发布时间:2019-09-25 08:26:07编辑:auto阅读(2890)
#coding:utf8
import time
from urllib import request
from bs4 import BeautifulSoup
num = 1#用来计算一共爬取了多少本书
start_time = time.time()#定位一个开始的时间
f = open('/root/桌面/豆瓣/1.txt','a')
for i in range(1,3):
for e in (100,400,15):
html = request.urlopen('https://book.douban.com/series/%d?page=%d'%(e,i))
bsObj = BeautifulSoup(html,'lxml')
print ("=============" + "第%d页"%i + "==============")
h2_list = bsObj.find_all('h2')
for h2_node in h2_list:
a_node = h2_node.a
#print (a_node)
title = a_node.attrs["title"]
title = "<<" + title + ">>"
print ("第%d本书籍"%num,title,file=f)
num +=1
time.sleep(2)
end_time = time.time()
duration_time = start_time - end_time
print ('运行时间一共%.2f: '%duration_time+'秒')
print ('共抓到%d本书籍'%num-1)
f.close()
上一篇: Python3快速入门(十八)——PyI
下一篇: python3的time模块和datet
51929
51699
42018
38857
33345
30315
28954
23969
23885
22251
378°
2588°
3261°
2712°
2700°
3427°
2656°
3494°
5759°
5536°