发布时间:2019-09-25 08:26:07编辑:auto阅读(2351)
#coding:utf8
import time
from urllib import request
from bs4 import BeautifulSoup
num = 1#用来计算一共爬取了多少本书
start_time = time.time()#定位一个开始的时间
f = open('/root/桌面/豆瓣/1.txt','a')
for i in range(1,3):
for e in (100,400,15):
html = request.urlopen('https://book.douban.com/series/%d?page=%d'%(e,i))
bsObj = BeautifulSoup(html,'lxml')
print ("=============" + "第%d页"%i + "==============")
h2_list = bsObj.find_all('h2')
for h2_node in h2_list:
a_node = h2_node.a
#print (a_node)
title = a_node.attrs["title"]
title = "<<" + title + ">>"
print ("第%d本书籍"%num,title,file=f)
num +=1
time.sleep(2)
end_time = time.time()
duration_time = start_time - end_time
print ('运行时间一共%.2f: '%duration_time+'秒')
print ('共抓到%d本书籍'%num-1)
f.close()
上一篇: Python3快速入门(十八)——PyI
下一篇: python3的time模块和datet
48451
47338
38221
35462
29920
26626
25598
20545
20240
18666
59°
70°
174°
118°
144°
261°
336°
332°
309°
399°