Python3 爬取豆瓣书籍 Xpat

发布时间:2019-09-25 08:26:07编辑:auto阅读(2351)

    #coding:utf8
    import time
    from urllib import request
    from bs4 import BeautifulSoup
    num = 1#用来计算一共爬取了多少本书
    start_time = time.time()#定位一个开始的时间

    url = 'https://book.douban.com/series/128?page=1'

    html = request.urlopen('https://book.douban.com/series/128?page=1')

    bsObj = BeautifulSoup(html,'lxml')

    print (bsObj)

    f = open('/root/桌面/豆瓣/1.txt','a')
    for i in range(1,3):
    for e in (100,400,15):
    html = request.urlopen('https://book.douban.com/series/%d?page=%d'%(e,i))
    bsObj = BeautifulSoup(html,'lxml')
    print ("=============" + "第%d页"%i + "==============")
    h2_list = bsObj.find_all('h2')

    print (h2_list)

        for h2_node in h2_list:
            a_node = h2_node.a
            #print (a_node)
            title = a_node.attrs["title"]
            title = "<<" + title + ">>"
            print ("第%d本书籍"%num,title,file=f)
            num +=1
        time.sleep(2)

    end_time = time.time()
    duration_time = start_time - end_time
    print ('运行时间一共%.2f: '%duration_time+'秒')
    print ('共抓到%d本书籍'%num-1)
    f.close()

关键字