python多任务抓取图片

发布时间:2019-03-12 00:44:30编辑:auto阅读(2016)

    import re
    import urllib.request
    import gevent
    
    
    def download(image_download, images_path,i):
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"}
        req = urllib.request.Request(url=images_path, headers=headers)
        image = urllib.request.urlopen(req)
        image_content = image.read()
    
        image_name = image_download + "/" + str(i) + ".jpg"
        file = open(image_name, "wb")
        file.write(image_content)
        file.close()
    
    
    def main():
        with open("douyu.html", "r", encoding="utf-8") as f:
            messages = f.read()
        images_path = re.findall(r"https://.*?\.jpg", messages)
        directory = "images_douyu"
        i = 0
        gevent_list = list()
        for image_path in images_path:
            i += 1
            gevent_list.append(gevent.spawn(download,directory, image_path, i))
    
        gevent.joinall(gevent_list)
        
    
    if __name__ == "__main__":
        main()
    

      

关键字