爬虫——生产者消费者

发布时间:2019-03-14 23:32:31编辑:auto阅读(2351)

    结构

    生产者生成网址并放入队列

    多个消费者从队列中取出网址

     1 from queue import Queue
     2 import time, threading, requests
     3 
     4 url_base = 'http://www.qiushibaike.com/8hr/page/{}/'
     5 header = {}
     6 
     7 def load_data():
     8     return [url_base.format(i) for i in [1, 3, 6, 7]]
     9 
    10 #生产者
    11 def produce(q):
    12     index = 0
    13     data = load_data()
    14     while True:
    15         if index < len(data):
    16             q.put(data[index])
    17             index += 1
    18 
    19 #消费者
    20 def consume(q):
    21     while True:
    22         download_url = q.get()
    23         # requests.get(download_url,headers=header)
    24         print('thread is {} content is {}'.format(threading.current_thread(), download_url))
    25 
    26 def main():
    27     q = Queue(4)
    28     p1 = threading.Thread(target=produce, args=[q])
    29     c1 = threading.Thread(target=consume, args=[q])
    30     c2 = threading.Thread(target=consume, args=[q])
    31     p1.start()
    32     c1.start()
    33     c2.start()
    34 
    35 if __name__ == '__main__':
    36     main()

     

    爬虫类需要继承多线程类

    初始化方法需要继承父类初始化方法

    创建对象,直接start就会调用类中run方法

     1 # class ConsumeSpider(threading.Thread):
     2 #     def __init__(self):
     3 #         super().__init__()
     4 #         pass
     5 #
     6 #     def run(self):
     7 #         pass
     8 #
     9 # c3 = ConsumeSpider()
    10 # c3.start()

    协程

    协程(coroutine):轻量级的线程,不存在上下文切换,能在多个任务之间调度的多任务方式,可以使用yield实现

     1 import time, threading
     2 
     3 def task_1():
     4     while True:
     5         print('-----1-----', threading.current_thread())
     6         time.sleep(1)
     7         yield
     8 
     9 
    10 def task_2():
    11     while True:
    12         print('-----2-----', threading.current_thread())
    13         time.sleep(1)
    14         yield
    15 
    16 
    17 def main():
    18     t1 = task_1()
    19     t2 = task_2()
    20     while True:
    21         next(t1)
    22         next(t2)
    23 
    24 
    25 if __name__ == '__main__':
    26     main()
    27  

     

    请使用手机"扫一扫"x

关键字