发布时间:2019-09-01 10:53:50编辑:auto阅读(2116)
requests = makeRequests(some_callable, list_of_args, callback) [pool.putRequest(req) for req in requests] pool.wait() result = request.callable(*request.args, **request.kwds)import cookielibimport urllib2import socketimport osfrom bs4 import BeautifulSoupimport threadpoolimport threading def download(pdfUrl): folder = 'matlab_pdf' mutex.acquire(10) if not os.path.exists(folder): os.makedirs(folder) mutex.release() name = pdfUrl.split('/')[-1] try: status = False f = open(os.path.join(folder,name),'wb') f.write(urllib2.urlopen(pdfUrl).read()) f.close() status = True except Exception as err: print err f.close() return (name,status) def print_result(request, result): print "the %s is %s" % (result[0], 'downloaded!' if result[1] else 'can not find.') initUrl = r"http://blog.sina.com.cn/s/blog_740773f40100ywyg.html"socket.setdefaulttimeout(10)cj = cookielib.CookieJar()opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))opener.addheaders = [('User-agent','Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')]urllib2.install_opener(opener)resp = urllib2.urlopen(initUrl).read()a_list = BeautifulSoup(resp).find_all('a')urls = [a.get('href') for a in a_list]pdfUrls = [url for url in urls if url and len(url)>3 and url[-3:]=='pdf']mutex = threading.Lock()pool = threadpool.ThreadPool(30) requests = threadpool.makeRequests(download,pdfUrls,print_result) [pool.putRequest(req) for req in requests] pool.wait()import cookielibimport urllib2import socketimport osfrom bs4 import BeautifulSoupimport threadpoolimport threadingdef download(name,pdfUrl): folder = 'matlab_pdf_test' mutex.acquire(10) if not os.path.exists(folder): os.makedirs(folder) mutex.release()# name = pdfUrl.split('/')[-1] #print 'this is '+name+pdfUrl try: status = False f = open(os.path.join(folder,name),'wb') f.write(urllib2.urlopen(pdfUrl).read()) f.close() status = True except Exception as err: print err f.close() return (name,status) def print_result(request, result): print "the %s is %s" % (result[0], 'downloaded!' if result[1] else 'can not find.') initUrl = r"http://blog.sina.com.cn/s/blog_740773f40100ywyg.html"socket.setdefaulttimeout(10)cj = cookielib.CookieJar()opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))opener.addheaders = [('User-agent','Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')]urllib2.install_opener(opener)resp = urllib2.urlopen(initUrl).read()a_list = BeautifulSoup(resp).find_all('a')urls = [a.get('href') for a in a_list]pdfUrls = [url for url in urls if url and len(url)>3 and url[-3:]=='pdf']v = list(map(lambda x: [x.split('/')[-1],x], pdfUrls))u = [None for i in range(len(pdfUrls))] mutex = threading.Lock()pool = threadpool.ThreadPool(30) requests = threadpool.makeRequests(download,zip(v,u),print_result) [pool.putRequest(req) for req in requests] pool.wait()import cookielibimport urllib2import socketimport osfrom bs4 import BeautifulSoupimport threadpoolimport threadingdef download(name,pdfUrl): folder = 'matlab_pdf_test' mutex.acquire(10) if not os.path.exists(folder): os.makedirs(folder) mutex.release()# name = pdfUrl.split('/')[-1] #print 'this is '+name+pdfUrl try: status = False f = open(os.path.join(folder,name),'wb') f.write(urllib2.urlopen(pdfUrl).read()) f.close() status = True except Exception as err: print err f.close() return (name,status) def print_result(request, result): print "the %s is %s" % (result[0], 'downloaded!' if result[1] else 'can not find.') initUrl = r"http://blog.sina.com.cn/s/blog_740773f40100ywyg.html"socket.setdefaulttimeout(10)cj = cookielib.CookieJar()opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))opener.addheaders = [('User-agent','Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')]urllib2.install_opener(opener)resp = urllib2.urlopen(initUrl).read()a_list = BeautifulSoup(resp).find_all('a')urls = [a.get('href') for a in a_list]pdfUrls = [url for url in urls if url and len(url)>3 and url[-3:]=='pdf']#v = list(map(lambda x: [x.split('/')[-1],x], pdfUrls))v = list(map(lambda x: {'name':x.split('/')[-1],'pdfUrl':x}, pdfUrls))u = [None for i in range(len(pdfUrls))] mutex = threading.Lock()pool = threadpool.ThreadPool(30) requests = threadpool.makeRequests(download,zip(u,v),print_result) [pool.putRequest(req) for req in requests] pool.wait()
上一篇: Python-延时操作
下一篇: python实现opencv学习一:安装
51208
50625
41247
38062
32524
29430
28295
23152
23104
21449
1501°
2218°
1839°
1774°
2077°
1833°
2509°
4215°
4076°
2914°