发布时间:2019-09-01 10:53:50编辑:auto阅读(1712)
requests = makeRequests(some_callable, list_of_args, callback)
[pool.putRequest(req) for req in requests]
pool.wait()
result = request.callable(*request.args, **request.kwds)
import cookielib
import urllib2
import socket
import os
from bs4 import BeautifulSoup
import threadpool
import threading
def download(pdfUrl):
folder = 'matlab_pdf'
mutex.acquire(10)
if not os.path.exists(folder):
os.makedirs(folder)
mutex.release()
name = pdfUrl.split('/')[-1]
try:
status = False
f = open(os.path.join(folder,name),'wb')
f.write(urllib2.urlopen(pdfUrl).read())
f.close()
status = True
except Exception as err:
print err
f.close()
return (name,status)
def print_result(request, result):
print "the %s is %s" % (result[0], 'downloaded!' if result[1] else 'can not find.')
initUrl = r"http://blog.sina.com.cn/s/blog_740773f40100ywyg.html"
socket.setdefaulttimeout(10)
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent','Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')]
urllib2.install_opener(opener)
resp = urllib2.urlopen(initUrl).read()
a_list = BeautifulSoup(resp).find_all('a')
urls = [a.get('href') for a in a_list]
pdfUrls = [url for url in urls if url and len(url)>3 and url[-3:]=='pdf']
mutex = threading.Lock()
pool = threadpool.ThreadPool(30)
requests = threadpool.makeRequests(download,pdfUrls,print_result)
[pool.putRequest(req) for req in requests]
pool.wait()
import cookielib
import urllib2
import socket
import os
from bs4 import BeautifulSoup
import threadpool
import threading
def download(name,pdfUrl):
folder = 'matlab_pdf_test'
mutex.acquire(10)
if not os.path.exists(folder):
os.makedirs(folder)
mutex.release()
# name = pdfUrl.split('/')[-1]
#print 'this is '+name+pdfUrl
try:
status = False
f = open(os.path.join(folder,name),'wb')
f.write(urllib2.urlopen(pdfUrl).read())
f.close()
status = True
except Exception as err:
print err
f.close()
return (name,status)
def print_result(request, result):
print "the %s is %s" % (result[0], 'downloaded!' if result[1] else 'can not find.')
initUrl = r"http://blog.sina.com.cn/s/blog_740773f40100ywyg.html"
socket.setdefaulttimeout(10)
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent','Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')]
urllib2.install_opener(opener)
resp = urllib2.urlopen(initUrl).read()
a_list = BeautifulSoup(resp).find_all('a')
urls = [a.get('href') for a in a_list]
pdfUrls = [url for url in urls if url and len(url)>3 and url[-3:]=='pdf']
v = list(map(lambda x: [x.split('/')[-1],x], pdfUrls))
u = [None for i in range(len(pdfUrls))]
mutex = threading.Lock()
pool = threadpool.ThreadPool(30)
requests = threadpool.makeRequests(download,zip(v,u),print_result)
[pool.putRequest(req) for req in requests]
pool.wait()
import cookielib
import urllib2
import socket
import os
from bs4 import BeautifulSoup
import threadpool
import threading
def download(name,pdfUrl):
folder = 'matlab_pdf_test'
mutex.acquire(10)
if not os.path.exists(folder):
os.makedirs(folder)
mutex.release()
# name = pdfUrl.split('/')[-1]
#print 'this is '+name+pdfUrl
try:
status = False
f = open(os.path.join(folder,name),'wb')
f.write(urllib2.urlopen(pdfUrl).read())
f.close()
status = True
except Exception as err:
print err
f.close()
return (name,status)
def print_result(request, result):
print "the %s is %s" % (result[0], 'downloaded!' if result[1] else 'can not find.')
initUrl = r"http://blog.sina.com.cn/s/blog_740773f40100ywyg.html"
socket.setdefaulttimeout(10)
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent','Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)')]
urllib2.install_opener(opener)
resp = urllib2.urlopen(initUrl).read()
a_list = BeautifulSoup(resp).find_all('a')
urls = [a.get('href') for a in a_list]
pdfUrls = [url for url in urls if url and len(url)>3 and url[-3:]=='pdf']
#v = list(map(lambda x: [x.split('/')[-1],x], pdfUrls))
v = list(map(lambda x: {'name':x.split('/')[-1],'pdfUrl':x}, pdfUrls))
u = [None for i in range(len(pdfUrls))]
mutex = threading.Lock()
pool = threadpool.ThreadPool(30)
requests = threadpool.makeRequests(download,zip(u,v),print_result)
[pool.putRequest(req) for req in requests]
pool.wait()
上一篇: Python-延时操作
下一篇: python实现opencv学习一:安装
47868
46434
37321
34767
29337
25999
24952
19970
19568
18056
5811°
6436°
5953°
5979°
7083°
5929°
5970°
6462°
6427°
7804°