python多线程下载图片

发布时间:2019-08-06 09:14:09编辑:auto阅读(1450)

    功能:从p_w_picpath.baidu.com自动翻页下载图片的python程序
    用法:运行程序后,输入关键字即可
    #!/usr/bin/python
    # filename: getbaidupic.py
    # description: get p_w_picpaths from p_w_picpath.baidu.com
    # author: cjcse
    # version: v 0.21
    import urllib
    import htmllib
    import formatter
    import string
    import os
    import sys
    import time
    import thread
    #import threading
    class Parser(htmllib.HTMLParser):
    #return a dictionary mapping anchor texts to lists of associated hyperlinks
    def __init__(self, verbose=0):
    self.anchors = {}
    f = formatter.NullFormatter()
    htmllib.HTMLParser.__init__(self, f, verbose)
    def anchor_bgn(self, href, name, type):
    self.save_bgn()
    self.anchor = href
    def anchor_end(self):
    text = string.strip(self.save_end())
    if self.anchor and text:
    self.anchors[text] = self.anchors.get(text, []) + [self.anchor]
    def GetJpg(url):
    try:
    global save
    global total
    global successed
    global failed
    total += 1
    seps = url.split("/")
    size = len(seps)
    name = seps[size-1]
    name = save + "\\" + name
    i = 1
    list = name.split(".")
    while os.path.exists(name):
    if len(list) == 2:
    name = list[0] + "_" + repr(i) + "." + list[1]
    else:
    name = list[0] + "_" + repr(i)
    i += 1
    dat = urllib.urlopen(url).read()
    if len(dat) < 11024:
    print url + "\t[Failed]"
    return
    op = open(name, "wb")
    if not op:
    print url + "\t[Failed]"
    exit()
    op.write(dat)
    op.close()
    print url + "\t[OK]"
    except:
    print url + "\t[Failed]"
    def GetBaiduNextPage(url):
    global pn
    url += "&rn=" + repr(rn) + "&pn=" + repr(pn) + "&ln=" + repr(ln)
    pn += 18
    return url
    def GetAllJpg(url):
    html = urllib.urlopen(url).read()
    p = Parser()
    p.feed(html)
    p.close()
    cnt = 0
    for k, v in p.anchors.items():
    for uri in v:
    if uri.find(".jpg") != -1:
    ls = uri.split("&")
    for st in ls:
    url2 = st.split("=")
    for st2 in url2:
    st2 = string.lower(st2)
    if string.find(st2, "http://") != -1 and string.find(st2, ".jpg") != -1:
    try:
    GetJpg(st2)
    except:
    continue
    print "---------------------------------------------------------------------"
    print "Description: Get p_w_picpaths from p_w_picpath.baidu.com. "
    print "Author: cjcse from CU."
    print "version: v 0.2."
    print "---------------------------------------------------------------------"
    str = raw_input("Input your keywords: ")
    while (len(str) == 0):
    str = raw_input("Keyword: ")
    url = "http://p_w_picpath.baidu.com/i?ct=201326592&cl=2&lm=-1&tn=baidup_w_picpath&pv=&word=" + str + "&z=5"
    try:
    if not os.path.exists("c:\\p_w_picpath_baidu"):
    os.mkdir("c:\\p_w_picpath_baidu")
    except:
    print "Failed to create directory in disk c:"
    exit()
    pages = 50
    save = "c:\\p_w_picpath_baidu"
    print "The p_w_picpaths will be stored in folder \"c:\\p_w_picpath_baidu\"."
    rn = 21
    pn = 18
    ln = 2000
    for i in range(0, pages):
    thread.start_new_thread(GetAllJpg,(url,))
    url = GetBaiduNextPage(url)
    while True:
    pass

关键字