python3使用urllib抓取用户

发布时间:2019-10-16 17:30:06编辑:auto阅读(600)

    #python34
    # -*- coding: utf-8 -*-
    
    import http.cookiejar
    import urllib.error
    import urllib.parse
    import urllib.request
    
    LOGIN_URL = r'http://......'
    get_url = 'http://.......'  # 利用cookie请求访问另一个网址
    
    username=input('用户名:')
    password=input('密码:')
    values = {'username': username, 'password': password}
    
    postdata = urllib.parse.urlencode(values).encode()
    
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
    }
    
    cookie_filename = 'cookie_jar.txt'
    cookie_jar = http.cookiejar.MozillaCookieJar(cookie_filename)
    handler = urllib.request.HTTPCookieProcessor(cookie_jar)
    opener = urllib.request.build_opener(handler)
    
    request = urllib.request.Request(LOGIN_URL, postdata, headers)
    
    try:
        response = opener.open(request)
        
        #将cookies保存到本地test.txt,便于读取
        cookies = list()
        for item in cookie_jar:
            cookies.append(item.name + '=' + item.value)
        print(cookies)
        with open('test.txt', 'w+', encoding='utf-8') as f:
            f.write(';'.join(cookies))
        
        #将cookies保存到本地cookie_jar.txt
        #cookie_jar.save(ignore_discard=True, ignore_expires=True)
    except urllib.error.URLError as e:
        print(e.code, ':', e.reason)
    
    #测试获取数据
    get_request = urllib.request.Request(get_url)  
    get_response = opener.open(get_request)  
    print(get_response.read().decode())

关键字