python: 抓取免费代理ip

发布时间:2019-09-16 07:40:47编辑:auto阅读(1512)

    通过抓取西刺网免费代理ip实现代理爬虫:

    from bs4 import BeautifulSoup
    import requests
    import random
    import telnetlib
    requests = requests.session()
    ip_list = []
    proxy_list = []
    headers = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
         'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
    ]
    headers = random.choice(headers)
    headers = {'User-Agent': headers}
    def get_proxy():
        url = 'http://www.xicidaili.com/nn/'
        web_data = requests.get(url, headers=headers)
        soup = BeautifulSoup(web_data.text, 'lxml')
        ips = soup.find_all('tr')
        for i in range(1, len(ips)):
            ip_info = ips[i]
            tds = ip_info.find_all('td')
            ip = tds[1].text
            port = tds[2].text
            try:
                telnetlib.Telnet(ip, port, timeout=2)
                ip_list.append(ip+":"+port)
            except:
                pass
        #print(ip_list)
        for ip in ip_list:
            proxy_list.append('http://' + ip)     
    proxies = get_proxy()
    
    proxy_ip = random.choice(proxy_list)
    proxies = {'http': proxy_ip}
    print(proxies)
    

    代理IP的使用,把他传到get请求里面就可以了

    requests.get(url, headers=headers, proxies=proxies)

关键字