发布时间:2019-08-25 09:34:29编辑:auto阅读(1272)
from bs4 import BeautifulSoup import urllib2 url = 'https://www.douban.com' cookie='ll="118234"; __yadk_uid=FZYkMR92OctgDfVQxh7rgOvKAfSaAcF1; gr_user_id=30-b429-d8ac2b39f39e; _vwo_uuid_v2=62C802065BA1FE1E49689EB42248C9B5|86bc597a128b6ebcf16129a36961cd49; bid=Ogc8aq4tIsk; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1519356140%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DynTCvqw85IEmoWlag4b0hClM5qTjixjEN46Bbi_l7O1HuW1WreuRM_BxXp7M6Dyo%26wd%3D%26eqid%3Dd6c4a5f10001bb85000000025a8f88e8%22%5D; _pk_ses.100001.8cb4=*; __utma=30149280.832780041.1482799300.1517562754.1519356141.20; __utmc=30149280; __utmz=30149280.1519356141.20.19.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmt=1; _ga=GA1.2.832780041.1482799300; _gid=GA1.2.925059532.1519356149; _gat_UA-7019765-1=1; dbcl2="162182190:W4cfAVJjlD0"; ck=Pky-; _pk_id.100001.8cb4=599b9f4c8e87f346.1482799300.14.1519356150.1514105301.; push_noty_num=0; push_doumail_num=0; __utmv=30149280.16218; __utmb=30149280.3.10.1519356141' send_headers = { 'Host':'www.douban.com', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Connection':'keep-alive', 'Cookie': cookie } req = urllib2.Request(url,headers=send_headers) page = urllib2.urlopen(req) soup = BeautifulSoup(page,'lxml') print soup.original_encoding print (soup).encode('gb18030') file = open("title.txt","w") file.write(str(soup)) file.close() print 'ok'
转载自阿飞的技术仓库
上一篇: python复杂网络处理模块networ
下一篇: python对比配置文件差异
46580
44483
35913
33478
28165
24734
23654
18861
18153
16834
4649°
5209°
4737°
4870°
5536°
4499°
4552°
5092°
4996°
6246°