Python爬虫之Requests库的基

发布时间:2019-04-10 21:14:17编辑:auto阅读(1879)

      1 import requests
      2 response = requests.get('http://www.baidu.com/')
      3 print(type(response))
      4 print(response.status_code)
      5 print(type(response.text))
      6 print(response.text)
      7 print(response.cookies)
      8 
      9 # 各种请求方式
     10 import requests
     11 requests.post('http://httpbin.org/post')
     12 requests.put('http://httpbin.org/put')
     13 requests.delete('http://httpbin.org/delete')
     14 requests.head('http://httpbin.org/get')
     15 requests.options('http://httpbin.org/get')
     16 
     17 # 基本GET请求
     18 import requests
     19 response = requests.get('http://httpbin.org/get')
     20 print(response.text)
     21 
     22 # 带参数GET请求
     23 import requests
     24 response = requests.get('http://httpbin.org/get?name=germey&age=22')
     25 print(response.text)
     26 
     27 import requests
     28 data = {
     29     'name': 'germey',
     30     'age': 22
     31 }
     32 response = requests.get('http://httpbin.org/get', params = data)
     33 print(response.text)
     34 
     35 # 解析json
     36 import requests
     37 import json
     38 response = requests.get('http://httpbin.org/get')
     39 print(type(response.text))
     40 print(response.json())
     41 print(json.loads(response.text))
     42 print(type(response.json()))
     43 
     44 # 获取二进制数据
     45 import requests
     46 response = requests.get('http://github.com/favicon.ico')
     47 print(type(response.text), type(response.content))
     48 print(response.text)
     49 print(response.content)
     50 
     51 # 保存图片
     52 import requests
     53 response = requests.get('http://github.com/favicon.ico')
     54 with open('1.ico', 'wb') as f:
     55     f.write(response.content)
     56     f.close()
     57 
     58 # 添加headers 不添加的话会请求失败的
     59 import requests
     60 response = requests.get('http://www.zhihu.com/explore')
     61 print(response.text)
     62 
     63 import requests
     64 headers = {
     65     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
     66 }
     67 response = requests.get('http://zhihu.com/explore', headers = headers)
     68 print(response.text)
     69 
     70 # 基本的POST请求
     71 import requests
     72 data = {'name': 'germey', 'age': 22}
     73 response = requests.post('http://httpbin.org/post', data = data)
     74 print(response.text)
     75 
     76 import requests
     77 data = {'name':'germey', 'age':22}
     78 headers = {
     79     'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
     80 }
     81 response = requests.post('http://httpbin.org/post', data = data, headers = headers)
     82 print(response.json())
     83 
     84 # response属性
     85 import requests
     86 response = requests.get('http://www.jianshu.com')
     87 print(type(response.status_code), response.status_code)
     88 print(type(response.headers), response.headers)
     89 print(type(response.cookies), response.cookies)
     90 print(type(response.url), response.url)
     91 print(type(response.history), response.history)
     92 
     93 # 文件上传
     94 import requests
     95 files = {'file':open('1.ico', 'rb')}
     96 response = requests.post('http://httpbin.org/post', files = files)
     97 print(response.text)
     98 
     99 # 获取cookie
    100 import requests
    101 response = requests.get('http://www.baidu.com')
    102 print(response.cookies)
    103 for key, value in response.cookies.items():
    104     print(key + ' = ' + value)
    105 
    106 # 会话维持 模拟登陆
    107 import requests
    108 requests.get('http://httpbin.org/cookies/set/number/123456789')
    109 response = requests.get('http://httpbin.org/cookies')
    110 print(response.text)
    111 
    112 import requests
    113 s = requests.session()
    114 s.get('http://httpbin.org/cookies/set/number/123456789')
    115 response = s.get('http://httpbin.org/cookies')
    116 print(response.text)
    117 
    118 # 证书验证
    119 import requests
    120 response = requests.get('https://www.12306.cn')
    121 print(response.status_code)

     

关键字