刚开始学习python 勿喷
第一步 去官网下载python3^ 版本
下载链接 https://www.python.org/downlo... 如果是window系统需要添加一下环境变量
下面是代码
pyhton爬虫 梨视频
需要先下载 request 模块
pip install requests
import requests
import re
import os
import time
from urllib.request import urlretrieve #下载模块
def video_DL(url):
header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0"}
# url = "https://www.pearvideo.com/category_6"
response = requests.get(url,headers = header)
html = response.text
reg = '<a href="(.*?)" class="actplay">'
bgImgReg = '<div class="popularem-img" style="background-image: url(.*?);">'
titleReg = '<h2 class="popularem-title">(.*?)</h2>'
contentReg = '<p class="popularem-abs padshow">(.*?)</p>'
timeReg = '<div class="cm-duration">(.*?)</div>'
video_id = re.findall(reg, html) #视频id
video_img = re.findall(bgImgReg, html) #视频图片
video_title = re.findall(titleReg, html) #视频标题
video_time = re.findall(timeReg, html) #视频时间
videio_content = re.findall(contentReg, html) #视频内容
video_url = []
purl_1 = []
videoImg = []
videoTitle = []
videoTime = []
videoContent = []
videoUrl = []
#@getVideoUrl
for i in video_id:
video_html = "http://www.pearvideo.com/{}".format(i)
video_url.append(str(video_html))
#视频播放地址数组
for j in video_url:
purl = requests.get(j).text
req = 'srcUrl="(.*?)"'
purl_1.append(re.findall(req,purl))
#视频poster图片数组
for i in video_img:
videoImg.append(i.split("(")[1].split(")")[0])
#标题 数组
for i in video_title:
videoTitle.append(i)
#视频播放时间数组
for i in video_time:
videoTime.append(i)
#视频内容数组
for i in videio_content:
videoContent.append(i)
for i in purl_1:
videoUrl.append(''.join(i))
#循环获取数组单条内容
for index,el in enumerate(video_id):
writeTxt = videoTitle[index]+'\n'+videoUrl[index]+'\n'+videoContent[index]+'\n'+videoImg[index]+'\n'+videoTime[index]+'\n\n\n\n'
# print(videoUrl[index]+videoContent[index])
f = open("test2.txt",'a+')
f.write(writeTxt)
f.close()
// 下面注释是下载视频poster 图片到video文件夹下面
# path = "video"
#判断当前目录有没有video文件
# if path not in os.listdir():
# os.mkdir(path)
# urlretrieve(purl_1[index],path+"/%s.mp4"%video_title[index])
def download():
n = 0
while True:
if n >= 36:
return
#https://www.pearvideo.com/popular_loading.jsp?reqType=5&categoryId=10&start 这是梨视频异步请求接口
url = "https://www.pearvideo.com/popular_loading.jsp?reqType=5&categoryId=10&start={}".format(n)
n += 12
time.sleep(1)
video_DL(url)
download()