使用python 爬梨视频

刚开始学习python 勿喷
第一步去官网下载python3^ 版本
下载链接 https://www.python.org/downlo... 如果是window系统需要添加一下环境变量

下面是代码

pyhton爬虫梨视频

需要先下载 request 模块
pip install requests

  import requests
import re
import os
import time
from urllib.request import urlretrieve  #下载模块
def video_DL(url):
    header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0"}
   # url = "https://www.pearvideo.com/category_6"
    response = requests.get(url,headers = header)
    html = response.text
    reg = '<a href="(.*?)" class="actplay">'
    bgImgReg = '<div class="popularem-img" style="background-image: url(.*?);">'
    titleReg = '<h2 class="popularem-title">(.*?)</h2>'
    contentReg = '<p class="popularem-abs padshow">(.*?)</p>'
    timeReg = '<div class="cm-duration">(.*?)</div>'
    video_id = re.findall(reg, html) #视频id
    video_img = re.findall(bgImgReg, html) #视频图片
    video_title = re.findall(titleReg, html)    #视频标题
    video_time = re.findall(timeReg, html) #视频时间
    videio_content = re.findall(contentReg, html)   #视频内容
    video_url = []
    purl_1 = []
    videoImg = []
    videoTitle = []
    videoTime = []
    videoContent = []
    videoUrl = []
    #@getVideoUrl
    for i in video_id:
        video_html = "http://www.pearvideo.com/{}".format(i)
        video_url.append(str(video_html))
    #视频播放地址数组
    for j in video_url:
        purl = requests.get(j).text
        req = 'srcUrl="(.*?)"'
        purl_1.append(re.findall(req,purl))
    #视频poster图片数组
    for i in video_img: 
        videoImg.append(i.split("(")[1].split(")")[0])
    #标题 数组   
    for i in video_title:
        videoTitle.append(i)
    #视频播放时间数组
    for i in video_time:
        videoTime.append(i)
    #视频内容数组
    for i in videio_content:
        videoContent.append(i)
    for i in purl_1:
        videoUrl.append(''.join(i))
    
    #循环获取数组单条内容
    for index,el in enumerate(video_id):

        writeTxt = videoTitle[index]+'\n'+videoUrl[index]+'\n'+videoContent[index]+'\n'+videoImg[index]+'\n'+videoTime[index]+'\n\n\n\n'
        # print(videoUrl[index]+videoContent[index])
        f = open("test2.txt",'a+')
        f.write(writeTxt)
        f.close()
        // 下面注释是下载视频poster 图片到video文件夹下面
        # path = "video"
        #判断当前目录有没有video文件
        # if path not in os.listdir():
        #     os.mkdir(path)
        # urlretrieve(purl_1[index],path+"/%s.mp4"%video_title[index])
def download():
    n = 0
    while True:
        if n >= 36:
            return
        #https://www.pearvideo.com/popular_loading.jsp?reqType=5&categoryId=10&start  这是梨视频异步请求接口  
        url = "https://www.pearvideo.com/popular_loading.jsp?reqType=5&categoryId=10&start={}".format(n)
        n += 12
        time.sleep(1)
        video_DL(url)
download()

使用python 爬梨视频

pyhton爬虫 梨视频

pyhton爬虫梨视频