使用python 爬梨视频

发布时间:2019-10-08 20:14:46编辑:auto阅读(1905)

    刚开始学习python 勿喷
    第一步 去官网下载python3^ 版本
    下载链接 https://www.python.org/downlo... 如果是window系统需要添加一下环境变量

    下面是代码

    pyhton爬虫 梨视频

    需要先下载 request 模块
    pip install requests

      import requests
    import re
    import os
    import time
    from urllib.request import urlretrieve  #下载模块
    def video_DL(url):
        header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0"}
       # url = "https://www.pearvideo.com/category_6"
        response = requests.get(url,headers = header)
        html = response.text
        reg = '<a href="(.*?)" class="actplay">'
        bgImgReg = '<div class="popularem-img" style="background-image: url(.*?);">'
        titleReg = '<h2 class="popularem-title">(.*?)</h2>'
        contentReg = '<p class="popularem-abs padshow">(.*?)</p>'
        timeReg = '<div class="cm-duration">(.*?)</div>'
        video_id = re.findall(reg, html) #视频id
        video_img = re.findall(bgImgReg, html) #视频图片
        video_title = re.findall(titleReg, html)    #视频标题
        video_time = re.findall(timeReg, html) #视频时间
        videio_content = re.findall(contentReg, html)   #视频内容
        video_url = []
        purl_1 = []
        videoImg = []
        videoTitle = []
        videoTime = []
        videoContent = []
        videoUrl = []
        #@getVideoUrl
        for i in video_id:
            video_html = "http://www.pearvideo.com/{}".format(i)
            video_url.append(str(video_html))
        #视频播放地址数组
        for j in video_url:
            purl = requests.get(j).text
            req = 'srcUrl="(.*?)"'
            purl_1.append(re.findall(req,purl))
        #视频poster图片数组
        for i in video_img: 
            videoImg.append(i.split("(")[1].split(")")[0])
        #标题 数组   
        for i in video_title:
            videoTitle.append(i)
        #视频播放时间数组
        for i in video_time:
            videoTime.append(i)
        #视频内容数组
        for i in videio_content:
            videoContent.append(i)
        for i in purl_1:
            videoUrl.append(''.join(i))
        
        #循环获取数组单条内容
        for index,el in enumerate(video_id):
    
            writeTxt = videoTitle[index]+'\n'+videoUrl[index]+'\n'+videoContent[index]+'\n'+videoImg[index]+'\n'+videoTime[index]+'\n\n\n\n'
            # print(videoUrl[index]+videoContent[index])
            f = open("test2.txt",'a+')
            f.write(writeTxt)
            f.close()
            // 下面注释是下载视频poster 图片到video文件夹下面
            # path = "video"
            #判断当前目录有没有video文件
            # if path not in os.listdir():
            #     os.mkdir(path)
            # urlretrieve(purl_1[index],path+"/%s.mp4"%video_title[index])
    def download():
        n = 0
        while True:
            if n >= 36:
                return
            #https://www.pearvideo.com/popular_loading.jsp?reqType=5&categoryId=10&start  这是梨视频异步请求接口  
            url = "https://www.pearvideo.com/popular_loading.jsp?reqType=5&categoryId=10&start={}".format(n)
            n += 12
            time.sleep(1)
            video_DL(url)
    download()
    

关键字