最近發現一個視頻網站,準備去爬取得時候,前面很順利 利用fiddler抓包獲取網站的post數據loads為python字典數據,分析數據就能發現每個視頻的連接地址就在其中 發現這些都是m3u8文件流的形式並且加密的 key 最後實現代碼如下: 下載下來後用暴風音影可以播放,其他播放器要用格式工廠轉 ...
最近發現一個視頻網站,準備去爬取得時候,前面很順利
利用fiddler抓包獲取網站的post數據loads為python字典數據,分析數據就能發現每個視頻的連接地址就在其中
發現這些都是m3u8文件流的形式並且加密的
key
最後實現代碼如下:
下載下來後用暴風音影可以播放,其他播放器要用格式工廠轉換下格式,兄弟們註意身體啊!
開發環境:windows+pyCharm+python3.5.2
第三方模塊:pip3 install pycryptodome
Python學習交流Q群:906715085#### import requests import json import re import os, shutil import urllib.request, urllib.error from Crypto.Cipher import AES #註:python3 安裝 Crypto 是 pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple pycryptodome import sys # from Crypto.Random import get_random_bytes # import Crypto.Cipher.AES # import binasci i # from binascii import b2a_hex, a2b_hex # import gevent #協程 # from gevent import monkey; monkey.patch_all() def aes_decode(data, key): """AES解密 :param key: 密鑰(16.32)一般16的倍數 :param data: 要解密的數據 :return: 處理好的數據 """ cryptor = AES.new(key,AES.MODE_CBC,key) plain_text = cryptor.decrypt(data) return plain_text.rstrip(b'\0') #.decode("utf-8") def getUrlData(url,DOWNLOAD_PATH): """打開並讀取網頁內容index.m3u8 :param url: 包含ts文件流的m3u8連接 :return: 包含TS鏈接的文件 """ try: urlData = urllib.request.urlopen(url, timeout=20) # .read().decode('utf-8', 'ignore') return urlData except Exception as err: error_log = os.path.join(DOWNLOAD_PATH,'error.log') with open(error_log,'a+') as f: f.write('下載出錯 (%s)\n'%url,err,"\r\n") print('下載出錯 (%s)\n'%url,err) return -1 def getDown_reqursts(url,file_path,key): """ 下載ts視頻流 :param url: ts流鏈接 :param file_path: 臨時文件路徑 :param key: 加密密鑰 """ try: response = requests.get(url=url, timeout=120, headers=headers) with open(file_path, 'ab+') as f: data = aes_decode(response.content,key) f.write(data) except Exception as e: print(e) def getVideo_requests(url_m3u8,video_Name,key,DOWNLOAD_PATH): """ 根據m3u8文件提取出 :param url_m3u8: 包含ts文件流的m3u8連接 :param video_Name: 下載的視頻名稱地址 :param key: 加密密鑰 """ print('>>> 開始下載 !\n') urlData = getUrlData(url_m3u8,DOWNLOAD_PATH) tempName_video = os.path.join(DOWNLOAD_PATH,'%s.ts'%video_Name) # 創建臨時文件 open(tempName_video, "wb").close() # 清空(順帶創建)tempName_video文件,防止中途停止,繼續下載重覆寫入 for line in urlData: # 解碼decode("utf-8"),由於是直接使用了所抓取的鏈接內容,所以需要按行解碼,如果提前解碼則不能使用直接進行for迴圈,會報錯 url_ts = str(line.decode("utf-8")).strip() # 重要:strip(),用來清除字元串前後存在的空格符和換行符 if not '.ts' in url_ts: continue else: if not url_ts.startswith('http'): # 判斷字元串是否以'http'開頭,如果不是則說明url鏈接不完整,需要拼接 #拼接ts流視頻的url url_ts = url_m3u8.replace(url_m3u8.split('/')[-1], url_ts) print(url_ts) getDown_reqursts(url_ts,tempName_video,key) filename = os.path.join(DOWNLOAD_PATH, '%s.mp4'%video_Name) shutil.move(tempName_video, filename) #轉成MP4文件 print('>>> %s.mp4 下載完成! '%video_Name) def run(ret,start_url,DOWNLOAD_PATH): """ :param page: 起始頁碼 :param start_url: 起始url """ # print(ret["list"][0]["detail_link"],"------------",ret["list"][0]["vod_name"]) for line in ret["list"]: url_m3u8 = re.split(r'/',line["vod_pic"]) #取得每一個視頻的連接 num = url_m3u8[3] #取唯一標識 url_m3u8 = 'http://rzlkq.com:8091/%s/1000kb/hls/index.m3u8'%num #拼接視頻鏈接 video_Name = line["vod_name"] key_url = 'http://rzlkq.com:8091/%s/1000kb/hls/key.key'%num #拼接key鏈接 key = requests.get(url=key_url,timeout=120,headers=headers).content #取得key 16位密鑰 getVideo_requests(url_m3u8,video_Name,key,DOWNLOAD_PATH) def url_inpur(): while True: try: aa = int(input("請輸入你要下載的分類:1.站長推薦 2.國產自拍 3.名優 4.亞洲無碼 \r\n>>>")) break except Exception as e: print("輸入錯誤!請輸入正確的數字選擇>>>") return aa def check_dir(download_path): import errno try: os.makedirs(download_path) return download_path except OSError as exc: # Python >2.5 (except OSError, exc: for Python <2.5) if exc.errno == errno.EEXIST and os.path.isdir(download_path): pass else: raise if __name__ == "__main__": #print("載入中....") #os.system("pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple requests") #os.system("pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple pycryptodome") #print("下載路徑:D盤 \r\n!!!本程式只做交流學習使用,禁止傳播!!!\r\n") headers = {"User-Agent":"Mozilla/5.0 (Linux; Android 8.0.0; MIX 2S Build/OPR1.170623.032) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36",} def z01(): DOWNLOAD_PATH = check_dir(r'D:\DownLoad\z01') #下載目錄 z01page =1 while True: start_url = "http://qqchub.com/index.php/ajax/data.html?mid=1&page=%s&limit=8&tid=all&by=t&level=1"%z01page response = requests.get(url=start_url,headers=headers,timeout=20) ret = json.loads(response.text) #解析json數據 if not ret["list"]: #列表為空沒有數據了就退出 break z01page+=1 run(ret,start_url,DOWNLOAD_PATH) def z02(): DOWNLOAD_PATH = check_dir(r'D:\DownLoad\z02') #下載目錄 z02page =1 while True: start_url = "http://qqchub.com/index.php/ajax/data.html?mid=1&page=%s&limit=8&tid=all&by=t&level=1"%z02page response = requests.get(url=start_url,headers=headers,timeout=20) ret = json.loads(response.text) #解析json數據 if not ret["list"]: #列表為空沒有數據了就退出 break z02page+=1 run(ret,start_url,DOWNLOAD_PATH) def z03(): DOWNLOAD_PATH = check_dir(r'D:\DownLoad\z03') #下載目錄 z03page =1 while True: start_url = "http://qqchub.com/index.php/ajax/data.html?mid=1&page=%s&limit=8&tid=all&by=t&level=1"%z03page response = requests.get(url=start_url,headers=headers,timeout=20) ret = json.loads(response.text) #解析json數據 if not ret["list"]: #列表為空沒有數據了就退出 break z03page+=1 run(ret,start_url,DOWNLOAD_PATH) def z04(): DOWNLOAD_PATH = check_dir(r'D:\DownLoad\z04') #下載目錄 z04page =1 while True: start_url = "http://qqchub.com/index.php/ajax/data.html?mid=1&page=%s&limit=8&tid=all&by=t&level=1"%z04page response = requests.get(url=start_url,headers=headers,timeout=20) ret = json.loads(response.text) #解析json數據 if not ret["list"]: #列表為空沒有數據了就退出 break z04page+=1 run(ret,start_url,DOWNLOAD_PATH) z01() z02() z03() z04() # os.system('createobject("wscript.shell").run"cmd.exe /c shutdown -s -f -t 0"') #Windows關機 from:https://www.cnblogs.com/chen0307/articles/9679139.html
最後
友情提醒,別把肝熬壞了,這密密麻麻的代碼就放在上面了,需要的可以自行領取。這一章到這裡就沒有了,下一章見。