廢話不多說,直接上代碼 from selenium import webdriver from selenium.webdriver import ChromeOptions import time import re from selenium.webdriver.support import e ...
廢話不多說,直接上代碼
from selenium import webdriver from selenium.webdriver import ChromeOptions import time import re from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait import uuid import os import requests option = ChromeOptions() option.add_argument( 'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36"' ) option.add_experimental_option('excludeSwitches', ['enable-automation']) #防止系統檢測到自動化工具 option.add_experimental_option('useAutomationExtension', False) browser = webdriver.Chrome(options=option) browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { 'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})' }) browser.maximize_window()#頁面最大化 def douyincrawler(keyword): url = 'https://www.douyin.com/search/'+keyword+'?publish_time=0&sort_type=0&source=switch_tab&type=video' browser.get(url) browser.find_element_by_xpath('//*[@id="qdblhsHs"]/button').click() #點擊登陸用抖音手機app掃碼登陸 time.sleep(15) #設置等待時間掃碼登陸 for x in range(5):#自動下拉 time.sleep(5) js_bottom = "var q=document.documentElement.scrollTop=10000" browser.execute_script(js_bottom) if '服務出現異常' in browser.page_source: #刷新頁面 browser.refresh() if '服務異常,重新' in browser.page_source: browser.find_element_by_xpath('//*[@id="dark"]/div[2]/div/div[3]/div[2]/div/div/span').click() #點擊載入 detail_url_lists = browser.find_elements_by_xpath('//*[@id="dark"]/div[2]/div/div[3]/div[2]/ul/li/div/div/a[1]')# 獲取頁面所有詳情url print('共計偵查到{}個視頻數據'.format(len(detail_url_lists))) for i in detail_url_lists: try: browser.execute_script("arguments[0].click();", i) #防止頁面有該元素卻無法點擊問題出現 ws = browser.window_handles #獲取所有視窗 browser.switch_to.window(ws[1]) #切換新句柄 WebDriverWait(browser, 10).until(EC.presence_of_element_located(( By.XPATH, '//*[@id="root"]/div/div[2]/div/div/div[1]/div[1]/div[2]/div/div[1]/div/div[2]/div[2]/xg-video-container/video' ))) #顯示等待視頻標簽出現 video_url = 'https:' + re.findall(r'<source class="" src="(.*?)"', browser.page_source)[0] # 正則獲取視頻鏈接 savevideo(video_url) browser.close() #關閉當前視窗 browser.switch_to.window(ws[0]) #切回主頁面這一步很關鍵 except Exception as e: print(e) def savevideo(video_url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36", } video_dir = r'C:\Users\lvye\Desktop\dou_yin\video' video_full_path = os.path.join(video_dir,str(uuid.uuid4()) + '.mp4') response = requests.get(url=video_url,headers=headers) with open(video_full_path,'wb')as f: f.write(response.content) print('已下載:{}'.format(video_url)) if __name__ == '__main__': douyincrawler('街拍美女')
成果展示:
註:該代碼只做技術分享,不可用於違法犯罪