簡介 抖音,是一款可以拍短視頻的音樂創意短視頻社交軟體,該軟體於2016年9月上線,是一個專註年輕人的15秒音樂短視頻社區。用戶可以通過這款軟體選擇歌曲,拍攝15秒的音樂短視頻,形成自己的作品。此APP已在Android各大應用商店和APP Store均有上線。 今天咱們就用Python爬取抖音視頻 ...
簡介
抖音,是一款可以拍短視頻的音樂創意短視頻社交軟體,該軟體於2016年9月上線,是一個專註年輕人的15秒音樂短視頻社區。用戶可以通過這款軟體選擇歌曲,拍攝15秒的音樂短視頻,形成自己的作品。此APP已在Android各大應用商店和APP Store均有上線。
今天咱們就用Python爬取抖音視頻
準備:
環境:Python3.6+Windows
IDE:你開行就好,喜歡用哪個就用哪個
模塊:
1 from splinter.driver.webdriver.chrome import Options, Chrome 2 from splinter.browser import Browser 3 from contextlib import closing 4 import requests, json, time, re, os, sys, time 5 from bs4 import BeautifulSoup
獲得視頻播放地址
-
查詢的用戶ID
-
視頻名字列表
-
視頻鏈接列表
-
用戶昵稱
1 def get_video_urls(self, user_id): 2 3 + video_names = [] 4 + video_urls = [] 5 + unique_id = '' 6 + while unique_id != user_id: 7 + search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id 8 + req = requests.get(url = search_url, verify = False) 9 + html = json.loads(req.text) 10 + aweme_count = html['user_list'][0]['user_info']['aweme_count'] 11 + uid = html['user_list'][0]['user_info']['uid'] 12 + nickname = html['user_list'][0]['user_info']['nickname'] 13 + unique_id = html['user_list'][0]['user_info']['unique_id'] 14 + user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count) 15 + req = requests.get(url = user_url, verify = False) 16 + html = json.loads(req.text) 17 + i = 1 18 + for each in html['aweme_list']: 19 + share_desc = each['share_info']['share_desc'] 20 + if '抖音-原創音樂短視頻社區' == share_desc: 21 + video_names.append(str(i) + '.mp4') 22 + i += 1 23 + else: 24 + video_names.append(share_desc + '.mp4') 25 + video_urls.append(each['share_info']['share_url']) 26 + 27 + return video_names, video_urls, nickname
獲得帶水印的視頻播放地址
-
video_url:帶水印的視頻播放地址
-
download_url: 帶水印的視頻下載地址
1 def get_download_url(self, video_url): 2 3 + req = requests.get(url = video_url, verify = False) 4 + bf = BeautifulSoup(req.text, 'lxml') 5 + script = bf.find_all('script')[-1] 6 + video_url_js = re.findall('var data = \[(.+)\];', str(script))[0] 7 + video_html = json.loads(video_url_js) 8 + download_url = video_html['video']['play_addr']['url_list'][0] 9 + return download_url
視頻下載
-
video_url: 帶水印的視頻地址
-
video_name: 視頻名
-
watermark_flag: 是否下載不帶水印的視頻
1 def video_downloader(self, video_url, video_name, watermark_flag=True): 2 + """ 3 + 視頻下載 4 + Parameters: 5 + video_url: 帶水印的視頻地址 6 + video_name: 視頻名 7 + watermark_flag: 是否下載不帶水印的視頻 8 + Returns: 9 + 無 10 + """ 11 + size = 0 12 + if watermark_flag == True: 13 + video_url = self.remove_watermark(video_url) 14 + else: 15 + video_url = self.get_download_url(video_url) 16 + with closing(requests.get(video_url, stream=True, verify = False)) as response: 17 + chunk_size = 1024 18 + content_size = int(response.headers['content-length']) 19 + if response.status_code == 200: 20 + sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) 21 + 22 + with open(video_name, "wb") as file: 23 + for data in response.iter_content(chunk_size = chunk_size): 24 + file.write(data) 25 + size += len(data) 26 + file.flush() 27 + 28 + sys.stdout.write(' [下載進度]:%.2f%%' % float(size / content_size * 100) + '\r') 29 + sys.stdout.flush()
獲得無水印的視頻播放地址
1 def remove_watermark(self, video_url): 2 + """ 3 + 獲得無水印的視頻播放地址 4 + Parameters: 5 + video_url: 帶水印的視頻地址 6 + Returns: 7 + 無水印的視頻下載地址 8 + """ 9 + self.driver.visit('http://douyin.iiilab.com/') 10 + self.driver.find_by_tag('input').fill(video_url) 11 + self.driver.find_by_xpath('//button[@class="btn btn-default"]').click() 12 + html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html 13 + bf = BeautifulSoup(html, 'lxml') 14 + return bf.find('a').get('href')
下載視頻
1 def run(self): 2 + """ 3 + 運行函數 4 + Parameters: 5 + None 6 + Returns: 7 + None 8 + """ 9 + self.hello() 10 + user_id = input('請輸入ID(例如40103580):') 11 + video_names, video_urls, nickname = self.get_video_urls(user_id) 12 + if nickname not in os.listdir(): 13 + os.mkdir(nickname) 14 + print('視頻下載中:共有%d個作品!\n' % len(video_urls)) 15 + for num in range(len(video_urls)): 16 + print(' 解析第%d個視頻鏈接 [%s] 中,請稍後!\n' % (num+1, video_urls[num])) 17 + if '\\' in video_names[num]: 18 + video_name = video_names[num].replace('\\', '') 19 + elif '/' in video_names[num]: 20 + video_name = video_names[num].replace('/', '') 21 + else: 22 + video_name = video_names[num] 23 + self.video_downloader(video_urls[num], os.path.join(nickname, video_name)) 24 + print('\n') 25 + 26 + print('下載完成!')
全部代碼
1 +# -*- coding:utf-8 -*- 2 3 +Python學習交流群:125240963 4 +Python學習交流群:125240963 5 +Python學習交流群:125240963 6 7 +from splinter.driver.webdriver.chrome import Options, Chrome 8 +from splinter.browser import Browser 9 +from contextlib import closing 10 +import requests, json, time, re, os, sys, time 11 +from bs4 import BeautifulSoup 12 + 13 class DouYin(object): 14 def __init__(self, width = 500, height = 300): 15 + """ 16 + 抖音App視頻下載 17 + """ 18 + # 無頭瀏覽器 19 + chrome_options = Options() 20 + chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"') 21 + self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True) 22 + 23 def get_video_urls(self, user_id): 24 + """ 25 + 獲得視頻播放地址 26 + Parameters: 27 + user_id:查詢的用戶ID 28 + Returns: 29 + video_names: 視頻名字列表 30 + video_urls: 視頻鏈接列表 31 + nickname: 用戶昵稱 32 + """ 33 + video_names = [] 34 + video_urls = [] 35 + unique_id = '' 36 + while unique_id != user_id: 37 + search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id 38 + req = requests.get(url = search_url, verify = False) 39 + html = json.loads(req.text) 40 + aweme_count = html['user_list'][0]['user_info']['aweme_count'] 41 + uid = html['user_list'][0]['user_info']['uid'] 42 + nickname = html['user_list'][0]['user_info']['nickname'] 43 + unique_id = html['user_list'][0]['user_info']['unique_id'] 44 + user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count) 45 + req = requests.get(url = user_url, verify = False) 46 + html = json.loads(req.text) 47 + i = 1 48 + for each in html['aweme_list']: 49 + share_desc = each['share_info']['share_desc'] 50 + if '抖音-原創音樂短視頻社區' == share_desc: 51 + video_names.append(str(i) + '.mp4') 52 + i += 1 53 + else: 54 + video_names.append(share_desc + '.mp4') 55 + video_urls.append(each['share_info']['share_url']) 56 + 57 + return video_names, video_urls, nickname 58 + 59 def get_download_url(self, video_url): 60 + """ 61 + 獲得帶水印的視頻播放地址 62 + Parameters: 63 + video_url:帶水印的視頻播放地址 64 + Returns: 65 + download_url: 帶水印的視頻下載地址 66 + """ 67 + req = requests.get(url = video_url, verify = False) 68 + bf = BeautifulSoup(req.text, 'lxml') 69 + script = bf.find_all('script')[-1] 70 + video_url_js = re.findall('var data = \[(.+)\];', str(script))[0] 71 + video_html = json.loads(video_url_js) 72 + download_url = video_html['video']['play_addr']['url_list'][0] 73 + return download_url 74 + 75 def video_downloader(self, video_url, video_name, watermark_flag=True): 76 + """ 77 + 視頻下載 78 + Parameters: 79 + video_url: 帶水印的視頻地址 80 + video_name: 視頻名 81 + watermark_flag: 是否下載不帶水印的視頻 82 + Returns: 83 + 無 84 + """ 85 + size = 0 86 + if watermark_flag == True: 87 + video_url = self.remove_watermark(video_url) 88 + else: 89 + video_url = self.get_download_url(video_url) 90 + with closing(requests.get(video_url, stream=True, verify = False)) as response: 91 + chunk_size = 1024 92 + content_size = int(response.headers['content-length']) 93 + if response.status_code == 200: 94 + sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) 95 + 96 + with open(video_name, "wb") as file: 97 + for data in response.iter_content(chunk_size = chunk_size): 98 + file.write(data) 99 + size += len(data) 100 + file.flush() 101 + 102 + sys.stdout.write(' [下載進度]:%.2f%%' % float(size / content_size * 100) + '\r') 103 + sys.stdout.flush() 104 + 105 + 106 def remove_watermark(self, video_url): 107 + """ 108 + 獲得無水印的視頻播放地址 109 + Parameters: 110 + video_url: 帶水印的視頻地址 111 + Returns: 112 + 無水印的視頻下載地址 113 + """ 114 + self.driver.visit('http://douyin.iiilab.com/') 115 + self.driver.find_by_tag('input').fill(video_url) 116 + self.driver.find_by_xpath('//button[@class="btn btn-default"]').click() 117 + html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html 118 + bf = BeautifulSoup(html, 'lxml') 119 + return bf.find('a').get('href') 120 + 121 def run(self): 122 + """ 123 + 運行函數 124 + Parameters: 125 + None 126 + Returns: 127 + None 128 + """ 129 + self.hello() 130 + user_id = input('請輸入ID(例如40103580):') 131 + video_names, video_urls, nickname = self.get_video_urls(user_id) 132 + if nickname not in os.listdir(): 133 + os.mkdir(nickname) 134 + print('視頻下載中:共有%d個作品!\n' % len(video_urls)) 135 + for num in range(len(video_urls)): 136 + print(' 解析第%d個視頻鏈接 [%s] 中,請稍後!\n' % (num+1, video_urls[num])) 137 + if '\\' in video_names[num]: 138 + video_name = video_names[num].replace('\\', '') 139 + elif '/' in video_names[num]: 140 + video_name = video_names[num].replace('/', '') 141 + else: 142 + video_name = video_names[num] 143 + self.video_downloader(video_urls[num], os.path.join(nickname, video_name)) 144 + print('\n') 145 + 146 + print('下載完成!') 147 + 148 def hello(self): 149 + """ 150 + 列印歡迎界面 151 + Parameters: 152 + None 153 + Returns: 154 + None 155 + """ 156 + print('*' * 100) 157 + print('\t\t\t\t抖音App視頻下載小助手') 158 + print('\t\t作者:Python學習交流群:125240963') 159 + print('*' * 100) 160 + 161 + 162 +if __name__ == '__main__': 163 + douyin = DouYin() 164 + douyin.run()