例:抓取PhotoShop視頻教程 網址http://www.mxiaobei.com/?id=424 BeautifulSoup: https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/ Requests: http://cn.python reque ...
例:抓取PhotoShop視頻教程 網址http://www.mxiaobei.com/?id=424
import requests
import re
from bs4 import BeautifulSoup
import time
dicts = {}
list1 = set()
print('start')
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
urls = 'http://www.mxiaobei.com/?id='
for index in range(451, 565):
r = requests.get(urls + str(index), headers = {'user-agent': ua })
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text, 'lxml')
title = soup.find(name='h2')
mp4url = soup.find('div', id='CuPlayer')
if mp4url is None:
list1.add(index)
continue
mpurl = re.search('http.*?mp4', mp4url.text)
dicts[title.text] = mpurl.group()
#print(index)
#time.sleep(1)
#print(title.text + ' : ' + dicts[title.text])
print(dicts)
print(list1)
for temp in dicts.items():
#time.sleep(1)
r = requests.get(temp[1], stream=True)
with open(temp[0] + '.mp4', "wb") as mp4:
for chunk in r.iter_content(chunk_size=1024 * 1024):
if chunk:
mp4.write(chunk)
print(temp[0]+'下載完成')
print('end!')
- BeautifulSoup: https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/
- Requests: http://cn.python-requests.org/zh_CN/latest/