爬取圖片實例 •selenium+win32爬取圖片 Python學習交流Q群:903971231##### """爬取圖片""" import os import threading import time from ctypes import windll import requests imp ...
爬取圖片實例
•selenium+win32爬取圖片
Python學習交流Q群:903971231##### """爬取圖片""" import os import threading import time from ctypes import windll import requests import win32ap iimport win32clipboard import win32con from PySide2 import QtWidgets from requests_html import HTMLSession, HTML from PySide2.QtGui import QPixmap, QColor, QStandardItemModel, QStandardItem from PySide2.QtCore import QFile, Qt, QDateTime, QDate, QTime, QTimer, QStringListModel, QModelIndex from PySide2.QtUiTools import QUiLoader from PySide2.QtWidgets import QApplication, QTreeView, QTreeWidget, QHeaderView, QTreeWidgetItem, QWidget from bs4 import BeautifulSoup from selenium import webdriverfrom selenium.webdriver import ActionChainsfrom selenium.webdriver.chrome.options import Optionsfrom selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC class Test: def __init__(self): super(Test, self).__init__() file = QFile('UI.ui') file.open(QFile.ReadOnly) file.close() self.ui = QUiLoader().load(file) self.ui.B_start.clicked.connect(self.start) self.ui.B_left.clicked.connect(lambda: self.change_index('left')) self.ui.B_right.clicked.connect(lambda: self.change_index('right')) #定義圖片列表 self.img_list = [] #圖片自適應 self.ui.label.setScaledContents(True) #當前顯示的圖片索引 self.index = 0 self.headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", } self.text = '' # 使用phantomJS消除瀏覽器界面 #self.browser = webdriver.PhantomJS() #出警告可使用設置chrome的方法 #瀏覽器設置 options = Options() options.add_argument('--headless') # self.browser = webdriver.Chrome(options=options) self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser, 30) self.session = requests.Session() self.pull() self.start_show_pic() def change_index(self, button): if button == 'left': self.index -= 1 pix = QPixmap('圖片/' + self.img_list[self.index]) self.ui.label.setPixmap(pix) else: self.index += 1 pix = QPixmap('圖片/' + self.img_list[self.index]) self.ui.label.setPixmap(pix) def start_show_pic(self): t1 = threading.Thread(target=self.show_pic) t1.setDaemon(True) t1.start() def show_pic(self): while True: for i in os.walk('圖片'): self.img_list = i[2] if self.img_list: pix = QPixmap('圖片/' + self.img_list[self.index]) self.ui.label.setPixmap(pix) time.sleep(3) self.index += 1 if self.index > len(self.img_list): self.index = 0 def start(self): t1 = threading.Thread(target=self.get_img) t1.setDaemon(True) t1.start() def pull(self): """ 查看是否有目標網站的源代碼,如果有就讀取, 如果沒有就發送請求 """ if os.path.exists('爬取圖片.html'): with open('爬取圖片.html', 'r', encoding='utf8') as f: self.text = f.read() # print(self.text) else: self.browser.get('http://www.netbian.com/') self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.list'))) self.text = self.browser.page_source with open('爬取圖片.html', 'w', encoding='utf8') as f: f.write(self.text) print(self.text) self.ui.B_start.setEnabled(True) def get_img(self): html = BeautifulSoup(self.text, 'lxml') href_url = html.select('.list ul li a') print(href_url) for a in href_url: #print('@@@', a) if a['href'].startswith('/desk'): url = 'http://www.netbian.com' + a['href'] #print(url) self.browser.get(url) pic = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.pic'))) soup = BeautifulSoup(self.browser.page_source, 'lxml') img = soup.select_one('#main > div.endpage > div > p > a > img') #print(img) #print(img) url = img['src'] title = img['title'] #獲取路徑 path = os.path.join(os.getcwd(), '圖片', title + '.jpg') if len(self.browser.window_handles) > 1: self.browser.switch_to.window(self.browser.window_handles[1]) self.browser.close() self.browser.switch_to.window(self.browser.window_handles[0]) # action = ActionChains(self.browser) # action.move_to_element(pic) # action.context_click(pic) # 右鍵點擊該元素 # action.perform() # time.sleep(1) # action.send_keys('v') # action.perform() time.sleep(1) if os.path.exists(path): self.ui.listWidget.addItem(title + '.jpg 已存在,不下載') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) else: # 滑鼠移動到位置點右鍵 windll.user32.SetCursorPos(500, 700) win32api.mouse_event(win32con.MOUSEEVENTF_RIGHTDOWN, 0, 0, 0) time.sleep(0.05) win32api.mouse_event(win32con.MOUSEEVENTF_RIGHTUP, 0, 0, 0) time.sleep(1) # 按下v win32api.keybd_event(86, 0, 0, 0) win32api.keybd_event(86, 0, win32con.KEYEVENTF_KEYUP, 0) # 將路徑複製到剪切板 win32clipboard.OpenClipboard() win32clipboard.EmptyClipboard() win32clipboard.SetClipboardText(path) win32clipboard.CloseClipboard() # 滑鼠定位輸入框並點擊 windll.user32.SetCursorPos(274, 449) win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN, 0, 0, 0) win32api.mouse_event(win32con.MOUSEEVENTF_LEFTUP, 0, 0, 0) time.sleep(1) # 按下ctrl+v win32api.keybd_event(17, 0, 0, 0) win32api.keybd_event(86, 0, 0, 0) win32api.keybd_event(86, 0, win32con.KEYEVENTF_KEYUP, 0) win32api.keybd_event(17, 0, win32con.KEYEVENTF_KEYUP, 0) time.sleep(3) # 按下回車 win32api.keybd_event(13, 0, 0, 0) win32api.keybd_event(13, 0, win32con.KEYEVENTF_KEYUP, 0) time.sleep(2) self.ui.listWidget.addItem(title + '.jpg 下載完成') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) # res = self.session.get(url, headers=self.headers) # print(res.text) # with open('圖片/' + title + '.jpg', 'wb') as f: # f.write(res.content) # print(title + '.jpg 下載完成') elif a['href'].startswith('/index'): url = 'http://www.netbian.com' + a['href'] print(url) self.browser.get(url) self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.list'))) self.text = self.browser.page_source self.ui.listWidget.addItem('下一頁') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) self.get_img() if __name__ == '__main__': app = QApplication([]) # 設置fusion風格 app.setStyle('Fusion') window = Test() window.ui.show() app.exec_()
最後
今天的分享到這裡就完了,祝大家五一快樂鴨!!!