Python版本:3.5.2 日期:2018/1/21 ~~~~ __Author__ = "Lance " coding = utf 8 from urllib import request from urllib import parse from http import cookiejar f ...
Python版本:3.5.2
日期:2018/1/21
__Author__ = "Lance#"
# -*- coding = utf-8 -*-
from urllib import request
from urllib import parse
from http import cookiejar
from aip.ocr import AipOcr
import re
class Hust(object):
def __init__(self, stu_id, passwd):
#登錄地址,驗證碼地址,成績查詢地址
self.__url_check = "http://jwzx.hrbust.edu.cn/academic/getCaptcha.do"
self.__url_login = "http://jwzx.hrbust.edu.cn/academic/j_acegi_security_check"
self.__url_scoal = "http://jwzx.hrbust.edu.cn/academic/manager/score/studentOwnScore.do"
#信息頭,模擬瀏覽器
self.__headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:57.0) Gecko/20100101 Firefox/57.0"
}
self.__captcha = ''
#這裡是自已在AI中申請到的ID和KEY
self.__APP_ID = 'xxxxxx'
self.__API_KEY = 'xxxxxx'
self.__SECRET_KEY = 'xxxxxx'
#參數信息,在瀏覽器中可以捕獲
self.__post_data = {
"groupId": "",
"j_username": stu_id,
"j_password": passwd,
"j_captcha" : ''
}
##聲明一個CookieJar對象實例
self.__cookie = cookiejar.CookieJar()
#利用HTTPCookieProcessor對象來創建cookie處理器
self.__cookieProc = request.HTTPCookieProcessor(self.__cookie)
# 通過handler來構建opener
self.__opener = request.build_opener(self.__cookieProc)
#安裝使用這個opener
request.install_opener(self.__opener)
def ocr_captcha(self):
'''ocr識別驗證碼'''
Req = request.Request(self.__url_check, headers=self.__headers)
captcha = request.urlopen(Req).read()
#AI的介面函數
client = AipOcr(self.__APP_ID, self.__API_KEY, self.__SECRET_KEY)
res = client.basicGeneral(captcha)
self.__captcha = res['words_result'][0]['words']
def get_captcha(self):
'''得到驗證碼'''
return self.__captcha
def set_postdata(self):
'''設置要發送的參數,就是修改驗證碼'''
self.__post_data["j_captcha"] = self.__captcha
def login(self):
'''模擬登錄'''
#urlencode的作用:將字元串以URL編碼,用於編碼處理
data = parse.urlencode(self.__post_data).encode()
Req = request.Request(self.__url_login, headers=self.__headers)
html = request.urlopen(Req, data=data)
#登錄頁採用的是GBK編碼,這個需要註意
return html.read().decode("GBK")
def get_scoal(self):
'''獲取到成績信息,並用正則分解'''
Req = request.Request(self.__url_scoal, headers=self.__headers)
res = request.urlopen(Req).read().decode()
#解析HTML採用的正則表達式
pat = re.compile('<td>(.*?)</td>', re.S)
list = re.findall(pat, res)
#對採集到的數據進行整理
for i, con in enumerate(list):
list[i] = con.replace("\n ", "")
return list
def display(self, list):
'''顯示成績信息'''
cnt = len(list)
new_list = []
cnt -= 3
y = int(cnt / 13)
for m in range(y):
new_list.insert(m, [list[j] for j in range(3 + m * 13, 16 + m * 13)])
print("學年 學期 及格標誌 分數 學分 課程名")
for item in new_list:
print("{} {} {:>5s} {:5s} {:^5s} {:^20s}".format(
item[0], item[1], item[12], item[6].replace('<span style=" color:#FF0000">', "").replace("</span>", ""),
item[7], item[3]))
if __name__ == '__main__':
cnt = 1
err_str = "輸入的驗證碼不正確!"
#此處是自己的學號和密碼
stu = Hust("xxxxxx", "xxxxxx")
while True:
stu.ocr_captcha()
print("識別到的驗證碼為: %s ------ " % stu.get_captcha(), end="")
stu.set_postdata()
html = stu.login()
if err_str not in html:
print("驗證碼正確")
break
cnt += 1
print("驗證碼錯誤,啟動第%d次識別" % cnt)
print()
print("Scoal Info".center(70, "-"))
list = stu.get_scoal()
stu.display(list)
print("End".center(70, "-"))
完成效果圖:
請自動忽略這個人掛科的消息,0.0
可能我理解不周,請謹慎參考,我會後期完善,謝謝支持!
歡迎探討。