正方教务爬虫

基于selenium的正方教务成绩爬虫

关于


一个正方教务爬虫,可以自动跳到成绩页面并保存成绩..


能力

自动登录✔
自动验证码识别填写✔
失败自动重试✔
自动保存成绩✔

使用

step 1 下载

直接复制或下载本仓库里的spider.py文件到本地

step 2 安装最新版Chrome浏览器(已有请跳过)

https://www.google.cn/intl/zh-CN/chrome/

step 3 下载对应的驱动

下载地址 http://chromedriver.storage.googleapis.com/index.html
驱动安装教程 https://blog.csdn.net/m0_67575344/article/details/126142295

step 4 安装所需模块

  1. ddddocr
  2. selenium
命令行安装
1
2
pip install ddddocr
pip install selenium
自动安装

使用pycharm自动安装

step 5 运行

直接在编译器环境运行

完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# -*- coding: utf-8 -*-
import time
import datetime
import ddddocr
import openpyxl
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains

service = ChromeService(executable_path=r'C:\Program Files\Google\Chrome\Application\chromedriver.exe') # chrome驱动路径
chrome = webdriver.Chrome(service=service)

# 配置项

name = '123456' # 账号
pwd = '132456l' # 密码
safe_time = 3 # 安全间隔时间
servernum = 0 # 选择服务器 填: 0,1,2,3
retry = True # 失败重试
mode = 1
servers = ['http://127.0.0.1/', 'http://127.0.0.1/', 'http://127.0.0.1/', 'http://127.0.0.1/']


def print_INFO(message):
print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + message)


def print_ERROR(error):
print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + "\033[1;31m" + error + " \033[0m")


def print_Exception(e):
print("\033[1;31m异常!\033[0m\n")
print(e)


def recognize():
if chrome.find_element(By.ID, 'icode').screenshot('img.png'): # 捕获验证码
# 验证码识别
with open('img.png', 'rb') as f:
img = f.read()
ocr = ddddocr.DdddOcr()
result = ocr.classification(img)
print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + "验证码" + result)
return result


def auto_Login():
chrome.get(servers[servernum])
print_INFO("尝试登录" + str(servernum) + '号服务器')
if chrome.title == 'ERROR - 出错啦!':
return False
try:
chrome.find_element(By.ID, 'txtUserName').send_keys(name)
chrome.find_element(By.ID, 'TextBox2').send_keys(pwd)
except:
print_Exception(Exception)
return False
chrome.find_element(By.ID, 'txtSecretCode').send_keys(recognize())
try:
chrome.find_element(By.ID, 'Button1').click()
except:
print_Exception(Exception)
return False
if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录':
print_ERROR('跳转失败')
return False
return True


def get_score():
try:
hovertarget = chrome.find_element(By.XPATH, '/html/body/div/div[1]/ul/li[5]/a/span')
ActionChains(chrome).move_to_element(hovertarget).perform()
chrome.find_element(By.XPATH, '/html/body/div/div[1]/ul/li[5]/ul/li[4]/a').click()
except:
print_Exception(Exception)
print_ERROR('成绩查询按钮点击失败')
return False
if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录':
auto_Login()
return False
time.sleep(5)
try:
chrome.switch_to.frame('zhuti')
chrome.find_element(By.ID, 'btn_zcj').click()
if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录':
chrome.switch_to.default_content()
while 1:
if auto_Login():
break
return False
# 保存到excel
work_book = openpyxl.Workbook()
shell = work_book.worksheets[0]
trs = chrome.find_elements(By.XPATH, '/html/body/form/div[2]/div/span/div[1]/table[1]/tbody/tr')
trnum = 1
for tr in trs:
tdnum = 1
while 1:
tdXPATH = '/html/body/form/div[2]/div/span/div[1]/table[1]/tbody/tr[' + str(trnum) + ']/td[' + str(tdnum) + ']'
shell.cell(trnum, tdnum, chrome.find_element(By.XPATH, tdXPATH).text)
tdnum += 1
if tdnum == 20:
break
trnum += 1
work_book.save('score.xlsx')
chrome.switch_to.default_content()
except:
print_Exception(Exception)
print_ERROR('成绩获取错误')
return False
return True


def main():
if mode == 1:
print_INFO('开始查询成绩')
chrome.maximize_window()
while 1:
trytimes = 0
succeed = False
while 1:
trytimes += 1
if auto_Login():
succeed = True
print_INFO('登录成功')
break
else:
print_ERROR('尝试登录失败')
if retry:
if trytimes > 10 and succeed == False:
print('\033[0;32m已经为你尝试了' + str(
trytimes) + '次登录, 全部登录失败。建议更换服务器或检查你的账号密码是否正确。\033[0m')
time.sleep(safe_time)
continue
else:
break
time.sleep(1)
trygetscore = 0
get_scoreFaile = False
while 1:
if trygetscore >= 5:
get_scoreFaile = True
break
trygetscore += 1
status = get_score()
if status:
print_INFO('查询成功')
break
else:
print_ERROR('查询失败')
if retry:
time.sleep(safe_time)
continue
else:
break
if get_scoreFaile is True:
continue
else:
a = input()
elif mode == 2:
print_INFO('Exit')



main()