This repository has been archived on 2024-06-23. You can view files and clone it, but cannot push or open issues or pull requests.
mathdeptv2/工具v4/下载小闲答题数据.py

358 lines
14 KiB
Python

from PySide6.QtWidgets import QWidget, QApplication, QFileDialog, QTableWidgetItem
from PySide6.QtGui import QColor
from Ui_下载小闲答题数据 import Ui_Form
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
import re,os,sys
from datetime import datetime
import numpy as np
from time import sleep
from database_tools_2 import *
def GetPageInfo(driver): #学情报告页面, 显示页码和总页数
pageinfo = driver.find_element(By.CSS_SELECTOR,"[class*=styles__page___]").text
return pageinfo
def SelectNext(driver): # 在打开后的报告页面选择下一个班级
try:
openlist = driver.find_element(By.XPATH,"/html/body/div[2]/div/div/div[2]/div[1]/div[2]/div/div/div")
sleep(0.1)
openlist.click()
sleep(0.1)
openlist.send_keys(Keys.ARROW_DOWN)
sleep(0.1)
openlist.send_keys(Keys.ENTER)
sleep(0.3)
return driver.find_element(By.XPATH,"/html/body/div[2]/div/div/div[2]/div[1]/div[2]/div/div/div").text
except:
return 1
def GetCommitDate(driver): #显示当前班级的提交时间
commit_date = driver.find_element(By.XPATH,"/html/body/div[2]/div/div/div[2]/div[1]/div[2]/span[2]").text
year,month,date = re.findall(r"(\d{4})-(\d{2})-(\d{2})",commit_date)[0]
return year+month+date
def GetCommitInfo(driver): #获取报告页面的班级及提交日期信息
commit_dict = {}
classname = SelectNext(driver)
sleep(0.5)
while not classname in commit_dict.keys():
commit_date = GetCommitDate(driver)
if not classname == 1:
commit_dict[classname] = commit_date
classname = SelectNext(driver)
else:
classname = SelectNext(driver)
return commit_dict
def ToIntTime(string):
date_obj = datetime.strptime(string, "%Y%m%d")
int_time = int(date_obj.timestamp())
return int_time
def MedianCommitDate(adict):
return datetime.fromtimestamp(np.median([ToIntTime(adict[t]) for t in adict.keys()])).strftime("%Y%m%d")
def gotopagenum(driver,num):
pagecontrol = driver.find_element(By.CSS_SELECTOR,"[class*=styles__pagination___]")
pageinput = pagecontrol.find_element(By.CSS_SELECTOR,"input[type=text]")
pageinput.clear()
submitbutton = pagecontrol.find_element(By.CSS_SELECTOR,"[class*=tyles__button___]")
pageinput.send_keys(str(1))
submitbutton.click()
sleep(0.5)
pageinput.clear()
sleep(0.1)
pageinput.send_keys(str(num))
submitbutton.click()
sleep(0.5)
def ClickBack(driver): # 点击返回按钮
backbutton = driver.find_element(By.CSS_SELECTOR,'[class*=styles__back]') # 点击返回
backbutton.click()
sleep(1)
def GetHomeworkNames(driver,starting_date = "20000101"):
hwk_list = []
homework_roots = driver.find_elements(By.CSS_SELECTOR,'[class*=styles__reportList]')
for homework_root in homework_roots:
name = homework_root.find_element(By.CSS_SELECTOR,'[class*=styles__name]').text
date_raw = homework_root.find_element(By.CSS_SELECTOR,'[class*=styles__time]').text
date = re.findall(r"\d{4}-\d{2}-\d{2}",date_raw)[0].replace("-","")
time = re.findall(r"\d{2}:\d{2}:\d{2}",date_raw)[0].replace(":","")
grade = homework_root.find_element(By.CSS_SELECTOR,'[class*=styles__grades]').text[3:]
if date >= starting_date:
hwk_list.append({"name": name, "date": date, "time": time, "grade": grade})
return hwk_list
def GetHomeworkRoots(driver): #找到页面上所有作业考试的root
homework_roots = driver.find_elements(By.CSS_SELECTOR,'[class*=styles__reportList]')
return homework_roots
def ClickDetail(root): #点击查看详情
root.find_element(By.XPATH,"div[2]/div[2]").click()
def ClickDownload(root): #点击报表
root.find_element(By.XPATH,"div[2]/div[3]").click()
def StartandEndDownload(driver):
driver.find_element(By.XPATH,"/html/body/div[4]/div/div[2]/div/div[2]/div[3]/div/button[2]").click()
QuitDownload(driver)
def QuitDownload(driver): #退出下载对话框
while driver.find_element(By.XPATH,"/html/body/div[4]/div/div[2]/div/div[2]/div[3]/div/button[1]").text == '取 消':
try:
sleep(0.5)
driver.find_element(By.XPATH,"/html/body/div[4]/div/div[2]/div/div[2]/div[3]/div/button[1]").click()
except:
pass
def GenerateCommitMessages(driver,index): #在班级报告页面逐一读取页面上的指定作业的名称和计算中位提交日期
page_info = GetPageInfo(driver)
page = page_info[:page_info.index("/")]
roots = GetHomeworkRoots(driver)
root = roots[index-1]
# root_text = root.text
# homework_name = root_text.split("\n")[1]
ClickDetail(root)
adict = GetCommitInfo(driver)
# print(homework_name)
message = MedianCommitDate(adict)
sleep(0.1)
ClickBack(driver)
sleep(0.5)
gotopagenum(driver,page)
sleep(0.5)
return message
def DownloadZipwithDetail(driver,index): #下载页面上的指定zip文件, 并返回中位上传时间
page_info = GetPageInfo(driver)
page = page_info[:page_info.index("/")]
roots = GetHomeworkRoots(driver)
root = roots[index-1]
root_text = root.text
homework_name = root_text.split("\n")[1]
ClickDownload(root)
sleep(0.5)
StartandEndDownload(driver)
print(f"已下载: {homework_name}")
sleep(0.5)
QuitDownload(driver)
ClickDetail(root)
adict = GetCommitInfo(driver)
message = MedianCommitDate(adict)
sleep(0.1)
ClickBack(driver)
sleep(0.5)
gotopagenum(driver,page)
sleep(0.5)
return message
def RenameRecentzip(folder,message): #将folder中最新的zip文件名加上(message)
filelist = os.listdir(folder)
filelist.sort(key = lambda x:os.path.getmtime(os.path.join(folder,x)),reverse = True)
filepath = os.path.join(folder,filelist[0])
renamedfilepath = filepath[:-4]+f"({message}).zip"
try:
os.rename(filepath,renamedfilepath)
print(f"已重命名为: {os.path.split(renamedfilepath)[-1]}")
except:
return 1
return 0
def DownloadZips(driver,alist=[]): #下载页面上的指定zip文件
page_info = GetPageInfo(driver)
page = page_info[:page_info.index("/")]
roots_len = len(GetHomeworkRoots(driver))
if alist == []:
rangelist = range(roots_len)
else:
rangelist = alist.copy()
for i in rangelist:
roots = GetHomeworkRoots(driver)
root = roots[i]
root_text = root.text
homework_name = root_text.split("\n")[1]
ClickDownload(root)
sleep(0.5)
StartandEndDownload(driver)
print(f"已下载: {homework_name}")
sleep(0.5)
gotopagenum(driver,page)
sleep(0.5)
def DownloadandRenameZips(driver,message,folder,index): #下载页面上的指定zip文件
page_info = GetPageInfo(driver)
page = page_info[:page_info.index("/")]
roots = GetHomeworkRoots(driver)
root = roots[index-1]
print(root.text)
ClickDownload(root)
sleep(0.5)
StartandEndDownload(driver)
sleep(5)
filelist = os.listdir(folder)
filelist.sort(key = lambda x:os.path.getmtime(os.path.join(folder,x)),reverse = True)
filepath = os.path.join(folder,filelist[0])
renamedfilepath = filepath[:-4]+f"({message}).zip"
os.rename(filepath,renamedfilepath)
print(f"已重命名为: {os.path.split(renamedfilepath)[-1]}")
sleep(0.5)
gotopagenum(driver,page)
sleep(0.5)
def getIndices(string):
if string.strip() == "":
return []
else:
string_list = [int(i)-1 for i in string.strip().split(",")]
return string_list
def GetValidHomeworks(driver,startdate,enddate,graderegex): #在所有作业中找到在startdate,enddate中符合graderegex年级的作业, 返回(作业信息,页码)的列表
validhomeworks = []
gotopagenum(driver,1)
sleep(2)
endflag = False
while not endflag:
currentpage = int(re.findall("^([\d]+)/",GetPageInfo(driver))[0])
homeworkinfo = GetHomeworkNames(driver)
for item in homeworkinfo:
if re.findall(graderegex,item["grade"])!= [] and startdate <= item["date"] <= enddate:
validhomeworks.append((item,currentpage))
print(item)
if startdate > item["date"]:
endflag = True
break
gotopagenum(driver,currentpage+1)
sleep(2)
return validhomeworks[-1::-1]
def gethmwkIndexonPage(driver,hmwkinfo): #在当前页面寻找作业位置, 返回位置(1-10), 若未找到则返回-1
homeworks = GetHomeworkNames(driver)
count = 1
for h in homeworks:
if h == hmwkinfo:
return count
count += 1
return -1
class MyWindow_xxxz(QWidget,Ui_Form):
def __init__(self):
super().__init__()
self.setupUi(self)
self.bind()
def getchromepath(self):
try:
self.chromepath = ReadTextFile("临时文件/browserrelated/chromepath.ini")
self.label_chrome.setText(self.chromepath)
except:
pass
def getdriverpath(self):
try:
self.driverpath = ReadTextFile("临时文件/browserrelated/driverpath.ini")
self.label_driver.setText(self.driverpath)
except:
pass
def bind(self):
makedir("临时文件/browserrelated")
self.getchromepath()
self.getdriverpath()
self.lineEdit_startdate.setText(GetDate())
self.lineEdit_enddate.setText(GetDate())
self.pushButton_chrome.clicked.connect(self.getchromePath)
self.pushButton_driver.clicked.connect(self.getdriverPath)
self.pushButton_outputfolder.clicked.connect(self.getoutputfolder)
self.pushButton_openbrowser.clicked.connect(self.openbrowser)
self.pushButton_login.clicked.connect(self.login)
self.pushButton_getlist.clicked.connect(self.getlist)
self.pushButton_exec.clicked.connect(self.exec)
self.tableWidget.setColumnWidth(0,350)
self.tableWidget.setColumnWidth(1,80)
def getchromePath(self):
pathlist = QFileDialog.getOpenFileName(self,"选择文件",".","chrome.exe文件(chrome.exe);;所有文件(*)")
self.label_chrome.setText(pathlist[0])
self.chromepath = pathlist[0]
SaveTextFile(self.chromepath,"临时文件/browserrelated/chromepath.ini")
def getdriverPath(self):
pathlist = QFileDialog.getOpenFileName(self,"选择文件",".","chromedriver.exe文件(chromedriver.exe);;所有文件(*)")
self.label_driver.setText(pathlist[0])
self.driverpath = pathlist[0]
SaveTextFile(self.driverpath,"临时文件/browserrelated/driverpath.ini")
def getoutputfolder(self):
self.outputfolder = QFileDialog.getExistingDirectory(None, "选择文件夹")
if sys.platform == "win32":
self.outputfolder = self.outputfolder.replace("/","\\")
self.label_outputfolder.setText(self.outputfolder)
def openbrowser(self):
service = Service(executable_path=self.driverpath)
options = webdriver.ChromeOptions()
options.binary_location = self.chromepath
prefs = {"download.default_directory": self.outputfolder}
options.add_experimental_option("prefs", prefs)
self.driver = webdriver.Chrome(service= service,options = options)
self.driver.get("http://ls.xiaoxianai.cn")
def login(self):
loginform = self.driver.find_element(By.XPATH,'//*[@id="container"]/div/div/form')
loginlist = loginform.find_elements(By.TAG_NAME,"div")
loginlist[1].find_element(By.TAG_NAME,"input").send_keys("16621337584")
loginlist[2].find_element(By.TAG_NAME,"input").send_keys("password")
loginform.find_element(By.TAG_NAME,"button").click()
def getlist(self):
self.homeworklist = GetValidHomeworks(self.driver,self.lineEdit_startdate.text(),self.lineEdit_enddate.text(),self.lineEdit_graderegex.text())
for i in range(len(self.homeworklist)):
self.tableWidget.insertRow(i)
self.tableWidget.setItem(i,0,QTableWidgetItem(f"{self.homeworklist[i][0]['name']} / {self.homeworklist[i][0]['grade']}"))
self.tableWidget.setItem(i,1,QTableWidgetItem(""))
def exec(self):
i = 0
for hmwk,page in self.homeworklist:
foundhmwk = False
downloaded = False
for p in range(page+1,0,-1):
gotopagenum(self.driver,p)
sleep(2)
hmwkindex = gethmwkIndexonPage(self.driver,hmwk)
if hmwkindex > 0:
foundhmwk = True
print(f"已找到 {hmwk['name']} 于页码 {p}, 第 {hmwkindex}")
break
if not foundhmwk:
print(f"!!!!!!未找到 {hmwk['name']}")
else:
print(f"正在下载 {hmwk['name']} 的 zip 文件")
try:
message = DownloadZipwithDetail(self.driver,hmwkindex)
renamed = RenameRecentzip(self.outputfolder,message)
if renamed == 0:
downloaded = True
self.tableWidget.item(i,1).setBackground(QColor("green"))
QApplication.processEvents()
except:
print(Exception)
if not downloaded:
print(f"@@@@@@未能下载并改名 {hmwk['name']}")
self.tableWidget.item(i,1).setBackground(QColor("red"))
i += 1
if __name__ == '__main__':
app = QApplication([])
windows = MyWindow_xxxz()
windows.show()
app.exec()