from PySide6.QtWidgets import QWidget, QApplication, QFileDialog, QTableWidgetItem from PySide6.QtGui import QColor from Ui_下载小闲答题数据 import Ui_Form from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.keys import Keys import re,os,sys from datetime import datetime import numpy as np from time import sleep from database_tools_2 import * def GetPageInfo(driver): #学情报告页面, 显示页码和总页数 pageinfo = driver.find_element(By.CSS_SELECTOR,"[class*=styles__page___]").text return pageinfo def SelectNext(driver): # 在打开后的报告页面选择下一个班级 try: openlist = driver.find_element(By.XPATH,"/html/body/div[2]/div/div/div[2]/div[1]/div[2]/div/div/div") sleep(0.1) openlist.click() sleep(0.1) openlist.send_keys(Keys.ARROW_DOWN) sleep(0.1) openlist.send_keys(Keys.ENTER) sleep(0.3) return driver.find_element(By.XPATH,"/html/body/div[2]/div/div/div[2]/div[1]/div[2]/div/div/div").text except: return 1 def GetCommitDate(driver): #显示当前班级的提交时间 commit_date = driver.find_element(By.XPATH,"/html/body/div[2]/div/div/div[2]/div[1]/div[2]/span[2]").text year,month,date = re.findall(r"(\d{4})-(\d{2})-(\d{2})",commit_date)[0] return year+month+date def GetCommitInfo(driver): #获取报告页面的班级及提交日期信息 commit_dict = {} classname = SelectNext(driver) sleep(0.5) while not classname in commit_dict.keys(): commit_date = GetCommitDate(driver) if not classname == 1: commit_dict[classname] = commit_date classname = SelectNext(driver) else: classname = SelectNext(driver) return commit_dict def ToIntTime(string): date_obj = datetime.strptime(string, "%Y%m%d") int_time = int(date_obj.timestamp()) return int_time def MedianCommitDate(adict): return datetime.fromtimestamp(np.median([ToIntTime(adict[t]) for t in adict.keys()])).strftime("%Y%m%d") def gotopagenum(driver,num): pagecontrol = driver.find_element(By.CSS_SELECTOR,"[class*=styles__pagination___]") pageinput = pagecontrol.find_element(By.CSS_SELECTOR,"input[type=text]") pageinput.clear() submitbutton = pagecontrol.find_element(By.CSS_SELECTOR,"[class*=tyles__button___]") pageinput.send_keys(str(1)) submitbutton.click() sleep(0.5) pageinput.clear() sleep(0.1) pageinput.send_keys(str(num)) submitbutton.click() sleep(0.5) def ClickBack(driver): # 点击返回按钮 backbutton = driver.find_element(By.CSS_SELECTOR,'[class*=styles__back]') # 点击返回 backbutton.click() sleep(1) def GetHomeworkNames(driver,starting_date = "20000101"): hwk_list = [] homework_roots = driver.find_elements(By.CSS_SELECTOR,'[class*=styles__reportList]') for homework_root in homework_roots: name = homework_root.find_element(By.CSS_SELECTOR,'[class*=styles__name]').text date_raw = homework_root.find_element(By.CSS_SELECTOR,'[class*=styles__time]').text date = re.findall(r"\d{4}-\d{2}-\d{2}",date_raw)[0].replace("-","") time = re.findall(r"\d{2}:\d{2}:\d{2}",date_raw)[0].replace(":","") grade = homework_root.find_element(By.CSS_SELECTOR,'[class*=styles__grades]').text[3:] if date >= starting_date: hwk_list.append({"name": name, "date": date, "time": time, "grade": grade}) return hwk_list def GetHomeworkRoots(driver): #找到页面上所有作业考试的root homework_roots = driver.find_elements(By.CSS_SELECTOR,'[class*=styles__reportList]') return homework_roots def ClickDetail(root): #点击查看详情 root.find_element(By.XPATH,"div[2]/div[2]").click() def ClickDownload(root): #点击报表 root.find_element(By.XPATH,"div[2]/div[3]").click() def StartandEndDownload(driver): driver.find_element(By.XPATH,"/html/body/div[4]/div/div[2]/div/div[2]/div[3]/div/button[2]").click() QuitDownload(driver) def QuitDownload(driver): #退出下载对话框 while driver.find_element(By.XPATH,"/html/body/div[4]/div/div[2]/div/div[2]/div[3]/div/button[1]").text == '取 消': try: sleep(0.5) driver.find_element(By.XPATH,"/html/body/div[4]/div/div[2]/div/div[2]/div[3]/div/button[1]").click() except: pass def GenerateCommitMessages(driver,index): #在班级报告页面逐一读取页面上的指定作业的名称和计算中位提交日期 page_info = GetPageInfo(driver) page = page_info[:page_info.index("/")] roots = GetHomeworkRoots(driver) root = roots[index-1] # root_text = root.text # homework_name = root_text.split("\n")[1] ClickDetail(root) adict = GetCommitInfo(driver) # print(homework_name) message = MedianCommitDate(adict) sleep(0.1) ClickBack(driver) sleep(0.5) gotopagenum(driver,page) sleep(0.5) return message def DownloadZipwithDetail(driver,index): #下载页面上的指定zip文件, 并返回中位上传时间 page_info = GetPageInfo(driver) page = page_info[:page_info.index("/")] roots = GetHomeworkRoots(driver) root = roots[index-1] root_text = root.text homework_name = root_text.split("\n")[1] ClickDownload(root) sleep(0.5) StartandEndDownload(driver) print(f"已下载: {homework_name}") sleep(0.5) QuitDownload(driver) ClickDetail(root) adict = GetCommitInfo(driver) message = MedianCommitDate(adict) sleep(0.1) ClickBack(driver) sleep(0.5) gotopagenum(driver,page) sleep(0.5) return message def RenameRecentzip(folder,message): #将folder中最新的zip文件名加上(message) filelist = os.listdir(folder) filelist.sort(key = lambda x:os.path.getmtime(os.path.join(folder,x)),reverse = True) filepath = os.path.join(folder,filelist[0]) renamedfilepath = filepath[:-4]+f"({message}).zip" try: os.rename(filepath,renamedfilepath) print(f"已重命名为: {os.path.split(renamedfilepath)[-1]}") except: return 1 return 0 def DownloadZips(driver,alist=[]): #下载页面上的指定zip文件 page_info = GetPageInfo(driver) page = page_info[:page_info.index("/")] roots_len = len(GetHomeworkRoots(driver)) if alist == []: rangelist = range(roots_len) else: rangelist = alist.copy() for i in rangelist: roots = GetHomeworkRoots(driver) root = roots[i] root_text = root.text homework_name = root_text.split("\n")[1] ClickDownload(root) sleep(0.5) StartandEndDownload(driver) print(f"已下载: {homework_name}") sleep(0.5) gotopagenum(driver,page) sleep(0.5) def DownloadandRenameZips(driver,message,folder,index): #下载页面上的指定zip文件 page_info = GetPageInfo(driver) page = page_info[:page_info.index("/")] roots = GetHomeworkRoots(driver) root = roots[index-1] print(root.text) ClickDownload(root) sleep(0.5) StartandEndDownload(driver) sleep(5) filelist = os.listdir(folder) filelist.sort(key = lambda x:os.path.getmtime(os.path.join(folder,x)),reverse = True) filepath = os.path.join(folder,filelist[0]) renamedfilepath = filepath[:-4]+f"({message}).zip" os.rename(filepath,renamedfilepath) print(f"已重命名为: {os.path.split(renamedfilepath)[-1]}") sleep(0.5) gotopagenum(driver,page) sleep(0.5) def getIndices(string): if string.strip() == "": return [] else: string_list = [int(i)-1 for i in string.strip().split(",")] return string_list def GetValidHomeworks(driver,startdate,enddate,graderegex): #在所有作业中找到在startdate,enddate中符合graderegex年级的作业, 返回(作业信息,页码)的列表 validhomeworks = [] gotopagenum(driver,1) sleep(2) endflag = False while not endflag: currentpage = int(re.findall("^([\d]+)/",GetPageInfo(driver))[0]) homeworkinfo = GetHomeworkNames(driver) for item in homeworkinfo: if re.findall(graderegex,item["grade"])!= [] and startdate <= item["date"] <= enddate: validhomeworks.append((item,currentpage)) print(item) if startdate > item["date"]: endflag = True break gotopagenum(driver,currentpage+1) sleep(2) return validhomeworks[-1::-1] def gethmwkIndexonPage(driver,hmwkinfo): #在当前页面寻找作业位置, 返回位置(1-10), 若未找到则返回-1 homeworks = GetHomeworkNames(driver) count = 1 for h in homeworks: if h == hmwkinfo: return count count += 1 return -1 class MyWindow_xxxz(QWidget,Ui_Form): def __init__(self): super().__init__() self.setupUi(self) self.bind() def getchromepath(self): try: self.chromepath = ReadTextFile("临时文件/browserrelated/chromepath.ini") self.label_chrome.setText(self.chromepath) except: pass def getdriverpath(self): try: self.driverpath = ReadTextFile("临时文件/browserrelated/driverpath.ini") self.label_driver.setText(self.driverpath) except: pass def bind(self): makedir("临时文件/browserrelated") self.getchromepath() self.getdriverpath() self.lineEdit_startdate.setText(GetDate()) self.lineEdit_enddate.setText(GetDate()) self.pushButton_chrome.clicked.connect(self.getchromePath) self.pushButton_driver.clicked.connect(self.getdriverPath) self.pushButton_outputfolder.clicked.connect(self.getoutputfolder) self.pushButton_openbrowser.clicked.connect(self.openbrowser) self.pushButton_login.clicked.connect(self.login) self.pushButton_getlist.clicked.connect(self.getlist) self.pushButton_exec.clicked.connect(self.exec) self.tableWidget.setColumnWidth(0,350) self.tableWidget.setColumnWidth(1,80) def getchromePath(self): pathlist = QFileDialog.getOpenFileName(self,"选择文件",".","chrome.exe文件(chrome.exe);;所有文件(*)") self.label_chrome.setText(pathlist[0]) self.chromepath = pathlist[0] SaveTextFile(self.chromepath,"临时文件/browserrelated/chromepath.ini") def getdriverPath(self): pathlist = QFileDialog.getOpenFileName(self,"选择文件",".","chromedriver.exe文件(chromedriver.exe);;所有文件(*)") self.label_driver.setText(pathlist[0]) self.driverpath = pathlist[0] SaveTextFile(self.driverpath,"临时文件/browserrelated/driverpath.ini") def getoutputfolder(self): self.outputfolder = QFileDialog.getExistingDirectory(None, "选择文件夹") if sys.platform == "win32": self.outputfolder = self.outputfolder.replace("/","\\") self.label_outputfolder.setText(self.outputfolder) def openbrowser(self): service = Service(executable_path=self.driverpath) options = webdriver.ChromeOptions() options.binary_location = self.chromepath prefs = {"download.default_directory": self.outputfolder} options.add_experimental_option("prefs", prefs) self.driver = webdriver.Chrome(service= service,options = options) self.driver.get("http://ls.xiaoxianai.cn") def login(self): loginform = self.driver.find_element(By.XPATH,'//*[@id="container"]/div/div/form') loginlist = loginform.find_elements(By.TAG_NAME,"div") loginlist[1].find_element(By.TAG_NAME,"input").send_keys("16621337584") loginlist[2].find_element(By.TAG_NAME,"input").send_keys("password") loginform.find_element(By.TAG_NAME,"button").click() def getlist(self): self.homeworklist = GetValidHomeworks(self.driver,self.lineEdit_startdate.text(),self.lineEdit_enddate.text(),self.lineEdit_graderegex.text()) for i in range(len(self.homeworklist)): self.tableWidget.insertRow(i) self.tableWidget.setItem(i,0,QTableWidgetItem(f"{self.homeworklist[i][0]['name']} / {self.homeworklist[i][0]['grade']}")) self.tableWidget.setItem(i,1,QTableWidgetItem("")) def exec(self): i = 0 for hmwk,page in self.homeworklist: foundhmwk = False downloaded = False for p in range(page+1,0,-1): gotopagenum(self.driver,p) sleep(2) hmwkindex = gethmwkIndexonPage(self.driver,hmwk) if hmwkindex > 0: foundhmwk = True print(f"已找到 {hmwk['name']} 于页码 {p}, 第 {hmwkindex} 项") break if not foundhmwk: print(f"!!!!!!未找到 {hmwk['name']}") else: print(f"正在下载 {hmwk['name']} 的 zip 文件") try: message = DownloadZipwithDetail(self.driver,hmwkindex) renamed = RenameRecentzip(self.outputfolder,message) if renamed == 0: downloaded = True self.tableWidget.item(i,1).setBackground(QColor("green")) QApplication.processEvents() except: print(Exception) if not downloaded: print(f"@@@@@@未能下载并改名 {hmwk['name']}") self.tableWidget.item(i,1).setBackground(QColor("red")) i += 1 if __name__ == '__main__': app = QApplication([]) windows = MyWindow_xxxz() windows.show() app.exec()