diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index 0627cd48..a4c35f58 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -1475,5 +1475,138 @@ def ExtractProblemIDs(paperdict,pro_dict):#从备课组材料的每一张讲义 output.append(id) return(output) + +def ParseZipname(zipfilename): #小闲平台的zip文件中获得试卷编号, 返回试卷编号字符串 + xiaoxianpid = re.findall(r"^(\d*?)_",os.path.split(zipfilename)[1]) + return xiaoxianpid[0] + +def FindFile(dir,filename): #在指定目录及子目录下寻找特定文件名的文件, 返回文件所在的路径列表 + pathlist = [] + for path,m,filenames in os.walk(dir): + if filename in filenames: + pathlist.append(path) + return pathlist + +def FindPaper(xiaoxianpid, answersheetpath): #根据小闲的试卷编号和答题纸对应json的根目录寻找题库的试卷编号,届别,题号, 返回(题库试卷编号,届别,题号列表), 如果未找到则返回False + answersheetpathlist = FindFile(answersheetpath,"答题纸对应.json") + foundpid = False + for dir in answersheetpathlist: + filepath = os.path.join(dir,"答题纸对应.json") + anssheetjson = load_dict(filepath) + if xiaoxianpid in anssheetjson: + foundpid = True + grade = "20"+re.findall(r"\d{2}届",dir)[0] + pid = anssheetjson[xiaoxianpid]["id"] + notesjson = load_dict(os.path.join(dir,"校本材料.json")) + idlist = [] + for part in anssheetjson[xiaoxianpid]["parts"]: + idlist += notesjson["notes"][pid][part].copy() + if "marks" in anssheetjson[xiaoxianpid]: + marks = anssheetjson[xiaoxianpid]["marks"] + else: + marks = [] + break + if foundpid: + return(pid,grade,idlist,marks) + else: + return False + +def CheckPaperType(filepath,filename): #根据filepath(通常是小闲的zip解压出的目录)和filename(通常是"小题分_按学号(数学).xlsx")检测试卷类型, 未找到该文件则返回False, 找到文件且是日常试卷返回"日常卷", 找到文件且不是日常试卷返回"考试卷" + statsfilepathlist = FindFile(filepath,filename) + if statsfilepathlist == []: + return False + else: + dir = statsfilepathlist[0] + dfcurrent = pd.read_excel(os.path.join(dir,filename)) + if re.findall(r"第\d*步",str(dfcurrent.loc[1,:])) == []: + return "日常卷" + else: + return "考试卷" + +def generateColIndexandMarks(filepath,statsfilename,paperinfo): #根据filepath(是一个有statsfilename的文件夹列表)中第一个路径中的数据文件, statsfilename数据文件名, 及paperinfo(FindPaper返回的结果)寻找excel文件中有效的列的位置和相应的满分分数 + dir = filepath[0] + dfcurrent = pd.read_excel(os.path.join(dir,statsfilename)) + validcols = [] + for i in range(len(dfcurrent.columns)): + colname = str(dfcurrent.iloc[1,i]) + if ("单选" in colname or "填空" in colname or "主观" in colname or "步" in colname) and re.findall("[ABCD]",colname) == []: + validcols.append(i) + for col in range(len(validcols)-1,-1,-1): + colname = str(dfcurrent.iloc[1,validcols[col]]) + if "主观" in colname: + colname_main = re.findall(r"^([\d\.]*)[\($]",colname[2:])[0] + t = [dfcurrent.iloc[1,c] for c in validcols[col+1:]] + t = str(t) + if colname_main in t: + validcols.pop(col) + if paperinfo[3] == []: + marks = [1] * len(validcols) + else: + marks = paperinfo[3] + if len(marks) == len(validcols): + return (validcols,marks) + else: + return False + + +def CheckValidity(classpath,gradename,threshold): #根据文件夹classpath, 年级名gradename和提交比例threshold, 检测提交人数是否不小于threshold, 返回(班级名, 提交是否有效) + classname_raw = re.findall(r"(高[一二三])(\d*?)班",classpath)[0] + classname = gradename + classname_raw[0] + classname_raw[1].zfill(2) + "班" + df = pd.read_excel(os.path.join(os.path.split(classpath)[0],"学科总体分析.xlsx")) + totalstudents = df.loc[2,df.columns[1]] + validstudents = df.loc[2,df.columns[2]] + classvalidflag = False + if threshold * totalstudents < validstudents: + print(f"{classname} 有效, 共 {totalstudents} 人, 提交 {validstudents} 人") + classvalidflag = True + else: + print(f"!!! {classname} 无效, 共 {totalstudents} 人, 提交 {validstudents} 人") + return (classname,classvalidflag) + +def generateIDtoUsageCorrespondence(idlist,validcols,names): #根据idlist(题库ID列表), validcols(有效列位置列表), names(题目名称列表)自动生成一个字典, 键值为题库ID, 内容为该题对应的列位置列表 + corr_dict = {} + for i in range(len(idlist)): + ind = i+1 + collist = [] + for j in range(len(validcols)): + n = names[j] + if not "步" in n: + name = re.findall(r"^([^\(]*)",n)[0] + else: + name = re.findall(r"^([^第]*)",n)[0] + if ind == getindex(name): + collist.append(j) + corr_dict[idlist[i]] = collist + return corr_dict + +def CalculateUsages(statsfilepathlist,statsfilename,gradename,threshold,marks,correspondence_dict,validcols,date): #根据统计数据所在的路径,文件名,年级,阈值,分数列表和题号列数(0-len(validcols))对应字典,以及原excel文件中的有效列位置validcols, 日期date, 生成usages的metadata.txt文件的内容, 如果有正确率大于1的则返回False + output = "ans\n\n\n" + validflag = True + for dir in statsfilepathlist: + classname, valid = CheckValidity(dir,gradename,threshold) + if valid: + dfcurrent = pd.read_excel(os.path.join(dir,statsfilename)) + means = dfcurrent.iloc[2:-2,validcols].mean()/marks + if max(means)>1: + print("满分数据有误!!!") + validflag = False + else: + means_out = [f"{t:.3f}" for t in means] + for id in correspondence_dict: + cols = correspondence_dict[id] + diffs = "\t".join([means_out[u] for u in cols]) + usages = f"{date}\t{classname}\t{diffs}" + output += f"{id}\n{usages}\n\n\n" + if validflag: + return output + else: + return False + + +def getindex(string,pos = 2): + para = string.split(".") + return int(para[pos-1]) + + if __name__ == "__main__": print("数据库工具, import用.") \ No newline at end of file diff --git a/工具v2/收集使用记录.py b/工具v2/收集使用记录.py index 4a61fd69..27aa5248 100644 --- a/工具v2/收集使用记录.py +++ b/工具v2/收集使用记录.py @@ -1,97 +1,16 @@ -"""工程中""" +zipfilepath = r"D:\temp\222817032234165544977_G20260160选择性必修第四章数列复习_高一_数学.zip" +# zipfilepath = r"D:\temp\222817041862672707412_控江中学2023学年第一学期高一数学期末考试_高一_数学.zip" +date = "20240126" +threshold = 0.75 #设置最低提交人数比例 + from database_tools import * import zipfile,shutil - -zipfilepath = r"D:\temp\222817032234165544977_G20260160选择性必修第四章数列复习_高一_数学.zip" -# zipfilepath = r"D:\temp\222817041862672707412_控江中学2023学年第一学期高一数学期末考试_高一_数学.zip" tempdir = "临时文件/zips" statsfilename = "小题分_按学号(数学).xlsx" -threshold = 0.96 #设置最低提交人数 answersheetseekingpath = "../备课组" -def ParseZipname(zipfilename): #小闲平台的zip文件中获得试卷编号, 返回试卷编号字符串 - xiaoxianpid = re.findall(r"^(\d*?)_",os.path.split(zipfilename)[1]) - return xiaoxianpid[0] - -def FindFile(dir,filename): #在指定目录及子目录下寻找特定文件名的文件, 返回文件所在的路径列表 - pathlist = [] - for path,m,filenames in os.walk(dir): - if filename in filenames: - pathlist.append(path) - return pathlist - -def FindPaper(xiaoxianpid, answersheetpath): #根据小闲的试卷编号和答题纸对应json的根目录寻找题库的试卷编号,届别,题号, 返回(题库试卷编号,届别,题号列表), 如果未找到则返回False - answersheetpathlist = FindFile(answersheetpath,"答题纸对应.json") - foundpid = False - for dir in answersheetpathlist: - filepath = os.path.join(dir,"答题纸对应.json") - anssheetjson = load_dict(filepath) - if xiaoxianpid in anssheetjson: - foundpid = True - grade = "20"+re.findall(r"\d{2}届",dir)[0] - pid = anssheetjson[xiaoxianpid]["id"] - notesjson = load_dict(os.path.join(dir,"校本材料.json")) - idlist = [] - for part in anssheetjson[xiaoxianpid]["parts"]: - idlist += notesjson["notes"][pid][part].copy() - if "marks" in anssheetjson[xiaoxianpid]: - marks = anssheetjson[xiaoxianpid]["marks"] - else: - marks = [] - break - if foundpid: - return(pid,grade,idlist,marks) - else: - return False - -def CheckPaperType(filepath,filename): #根据filepath(通常是小闲的zip解压出的目录)和filename(通常是"小题分_按学号(数学).xlsx")检测试卷类型, 未找到该文件则返回False, 找到文件且是日常试卷返回"日常卷", 找到文件且不是日常试卷返回"考试卷" - statsfilepathlist = FindFile(filepath,filename) - if statsfilepathlist == []: - return False - else: - dir = statsfilepathlist[0] - dfcurrent = pd.read_excel(os.path.join(dir,filename)) - if re.findall(r"第\d*步",str(dfcurrent.loc[1,:])) == []: - return "日常卷" - else: - return "考试卷" - -def generateColIndexandMarks(filepath,paperinfo): #根据filepath(是一个有statsfilename的文件夹列表)中第一个路径中的数据文件及paperinfo(FindPaper返回的结果)寻找excel文件中有效的列的位置和相应的满分分数 - dir = filepath[0] - dfcurrent = pd.read_excel(os.path.join(dir,statsfilename)) - validcols = [] - if papertype == "日常卷": - for i in range(len(dfcurrent.columns)): - colname = str(dfcurrent.iloc[1,i]) - if ("单选" in colname or "填空" in colname or "主观" in colname) and re.findall("[ABCD]",colname) == []: - validcols.append(i) - marks = [1] * len(validcols) - elif papertype == "考试卷": - for i in range(len(dfcurrent.columns)): - colname = str(dfcurrent.iloc[1,i]) - if ("单选" in colname or "填空" in colname or "主观" in colname or "步" in colname) and re.findall("[ABCD]",colname) == []: - validcols.append(i) - for col in range(len(validcols)-1,-1,-1): - colname = str(dfcurrent.iloc[1,validcols[col]]) - if "主观" in colname: - colname_main = re.findall(r"^([\d\.]*)[\($]",colname[2:])[0] - t = [dfcurrent.iloc[1,c] for c in validcols[col+1:]] - t = str(t) - if colname_main in t: - validcols.pop(col) - if paperinfo[3] == []: - marks = [1] * len(validcols) - else: - marks = paperinfo[3] - if len(marks) == len(validcols): - return (validcols,marks) - else: - return False - - - try: @@ -100,40 +19,28 @@ try: except: pass + + xiaoxianpid = ParseZipname(zipfilepath) paperinfo = FindPaper(xiaoxianpid, answersheetseekingpath) - - +gradename = paperinfo[1] +idlist = paperinfo[2] zf = zipfile.ZipFile(zipfilepath) zf.extractall(tempdir) #解压zip文件中的所有内容到tempdir -papertype = CheckPaperType(tempdir,statsfilename) +# papertype = CheckPaperType(tempdir,statsfilename) statsfilepathlist = FindFile(tempdir,statsfilename) +validcols,marks = generateColIndexandMarks(statsfilepathlist,statsfilename,paperinfo) +dfcurrent = pd.read_excel(os.path.join(statsfilepathlist[0],statsfilename)) +correspondence_dict = generateIDtoUsageCorrespondence(idlist,validcols,dfcurrent.iloc[1,validcols]) +output = CalculateUsages(statsfilepathlist,statsfilename,gradename,threshold,marks,correspondence_dict,validcols,date) +SaveTextFile(output,"文本文件/metadata.txt") +print("数据文件已输出至metadata.txt") -validcols,marks = generateColIndexandMarks(statsfilepathlist,paperinfo) + + - - - - - -pass - - -# print(ParseZipname(zipfilepath)) - -# df = pd.read_excel(os.path.join(os.path.split(dir)[0],"学科总体分析.xlsx")) -# totalstudents = df.loc[2,df.columns[1]] -# validstudents = df.loc[2,df.columns[2]] -# classname = re.findall(r"高[一二三]\d*?班",dir)[0] -# classvalidflag = False -# if threshold * totalstudents < validstudents: -# print(f"{classname} 有效, 共 {totalstudents} 人, 提交 {validstudents} 人") -# classvalidflag = True -# else: -# print(f"!!! {classname} 无效, 共 {totalstudents} 人, 提交 {validstudents} 人") -# if classvalidflag: \ No newline at end of file