收集使用记录功能已能使用, 可自适应考试卷与平时卷

2024-01-27 15:19:20 +08:00 · 2024-01-27 15:19:20 +08:00 · 4d3ec1b020
parent 3cd434c5e6
commit 4d3ec1b020
2 changed files with 151 additions and 111 deletions
--- a/工具v2/database_tools.py
+++ b/工具v2/database_tools.py
@ -1475,5 +1475,138 @@ def ExtractProblemIDs(paperdict,pro_dict):#从备课组材料的每一张讲义
                    output.append(id)
    return(output)

+
+def ParseZipname(zipfilename): #小闲平台的zip文件中获得试卷编号, 返回试卷编号字符串
+    xiaoxianpid = re.findall(r"^(\d*?)_",os.path.split(zipfilename)[1])
+    return xiaoxianpid[0]
+
+def FindFile(dir,filename): #在指定目录及子目录下寻找特定文件名的文件, 返回文件所在的路径列表
+    pathlist = []
+    for path,m,filenames in os.walk(dir):
+        if filename in filenames:
+            pathlist.append(path)
+    return pathlist
+
+def FindPaper(xiaoxianpid, answersheetpath): #根据小闲的试卷编号和答题纸对应json的根目录寻找题库的试卷编号,届别,题号, 返回(题库试卷编号,届别,题号列表), 如果未找到则返回False
+    answersheetpathlist = FindFile(answersheetpath,"答题纸对应.json")
+    foundpid = False
+    for dir in answersheetpathlist:
+        filepath = os.path.join(dir,"答题纸对应.json")
+        anssheetjson = load_dict(filepath)
+        if xiaoxianpid in anssheetjson:
+            foundpid = True
+            grade = "20"+re.findall(r"\d{2}届",dir)[0]
+            pid = anssheetjson[xiaoxianpid]["id"]
+            notesjson = load_dict(os.path.join(dir,"校本材料.json"))
+            idlist = []
+            for part in anssheetjson[xiaoxianpid]["parts"]:
+                idlist += notesjson["notes"][pid][part].copy()
+            if "marks" in anssheetjson[xiaoxianpid]:
+                marks = anssheetjson[xiaoxianpid]["marks"]
+            else:
+                marks = []
+            break      
+    if foundpid:
+        return(pid,grade,idlist,marks)
+    else:
+        return False
+
+def CheckPaperType(filepath,filename): #根据filepath(通常是小闲的zip解压出的目录)和filename(通常是"小题分_按学号（数学）.xlsx")检测试卷类型, 未找到该文件则返回False, 找到文件且是日常试卷返回"日常卷", 找到文件且不是日常试卷返回"考试卷"
+    statsfilepathlist = FindFile(filepath,filename)
+    if statsfilepathlist == []:
+        return False
+    else:
+        dir = statsfilepathlist[0]
+        dfcurrent = pd.read_excel(os.path.join(dir,filename))
+        if re.findall(r"第\d*步",str(dfcurrent.loc[1,:])) == []:
+            return "日常卷"
+        else:
+            return "考试卷"
+
+def generateColIndexandMarks(filepath,statsfilename,paperinfo): #根据filepath(是一个有statsfilename的文件夹列表)中第一个路径中的数据文件, statsfilename数据文件名, 及paperinfo(FindPaper返回的结果)寻找excel文件中有效的列的位置和相应的满分分数
+    dir = filepath[0]
+    dfcurrent = pd.read_excel(os.path.join(dir,statsfilename))
+    validcols = []
+    for i in range(len(dfcurrent.columns)):
+        colname = str(dfcurrent.iloc[1,i])
+        if ("单选" in colname or "填空" in colname or "主观" in colname or "步" in colname) and re.findall("[ABCD]",colname) == []:
+            validcols.append(i)
+    for col in range(len(validcols)-1,-1,-1):
+        colname = str(dfcurrent.iloc[1,validcols[col]])
+        if "主观" in colname:
+            colname_main = re.findall(r"^([\d\.]*)[\($]",colname[2:])[0]
+            t = [dfcurrent.iloc[1,c] for c in validcols[col+1:]]
+            t = str(t)
+            if colname_main in t:
+                validcols.pop(col)
+    if paperinfo[3] == []:
+        marks = [1] * len(validcols)
+    else:
+        marks = paperinfo[3]
+    if len(marks) == len(validcols):
+        return (validcols,marks)
+    else:
+        return False
+                
+
+def CheckValidity(classpath,gradename,threshold): #根据文件夹classpath, 年级名gradename和提交比例threshold, 检测提交人数是否不小于threshold, 返回(班级名, 提交是否有效)
+    classname_raw = re.findall(r"(高[一二三])(\d*?)班",classpath)[0]
+    classname = gradename + classname_raw[0] + classname_raw[1].zfill(2) + "班"
+    df = pd.read_excel(os.path.join(os.path.split(classpath)[0],"学科总体分析.xlsx"))
+    totalstudents = df.loc[2,df.columns[1]]
+    validstudents = df.loc[2,df.columns[2]]
+    classvalidflag = False
+    if threshold * totalstudents < validstudents:
+        print(f"{classname} 有效, 共 {totalstudents} 人, 提交 {validstudents} 人")
+        classvalidflag = True
+    else:
+        print(f"!!! {classname} 无效, 共 {totalstudents} 人, 提交 {validstudents} 人")
+    return (classname,classvalidflag)
+
+def generateIDtoUsageCorrespondence(idlist,validcols,names): #根据idlist(题库ID列表), validcols(有效列位置列表), names(题目名称列表)自动生成一个字典, 键值为题库ID, 内容为该题对应的列位置列表
+    corr_dict = {}
+    for i in range(len(idlist)):
+        ind = i+1
+        collist = []
+        for j in range(len(validcols)):
+            n = names[j]
+            if not "步" in n:
+                name = re.findall(r"^([^\(]*)",n)[0]
+            else:
+                name = re.findall(r"^([^第]*)",n)[0]
+            if ind == getindex(name):
+                collist.append(j)
+        corr_dict[idlist[i]] = collist
+    return corr_dict
+
+def CalculateUsages(statsfilepathlist,statsfilename,gradename,threshold,marks,correspondence_dict,validcols,date): #根据统计数据所在的路径,文件名,年级,阈值,分数列表和题号列数(0-len(validcols))对应字典,以及原excel文件中的有效列位置validcols, 日期date, 生成usages的metadata.txt文件的内容, 如果有正确率大于1的则返回False
+    output = "ans\n\n\n"
+    validflag = True
+    for dir in statsfilepathlist:
+        classname, valid = CheckValidity(dir,gradename,threshold)
+        if valid:
+            dfcurrent = pd.read_excel(os.path.join(dir,statsfilename))
+            means = dfcurrent.iloc[2:-2,validcols].mean()/marks
+            if max(means)>1:
+                print("满分数据有误!!!")
+                validflag = False
+            else:
+                means_out = [f"{t:.3f}" for t in means]
+                for id in correspondence_dict:
+                    cols = correspondence_dict[id]
+                    diffs = "\t".join([means_out[u] for u in cols])
+                    usages = f"{date}\t{classname}\t{diffs}"
+                    output += f"{id}\n{usages}\n\n\n"
+    if validflag:
+        return output
+    else:
+        return False
+
+
+def getindex(string,pos = 2):
+    para = string.split(".")
+    return int(para[pos-1])
+
+
 if __name__ == "__main__":
    print("数据库工具, import用.")
--- a/工具v2/收集使用记录.py
+++ b/工具v2/收集使用记录.py
@ -1,97 +1,16 @@
-"""工程中"""
+zipfilepath = r"D:\temp\222817032234165544977_G20260160选择性必修第四章数列复习_高一_数学.zip"
+# zipfilepath = r"D:\temp\222817041862672707412_控江中学2023学年第一学期高一数学期末考试_高一_数学.zip"
+date = "20240126"
+threshold = 0.75 #设置最低提交人数比例
+

 from database_tools import *
 import zipfile,shutil

-
-zipfilepath = r"D:\temp\222817032234165544977_G20260160选择性必修第四章数列复习_高一_数学.zip"
-# zipfilepath = r"D:\temp\222817041862672707412_控江中学2023学年第一学期高一数学期末考试_高一_数学.zip"
 tempdir = "临时文件/zips"
 statsfilename = "小题分_按学号（数学）.xlsx"
-threshold = 0.96 #设置最低提交人数
 answersheetseekingpath = "../备课组"

-def ParseZipname(zipfilename): #小闲平台的zip文件中获得试卷编号, 返回试卷编号字符串
-    xiaoxianpid = re.findall(r"^(\d*?)_",os.path.split(zipfilename)[1])
-    return xiaoxianpid[0]
-
-def FindFile(dir,filename): #在指定目录及子目录下寻找特定文件名的文件, 返回文件所在的路径列表
-    pathlist = []
-    for path,m,filenames in os.walk(dir):
-        if filename in filenames:
-            pathlist.append(path)
-    return pathlist
-
-def FindPaper(xiaoxianpid, answersheetpath): #根据小闲的试卷编号和答题纸对应json的根目录寻找题库的试卷编号,届别,题号, 返回(题库试卷编号,届别,题号列表), 如果未找到则返回False
-    answersheetpathlist = FindFile(answersheetpath,"答题纸对应.json")
-    foundpid = False
-    for dir in answersheetpathlist:
-        filepath = os.path.join(dir,"答题纸对应.json")
-        anssheetjson = load_dict(filepath)
-        if xiaoxianpid in anssheetjson:
-            foundpid = True
-            grade = "20"+re.findall(r"\d{2}届",dir)[0]
-            pid = anssheetjson[xiaoxianpid]["id"]
-            notesjson = load_dict(os.path.join(dir,"校本材料.json"))
-            idlist = []
-            for part in anssheetjson[xiaoxianpid]["parts"]:
-                idlist += notesjson["notes"][pid][part].copy()
-            if "marks" in anssheetjson[xiaoxianpid]:
-                marks = anssheetjson[xiaoxianpid]["marks"]
-            else:
-                marks = []
-            break      
-    if foundpid:
-        return(pid,grade,idlist,marks)
-    else:
-        return False
-
-def CheckPaperType(filepath,filename): #根据filepath(通常是小闲的zip解压出的目录)和filename(通常是"小题分_按学号（数学）.xlsx")检测试卷类型, 未找到该文件则返回False, 找到文件且是日常试卷返回"日常卷", 找到文件且不是日常试卷返回"考试卷"
-    statsfilepathlist = FindFile(filepath,filename)
-    if statsfilepathlist == []:
-        return False
-    else:
-        dir = statsfilepathlist[0]
-        dfcurrent = pd.read_excel(os.path.join(dir,filename))
-        if re.findall(r"第\d*步",str(dfcurrent.loc[1,:])) == []:
-            return "日常卷"
-        else:
-            return "考试卷"
-
-def generateColIndexandMarks(filepath,paperinfo): #根据filepath(是一个有statsfilename的文件夹列表)中第一个路径中的数据文件及paperinfo(FindPaper返回的结果)寻找excel文件中有效的列的位置和相应的满分分数
-    dir = filepath[0]
-    dfcurrent = pd.read_excel(os.path.join(dir,statsfilename))
-    validcols = []
-    if papertype == "日常卷":
-        for i in range(len(dfcurrent.columns)):
-            colname = str(dfcurrent.iloc[1,i])
-            if ("单选" in colname or "填空" in colname or "主观" in colname) and re.findall("[ABCD]",colname) == []:
-                validcols.append(i)
-        marks = [1] * len(validcols)
-    elif papertype == "考试卷":
-        for i in range(len(dfcurrent.columns)):
-            colname = str(dfcurrent.iloc[1,i])
-            if ("单选" in colname or "填空" in colname or "主观" in colname or "步" in colname) and re.findall("[ABCD]",colname) == []:
-                validcols.append(i)
-        for col in range(len(validcols)-1,-1,-1):
-            colname = str(dfcurrent.iloc[1,validcols[col]])
-            if "主观" in colname:
-                colname_main = re.findall(r"^([\d\.]*)[\($]",colname[2:])[0]
-                t = [dfcurrent.iloc[1,c] for c in validcols[col+1:]]
-                t = str(t)
-                if colname_main in t:
-                    validcols.pop(col)
-    if paperinfo[3] == []:
-        marks = [1] * len(validcols)
-    else:
-        marks = paperinfo[3]
-    if len(marks) == len(validcols):
-        return (validcols,marks)
-    else:
-        return False
-                
-
-


 try:
@ -100,40 +19,28 @@ try:
 except:
    pass

+
+
 xiaoxianpid = ParseZipname(zipfilepath)
 paperinfo = FindPaper(xiaoxianpid, answersheetseekingpath)
-
-
+gradename = paperinfo[1]
+idlist = paperinfo[2]

 zf = zipfile.ZipFile(zipfilepath)
 zf.extractall(tempdir) #解压zip文件中的所有内容到tempdir

-papertype = CheckPaperType(tempdir,statsfilename)
+# papertype = CheckPaperType(tempdir,statsfilename)
 statsfilepathlist = FindFile(tempdir,statsfilename)
+validcols,marks = generateColIndexandMarks(statsfilepathlist,statsfilename,paperinfo)
+dfcurrent = pd.read_excel(os.path.join(statsfilepathlist[0],statsfilename))
+correspondence_dict = generateIDtoUsageCorrespondence(idlist,validcols,dfcurrent.iloc[1,validcols])
+output = CalculateUsages(statsfilepathlist,statsfilename,gradename,threshold,marks,correspondence_dict,validcols,date)
+SaveTextFile(output,"文本文件/metadata.txt")
+print("数据文件已输出至metadata.txt")

-validcols,marks = generateColIndexandMarks(statsfilepathlist,paperinfo)
+
+                      




-
-
-
-
-
-pass       
-
-
-# print(ParseZipname(zipfilepath))
-
-# df = pd.read_excel(os.path.join(os.path.split(dir)[0],"学科总体分析.xlsx"))
-#     totalstudents = df.loc[2,df.columns[1]]
-#     validstudents = df.loc[2,df.columns[2]]
-#     classname = re.findall(r"高[一二三]\d*?班",dir)[0]
-#     classvalidflag = False
-#     if threshold * totalstudents < validstudents:
-#         print(f"{classname} 有效, 共 {totalstudents} 人, 提交 {validstudents} 人")
-#         classvalidflag = True
-#     else:
-#         print(f"!!! {classname} 无效, 共 {totalstudents} 人, 提交 {validstudents} 人")
-#     if classvalidflag: