diff --git a/工具v2/小闲平台使用数据导入.py b/工具v2/小闲平台使用数据导入.py new file mode 100644 index 00000000..80dbbdc0 --- /dev/null +++ b/工具v2/小闲平台使用数据导入.py @@ -0,0 +1,123 @@ +import zipfile,os,re +import pandas as pd +from pathlib import Path + + +#设置工作目录, 要求工作目录中恰有一个.txt文件(或.tex文件)和一些.zip文件,其余不论 +# 第一行用"#"开头的作业数据不会被读取 +filepath = r"C:\Users\weiye\Documents\wwy sync\xiaoxian待导入" + +#设置届别与接受的比例阈值 +semester = 2023 +threshold = 0.5 #当班级提交人数超过该比例时数据有效 + + + +def stringcount(string,list): + theitem = "" + count = 0 + for item in list: + if string in item: + count += 1 + theitem = item + return (count,theitem) + + +shiftdict = {"高一": 3, "高二": 2, "高三": 1} + +#生成文件名tex_file和zip_file +files = [os.path.join(filepath,f) for f in os.listdir(filepath)] +tex_file = [f for f in files if ".tex" in f or ".txt" in f][0] +zip_files = [f for f in files if ".zip" in f] + +#分割各次作业数据 +with open(tex_file,"r",encoding = "utf8") as f: + tex_data = f.read().strip() +tex_data = re.sub(r"\t+",r" ",tex_data) +tex_data = re.sub(r"\n{2,}","---split---",tex_data) +homeworklist = tex_data.split("---split---") + +#读取各次作业首行(文件名)与次行(日期)并组织字典结构 +homeworkdict = {} +for hwk in homeworklist: + hwkdata = hwk.strip().split("\n") + id = hwkdata.pop(0).replace(" ","") + date = hwkdata.pop(0) + if not id.startswith("#"): + homeworkdict[id] = {} + homeworkdict[id]["date"] = date + homeworkdict[id]["usage_data"] = hwkdata + +#测试是否每一项都有相应的zip文件与之对应 +execflag = True +for id in homeworkdict: + if stringcount(id,zip_files)[0] == 1: + print("zip文件在文件夹中:",id) + else: + execflag = False + print("!!!zip文件个数不对:",id) + + +if execflag: + outputstr = "usages\n\n" + for id in homeworkdict: + print("正在处理%s"%id) + date = homeworkdict[id]["date"] + #在zip文件中找到包含正确率数据的文件 + zip_file = os.path.join(filepath,stringcount(id,zip_files)[1]) + zf = zipfile.ZipFile(zip_file) + statfiles = [f.filename for f in zf.filelist if "试题分析" in f.filename] + handinfiles = [f.filename for f in zf.filelist if "学生成绩" in f.filename] + + #生成答题纸区域编号与题目ID的对应 + correspondence_dict = {} + if "statsfile.xlsx" in os.listdir("临时文件"): + os.remove("临时文件/statsfile.xlsx") + extractedpath = Path(zf.extract(statfiles[0])) + extractedpath.rename("临时文件/statsfile.xlsx") + df = pd.read_excel("临时文件/statsfile.xlsx") + problems_indexes = list(df[df.columns[0]][2:]) + + for pind in problems_indexes: + for p in homeworkdict[id]["usage_data"]: + if len(re.findall("^"+pind+"\s",p))>0: + correspondence_dict[pind] = p[p.index(" ")+1:] + + for sf in [f for f in statfiles]: + #读取文件生成区域列表与难度列表 + gradename = re.findall(r"高[一二三]",sf)[0] + classname = str(semester+shiftdict[gradename])+"届"+gradename+re.findall(r"高[一二三]([\d]*?)班",sf)[0].zfill(2)+"班" + # print(classname) + if "statsfile.xlsx" in os.listdir("临时文件"): + os.remove("临时文件/statsfile.xlsx") + extractedpath = Path(zf.extract(sf)) + extractedpath.rename("临时文件/statsfile.xlsx") + df = pd.read_excel("临时文件/statsfile.xlsx") + if "handinfile.xlsx" in os.listdir("临时文件"): + os.remove("临时文件/handinfile.xlsx") + extractedpath = Path(zf.extract(sf[:sf.find("数学")]+"学生成绩.xlsx")) + extractedpath.rename("临时文件/handinfile.xlsx") + handindf = pd.read_excel("临时文件/handinfile.xlsx") + if str(handindf.iloc[int(len(handindf)*threshold)]["Unnamed: 4"]) != "缺考": + difficulties = list(df[df.columns[9]][2:]) + problems_indexes = list(df[df.columns[0]][2:]) + #生成该班级题目ID对应难度列表组成的字典 + class_difficulty = {} + for i in range(len(difficulties)): + if problems_indexes[i] in correspondence_dict: + if not correspondence_dict[problems_indexes[i]] in class_difficulty: + class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]] + else: + class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i]) + #添加到输出字符串中 + for id in class_difficulty: + outputstr += id + "\n" + date + "\t" + classname + "\t" + "\t".join([("%.3f" %float(v)) for v in class_difficulty[id]]) + "\n\n" + print(classname, "有效") + else: + print(classname, "无效") + + with open("临时文件/自动转换结果.txt","w",encoding = "utf8") as f: + f.write(outputstr) + with open("文本文件/metadata.txt","w",encoding = "utf8") as f: + f.write(outputstr) + zf.close() \ No newline at end of file diff --git a/工具v2/工具面板.py b/工具v2/工具面板.py index 99ce74f6..9cd67d37 100644 --- a/工具v2/工具面板.py +++ b/工具v2/工具面板.py @@ -88,6 +88,8 @@ MaintainenceMenu.add_command(label = "转换手打答案至metadata", command = MaintainenceMenu.add_separator() MaintainenceMenu.add_command(label = "移除关联题号", command = lambda: SetButton("移除关联题号",["文本文件/metadata.txt"])) MaintainenceMenu.add_separator() +MaintainenceMenu.add_command(label = "小闲平台使用数据导入", command = lambda: SetButton("小闲平台使用数据导入",["小闲平台使用数据导入.py"])) +MaintainenceMenu.add_separator() MaintainenceMenu.add_command(label = "分类题号字典生成", command = lambda: SetButton("分类题号字典生成",["分类题号字典生成.py"]))