新增小闲平台大型考试数据导入功能并加入工具面板
This commit is contained in:
parent
7453b4a442
commit
b04f454a02
|
|
@ -0,0 +1,125 @@
|
||||||
|
import zipfile,os,re
|
||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
#设置工作目录, 要求工作目录中恰有一个.txt文件(或.tex文件)和一些.zip文件,其余不论
|
||||||
|
# 第一行用"#"开头的作业数据不会被读取
|
||||||
|
filepath = r"C:\Users\weiye\Documents\wwy sync\xiaoxian待导入"
|
||||||
|
|
||||||
|
#设置届别与接受的比例阈值
|
||||||
|
semester = 2023
|
||||||
|
# threshold = 0.5 #当班级提交人数超过该比例时数据有效
|
||||||
|
|
||||||
|
def getindex(string,pos = 2):
|
||||||
|
para = string.split(".")
|
||||||
|
return int(para[pos-1])
|
||||||
|
|
||||||
|
def stringcount(string,list):
|
||||||
|
theitem = ""
|
||||||
|
count = 0
|
||||||
|
for item in list:
|
||||||
|
if string in item:
|
||||||
|
count += 1
|
||||||
|
theitem = item
|
||||||
|
return (count,theitem)
|
||||||
|
|
||||||
|
|
||||||
|
shiftdict = {"高一": 3, "高二": 2, "高三": 1}
|
||||||
|
patterns = [r"填空([\d\.]+)\((\d+)\)",r"单选([\d\.]+)\((\d+)\)",r"^([\d\.]+)第\d+(步)"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#生成文件名tex_file和zip_file
|
||||||
|
files = [os.path.join(filepath,f) for f in os.listdir(filepath)]
|
||||||
|
tex_file = [f for f in files if ".tex" in f or ".txt" in f][0]
|
||||||
|
zip_files = [f for f in files if ".zip" in f]
|
||||||
|
|
||||||
|
#分割各次作业数据
|
||||||
|
with open(tex_file,"r",encoding = "utf8") as f:
|
||||||
|
tex_data = f.read().strip()
|
||||||
|
tex_data = re.sub(r"\t+",r" ",tex_data)
|
||||||
|
tex_data = re.sub(r"\n{2,}","---split---",tex_data)
|
||||||
|
homeworklist = tex_data.split("---split---")
|
||||||
|
|
||||||
|
#读取各次作业首行(文件名)与次行(日期)并组织字典结构
|
||||||
|
homeworkdict = {}
|
||||||
|
for hwk in homeworklist:
|
||||||
|
hwkdata = hwk.strip().split("\n")
|
||||||
|
id = hwkdata.pop(0).replace(" ","")
|
||||||
|
date = hwkdata.pop(0)
|
||||||
|
if not id.startswith("#"):
|
||||||
|
homeworkdict[id] = {}
|
||||||
|
homeworkdict[id]["date"] = date
|
||||||
|
homeworkdict[id]["usage_data"] = hwkdata
|
||||||
|
|
||||||
|
#测试是否每一项都有相应的zip文件与之对应
|
||||||
|
execflag = True
|
||||||
|
for id in homeworkdict:
|
||||||
|
if stringcount(id,zip_files)[0] == 1:
|
||||||
|
print("zip文件在文件夹中:",id)
|
||||||
|
else:
|
||||||
|
execflag = False
|
||||||
|
print("!!!zip文件个数不对:",id)
|
||||||
|
|
||||||
|
if execflag:
|
||||||
|
outputstr = "usages\n\n"
|
||||||
|
for hid in homeworkdict:
|
||||||
|
print("正在处理%s"%id)
|
||||||
|
date = homeworkdict[hid]["date"]
|
||||||
|
#在zip文件中找到包含正确率数据的文件
|
||||||
|
zip_file = os.path.join(filepath,stringcount(hid,zip_files)[1])
|
||||||
|
zf = zipfile.ZipFile(zip_file)
|
||||||
|
# statfiles = [f.filename for f in zf.filelist if "试题分析" in f.filename]
|
||||||
|
handinfiles = [f.filename for f in zf.filelist if "小题分_按学号" in f.filename]
|
||||||
|
|
||||||
|
if "statsfile.xlsx" in os.listdir("临时文件"):
|
||||||
|
os.remove("临时文件/statsfile.xlsx")
|
||||||
|
extractedpath = Path(zf.extract(handinfiles[0]))
|
||||||
|
extractedpath.rename("临时文件/statsfile.xlsx")
|
||||||
|
df = pd.read_excel("临时文件/statsfile.xlsx",skiprows=2)
|
||||||
|
indices = {}
|
||||||
|
for col in df.columns:
|
||||||
|
for pattern in patterns:
|
||||||
|
res = re.findall(pattern,col)
|
||||||
|
if len(res) > 0:
|
||||||
|
id,mark = res[0]
|
||||||
|
if not id in indices:
|
||||||
|
indices[id] = {}
|
||||||
|
if not "步" in mark:
|
||||||
|
indices[id][col] = int(mark)
|
||||||
|
else:
|
||||||
|
indices[id][col] = int(input(f"{hid}-{col}的满分:"))
|
||||||
|
|
||||||
|
corresp_dict = {}
|
||||||
|
homework = homeworkdict[hid]
|
||||||
|
data = homework["date"]
|
||||||
|
for rawline in homework["usage_data"]:
|
||||||
|
line = re.sub(r"[\t\s]+"," ",rawline)
|
||||||
|
a,b = line.split(" ")
|
||||||
|
if a.strip() in indices:
|
||||||
|
corresp_dict[b.strip()]=indices[a].copy()
|
||||||
|
# print(corresp_dict)
|
||||||
|
for excelfile in handinfiles:
|
||||||
|
if "statsfile.xlsx" in os.listdir("临时文件"):
|
||||||
|
os.remove("临时文件/statsfile.xlsx")
|
||||||
|
extractedpath = Path(zf.extract(excelfile))
|
||||||
|
extractedpath.rename("临时文件/statsfile.xlsx")
|
||||||
|
df = pd.read_excel("临时文件/statsfile.xlsx",skiprows=2)[:-2]
|
||||||
|
gradename = re.findall(r"高[一二三]",excelfile)[0]
|
||||||
|
classname = str(semester+shiftdict[gradename])+"届"+gradename+re.findall(r"高[一二三]([\d]*?)班",excelfile)[0].zfill(2)+"班"
|
||||||
|
for id in corresp_dict:
|
||||||
|
colandmarks = corresp_dict[id]
|
||||||
|
currentstring = f"{id}\n{date}\t{classname}"
|
||||||
|
for col in colandmarks:
|
||||||
|
mark = colandmarks[col]
|
||||||
|
diff = df[col].mean()/mark
|
||||||
|
currentstring += f"\t{diff:.3f}"
|
||||||
|
currentstring += "\n\n"
|
||||||
|
outputstr += currentstring
|
||||||
|
|
||||||
|
with open("临时文件/自动转换结果.txt","w",encoding = "utf8") as f:
|
||||||
|
f.write(outputstr)
|
||||||
|
with open("文本文件/metadata.txt","w",encoding = "utf8") as f:
|
||||||
|
f.write(outputstr)
|
||||||
|
zf.close()
|
||||||
|
|
@ -89,6 +89,7 @@ MaintainenceMenu.add_separator()
|
||||||
MaintainenceMenu.add_command(label = "移除关联题号", command = lambda: SetButton("移除关联题号",["文本文件/metadata.txt"]))
|
MaintainenceMenu.add_command(label = "移除关联题号", command = lambda: SetButton("移除关联题号",["文本文件/metadata.txt"]))
|
||||||
MaintainenceMenu.add_separator()
|
MaintainenceMenu.add_separator()
|
||||||
MaintainenceMenu.add_command(label = "小闲平台使用数据导入", command = lambda: SetButton("小闲平台使用数据导入",["小闲平台使用数据导入.py"]))
|
MaintainenceMenu.add_command(label = "小闲平台使用数据导入", command = lambda: SetButton("小闲平台使用数据导入",["小闲平台使用数据导入.py"]))
|
||||||
|
MaintainenceMenu.add_command(label = "小闲平台大型考试数据导入", command = lambda: SetButton("小闲平台大型考试数据导入",["小闲平台大型考试数据导入.py"]))
|
||||||
MaintainenceMenu.add_separator()
|
MaintainenceMenu.add_separator()
|
||||||
MaintainenceMenu.add_command(label = "分类题号字典生成", command = lambda: SetButton("分类题号字典生成",["分类题号字典生成.py"]))
|
MaintainenceMenu.add_command(label = "分类题号字典生成", command = lambda: SetButton("分类题号字典生成",["分类题号字典生成.py"]))
|
||||||
|
|
||||||
|
|
|
||||||
Reference in New Issue