This repository has been archived on 2024-06-23. You can view files and clone it, but cannot push or open issues or pull requests.
mathdeptv2/工具v2/小闲平台大型考试数据导入.py

126 lines
4.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import zipfile,os,re
import pandas as pd
from pathlib import Path
#设置工作目录, 要求工作目录中恰有一个.txt文件或.tex文件)和一些.zip文件其余不论
# 第一行用"#"开头的作业数据不会被读取
filepath = r"C:\Users\weiye\Documents\wwy sync\xiaoxian待导入"
#设置届别与接受的比例阈值
semester = 2023
# threshold = 0.5 #当班级提交人数超过该比例时数据有效
def getindex(string,pos = 2):
para = string.split(".")
return int(para[pos-1])
def stringcount(string,list):
theitem = ""
count = 0
for item in list:
if string in item:
count += 1
theitem = item
return (count,theitem)
shiftdict = {"高一": 3, "高二": 2, "高三": 1}
patterns = [r"填空([\d\.]+)\((\d+)\)",r"单选([\d\.]+)\((\d+)\)",r"^([\d\.]+)第\d+(步)"]
#生成文件名tex_file和zip_file
files = [os.path.join(filepath,f) for f in os.listdir(filepath)]
tex_file = [f for f in files if ".tex" in f or ".txt" in f][0]
zip_files = [f for f in files if ".zip" in f]
#分割各次作业数据
with open(tex_file,"r",encoding = "utf8") as f:
tex_data = f.read().strip()
tex_data = re.sub(r"\t+",r" ",tex_data)
tex_data = re.sub(r"\n{2,}","---split---",tex_data)
homeworklist = tex_data.split("---split---")
#读取各次作业首行(文件名)与次行(日期)并组织字典结构
homeworkdict = {}
for hwk in homeworklist:
hwkdata = hwk.strip().split("\n")
id = hwkdata.pop(0).replace(" ","")
date = hwkdata.pop(0)
if not id.startswith("#"):
homeworkdict[id] = {}
homeworkdict[id]["date"] = date
homeworkdict[id]["usage_data"] = hwkdata
#测试是否每一项都有相应的zip文件与之对应
execflag = True
for id in homeworkdict:
if stringcount(id,zip_files)[0] == 1:
print("zip文件在文件夹中:",id)
else:
execflag = False
print("!!!zip文件个数不对:",id)
if execflag:
outputstr = "usages\n\n"
for hid in homeworkdict:
print("正在处理%s"%id)
date = homeworkdict[hid]["date"]
#在zip文件中找到包含正确率数据的文件
zip_file = os.path.join(filepath,stringcount(hid,zip_files)[1])
zf = zipfile.ZipFile(zip_file)
# statfiles = [f.filename for f in zf.filelist if "试题分析" in f.filename]
handinfiles = [f.filename for f in zf.filelist if "小题分_按学号" in f.filename]
if "statsfile.xlsx" in os.listdir("临时文件"):
os.remove("临时文件/statsfile.xlsx")
extractedpath = Path(zf.extract(handinfiles[0]))
extractedpath.rename("临时文件/statsfile.xlsx")
df = pd.read_excel("临时文件/statsfile.xlsx",skiprows=2)
indices = {}
for col in df.columns:
for pattern in patterns:
res = re.findall(pattern,col)
if len(res) > 0:
id,mark = res[0]
if not id in indices:
indices[id] = {}
if not "" in mark:
indices[id][col] = int(mark)
else:
indices[id][col] = int(input(f"{hid}-{col}的满分:"))
corresp_dict = {}
homework = homeworkdict[hid]
data = homework["date"]
for rawline in homework["usage_data"]:
line = re.sub(r"[\t\s]+"," ",rawline)
a,b = line.split(" ")
if a.strip() in indices:
corresp_dict[b.strip()]=indices[a].copy()
# print(corresp_dict)
for excelfile in handinfiles:
if "statsfile.xlsx" in os.listdir("临时文件"):
os.remove("临时文件/statsfile.xlsx")
extractedpath = Path(zf.extract(excelfile))
extractedpath.rename("临时文件/statsfile.xlsx")
df = pd.read_excel("临时文件/statsfile.xlsx",skiprows=2)[:-2]
gradename = re.findall(r"高[一二三]",excelfile)[0]
classname = str(semester+shiftdict[gradename])+""+gradename+re.findall(r"高[一二三]([\d]*?)班",excelfile)[0].zfill(2)+""
for id in corresp_dict:
colandmarks = corresp_dict[id]
currentstring = f"{id}\n{date}\t{classname}"
for col in colandmarks:
mark = colandmarks[col]
diff = df[col].mean()/mark
currentstring += f"\t{diff:.3f}"
currentstring += "\n\n"
outputstr += currentstring
with open("临时文件/自动转换结果.txt","w",encoding = "utf8") as f:
f.write(outputstr)
with open("文本文件/metadata.txt","w",encoding = "utf8") as f:
f.write(outputstr)
zf.close()