123 lines
5.1 KiB
Python
123 lines
5.1 KiB
Python
import zipfile,os,re
|
||
import pandas as pd
|
||
from pathlib import Path
|
||
|
||
|
||
#设置工作目录, 要求工作目录中恰有一个.txt文件(或.tex文件)和一些.zip文件,其余不论
|
||
# 第一行用"#"开头的作业数据不会被读取
|
||
filepath = r"C:\Users\weiye\Documents\wwy sync\xiaoxian待导入"
|
||
|
||
#设置届别与接受的比例阈值
|
||
semester = 2023
|
||
threshold = 0.5 #当班级提交人数超过该比例时数据有效
|
||
|
||
|
||
|
||
def stringcount(string,list):
|
||
theitem = ""
|
||
count = 0
|
||
for item in list:
|
||
if string in item:
|
||
count += 1
|
||
theitem = item
|
||
return (count,theitem)
|
||
|
||
|
||
shiftdict = {"高一": 3, "高二": 2, "高三": 1}
|
||
|
||
#生成文件名tex_file和zip_file
|
||
files = [os.path.join(filepath,f) for f in os.listdir(filepath)]
|
||
tex_file = [f for f in files if ".tex" in f or ".txt" in f][0]
|
||
zip_files = [f for f in files if ".zip" in f]
|
||
|
||
#分割各次作业数据
|
||
with open(tex_file,"r",encoding = "utf8") as f:
|
||
tex_data = f.read().strip()
|
||
tex_data = re.sub(r"\t+",r" ",tex_data)
|
||
tex_data = re.sub(r"\n{2,}","---split---",tex_data)
|
||
homeworklist = tex_data.split("---split---")
|
||
|
||
#读取各次作业首行(文件名)与次行(日期)并组织字典结构
|
||
homeworkdict = {}
|
||
for hwk in homeworklist:
|
||
hwkdata = hwk.strip().split("\n")
|
||
id = hwkdata.pop(0).replace(" ","")
|
||
date = hwkdata.pop(0)
|
||
if not id.startswith("#"):
|
||
homeworkdict[id] = {}
|
||
homeworkdict[id]["date"] = date
|
||
homeworkdict[id]["usage_data"] = hwkdata
|
||
|
||
#测试是否每一项都有相应的zip文件与之对应
|
||
execflag = True
|
||
for id in homeworkdict:
|
||
if stringcount(id,zip_files)[0] == 1:
|
||
print("zip文件在文件夹中:",id)
|
||
else:
|
||
execflag = False
|
||
print("!!!zip文件个数不对:",id)
|
||
|
||
|
||
if execflag:
|
||
outputstr = "usages\n\n"
|
||
for id in homeworkdict:
|
||
print("正在处理%s"%id)
|
||
date = homeworkdict[id]["date"]
|
||
#在zip文件中找到包含正确率数据的文件
|
||
zip_file = os.path.join(filepath,stringcount(id,zip_files)[1])
|
||
zf = zipfile.ZipFile(zip_file)
|
||
statfiles = [f.filename for f in zf.filelist if "试题分析" in f.filename]
|
||
handinfiles = [f.filename for f in zf.filelist if "学生成绩" in f.filename]
|
||
|
||
#生成答题纸区域编号与题目ID的对应
|
||
correspondence_dict = {}
|
||
if "statsfile.xlsx" in os.listdir("临时文件"):
|
||
os.remove("临时文件/statsfile.xlsx")
|
||
extractedpath = Path(zf.extract(statfiles[0]))
|
||
extractedpath.rename("临时文件/statsfile.xlsx")
|
||
df = pd.read_excel("临时文件/statsfile.xlsx")
|
||
problems_indexes = list(df[df.columns[0]][2:])
|
||
|
||
for pind in problems_indexes:
|
||
for p in homeworkdict[id]["usage_data"]:
|
||
if len(re.findall("^"+pind+"\s",p))>0:
|
||
correspondence_dict[pind] = p[p.index(" ")+1:]
|
||
|
||
for sf in [f for f in statfiles if re.findall(r"\d",f) != []]:
|
||
#读取文件生成区域列表与难度列表
|
||
gradename = re.findall(r"高[一二三]",sf)[0]
|
||
classname = str(semester+shiftdict[gradename])+"届"+gradename+re.findall(r"高[一二三]([\d]*?)班",sf)[0].zfill(2)+"班"
|
||
# print(classname)
|
||
if "statsfile.xlsx" in os.listdir("临时文件"):
|
||
os.remove("临时文件/statsfile.xlsx")
|
||
extractedpath = Path(zf.extract(sf))
|
||
extractedpath.rename("临时文件/statsfile.xlsx")
|
||
df = pd.read_excel("临时文件/statsfile.xlsx")
|
||
if "handinfile.xlsx" in os.listdir("临时文件"):
|
||
os.remove("临时文件/handinfile.xlsx")
|
||
extractedpath = Path(zf.extract(sf[:sf.find("数学")]+"学生成绩.xlsx"))
|
||
extractedpath.rename("临时文件/handinfile.xlsx")
|
||
handindf = pd.read_excel("临时文件/handinfile.xlsx")
|
||
if str(handindf.iloc[int(len(handindf)*threshold)]["Unnamed: 4"]) != "缺考":
|
||
difficulties = list(df[df.columns[9]][2:])
|
||
problems_indexes = list(df[df.columns[0]][2:])
|
||
#生成该班级题目ID对应难度列表组成的字典
|
||
class_difficulty = {}
|
||
for i in range(len(difficulties)):
|
||
if problems_indexes[i] in correspondence_dict:
|
||
if not correspondence_dict[problems_indexes[i]] in class_difficulty:
|
||
class_difficulty[correspondence_dict[problems_indexes[i]]] = [difficulties[i]]
|
||
else:
|
||
class_difficulty[correspondence_dict[problems_indexes[i]]].append(difficulties[i])
|
||
#添加到输出字符串中
|
||
for id in class_difficulty:
|
||
outputstr += id + "\n" + date + "\t" + classname + "\t" + "\t".join([("%.3f" %float(v)) for v in class_difficulty[id]]) + "\n\n"
|
||
print(classname, "有效")
|
||
else:
|
||
print(classname, "无效")
|
||
|
||
with open("临时文件/自动转换结果.txt","w",encoding = "utf8") as f:
|
||
f.write(outputstr)
|
||
with open("文本文件/metadata.txt","w",encoding = "utf8") as f:
|
||
f.write(outputstr)
|
||
zf.close() |