733 lines
35 KiB
Python
733 lines
35 KiB
Python
import json,re,os,Levenshtein,fitz,time,sys
|
||
|
||
def GetDate(): #获得当前日期
|
||
currentdate = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)
|
||
return currentdate #返回当前日期yyyymmdd
|
||
|
||
def ReadTextFile(filepath): #读取文本格式的文件
|
||
with open(filepath,"r",encoding="u8") as f:
|
||
data = f.read()
|
||
return data #返回文本格式文件的内容
|
||
|
||
def SaveTextFile(data,filepath): #写入文本格式的文件
|
||
with open(filepath,"w",encoding="u8") as f:
|
||
f.write(data)
|
||
return filepath #返回文件名
|
||
|
||
def SortDict(adict): #按字典项顺序排序字典
|
||
return dict(sorted(adict.items())) #返回排序后的字典
|
||
|
||
|
||
#读取存储json数据库相关(不限于题号数据库)
|
||
|
||
def load_dict(filename): #根据filename读取json数据库并转化为python字典
|
||
with open(filename,"r",encoding = "u8") as f:
|
||
adict = json.loads(f.read())
|
||
return adict #返回python字典
|
||
|
||
def save_dict(adict,filename): #将adict字典转化为json文件并保存至filename文件中
|
||
try:
|
||
with open(filename,"w",encoding = "u8") as f:
|
||
f.write(json.dumps(adict,indent=4,ensure_ascii=False))
|
||
return 0 #成功则返回0
|
||
except:
|
||
return 1 #不成功则返回1
|
||
|
||
|
||
def pre_treating(string): #删除字符串中对比较无用的字符, 以供比较
|
||
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
|
||
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
|
||
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
|
||
string = re.sub(r"[\n\t]","",string)
|
||
string = re.sub(r"(displaystyle)|(overrightarrow)|(overline)","",string)
|
||
string = re.sub(r"[,\.:;?]","",string)
|
||
return string #返回处理后的字符串
|
||
|
||
def treat_dict(p_dict): #对整个题库字典中的内容部分进行预处理,删除无用字符
|
||
treated_dict = {}
|
||
for id in p_dict:
|
||
treated_dict[id] = {}
|
||
treated_dict[id]["content"] = pre_treating(p_dict[id]["content"])
|
||
treated_dict[id]["same"] = p_dict[id]["same"]
|
||
return treated_dict #返回处理后的字典, 含内容字段及相同题目字段
|
||
|
||
def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程度最大的题目(除外列表之外的部分)
|
||
maxsim = -1
|
||
argmaxsim = "000000"
|
||
for id in adict:
|
||
if not id in excludelist:
|
||
simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"])
|
||
if simrate > maxsim:
|
||
maxsim = simrate
|
||
argmaxsim = id
|
||
return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号
|
||
|
||
def generate_problem_series(startingid,length,adict): #在adict字典里返回从startingid开始的一系列题号, 每一题都是与上一题的关联程度最大的
|
||
excludelist = [startingid]
|
||
currentid = startingid
|
||
for i in range(length):
|
||
maxsim,currentid = detectmaxsim(currentid,excludelist,adict)
|
||
excludelist.append(currentid)
|
||
return ",".join(excludelist) #返回按顺序的题号列表
|
||
|
||
|
||
def generate_number_set(string,*thedict): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"]
|
||
#可变参数*dict如果存在, 将只生成dict的keys中包含的题号列表
|
||
string = re.sub(r"[\n\s]","",string)
|
||
string_list = string.split(",")
|
||
numbers_list = []
|
||
for s in string_list:
|
||
if not ":" in s:
|
||
numbers_list.append(s.zfill(6))
|
||
else:
|
||
start,end = s.split(":")
|
||
for ind in range(int(start),int(end)+1):
|
||
numbers_list.append(str(ind).zfill(6))
|
||
if len(thedict) == 0:
|
||
return numbers_list #返回六位题号列表
|
||
elif len(thedict) == 1 and type(thedict[0]) == dict:
|
||
numbers_list = [id for id in numbers_list if id in thedict[0]]
|
||
return numbers_list #返回字典中存在的六位题号列表
|
||
else:
|
||
return "输入参数有误"
|
||
|
||
def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号"
|
||
if not len(id_list) == 0:
|
||
exp_list = []
|
||
start = id_list[0]
|
||
current = start
|
||
end = start
|
||
for id in id_list[1:]:
|
||
# print(id,current)
|
||
if int(id)-1 == int(current):
|
||
current = id
|
||
end = id
|
||
else:
|
||
if not start == end:
|
||
exp_list.append('"'+start+":"+end+'"')
|
||
else:
|
||
exp_list.append('"'+start+'"')
|
||
start = id
|
||
current = id
|
||
end = id
|
||
if not start == end:
|
||
exp_list.append('"'+start+":"+end+'"')
|
||
else:
|
||
exp_list.append('"'+start+'"')
|
||
exp_str = ",".join(exp_list).replace('"',"")
|
||
else:
|
||
exp_str = "无有效题号"
|
||
return exp_str #返回含有":"或","的题号字符串
|
||
|
||
def parsePDF(filePath): #提取pdf文件中的字符
|
||
with fitz.open(filePath) as doc:
|
||
text = ""
|
||
for page in doc.pages():
|
||
text += page.get_text() + "\n"
|
||
return text
|
||
|
||
def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串
|
||
if filePath[-4:] == ".txt" or filePath[-4:] == ".tex":
|
||
with open(filePath,"r",encoding = "u8") as f:
|
||
data = f.read()
|
||
elif filePath[-4:] == ".pdf":
|
||
data = parsePDF(filePath)
|
||
else:
|
||
return "格式不正确"
|
||
ids = re.findall(r"\((\d{6})\)",data)
|
||
return generate_exp(ids)
|
||
|
||
|
||
def spareIDs(dictname): #返回空闲题号
|
||
idlist = list(dictname.keys())
|
||
used_str = generate_exp(idlist)
|
||
used_list = used_str.split(",")
|
||
output = ""
|
||
for group in range(len(used_list)-1):
|
||
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n"
|
||
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999")
|
||
return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间
|
||
|
||
def parse_usage(datastring): #对单个usages中的项的结果进行分词
|
||
datastring = re.sub(r"\s+","\t",datastring.strip())
|
||
datalist = datastring.split("\t")
|
||
date = ""
|
||
classname = ""
|
||
diff = []
|
||
for item in datalist:
|
||
if not "." in item and not "高" in item and not "班" in item:
|
||
date = item
|
||
elif "高" in item or "班" in item:
|
||
classname = item
|
||
else:
|
||
diff.append(item)
|
||
return({"date":date,"classname":classname,"difficulty":diff}) #返回一个字典, "date"表示日期, "classname"表示班级, "difficultiy"表示难度列表
|
||
|
||
|
||
def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀
|
||
try:
|
||
data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0]
|
||
except:
|
||
pass
|
||
data = re.sub(r"\n{2,}","\n",data)
|
||
data = re.sub(r"\\item",r"\\enditem\\item",data)
|
||
data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息
|
||
problempositions = []
|
||
for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data):
|
||
problempositions.append(item.regs[1]) #确定题目内容所在位置
|
||
problem_list = []
|
||
for pos in problempositions:
|
||
content = data[pos[0]:pos[1]].strip()
|
||
content = re.sub(r"\n\%[\s\S]*$","",content) #题目内容
|
||
subdata = data[:pos[0]] #开始寻找出处中缀
|
||
suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata)
|
||
if len(suflist) == 0:
|
||
suffix = ""
|
||
else:
|
||
suffix = suflist[-1].replace("%","").strip()
|
||
problem_list.append((content,suffix))
|
||
return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组
|
||
|
||
|
||
def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目
|
||
NewProblem = problem.copy()
|
||
for field in NewProblem:
|
||
if type(NewProblem[field]) == str:
|
||
NewProblem[field] = ""
|
||
elif type(NewProblem[field]) == list:
|
||
NewProblem[field] = []
|
||
elif type(NewProblem[field]) == int or type(NewProblem[field]) == float:
|
||
NewProblem[field] = -1
|
||
return NewProblem #返回一个空题目的字典, ID和内容待赋值
|
||
|
||
def CreateNewProblem(id,content,origin,dict,editor): # 构建一道新题目的字典
|
||
NewProblem = CreateEmptyProblem(dict["000001"])
|
||
NewProblem["id"] = str(id).zfill(6)
|
||
NewProblem["content"] = content
|
||
NewProblem["origin"] = origin
|
||
NewProblem["edit"] = [editor]
|
||
return NewProblem # 返回一道新题目的字典, 已赋新的ID, 内容, 来源和编辑者
|
||
|
||
|
||
def AddProblemstoDict(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典
|
||
id = int(startingid)
|
||
currentsuffix = problems[0][1]
|
||
problemindex = 0
|
||
for p_and_suffix in problems:
|
||
p, suffix = p_and_suffix
|
||
pid = str(id).zfill(6)
|
||
if pid in thedict:
|
||
print("ID %s 已被使用."%pid)
|
||
return 1
|
||
else:
|
||
if suffix == currentsuffix:
|
||
problemindex += 1
|
||
else:
|
||
problemindex = 1
|
||
origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == "" else str(problemindex))
|
||
newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor)
|
||
if "blank" in p:
|
||
newproblem["genre"] = "填空题"
|
||
newproblem["space"] = ""
|
||
elif "bracket" in p:
|
||
newproblem["genre"] = "选择题"
|
||
newproblem["space"] = ""
|
||
else:
|
||
newproblem["genre"] = "解答题"
|
||
newproblem["space"] = "4em"
|
||
thedict[pid] = newproblem
|
||
maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict)
|
||
print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p))
|
||
id += 1
|
||
return 0
|
||
|
||
|
||
def CreateIDLinks(old_id_list,new_id_list,*thedict): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项
|
||
if len(thedict) == 1 and type(thedict[0]) == dict:
|
||
new_id_list = [id for id in new_id_list if not id in thedict[0]]
|
||
if len(old_id_list)>len(new_id_list):
|
||
return "新ID个数不足."
|
||
else:
|
||
id_links = []
|
||
for i in range(len(old_id_list)):
|
||
id_links.append((old_id_list[i],new_id_list[i]))
|
||
return id_links # 返回id联系, 每个元组表示一对id, 前者是旧id, 后者是新id
|
||
|
||
|
||
def CreateRelatedProblems(links,thedict,filepath,editor): # 根据links关联生成待编辑的新题目字典, 等待编辑修改
|
||
try:
|
||
new_dict = {}
|
||
for item in links:
|
||
old_id,new_id = item
|
||
new_dict[old_id] = thedict[old_id].copy()
|
||
new_dict[old_id]["id"] = new_id + "待替换"
|
||
new_dict[old_id]["content"] = "(待编辑)" + new_dict[old_id]["content"]
|
||
new_dict[old_id]["usages"] = []
|
||
new_dict[old_id]["same"] = []
|
||
new_dict[old_id]["unrelated"] = []
|
||
new_dict[old_id]["edit"] = new_dict[old_id]["edit"].copy() + [GetDate()+"\t"+editor]
|
||
new_dict[old_id]["origin"] += "-" + GetDate() + "修改"
|
||
save_dict(new_dict,filepath)
|
||
except:
|
||
return 1 #异常返回1
|
||
return 0 #正常返回0
|
||
|
||
def ImportRelatedProblems(new_json,main_json): # 导入编辑过的关联题目json文件到主数据库
|
||
pro_dict = load_dict(main_json)
|
||
new_dict = load_dict(new_json)
|
||
for id in new_dict:
|
||
new_id = new_dict[id]["id"].replace("待替换","") #新题号后需要跟"待替换"字样
|
||
if new_id in pro_dict:
|
||
print("题号有重复")
|
||
return 1
|
||
else:
|
||
pro_dict[new_id] = new_dict[id].copy()
|
||
pro_dict[new_id]["id"] = new_id
|
||
pro_dict[id]["related"] += [new_id]
|
||
pro_dict[new_id]["related"] += [id]
|
||
p = pro_dict[new_id]["content"]
|
||
if "blank" in p:
|
||
pro_dict[new_id]["genre"] = "填空题"
|
||
pro_dict[new_id]["genre"] = ""
|
||
elif "bracket" in p:
|
||
pro_dict[new_id]["genre"] = "选择题"
|
||
pro_dict[new_id]["genre"] = ""
|
||
else:
|
||
pro_dict[new_id]["genre"] = "解答题"
|
||
pro_dict[new_id]["genre"] = "4em"
|
||
print("导入关联题目 %s -> %s 信息成功."%(id,new_id))
|
||
save_dict(SortDict(pro_dict),main_json) #保存至目标pro_dict文件
|
||
return 0 #正常返回0
|
||
|
||
|
||
def strip_suffix(originalString, suf_words_list): # 字符串去除指定后缀
|
||
for sw in suf_words_list:
|
||
output = re.sub(sw+r"[\S]*$","",originalString)
|
||
return(output) # 返回原字符串中截去suf_words_list及之后字符的部分
|
||
|
||
def get_striped_origin(pro_dict,id,suf_words_list): # 题目来源去除指定后缀
|
||
return strip_suffix(pro_dict[id]["origin"],suf_words_list) # 返回去除指定后缀后的题目来源
|
||
|
||
|
||
def SeperateFirstLine(string): # 切割一个含有换行的字符串
|
||
thelist = string.split("\n")
|
||
firstline = thelist[0].strip()
|
||
contents = "\n".join([lines.strip() for lines in thelist[1:]])
|
||
return (firstline,contents) # 返回第一行, 其余行形成的二元组, 每一行前后的空格都被删去
|
||
|
||
def ObtainDatatoModify(metadatafilepath,fields_dict): #从metadata文件中获取需要修改的综合信息, metadata文件用双回车区分项, 单行的项表示此后的字段, 不小于两行的项中第一行是题目id, 第二行起是要修改的内容.
|
||
#fieldsdictpath是字段信息数据库文件路径
|
||
data = ReadTextFile(metadatafilepath)
|
||
datalines = data.split("\n")
|
||
data = ""
|
||
for l in datalines:
|
||
if l.strip() in fields_dict:
|
||
data += l + "\n\n"
|
||
else:
|
||
data += l + "\n"
|
||
data = re.sub(r"\n[\s\n]*\n","\n\n",data.strip()) #去除一些无意义的空格和多余的回车
|
||
datalist = data.split("\n\n")
|
||
to_modify_list = []
|
||
currentfield = "NotAField"
|
||
for line in datalist:
|
||
if line.strip() in fields_dict:
|
||
currentfield = line.strip()
|
||
elif not "\n" in line:
|
||
currentfield = "NotAField"
|
||
else:
|
||
id,content = SeperateFirstLine(line)
|
||
to_modify_list.append((currentfield,str(id).zfill(6),content))
|
||
return to_modify_list #返回一个列表, 每一项是一个三元组, 三项依次为字段, 题号, 要修改的内容
|
||
|
||
def FloatToInt(string): #从字符串返回浮点数,如果值非常接近整数则返回该整数
|
||
f = float(string)
|
||
if abs(f-round(f))<0.01:
|
||
f = round(f)
|
||
return f #返回浮点数或整数
|
||
|
||
def OverwriteData(prodict,fieldsdict,field_id_and_content): #用覆盖方式修改题目某字段信息
|
||
field,id,content = field_id_and_content
|
||
fieldType = fieldsdict[field]["FieldType"]
|
||
if not id in prodict:
|
||
print("题号 %s 并不在数据库中"%id)
|
||
else:
|
||
if fieldType == "str":
|
||
prodict[id][field] = content
|
||
elif fieldType == "int" or fieldType == "float":
|
||
content = FloatToInt(content)
|
||
prodict[id][field] = content
|
||
print("已覆盖 %s 的 %s 字段, 内容为 %s"%(id,field,content))
|
||
return (id,field,content) #返回三元组: 题号, 字段, 覆盖内容
|
||
|
||
def AppendData(prodict,fieldsdict,field_id_and_content): #用添加方式修改题目某字段信息
|
||
field,id,content = field_id_and_content
|
||
fieldType = fieldsdict[field]["FieldType"]
|
||
if not id in prodict:
|
||
print("题号 %s 并不在数据库中"%id)
|
||
else:
|
||
if fieldType == "str":
|
||
if content.strip() in prodict[id][field]:
|
||
print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,content))
|
||
else:
|
||
prodict[id][field] = (prodict[id][field].strip() + "\n" + content).strip()
|
||
print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,content))
|
||
elif fieldType == "list":
|
||
lines = [line.strip() for line in content.split("\n")]
|
||
for line in lines:
|
||
if line in prodict[id][field]:
|
||
print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,line))
|
||
else:
|
||
prodict[id][field] = prodict[id][field].copy() + [line]
|
||
print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,line))
|
||
return (id,field,content) #返回三元组: 题号, 字段, 内容
|
||
|
||
def AppendMutualData(prodict,field_id_and_content): #添加两个id之间的same, related, unrelated关联
|
||
field,id,content = field_id_and_content
|
||
lines = [str(line).zfill(6) for line in content.split("\n")]
|
||
for id2 in lines:
|
||
if not id in prodict:
|
||
print("题号 %s 并不在数据库中"%id)
|
||
elif not id2 in prodict:
|
||
print("题号 %s 并不在数据库中"%id2)
|
||
else:
|
||
if id2 in prodict[id][field]:
|
||
print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,id2))
|
||
else:
|
||
prodict[id][field] = prodict[id][field] + [id2]
|
||
print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,id2))
|
||
if id in prodict[id2][field]:
|
||
print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id2,field,id))
|
||
else:
|
||
prodict[id2][field] = prodict[id2][field] + [id]
|
||
print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id2,field,id))
|
||
return (id,field,content) #返回三元组: 题号, 字段, 内容
|
||
|
||
def AppendObjData(prodict,objdict,field_id_and_content): #添加目标编号数据
|
||
field,id,content = field_id_and_content
|
||
if not id in prodict:
|
||
print("题号 %s 并不在数据库中"%id)
|
||
else:
|
||
lines = [line.strip() for line in content.split("\n")]
|
||
for objid in lines:
|
||
if not objid in objdict:
|
||
print("目标编号 %s 并不在数据库中"%objid)
|
||
elif objid in prodict[id][field]:
|
||
print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,objid))
|
||
else:
|
||
prodict[id][field] = prodict[id][field] + [objid]
|
||
print("已于 %s 的 %s 字段执行添加, 编号为 %s, 内容为 %s"%(id,field,objid,objdict[objid]["content"]))
|
||
return (id,field,content) #返回三元组: 题号, 字段, 内容
|
||
|
||
def AppendUsageData(prodict,field_id_and_content): #添加使用记录数据
|
||
field,id,content = field_id_and_content
|
||
lines = [re.sub(r"\s+",r"\t",line.strip()) for line in content.split("\n")]
|
||
pending_list = []
|
||
for line in lines:
|
||
if not "FORCE" in line.upper():
|
||
time_stripped = re.sub(r"^\d{4,}\t","",line) #读取去除时间的数据
|
||
if time_stripped in "\n".join(prodict[id][field]):
|
||
print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,line))
|
||
else:
|
||
classid = [info for info in line.split("\t") if len(re.findall(r"[班高一二三]",info))>0][0] # 读取班级号
|
||
if classid in "\n".join(prodict[id][field]):
|
||
print("班级 %s 在题号为 %s 的题目处已有使用记录, 在pending list中记录"%(classid,id))
|
||
oldinfo = [usage for usage in prodict[id][field] if classid in usage]
|
||
pending_list = pending_list + [(id,line,oldinfo)]
|
||
else:
|
||
prodict[id][field].append(line)
|
||
print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,line))
|
||
else:
|
||
line = re.sub(r"\s+",r"\t",re.sub(r"FORCE","",line.upper())).strip()
|
||
prodict[id][field].append(line)
|
||
print("已于 %s 的 %s 字段执行强制添加, 内容为 %s"%(id,field,line))
|
||
output = "usages\n\n"
|
||
for item in pending_list:
|
||
id,new,oldlist = item
|
||
output += id + "\n"
|
||
output += new+"\tFORCE\n"
|
||
output += "\n".join(oldlist)+"\n\n"
|
||
return (field,id,content,output) #返回四元组: 题号, 字段, 内容, 待确定是否要添加的字符串(不含FORCE字样的行为旧结果,含FORCE字样的行为新结果,FORCE是运行后强制添加)
|
||
|
||
def ImportMetadata(prodict,objdict,fieldsdict,metadatafilepath,pendingdatafilepath): #metadata自动修改, 根据字段自适应修改, 参数为题库字典, 目标字典, 字段字典, metadata文本文件路径, 待确定是否替换的内容的存放路径
|
||
data_to_modify = ObtainDatatoModify(metadatafilepath,fieldsdict)
|
||
for item in data_to_modify:
|
||
field = item[0]
|
||
if field == "NotAField":
|
||
print("字段名有误")
|
||
return 1
|
||
else:
|
||
method = fieldsdict[field]["Method"]
|
||
if method == "overwrite":
|
||
feedback = OverwriteData(prodict,fieldsdict,item)
|
||
elif method == "append":
|
||
feedback = AppendData(prodict,fieldsdict,item)
|
||
elif method == "mutualappend":
|
||
feedback = AppendMutualData(prodict,item)
|
||
elif method == "objappend":
|
||
feedback = AppendObjData(prodict,objdict,item)
|
||
elif method == "usageappend":
|
||
feedback = AppendUsageData(prodict,item)
|
||
outputstring = feedback[3]
|
||
SaveTextFile(outputstring,pendingdatafilepath)
|
||
elif method == "fixed":
|
||
print("字段 %s 不可按此方式修改"%field)
|
||
return feedback # 已在数据库中修改, 之后需要将数据库写入一次, 返回1表示字段名有误, 返回其他表示成功进行了修改
|
||
|
||
def parseUsage(usagestring): #对单行usage信息进行分词
|
||
usagedict = {}
|
||
datalist = re.sub(r"\s+",r"\t",usagestring.strip()).split("\t")
|
||
if re.findall(r"20[\d]{2,6}",datalist[0]) != [] and re.findall(r"[\u4e00-\u9fa5]",datalist[0]) == []:
|
||
date = datalist.pop(0)
|
||
else:
|
||
date = ""
|
||
usagedict["date"] = date
|
||
if re.findall(r"[高一二三班]",datalist[0]) != []:
|
||
classid = datalist.pop(0)
|
||
else:
|
||
classid = ""
|
||
usagedict["classid"] = classid
|
||
usagedict["subproblems"] = len(datalist)
|
||
usagedict["difficulties"] = [float(u) for u in datalist]
|
||
usagedict["glossdiff"] = round(sum(usagedict["difficulties"])/usagedict["subproblems"],3)
|
||
return usagedict #返回词典, 含date日期, classid班级标识, subproblems小题数目, difficulties得分率, glossdiff小题平均得分率
|
||
|
||
def StringSubstitute(regex,template,stringtuple): # 用stringtuple里的字符串逐个替换template中的符合某种regex规则的字符串
|
||
toSub = re.findall(regex,template)
|
||
if not len(toSub) == len(stringtuple):
|
||
print("长度不符.")
|
||
return 1 #若长度不符则显示“长度不符”并返回1
|
||
else:
|
||
output = template
|
||
for i in range(len(toSub)):
|
||
output = output.replace(toSub[i],stringtuple[i])
|
||
return output #若长度符合则返回替换后的字符串
|
||
|
||
def GenerateTexDataforEditing(id_string,prodict,templatefilepath,editor): # 根据id_string的题号列表在prodict中生成一个可修改的tex文件, 包含题目内容,答案和解答,editor表示编辑者
|
||
id_list = generate_number_set(id_string,prodict)
|
||
output = "编辑者: " + editor + "\n\n\\begin{enumerate}\n\n"
|
||
for id in id_list:
|
||
content = prodict[id]["content"]
|
||
answer = prodict[id]["ans"]
|
||
solution = prodict[id]["solution"]
|
||
remark = prodict[id]["remark"]
|
||
output += "\\item (%s) "%str(id).zfill(6) + content + "\n\n" + "答案: " + answer + "\n\n" + "解答与提示: " + solution + "\n\n" + "备注: " + remark + "\n\n"
|
||
output += "\\end{enumerate}"
|
||
template = ReadTextFile(templatefilepath)
|
||
texdata = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",template,[output])
|
||
return texdata # 返回Tex文件的字符串, 待保存至tex文件
|
||
|
||
|
||
def GetEditedProblems(string): #从.tex文件所读取的字符串中取出正文内容, 并切割成以题号为key的字典, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组
|
||
bodydata = re.findall(r"\\begin\{enumerate\}([\s\S]*)\\end\{enumerate\}",string)[0]+r"\item"
|
||
problems = re.findall(r"\((\d{6})\)([\s\S]*?)\\item",bodydata)
|
||
edited_dict = {}
|
||
for problem in problems:
|
||
id, pro_string = problem
|
||
edited_dict[id] = parseProblem(pro_string)
|
||
return edited_dict # 返回以题号为key, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组的字典
|
||
|
||
def parseProblem(string): # 对以不小于两个回车切分的四段式字符串进行分词(通常第二段有"答案:", 第三段有"解答与提示:", 第四段有"备注:")
|
||
data = string.strip()
|
||
data = re.sub(r"\n{2,}","\n\n",data)
|
||
content,ans,solution,remark = data.split("\n\n")
|
||
content = content.strip()
|
||
ans = re.sub("答案:","",ans).strip()
|
||
solution = re.sub("解答与提示:","",solution).strip()
|
||
remark = re.sub("备注:","",remark).strip()
|
||
return (content,ans,solution,remark) # 返回四元组(题目内容, 答案, 解答与提示, 备注)
|
||
|
||
def ModifyProblembyTeX(id_string,prodict,toeditfilepath,editor): # vscode打开.tex文件修改题目的内容, 答案与解答
|
||
|
||
# 读取题号列表并生成待编辑的.tex文件
|
||
texdata = GenerateTexDataforEditing(id_string,prodict,"模板文件/题目编辑.txt",editor)
|
||
SaveTextFile(texdata,toeditfilepath)
|
||
|
||
# 打开待编辑的.tex文件
|
||
print("编辑完成后保存文件, 关闭文件继续...")
|
||
os.system("code -w -g "+toeditfilepath)
|
||
|
||
# 生成修改后的题号与内容, 答案, 解答与提示的对应
|
||
editor = GetDate() + "\t" + editor
|
||
editedtexdata = ReadTextFile(toeditfilepath)
|
||
edited_dict = GetEditedProblems(editedtexdata)
|
||
editedIDList = []
|
||
|
||
# 将.tex文件中的修改反映到原题库字典, 并保存
|
||
for id in edited_dict:
|
||
content, ans, solution, remark = edited_dict[id]
|
||
if not (content == prodict[id]["content"] and ans == prodict[id]["ans"] and solution == prodict[id]["solution"] and remark == prodict[id]["remark"]):
|
||
prodict[id]["content"] = content
|
||
prodict[id]["ans"] = ans
|
||
prodict[id]["solution"] = solution
|
||
prodict[id]["remark"] = remark
|
||
prodict[id]["edit"] = prodict[id]["edit"] + [editor]
|
||
editedIDList.append(id)
|
||
|
||
save_dict(prodict,"../题库0.3/problems.json")
|
||
return generate_exp(editedIDList) # 返回编辑过的字典
|
||
|
||
|
||
def RemoveMutualLink(metadata,prodict): # 删除双向关联id(same,related,unrelated), 输入为一个字符串, 每行表示一对题号, 用空格或制表符隔开
|
||
lines = [re.sub(r"[\s]+",r"\t",line).strip() for line in metadata.split("\n")]
|
||
for line in lines:
|
||
if not line == []:
|
||
id1,id2 = line.split("\t")
|
||
id1 = id1.zfill(6)
|
||
id2 = id2.zfill(6)
|
||
for field in ["same","related","unrelated"]:
|
||
if id1 in prodict[id2][field]:
|
||
prodict[id2][field].remove(id1)
|
||
if id2 in prodict[id1][field]:
|
||
prodict[id1][field].remove(id2)
|
||
return 0 # 返回0
|
||
|
||
def jsonEditProblemMetadata(id_string,prodict,editor): #用vscode在json模式下编辑题目综合信息
|
||
jsontoeditpath = "临时文件/problem_edit.json"
|
||
idlist = generate_number_set(id_string,prodict)
|
||
edit_dict = {}
|
||
for id in idlist:
|
||
edit_dict[id] = prodict[id].copy()
|
||
save_dict(edit_dict,jsontoeditpath)
|
||
#打开待编辑的json文件
|
||
os.system("code -w -g "+jsontoeditpath)
|
||
#编辑后关闭文件窗口自动执行下面步骤
|
||
editeddict = load_dict(jsontoeditpath)
|
||
editlist = []
|
||
for id in editeddict:
|
||
if not prodict[id] == editeddict[id]:
|
||
prodict[id] = editeddict[id].copy()
|
||
prodict[id]["edit"] = prodict[id]["edit"] + [GetDate() + "\t" + editor]
|
||
editlist.append(id)
|
||
return(generate_exp(editlist)) # 返回编辑过的题号字符串
|
||
|
||
def GetSamePairs(prodict): #获取已标注的相同题目组
|
||
same_groups = []
|
||
for id in prodict:
|
||
same = prodict[id]["same"]
|
||
if len(same) > 0 and not len(prodict[id]["usages"]) == 0:
|
||
same_groups.append([id]+same)
|
||
return(same_groups) #返回相同题目组, 每组由一些相同的题目构成
|
||
|
||
def ShareSameUsages(prodict,same_group): #对same_group中的所有题号共享使用记录
|
||
current_usages = []
|
||
for id in same_group:
|
||
for usage in prodict[id]["usages"]:
|
||
if not usage in current_usages:
|
||
current_usages = current_usages + [usage]
|
||
for id in same_group:
|
||
prodict[id]["usages"] = current_usages.copy()
|
||
return((same_group,current_usages)) #返回same_group中的题号列表 及 所有题号记录 组成的二元组
|
||
|
||
def SortUsages(prodict): #对使用记录按字符串进行排序
|
||
for id in prodict:
|
||
usages = prodict[id]["usages"].copy()
|
||
usages.sort()
|
||
prodict[id]["usages"] = usages.copy()
|
||
return 0 #返回0
|
||
|
||
|
||
def SortUsagesbyAverage(theusages): #根据使用记录每一条的平均值进行排序, 越高的在越前面
|
||
glossdifflist = []
|
||
for i in range(len(theusages)):
|
||
glossdifflist = glossdifflist + [(i,parseUsage(theusages[i])["glossdiff"])]
|
||
sortedglossdifflist = sorted(glossdifflist, key = lambda x:x[1], reverse = True)
|
||
newusages = [theusages[i[0]] for i in sortedglossdifflist]
|
||
return(newusages) # 返回排序后的list
|
||
|
||
|
||
def StripSuffix(string, suf_words): #除去字符串前后的空格及suf_words中每一个符合regex的字符串及其之后的部分
|
||
string = string.strip()
|
||
for sw in suf_words:
|
||
string = re.sub(sw+r"[\S]*$","",string)
|
||
return(string) # 返回处理以后的字符串
|
||
|
||
def MatchCondition(problem,condition_dict): #判断problem这一字典是否符合condition_dict中的所有筛选条件
|
||
match = True #初始设定符合条件
|
||
for fieldraw in [c for c in condition_dict if not "_not" in c and not condition_dict[c] == [""]]: #选出正向的条件([""]表示该条件不起作用)
|
||
cond_list = condition_dict[fieldraw]
|
||
if type(cond_list) == str:
|
||
cond_list = [cond_list]
|
||
field = re.sub(r"\d","",fieldraw)
|
||
if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string
|
||
string = "\n".join((problem[field]))
|
||
else:
|
||
string = str(problem[field])
|
||
current_match = False
|
||
for cond in cond_list: #有一个条件被满足, 就认为当前字段满足条件了
|
||
if len(re.findall(cond,string)) > 0:
|
||
current_match = True
|
||
if current_match == False:
|
||
match = False # 如果某个正向条件未满足, 那么match就赋值为False; 如果每一个正向条件都被满足, 那么match仍为True
|
||
for fieldraw in [c for c in condition_dict if "_not" in c and not condition_dict[c] == [""]]: #选出反向的条件([""]表示该条件不起作用)
|
||
cond_list = condition_dict[fieldraw]
|
||
fieldraw = fieldraw.replace("_not","")
|
||
field = re.sub(r"\d","",fieldraw)
|
||
if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string
|
||
string = "\n".join((problem[field]))
|
||
else:
|
||
string = str(problem[field])
|
||
current_match = True
|
||
for cond in cond_list: #有一个条件被满足, 就认为当前字段不被满足了
|
||
if len(re.findall(cond,string)) > 0:
|
||
current_match = False
|
||
if current_match == False:
|
||
match = False
|
||
return match #返回是否符合条件
|
||
|
||
|
||
def get_color(value): # 根据得分率获得rgb颜色代码
|
||
value = float(value)
|
||
if value>=0.5:
|
||
(r,g)=(1,2-2*value)
|
||
else:
|
||
(r,g)=(2*value,1)
|
||
return "{%.3f,%3f,0}"%(r,g) #返回用大括号包围的rgb数值
|
||
|
||
def GenerateValueColorCode(matchobj): # 生成三位小数代表的颜色方框(黑色边框, 难度对应的底色)的LaTeX代码
|
||
value = matchobj.group(1)
|
||
return "\t\\fcolorbox[rgb]{0,0,0}%s{%s}"%(get_color(value),value) #返回代码
|
||
|
||
def StudentsGetAfterContent(id,prodict,answered,spaceflag): #生成学生版讲义后的答案及空格, answered表示有无答案, spaceflag表示有无空格
|
||
string = ""
|
||
if answered:
|
||
string += "答案: \\textcolor{red}{%s}\n\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案")
|
||
if spaceflag:
|
||
if prodict[id]["space"] != "":
|
||
string += "\\vspace*{%s}\n\n"%prodict[id]["space"]
|
||
return string #生成学生讲义后的答案及空格
|
||
|
||
def XeLaTeXCompile(filedir,filename): #在filedir目录中用XeLaTeX编译filename文件两次(能编译出正确的引用和页码)
|
||
flagsuc = True
|
||
for i in range(2):
|
||
if os.system("xelatex -interaction=batchmode -output-directory=%s %s"%(filedir,filename)) == 0:
|
||
print("第%d次编译成功."%(i+1))
|
||
else:
|
||
flagsuc = False
|
||
return flagsuc # 若第二次编译成功则返回True, 否则返回False
|
||
|
||
def GenerateStudentsBodyString(problems,sectiontitles,pro_dict,consecutivenumbering,answered,spaceflag): #生成学生版的.tex文件的主体内容
|
||
bodystring = ""
|
||
if len(problems) == len(sectiontitles):
|
||
count = 0
|
||
for i in range(len(problems)):
|
||
idlist = generate_number_set(problems[i],pro_dict)
|
||
sectionstring = "\\section{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectiontitles[i],count if consecutivenumbering else 0)
|
||
for id in idlist:
|
||
count += 1
|
||
aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag)
|
||
sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent)
|
||
sectionstring += "\\end{enumerate}"
|
||
bodystring += sectionstring
|
||
else:
|
||
idstring = ",".join(problems)
|
||
idlist = generate_number_set(idstring,pro_dict)
|
||
sectionstring = "\\begin{enumerate}\n\n"
|
||
for id in idlist:
|
||
aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag)
|
||
sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent)
|
||
sectionstring += "\\end{enumerate}"
|
||
bodystring += sectionstring
|
||
return bodystring #返回主题内容字符串
|
||
|
||
if __name__ == "__main__":
|
||
print("数据库工具, import用.") |