This repository has been archived on 2024-06-23. You can view files and clone it, but cannot push or open issues or pull requests.
mathdeptv2/工具v2/database_tools.py

819 lines
40 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json,re,os,Levenshtein,fitz,time,sys
import pandas as pd
def GetDate(): #获得当前日期
currentdate = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)
return currentdate #返回当前日期yyyymmdd
def ReadTextFile(filepath): #读取文本格式的文件
with open(filepath,"r",encoding="u8") as f:
data = f.read()
return data #返回文本格式文件的内容
def SaveTextFile(data,filepath): #写入文本格式的文件
pathname = os.path.dirname(filepath)
if not os.path.exists(pathname):
os.mkdir(pathname)
with open(filepath,"w",encoding="u8") as f:
f.write(data)
return filepath #返回文件名
def SortDict(adict): #按字典项顺序排序字典
return dict(sorted(adict.items())) #返回排序后的字典
#读取存储json数据库相关(不限于题号数据库)
def load_dict(filename): #根据filename读取json数据库并转化为python字典
with open(filename,"r",encoding = "u8") as f:
adict = json.loads(f.read())
return adict #返回python字典
def save_dict(adict,filepath): #将adict字典转化为json文件并保存至filename文件中
try:
jsondata = json.dumps(adict,indent=4,ensure_ascii=False)
SaveTextFile(jsondata,filepath)
return filepath # 成功则返回文件路径
except:
print("保存 %s 文件失败"%filepath)
return 1 # 失败则返回1
def pre_treating(string): #删除字符串中对比较无用的字符, 以供比较
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
string = re.sub(r"[\n\t]","",string)
string = re.sub(r"(displaystyle)|(overrightarrow)|(overline)","",string)
string = re.sub(r"[,\.:;?]","",string)
return string #返回处理后的字符串
def treat_dict(p_dict): #对整个题库字典中的内容部分进行预处理,删除无用字符
treated_dict = {}
for id in p_dict:
treated_dict[id] = {}
treated_dict[id]["content"] = pre_treating(p_dict[id]["content"])
treated_dict[id]["same"] = p_dict[id]["same"]
return treated_dict #返回处理后的字典, 含内容字段及相同题目字段
def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程度最大的题目(除外列表之外的部分)
maxsim = -1
argmaxsim = "000000"
for id in adict:
if not id in excludelist:
simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"])
if simrate > maxsim:
maxsim = simrate
argmaxsim = id
return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号
def generate_problem_series(startingid,length,adict): #在adict字典里返回从startingid开始的一系列题号, 每一题都是与上一题的关联程度最大的
excludelist = [startingid]
currentid = startingid
for i in range(length):
maxsim,currentid = detectmaxsim(currentid,excludelist,adict)
excludelist.append(currentid)
return ",".join(excludelist) #返回按顺序的题号列表
def generate_number_set(string,*thedict): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"]
#可变参数*dict如果存在, 将只生成dict的keys中包含的题号列表
string = re.sub(r"[\n\s]","",string)
string_list = string.split(",")
numbers_list = []
for s in string_list:
if not ":" in s:
numbers_list.append(s.zfill(6))
else:
start,end = s.split(":")
for ind in range(int(start),int(end)+1):
numbers_list.append(str(ind).zfill(6))
if len(thedict) == 0:
return numbers_list #返回六位题号列表
elif len(thedict) == 1 and type(thedict[0]) == dict:
numbers_list = [id for id in numbers_list if id in thedict[0]]
return numbers_list #返回字典中存在的六位题号列表
else:
return "输入参数有误"
def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号"
if not len(id_list) == 0:
exp_list = []
start = id_list[0]
current = start
end = start
for id in id_list[1:]:
# print(id,current)
if int(id)-1 == int(current):
current = id
end = id
else:
if not start == end:
exp_list.append('"'+start+":"+end+'"')
else:
exp_list.append('"'+start+'"')
start = id
current = id
end = id
if not start == end:
exp_list.append('"'+start+":"+end+'"')
else:
exp_list.append('"'+start+'"')
exp_str = ",".join(exp_list).replace('"',"")
else:
exp_str = "无有效题号"
return exp_str #返回含有":"或","的题号字符串
def parsePDF(filePath): #提取pdf文件中的字符
with fitz.open(filePath) as doc:
text = ""
for page in doc.pages():
text += page.get_text() + "\n"
return text
def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串
if filePath[-4:] == ".txt" or filePath[-4:] == ".tex":
with open(filePath,"r",encoding = "u8") as f:
data = f.read()
elif filePath[-4:] == ".pdf":
data = parsePDF(filePath)
else:
return "格式不正确"
ids = re.findall(r"\((\d{6})\)",data)
return generate_exp(ids)
def spareIDs(dictname): #返回空闲题号
idlist = list(dictname.keys())
used_str = generate_exp(idlist)
used_list = used_str.split(",")
output = ""
for group in range(len(used_list)-1):
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n"
output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999")
return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间
def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀
try:
data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0]
except:
pass
data = re.sub(r"\n{2,}","\n",data)
data = re.sub(r"\\item",r"\\enditem\\item",data)
data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息
problempositions = []
for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data):
problempositions.append(item.regs[1]) #确定题目内容所在位置
problem_list = []
for pos in problempositions:
content = data[pos[0]:pos[1]].strip()
content = re.sub(r"\n\%[\s\S]*$","",content) #题目内容
subdata = data[:pos[0]] #开始寻找出处中缀
suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata)
if len(suflist) == 0:
suffix = ""
else:
suffix = suflist[-1].replace("%","").strip()
problem_list.append((content,suffix))
return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组
def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目
NewProblem = problem.copy()
for field in NewProblem:
if type(NewProblem[field]) == str:
NewProblem[field] = ""
elif type(NewProblem[field]) == list:
NewProblem[field] = []
elif type(NewProblem[field]) == int or type(NewProblem[field]) == float:
NewProblem[field] = -1
return NewProblem #返回一个空题目的字典, ID和内容待赋值
def CreateNewProblem(id,content,origin,dict,editor): # 构建一道新题目的字典
NewProblem = CreateEmptyProblem(dict["000001"])
NewProblem["id"] = str(id).zfill(6)
NewProblem["content"] = content
NewProblem["origin"] = origin
NewProblem["edit"] = [editor]
return NewProblem # 返回一道新题目的字典, 已赋新的ID, 内容, 来源和编辑者
def AddProblemstoDict(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典
id = int(startingid)
currentsuffix = problems[0][1]
problemindex = 0
for p_and_suffix in problems:
p, suffix = p_and_suffix
pid = str(id).zfill(6)
if pid in thedict:
print("ID %s 已被使用."%pid)
return 1
else:
if suffix == currentsuffix:
problemindex += 1
else:
problemindex = 1
origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == "" else str(problemindex))
newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor)
if "blank" in p:
newproblem["genre"] = "填空题"
newproblem["space"] = ""
elif "bracket" in p:
newproblem["genre"] = "选择题"
newproblem["space"] = ""
else:
newproblem["genre"] = "解答题"
newproblem["space"] = "4em"
thedict[pid] = newproblem
maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict)
print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p))
id += 1
return 0
def CreateIDLinks(old_id_list,new_id_list,*thedict): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项
if len(thedict) == 1 and type(thedict[0]) == dict:
new_id_list = [id for id in new_id_list if not id in thedict[0]]
if len(old_id_list)>len(new_id_list):
return "新ID个数不足."
else:
id_links = []
for i in range(len(old_id_list)):
id_links.append((old_id_list[i],new_id_list[i]))
return id_links # 返回id联系, 每个元组表示一对id, 前者是旧id, 后者是新id
def CreateRelatedProblems(links,thedict,filepath,editor): # 根据links关联生成待编辑的新题目字典, 等待编辑修改
try:
new_dict = {}
for item in links:
old_id,new_id = item
new_dict[old_id] = thedict[old_id].copy()
new_dict[old_id]["id"] = new_id + "待替换"
new_dict[old_id]["content"] = "(待编辑)" + new_dict[old_id]["content"]
new_dict[old_id]["usages"] = []
new_dict[old_id]["same"] = []
new_dict[old_id]["unrelated"] = []
new_dict[old_id]["edit"] = new_dict[old_id]["edit"].copy() + [GetDate()+"\t"+editor]
new_dict[old_id]["origin"] += "-" + GetDate() + "修改"
save_dict(new_dict,filepath)
except:
return 1 #异常返回1
return 0 #正常返回0
def ImportRelatedProblems(new_json,main_json): # 导入编辑过的关联题目json文件到主数据库
pro_dict = load_dict(main_json)
new_dict = load_dict(new_json)
for id in new_dict:
new_id = new_dict[id]["id"].replace("待替换","") #新题号后需要跟"待替换"字样
if new_id in pro_dict:
print("题号有重复")
return 1
else:
pro_dict[new_id] = new_dict[id].copy()
pro_dict[new_id]["id"] = new_id
pro_dict[id]["related"] += [new_id]
pro_dict[new_id]["related"] += [id]
p = pro_dict[new_id]["content"]
if "blank" in p:
pro_dict[new_id]["genre"] = "填空题"
pro_dict[new_id]["genre"] = ""
elif "bracket" in p:
pro_dict[new_id]["genre"] = "选择题"
pro_dict[new_id]["genre"] = ""
else:
pro_dict[new_id]["genre"] = "解答题"
pro_dict[new_id]["genre"] = "4em"
print("导入关联题目 %s -> %s 信息成功."%(id,new_id))
save_dict(SortDict(pro_dict),main_json) #保存至目标pro_dict文件
return 0 #正常返回0
def strip_suffix(originalString, suf_words_list): # 字符串去除指定后缀
for sw in suf_words_list:
output = re.sub(sw+r"[\S]*$","",originalString)
return(output) # 返回原字符串中截去suf_words_list及之后字符的部分
def get_striped_origin(pro_dict,id,suf_words_list): # 题目来源去除指定后缀
return strip_suffix(pro_dict[id]["origin"],suf_words_list) # 返回去除指定后缀后的题目来源
def SeperateFirstLine(string): # 切割一个含有换行的字符串
thelist = string.split("\n")
firstline = thelist[0].strip()
contents = "\n".join([lines.strip() for lines in thelist[1:]])
return (firstline,contents) # 返回第一行, 其余行形成的二元组, 每一行前后的空格都被删去
def ObtainDatatoModify(metadatafilepath,fields_dict): #从metadata文件中获取需要修改的综合信息, metadata文件用双回车区分项, 单行的项表示此后的字段, 不小于两行的项中第一行是题目id, 第二行起是要修改的内容.
#fieldsdictpath是字段信息数据库文件路径
data = ReadTextFile(metadatafilepath)
datalines = data.split("\n")
data = ""
for l in datalines:
if l.strip() in fields_dict:
data += l + "\n\n"
else:
data += l + "\n"
data = re.sub(r"\n[\s\n]*\n","\n\n",data.strip()) #去除一些无意义的空格和多余的回车
datalist = data.split("\n\n")
to_modify_list = []
currentfield = "NotAField"
for line in datalist:
if line.strip() in fields_dict:
currentfield = line.strip()
elif not "\n" in line:
currentfield = "NotAField"
else:
id,content = SeperateFirstLine(line)
to_modify_list.append((currentfield,str(id).zfill(6),content))
return to_modify_list #返回一个列表, 每一项是一个三元组, 三项依次为字段, 题号, 要修改的内容
def FloatToInt(string): #从字符串返回浮点数,如果值非常接近整数则返回该整数
f = float(string)
if abs(f-round(f))<0.01:
f = round(f)
return f #返回浮点数或整数
def OverwriteData(prodict,fieldsdict,field_id_and_content): #用覆盖方式修改题目某字段信息
field,id,content = field_id_and_content
fieldType = fieldsdict[field]["FieldType"]
if not id in prodict:
print("题号 %s 并不在数据库中"%id)
else:
if fieldType == "str":
prodict[id][field] = content
elif fieldType == "int" or fieldType == "float":
content = FloatToInt(content)
prodict[id][field] = content
print("已覆盖 %s%s 字段, 内容为 %s"%(id,field,content))
return (id,field,content) #返回三元组: 题号, 字段, 覆盖内容
def AppendData(prodict,fieldsdict,field_id_and_content): #用添加方式修改题目某字段信息
field,id,content = field_id_and_content
fieldType = fieldsdict[field]["FieldType"]
if not id in prodict:
print("题号 %s 并不在数据库中"%id)
else:
if fieldType == "str":
if content.strip() in prodict[id][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,content))
else:
prodict[id][field] = (prodict[id][field].strip() + "\n" + content).strip()
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,content))
elif fieldType == "list":
lines = [line.strip() for line in content.split("\n")]
for line in lines:
if line in prodict[id][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,line))
else:
prodict[id][field] = prodict[id][field].copy() + [line]
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,line))
return (id,field,content) #返回三元组: 题号, 字段, 内容
def AppendMutualData(prodict,field_id_and_content): #添加两个id之间的same, related, unrelated关联
field,id,content = field_id_and_content
lines = [str(line).zfill(6) for line in content.split("\n")]
for id2 in lines:
if not id in prodict:
print("题号 %s 并不在数据库中"%id)
elif not id2 in prodict:
print("题号 %s 并不在数据库中"%id2)
else:
if id2 in prodict[id][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,id2))
else:
prodict[id][field] = prodict[id][field] + [id2]
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,id2))
if id in prodict[id2][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id2,field,id))
else:
prodict[id2][field] = prodict[id2][field] + [id]
print("已于 %s%s 字段执行添加, 内容为 %s"%(id2,field,id))
return (id,field,content) #返回三元组: 题号, 字段, 内容
def AppendObjData(prodict,objdict,field_id_and_content): #添加目标编号数据
field,id,content = field_id_and_content
if not id in prodict:
print("题号 %s 并不在数据库中"%id)
else:
lines = [line.strip() for line in content.split("\n")]
for objid in lines:
if not objid in objdict:
print("目标编号 %s 并不在数据库中"%objid)
elif objid in prodict[id][field]:
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,objid))
else:
prodict[id][field] = prodict[id][field] + [objid]
print("已于 %s%s 字段执行添加, 编号为 %s, 内容为 %s"%(id,field,objid,objdict[objid]["content"]))
return (id,field,content) #返回三元组: 题号, 字段, 内容
def AppendUsageData(prodict,field_id_and_content): #添加使用记录数据
field,id,content = field_id_and_content
lines = [re.sub(r"\s+",r"\t",line.strip()) for line in content.split("\n")]
pending_list = []
for line in lines:
if not "FORCE" in line.upper():
time_stripped = re.sub(r"^\d{4,}\t","",line) #读取去除时间的数据
if time_stripped in "\n".join(prodict[id][field]):
print("题号 %s%s 字段, 内容 %s 已存在"%(id,field,line))
else:
classid = [info for info in line.split("\t") if len(re.findall(r"[班高一二三]",info))>0][0] # 读取班级号
if classid in "\n".join(prodict[id][field]):
print("班级 %s 在题号为 %s 的题目处已有使用记录, 在pending list中记录"%(classid,id))
oldinfo = [usage for usage in prodict[id][field] if classid in usage]
pending_list = pending_list + [(id,line,oldinfo)]
else:
prodict[id][field].append(line)
print("已于 %s%s 字段执行添加, 内容为 %s"%(id,field,line))
else:
line = re.sub(r"\s+",r"\t",re.sub(r"FORCE","",line.upper())).strip()
prodict[id][field].append(line)
print("已于 %s%s 字段执行强制添加, 内容为 %s"%(id,field,line))
output = "usages\n\n"
for item in pending_list:
id,new,oldlist = item
output += id + "\n"
output += new+"\tFORCE\n"
output += "\n".join(oldlist)+"\n\n"
return (field,id,content,output) #返回四元组: 题号, 字段, 内容, 待确定是否要添加的字符串不含FORCE字样的行为旧结果含FORCE字样的行为新结果FORCE是运行后强制添加
def ImportMetadata(prodict,objdict,fieldsdict,metadatafilepath,pendingdatafilepath): #metadata自动修改, 根据字段自适应修改, 参数为题库字典, 目标字典, 字段字典, metadata文本文件路径, 待确定是否替换的内容的存放路径
data_to_modify = ObtainDatatoModify(metadatafilepath,fieldsdict)
for item in data_to_modify:
field = item[0]
if field == "NotAField":
print("字段名有误")
return 1
else:
method = fieldsdict[field]["Method"]
if method == "overwrite":
feedback = OverwriteData(prodict,fieldsdict,item)
elif method == "append":
feedback = AppendData(prodict,fieldsdict,item)
elif method == "mutualappend":
feedback = AppendMutualData(prodict,item)
elif method == "objappend":
feedback = AppendObjData(prodict,objdict,item)
elif method == "usageappend":
feedback = AppendUsageData(prodict,item)
outputstring = feedback[3]
SaveTextFile(outputstring,pendingdatafilepath)
elif method == "fixed":
print("字段 %s 不可按此方式修改"%field)
return feedback # 已在数据库中修改, 之后需要将数据库写入一次, 返回1表示字段名有误, 返回其他表示成功进行了修改
def parseUsage(usagestring): #对单行usage信息进行分词
usagedict = {}
datalist = re.sub(r"\s+",r"\t",usagestring.strip()).split("\t")
if re.findall(r"20[\d]{2,6}",datalist[0]) != [] and re.findall(r"[\u4e00-\u9fa5]",datalist[0]) == []:
date = datalist.pop(0)
else:
date = ""
usagedict["date"] = date
if re.findall(r"[高一二三班]",datalist[0]) != []:
classid = datalist.pop(0)
else:
classid = ""
usagedict["classid"] = classid
usagedict["subproblems"] = len(datalist)
usagedict["difficulties"] = [float(u) for u in datalist]
usagedict["glossdiff"] = round(sum(usagedict["difficulties"])/usagedict["subproblems"],3)
return usagedict #返回词典, 含date日期, classid班级标识, subproblems小题数目, difficulties得分率, glossdiff小题平均得分率
def StringSubstitute(regex,template,stringtuple): # 用stringtuple里的字符串逐个替换template中的符合某种regex规则的字符串
toSub = re.findall(regex,template)
if not len(toSub) == len(stringtuple):
print("长度不符.")
return 1 #若长度不符则显示“长度不符”并返回1
else:
output = template
for i in range(len(toSub)):
output = output.replace(toSub[i],stringtuple[i])
return output #若长度符合则返回替换后的字符串
def GenerateTexDataforEditing(id_string,prodict,templatefilepath,editor): # 根据id_string的题号列表在prodict中生成一个可修改的tex文件, 包含题目内容答案和解答editor表示编辑者
id_list = generate_number_set(id_string,prodict)
output = "编辑者: " + editor + "\n\n\\begin{enumerate}\n\n"
for id in id_list:
content = prodict[id]["content"]
answer = prodict[id]["ans"]
solution = prodict[id]["solution"]
remark = prodict[id]["remark"]
output += "\\item (%s) "%str(id).zfill(6) + content + "\n\n" + "答案: " + answer + "\n\n" + "解答与提示: " + solution + "\n\n" + "备注: " + remark + "\n\n"
output += "\\end{enumerate}"
template = ReadTextFile(templatefilepath)
texdata = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",template,[output])
return texdata # 返回Tex文件的字符串, 待保存至tex文件
def GetEditedProblems(string): #从.tex文件所读取的字符串中取出正文内容, 并切割成以题号为key的字典, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组
bodydata = re.findall(r"\\begin\{enumerate\}([\s\S]*)\\end\{enumerate\}",string)[0]+r"\item"
problems = re.findall(r"\((\d{6})\)([\s\S]*?)\\item",bodydata)
edited_dict = {}
for problem in problems:
id, pro_string = problem
edited_dict[id] = parseProblem(pro_string)
return edited_dict # 返回以题号为key, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组的字典
def parseProblem(string): # 对以不小于两个回车切分的四段式字符串进行分词(通常第二段有"答案:", 第三段有"解答与提示:", 第四段有"备注:")
data = string.strip()
data = re.sub(r"\n{2,}","\n\n",data)
content,ans,solution,remark = data.split("\n\n")
content = content.strip()
ans = re.sub("答案:","",ans).strip()
solution = re.sub("解答与提示:","",solution).strip()
remark = re.sub("备注:","",remark).strip()
return (content,ans,solution,remark) # 返回四元组(题目内容, 答案, 解答与提示, 备注)
def ModifyProblembyTeX(id_string,prodict,toeditfilepath,editor): # vscode打开.tex文件修改题目的内容, 答案与解答
# 读取题号列表并生成待编辑的.tex文件
texdata = GenerateTexDataforEditing(id_string,prodict,"模板文件/题目编辑.txt",editor)
SaveTextFile(texdata,toeditfilepath)
# 打开待编辑的.tex文件
print("编辑完成后保存文件, 关闭文件继续...")
os.system("code -w -g "+toeditfilepath)
# 生成修改后的题号与内容, 答案, 解答与提示的对应
editor = GetDate() + "\t" + editor
editedtexdata = ReadTextFile(toeditfilepath)
edited_dict = GetEditedProblems(editedtexdata)
editedIDList = []
# 将.tex文件中的修改反映到原题库字典, 并保存
for id in edited_dict:
content, ans, solution, remark = edited_dict[id]
if not (content == prodict[id]["content"] and ans == prodict[id]["ans"] and solution == prodict[id]["solution"] and remark == prodict[id]["remark"]):
prodict[id]["content"] = content
prodict[id]["ans"] = ans
prodict[id]["solution"] = solution
prodict[id]["remark"] = remark
prodict[id]["edit"] = prodict[id]["edit"] + [editor]
editedIDList.append(id)
save_dict(prodict,"../题库0.3/problems.json")
return generate_exp(editedIDList) # 返回编辑过的字典
def RemoveMutualLink(metadata,prodict): # 删除双向关联id(same,related,unrelated), 输入为一个字符串, 每行表示一对题号, 用空格或制表符隔开
lines = [re.sub(r"[\s]+",r"\t",line).strip() for line in metadata.split("\n")]
for line in lines:
if not line == []:
id1,id2 = line.split("\t")
id1 = id1.zfill(6)
id2 = id2.zfill(6)
for field in ["same","related","unrelated"]:
if id1 in prodict[id2][field]:
prodict[id2][field].remove(id1)
if id2 in prodict[id1][field]:
prodict[id1][field].remove(id2)
return 0 # 返回0
def jsonEditProblemMetadata(id_string,prodict,editor): #用vscode在json模式下编辑题目综合信息
jsontoeditpath = "临时文件/problem_edit.json"
idlist = generate_number_set(id_string,prodict)
edit_dict = {}
for id in idlist:
edit_dict[id] = prodict[id].copy()
save_dict(edit_dict,jsontoeditpath)
#打开待编辑的json文件
os.system("code -w -g "+jsontoeditpath)
#编辑后关闭文件窗口自动执行下面步骤
editeddict = load_dict(jsontoeditpath)
editlist = []
for id in editeddict:
if not prodict[id] == editeddict[id]:
prodict[id] = editeddict[id].copy()
prodict[id]["edit"] = prodict[id]["edit"] + [GetDate() + "\t" + editor]
editlist.append(id)
return(generate_exp(editlist)) # 返回编辑过的题号字符串
def GetSamePairs(prodict): #获取已标注的相同题目组
same_groups = []
for id in prodict:
same = prodict[id]["same"]
if len(same) > 0 and not len(prodict[id]["usages"]) == 0:
same_groups.append([id]+same)
return(same_groups) #返回相同题目组, 每组由一些相同的题目构成
def ShareSameUsages(prodict,same_group): #对same_group中的所有题号共享使用记录
current_usages = []
for id in same_group:
for usage in prodict[id]["usages"]:
if not usage in current_usages:
current_usages = current_usages + [usage]
for id in same_group:
prodict[id]["usages"] = current_usages.copy()
return((same_group,current_usages)) #返回same_group中的题号列表 及 所有题号记录 组成的二元组
def SortUsages(prodict): #对使用记录按字符串进行排序
for id in prodict:
usages = prodict[id]["usages"].copy()
usages.sort()
prodict[id]["usages"] = usages.copy()
return 0 #返回0
def SortUsagesbyAverage(theusages): #根据使用记录每一条的平均值进行排序, 越高的在越前面
glossdifflist = []
for i in range(len(theusages)):
glossdifflist = glossdifflist + [(i,parseUsage(theusages[i])["glossdiff"])]
sortedglossdifflist = sorted(glossdifflist, key = lambda x:x[1], reverse = True)
newusages = [theusages[i[0]] for i in sortedglossdifflist]
return(newusages) # 返回排序后的list
def StripSuffix(string, suf_words): #除去字符串前后的空格及suf_words中每一个符合regex的字符串及其之后的部分
string = string.strip()
for sw in suf_words:
string = re.sub(sw+r"[\S]*$","",string)
return(string) # 返回处理以后的字符串
def MatchCondition(problem,condition_dict): #判断problem这一字典是否符合condition_dict中的所有筛选条件
match = True #初始设定符合条件
for fieldraw in [c for c in condition_dict if not "_not" in c and not condition_dict[c] == [""]]: #选出正向的条件([""]表示该条件不起作用)
cond_list = condition_dict[fieldraw]
if type(cond_list) == str:
cond_list = [cond_list]
field = re.sub(r"\d","",fieldraw)
if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string
string = "\n".join((problem[field]))
else:
string = str(problem[field])
current_match = False
for cond in cond_list: #有一个条件被满足, 就认为当前字段满足条件了
if len(re.findall(cond,string)) > 0:
current_match = True
if current_match == False:
match = False # 如果某个正向条件未满足, 那么match就赋值为False; 如果每一个正向条件都被满足, 那么match仍为True
for fieldraw in [c for c in condition_dict if "_not" in c and not condition_dict[c] == [""]]: #选出反向的条件([""]表示该条件不起作用)
cond_list = condition_dict[fieldraw]
fieldraw = fieldraw.replace("_not","")
field = re.sub(r"\d","",fieldraw)
if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string
string = "\n".join((problem[field]))
else:
string = str(problem[field])
current_match = True
for cond in cond_list: #有一个条件被满足, 就认为当前字段不被满足了
if len(re.findall(cond,string)) > 0:
current_match = False
if current_match == False:
match = False
return match #返回是否符合条件
def get_color(value): # 根据得分率获得rgb颜色代码
value = float(value)
if value>=0.5:
(r,g)=(1,2-2*value)
else:
(r,g)=(2*value,1)
return "{%.3f,%3f,0}"%(r,g) #返回用大括号包围的rgb数值
def GenerateValueColorCode(matchobj): # 生成三位小数代表的颜色方框(黑色边框, 难度对应的底色)的LaTeX代码
value = matchobj.group(1)
return "\t\\fcolorbox[rgb]{0,0,0}%s{%s}"%(get_color(value),value) #返回代码
def StudentsGetAfterContent(id,prodict,answered,spaceflag): #生成学生版讲义后的答案及空格, answered表示有无答案, spaceflag表示有无空格
string = ""
if answered:
string += "答案: \\textcolor{red}{%s}\n\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案")
if spaceflag:
if prodict[id]["space"] != "":
string += "\\vspace*{%s}\n\n"%prodict[id]["space"]
return string #生成学生讲义后的答案及空格
def XeLaTeXCompile(filedir,filename): #在filedir目录中用XeLaTeX编译filename文件两次能编译出正确的引用和页码
flagsuc = True
for i in range(2):
if os.system("xelatex -interaction=batchmode -output-directory=%s %s"%(filedir,filename)) == 0:
print("%d次编译成功."%(i+1))
else:
flagsuc = False
return flagsuc # 若第二次编译成功则返回True, 否则返回False
def GenerateStudentBodyString(problems,sectiontitles,pro_dict,consecutivenumbering,answered,spaceflag): #生成学生版的.tex文件的主体内容
bodystring = ""
if len(problems) == len(sectiontitles):
count = 0
for i in range(len(problems)):
idlist = generate_number_set(problems[i],pro_dict)
sectionstring = "\\section{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectiontitles[i],count if consecutivenumbering else 0)
for id in idlist:
count += 1
aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag)
sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent)
sectionstring += "\\end{enumerate}"
bodystring += sectionstring
else:
idstring = ",".join(problems)
idlist = generate_number_set(idstring,pro_dict)
sectionstring = "\\begin{enumerate}\n\n"
for id in idlist:
aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag)
sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent)
sectionstring += "\\end{enumerate}"
bodystring += sectionstring
return bodystring #返回主题内容字符串
def TeachersGetAfterContent(id,prodict,objdict,topandbottomusagestuple): #生成教师版讲义后的答案及空格, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列
string = ""
objs = "目标:\n\n%s\n\n"%GenerateObjTexCode(id,prodict,objdict)
tags = "标签: \\textcolor[rgb]{0.5,0.6,0.8}{%s}\n\n"%("; ".join(prodict[id]["tags"])) if not prodict[id]["tags"] == [] else "标签: \n\n"
ans = "答案: \\textcolor{red}{%s}\n\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案")
solution = "解答或提示: \\textcolor{magenta}{%s}\n\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")
origin = "来源: %s\n\n"%prodict[id]["origin"]
remark = "备注: \\textcolor[rgb]{0,0.5,0.2}{%s}\n\n"%(prodict[id]["remark"] if prodict[id]["remark"] != "" else "暂无备注")
usages = "使用记录:\n\n%s\n\n"%GenerateUsageTexCode(id,prodict,topandbottomusagestuple)
string += objs + tags + ans + solution + usages + origin + remark
return string #生成教师版讲义后的答案及空格
def GenerateObjTexCode(id,prodict,objdict): #生成目标代号对应的学习目标字符串(含蓝色编码)
string = ""
if prodict[id]["objs"] != []:
for objid in prodict[id]["objs"]:
if objid.upper() == "KNONE":
string += "\\textcolor{blue}{%s\t%s}\n\n"%(objid,"无当前有效关联目标")
else:
string += "\\textcolor{blue}{%s\t%s}\n\n"%(objid,objdict[objid]["content"])
return string #返回目标代码字符串
def ChooseUsage(usages,topandbottomusagestuple): #生成题号对应的题目的使用记录, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 两数之和大于记录数则从高到低排列后全部展示
top,bottom = topandbottomusagestuple
if top < 0 or bottom < 0:
return usages # 返回原本的全部使用记录
elif top + bottom > len(usages):
return SortUsagesbyAverage(usages) # 返回排列后的使用记录
else:
return (SortUsagesbyAverage(usages)[:top] + SortUsagesbyAverage(usages)[(len(usages)-bottom):]) # 返回排序后的最前面top个和最后面bottom个
def GenerateUsageTexCode(id,prodict,topandbottomusagestuple): #根据topandbottomusagestuple的要求生成题号为id的题目的缩减版的使用记录列表 topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 两数之和大于记录数则从高到低排列后全部展示
rawusages = prodict[id]["usages"].copy()
usages = ChooseUsage(rawusages,topandbottomusagestuple)
usagecode = re.sub("\\t([\d]\.[\d]{0,10})",GenerateValueColorCode,"\n\n".join(usages))
return usagecode #返回缩减后的使用记录列表
def GenerateTeacherBodyString(problems,sectiontitles,prodict,objdict,consecutivenumbering,topandbottomusagestuple): #生成学生版的.tex文件的主体内容
bodystring = ""
if len(problems) == len(sectiontitles):
count = 0
for i in range(len(problems)):
idlist = generate_number_set(problems[i],prodict)
sectionstring = "\\section{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectiontitles[i],count if consecutivenumbering else 0)
for id in idlist:
count += 1
aftercontent = TeachersGetAfterContent(id,prodict,objdict,topandbottomusagestuple)
sectionstring += "\\item (%s) %s\n\n%s"%(id,prodict[id]["content"],aftercontent)
sectionstring += "\\end{enumerate}"
bodystring += sectionstring
else:
idstring = ",".join(problems)
idlist = generate_number_set(idstring,prodict)
sectionstring = "\\begin{enumerate}\n\n"
for id in idlist:
aftercontent = TeachersGetAfterContent(id,prodict,objdict,topandbottomusagestuple)
sectionstring += "\\item (%s) %s\n\n%s"%(id,prodict[id]["content"],aftercontent)
sectionstring += "\\end{enumerate}"
bodystring += sectionstring
return bodystring #返回主题内容字符串
def GetValidTexFiles(path): #获取 有题号的.tex文件列表 及 有题号的.tex文件所在的路径列表
texlist = []
pathlist = []
for root,folders,files in os.walk(path):
for file in files:
if file[-4:] == ".tex":
texdata = ReadTextFile(os.path.join(root,file))
ids = re.findall(r"\((\d{6})\)",texdata)
if len(ids) > 0:
pathlist = pathlist + [root]
texlist = texlist + [os.path.join(root,file)]
texlist = sorted(list(set(texlist)))
pathlist = sorted(list(set(pathlist)))
return (texlist,pathlist) # 返回 有题号的.tex文件列表 与 所在路径列表 组成的二元组
def generate_lessonid_list(lessonidstr,lessonsdict): #根据lessonidstr的条件(开头字母与数字)返回课时代码列表
raw_criterion_list = lessonidstr.split(",")
criterion_list = []
for cr in raw_criterion_list: #生成满足要求的课时代码的前若干位
if not ":" in cr:
criterion_list.append(cr)
else:
criterion_list = criterion_list + ["K"+c[-4:] for c in generate_number_set(cr.upper().replace("K",""))]
lessons_list = []
for id in lessonsdict: #对每一课时作比对, 生成课时代码列表
for cr in criterion_list:
if id.startswith(cr):
lessons_list.append(id)
break
return lessons_list # 返回课时代码列表
if __name__ == "__main__":
print("数据库工具, import用.")