import json,re,os,Levenshtein,fitz,time,sys def GetDate(): #获得当前日期 currentdate = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2) return currentdate #返回当前日期yyyymmdd def ReadTextFile(filepath): #读取文本格式的文件 with open(filepath,"r",encoding="u8") as f: data = f.read() return data #返回文本格式文件的内容 def SaveTextFile(data,filepath): #写入文本格式的文件 with open(filepath,"w",encoding="u8") as f: f.write(data) return filepath #返回文件名 def SortDict(adict): #按字典项顺序排序字典 return dict(sorted(adict.items())) #返回排序后的字典 #读取存储json数据库相关(不限于题号数据库) def load_dict(filename): #根据filename读取json数据库并转化为python字典 with open(filename,"r",encoding = "u8") as f: adict = json.loads(f.read()) return adict #返回python字典 def save_dict(adict,filename): #将adict字典转化为json文件并保存至filename文件中 try: with open(filename,"w",encoding = "u8") as f: f.write(json.dumps(adict,indent=4,ensure_ascii=False)) return 0 #成功则返回0 except: return 1 #不成功则返回1 def pre_treating(string): #删除字符串中对比较无用的字符, 以供比较 string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string) string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string) string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string) string = re.sub(r"[\n\t]","",string) string = re.sub(r"(displaystyle)|(overrightarrow)|(overline)","",string) string = re.sub(r"[,\.:;?]","",string) return string #返回处理后的字符串 def treat_dict(p_dict): #对整个题库字典中的内容部分进行预处理,删除无用字符 treated_dict = {} for id in p_dict: treated_dict[id] = {} treated_dict[id]["content"] = pre_treating(p_dict[id]["content"]) treated_dict[id]["same"] = p_dict[id]["same"] return treated_dict #返回处理后的字典, 含内容字段及相同题目字段 def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程度最大的题目(除外列表之外的部分) maxsim = -1 argmaxsim = "000000" for id in adict: if not id in excludelist: simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"]) if simrate > maxsim: maxsim = simrate argmaxsim = id return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号 def generate_problem_series(startingid,length,adict): #在adict字典里返回从startingid开始的一系列题号, 每一题都是与上一题的关联程度最大的 excludelist = [startingid] currentid = startingid for i in range(length): maxsim,currentid = detectmaxsim(currentid,excludelist,adict) excludelist.append(currentid) return ",".join(excludelist) #返回按顺序的题号列表 def generate_number_set(string,*thedict): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"] #可变参数*dict如果存在, 将只生成dict的keys中包含的题号列表 string = re.sub(r"[\n\s]","",string) string_list = string.split(",") numbers_list = [] for s in string_list: if not ":" in s: numbers_list.append(s.zfill(6)) else: start,end = s.split(":") for ind in range(int(start),int(end)+1): numbers_list.append(str(ind).zfill(6)) if len(thedict) == 0: return numbers_list #返回六位题号列表 elif len(thedict) == 1 and type(thedict[0]) == dict: numbers_list = [id for id in numbers_list if id in thedict[0]] return numbers_list #返回字典中存在的六位题号列表 else: return "输入参数有误" def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号" if not len(id_list) == 0: exp_list = [] start = id_list[0] current = start end = start for id in id_list[1:]: # print(id,current) if int(id)-1 == int(current): current = id end = id else: if not start == end: exp_list.append('"'+start+":"+end+'"') else: exp_list.append('"'+start+'"') start = id current = id end = id if not start == end: exp_list.append('"'+start+":"+end+'"') else: exp_list.append('"'+start+'"') exp_str = ",".join(exp_list).replace('"',"") else: exp_str = "无有效题号" return exp_str #返回含有":"或","的题号字符串 def parsePDF(filePath): #提取pdf文件中的字符 with fitz.open(filePath) as doc: text = "" for page in doc.pages(): text += page.get_text() + "\n" return text def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串 if filePath[-4:] == ".txt" or filePath[-4:] == ".tex": with open(filePath,"r",encoding = "u8") as f: data = f.read() elif filePath[-4:] == ".pdf": data = parsePDF(filePath) else: return "格式不正确" ids = re.findall(r"\((\d{6})\)",data) return generate_exp(ids) def spareIDs(dictname): #返回空闲题号 idlist = list(dictname.keys()) used_str = generate_exp(idlist) used_list = used_str.split(",") output = "" for group in range(len(used_list)-1): output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n" output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999") return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间 def parse_usage(datastring): #对单个usages中的项的结果进行分词 datastring = re.sub(r"\s+","\t",datastring.strip()) datalist = datastring.split("\t") date = "" classname = "" diff = [] for item in datalist: if not "." in item and not "高" in item and not "班" in item: date = item elif "高" in item or "班" in item: classname = item else: diff.append(item) return({"date":date,"classname":classname,"difficulty":diff}) #返回一个字典, "date"表示日期, "classname"表示班级, "difficultiy"表示难度列表 def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀 try: data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0] except: pass data = re.sub(r"\n{2,}","\n",data) data = re.sub(r"\\item",r"\\enditem\\item",data) data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息 problempositions = [] for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data): problempositions.append(item.regs[1]) #确定题目内容所在位置 problem_list = [] for pos in problempositions: content = data[pos[0]:pos[1]].strip() content = re.sub(r"\n\%[\s\S]*$","",content) #题目内容 subdata = data[:pos[0]] #开始寻找出处中缀 suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata) if len(suflist) == 0: suffix = "" else: suffix = suflist[-1].replace("%","").strip() problem_list.append((content,suffix)) return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组 def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目 NewProblem = problem.copy() for field in NewProblem: if type(NewProblem[field]) == str: NewProblem[field] = "" elif type(NewProblem[field]) == list: NewProblem[field] = [] elif type(NewProblem[field]) == int or type(NewProblem[field]) == float: NewProblem[field] = -1 return NewProblem #返回一个空题目的字典, ID和内容待赋值 def CreateNewProblem(id,content,origin,dict,editor): # 构建一道新题目的字典 NewProblem = CreateEmptyProblem(dict["000001"]) NewProblem["id"] = str(id).zfill(6) NewProblem["content"] = content NewProblem["origin"] = origin NewProblem["edit"] = [editor] return NewProblem # 返回一道新题目的字典, 已赋新的ID, 内容, 来源和编辑者 def AddProblemstoDict(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典 id = int(startingid) currentsuffix = problems[0][1] problemindex = 0 for p_and_suffix in problems: p, suffix = p_and_suffix pid = str(id).zfill(6) if pid in thedict: print("ID %s 已被使用."%pid) return 1 else: if suffix == currentsuffix: problemindex += 1 else: problemindex = 1 origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == "" else str(problemindex)) newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor) if "blank" in p: newproblem["genre"] = "填空题" newproblem["space"] = "" elif "bracket" in p: newproblem["genre"] = "选择题" newproblem["space"] = "" else: newproblem["genre"] = "解答题" newproblem["space"] = "4em" thedict[pid] = newproblem maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict) print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p)) id += 1 return 0 def CreateIDLinks(old_id_list,new_id_list,*thedict): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项 if len(thedict) == 1 and type(thedict[0]) == dict: new_id_list = [id for id in new_id_list if not id in thedict[0]] if len(old_id_list)>len(new_id_list): return "新ID个数不足." else: id_links = [] for i in range(len(old_id_list)): id_links.append((old_id_list[i],new_id_list[i])) return id_links # 返回id联系, 每个元组表示一对id, 前者是旧id, 后者是新id def CreateRelatedProblems(links,thedict,filepath,editor): # 根据links关联生成待编辑的新题目字典, 等待编辑修改 try: new_dict = {} for item in links: old_id,new_id = item new_dict[old_id] = thedict[old_id].copy() new_dict[old_id]["id"] = new_id + "待替换" new_dict[old_id]["content"] = "(待编辑)" + new_dict[old_id]["content"] new_dict[old_id]["usages"] = [] new_dict[old_id]["same"] = [] new_dict[old_id]["unrelated"] = [] new_dict[old_id]["edit"] = new_dict[old_id]["edit"].copy() + [GetDate()+"\t"+editor] new_dict[old_id]["origin"] += "-" + GetDate() + "修改" save_dict(new_dict,filepath) except: return 1 #异常返回1 return 0 #正常返回0 def ImportRelatedProblems(new_json,main_json): # 导入编辑过的关联题目json文件到主数据库 pro_dict = load_dict(main_json) new_dict = load_dict(new_json) for id in new_dict: new_id = new_dict[id]["id"].replace("待替换","") #新题号后需要跟"待替换"字样 if new_id in pro_dict: print("题号有重复") return 1 else: pro_dict[new_id] = new_dict[id].copy() pro_dict[new_id]["id"] = new_id pro_dict[id]["related"] += [new_id] pro_dict[new_id]["related"] += [id] p = pro_dict[new_id]["content"] if "blank" in p: pro_dict[new_id]["genre"] = "填空题" pro_dict[new_id]["genre"] = "" elif "bracket" in p: pro_dict[new_id]["genre"] = "选择题" pro_dict[new_id]["genre"] = "" else: pro_dict[new_id]["genre"] = "解答题" pro_dict[new_id]["genre"] = "4em" print("导入关联题目 %s -> %s 信息成功."%(id,new_id)) save_dict(SortDict(pro_dict),main_json) #保存至目标pro_dict文件 return 0 #正常返回0 def strip_suffix(originalString, suf_words_list): # 字符串去除指定后缀 for sw in suf_words_list: output = re.sub(sw+r"[\S]*$","",originalString) return(output) # 返回原字符串中截去suf_words_list及之后字符的部分 def get_striped_origin(pro_dict,id,suf_words_list): # 题目来源去除指定后缀 return strip_suffix(pro_dict[id]["origin"],suf_words_list) # 返回去除指定后缀后的题目来源 def SeperateFirstLine(string): # 切割一个含有换行的字符串 thelist = string.split("\n") firstline = thelist[0].strip() contents = "\n".join([lines.strip() for lines in thelist[1:]]) return (firstline,contents) # 返回第一行, 其余行形成的二元组, 每一行前后的空格都被删去 def ObtainDatatoModify(metadatafilepath,fields_dict): #从metadata文件中获取需要修改的综合信息, metadata文件用双回车区分项, 单行的项表示此后的字段, 不小于两行的项中第一行是题目id, 第二行起是要修改的内容. #fieldsdictpath是字段信息数据库文件路径 data = ReadTextFile(metadatafilepath) datalines = data.split("\n") data = "" for l in datalines: if l.strip() in fields_dict: data += l + "\n\n" else: data += l + "\n" data = re.sub(r"\n[\s\n]*\n","\n\n",data.strip()) #去除一些无意义的空格和多余的回车 datalist = data.split("\n\n") to_modify_list = [] currentfield = "NotAField" for line in datalist: if line.strip() in fields_dict: currentfield = line.strip() elif not "\n" in line: currentfield = "NotAField" else: id,content = SeperateFirstLine(line) to_modify_list.append((currentfield,str(id).zfill(6),content)) return to_modify_list #返回一个列表, 每一项是一个三元组, 三项依次为字段, 题号, 要修改的内容 def FloatToInt(string): #从字符串返回浮点数,如果值非常接近整数则返回该整数 f = float(string) if abs(f-round(f))<0.01: f = round(f) return f #返回浮点数或整数 def OverwriteData(prodict,fieldsdict,field_id_and_content): #用覆盖方式修改题目某字段信息 field,id,content = field_id_and_content fieldType = fieldsdict[field]["FieldType"] if not id in prodict: print("题号 %s 并不在数据库中"%id) else: if fieldType == "str": prodict[id][field] = content elif fieldType == "int" or fieldType == "float": content = FloatToInt(content) prodict[id][field] = content print("已覆盖 %s 的 %s 字段, 内容为 %s"%(id,field,content)) return (id,field,content) #返回三元组: 题号, 字段, 覆盖内容 def AppendData(prodict,fieldsdict,field_id_and_content): #用添加方式修改题目某字段信息 field,id,content = field_id_and_content fieldType = fieldsdict[field]["FieldType"] if not id in prodict: print("题号 %s 并不在数据库中"%id) else: if fieldType == "str": if content.strip() in prodict[id][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,content)) else: prodict[id][field] = (prodict[id][field].strip() + "\n" + content).strip() print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,content)) elif fieldType == "list": lines = [line.strip() for line in content.split("\n")] for line in lines: if line in prodict[id][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,line)) else: prodict[id][field] = prodict[id][field].copy() + [line] print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,line)) return (id,field,content) #返回三元组: 题号, 字段, 内容 def AppendMutualData(prodict,field_id_and_content): #添加两个id之间的same, related, unrelated关联 field,id,content = field_id_and_content lines = [str(line).zfill(6) for line in content.split("\n")] for id2 in lines: if not id in prodict: print("题号 %s 并不在数据库中"%id) elif not id2 in prodict: print("题号 %s 并不在数据库中"%id2) else: if id2 in prodict[id][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,id2)) else: prodict[id][field] = prodict[id][field] + [id2] print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,id2)) if id in prodict[id2][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id2,field,id)) else: prodict[id2][field] = prodict[id2][field] + [id] print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id2,field,id)) return (id,field,content) #返回三元组: 题号, 字段, 内容 def AppendObjData(prodict,objdict,field_id_and_content): #添加目标编号数据 field,id,content = field_id_and_content if not id in prodict: print("题号 %s 并不在数据库中"%id) else: lines = [line.strip() for line in content.split("\n")] for objid in lines: if not objid in objdict: print("目标编号 %s 并不在数据库中"%objid) elif objid in prodict[id][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,objid)) else: prodict[id][field] = prodict[id][field] + [objid] print("已于 %s 的 %s 字段执行添加, 编号为 %s, 内容为 %s"%(id,field,objid,objdict[objid]["content"])) return (id,field,content) #返回三元组: 题号, 字段, 内容 def AppendUsageData(prodict,field_id_and_content): #添加使用记录数据 field,id,content = field_id_and_content lines = [re.sub(r"\s+",r"\t",line.strip()) for line in content.split("\n")] pending_list = [] for line in lines: if not "FORCE" in line.upper(): time_stripped = re.sub(r"^\d{4,}\t","",line) #读取去除时间的数据 if time_stripped in "\n".join(prodict[id][field]): print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,line)) else: classid = [info for info in line.split("\t") if len(re.findall(r"[班高一二三]",info))>0][0] # 读取班级号 if classid in "\n".join(prodict[id][field]): print("班级 %s 在题号为 %s 的题目处已有使用记录, 在pending list中记录"%(classid,id)) oldinfo = [usage for usage in prodict[id][field] if classid in usage] pending_list = pending_list + [(id,line,oldinfo)] else: prodict[id][field].append(line) print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,line)) else: line = re.sub(r"\s+",r"\t",re.sub(r"FORCE","",line.upper())).strip() prodict[id][field].append(line) print("已于 %s 的 %s 字段执行强制添加, 内容为 %s"%(id,field,line)) output = "usages\n\n" for item in pending_list: id,new,oldlist = item output += id + "\n" output += new+"\tFORCE\n" output += "\n".join(oldlist)+"\n\n" return (field,id,content,output) #返回四元组: 题号, 字段, 内容, 待确定是否要添加的字符串(不含FORCE字样的行为旧结果,含FORCE字样的行为新结果,FORCE是运行后强制添加) def ImportMetadata(prodict,objdict,fieldsdict,metadatafilepath,pendingdatafilepath): #metadata自动修改, 根据字段自适应修改, 参数为题库字典, 目标字典, 字段字典, metadata文本文件路径, 待确定是否替换的内容的存放路径 data_to_modify = ObtainDatatoModify(metadatafilepath,fieldsdict) for item in data_to_modify: field = item[0] if field == "NotAField": print("字段名有误") return 1 else: method = fieldsdict[field]["Method"] if method == "overwrite": feedback = OverwriteData(prodict,fieldsdict,item) elif method == "append": feedback = AppendData(prodict,fieldsdict,item) elif method == "mutualappend": feedback = AppendMutualData(prodict,item) elif method == "objappend": feedback = AppendObjData(prodict,objdict,item) elif method == "usageappend": feedback = AppendUsageData(prodict,item) outputstring = feedback[3] SaveTextFile(outputstring,pendingdatafilepath) elif method == "fixed": print("字段 %s 不可按此方式修改"%field) return feedback # 已在数据库中修改, 之后需要将数据库写入一次, 返回1表示字段名有误, 返回其他表示成功进行了修改 def parseUsage(usagestring): #对单行usage信息进行分词 usagedict = {} datalist = re.sub(r"\s+",r"\t",usagestring.strip()).split("\t") if re.findall(r"20[\d]{2,6}",datalist[0]) != [] and re.findall(r"[\u4e00-\u9fa5]",datalist[0]) == []: date = datalist.pop(0) else: date = "" usagedict["date"] = date if re.findall(r"[高一二三班]",datalist[0]) != []: classid = datalist.pop(0) else: classid = "" usagedict["classid"] = classid usagedict["subproblems"] = len(datalist) usagedict["difficulties"] = [float(u) for u in datalist] usagedict["glossdiff"] = round(sum(usagedict["difficulties"])/usagedict["subproblems"],3) return usagedict #返回词典, 含date日期, classid班级标识, subproblems小题数目, difficulties得分率, glossdiff小题平均得分率 def StringSubstitute(regex,template,stringtuple): # 用stringtuple里的字符串逐个替换template中的符合某种regex规则的字符串 toSub = re.findall(regex,template) if not len(toSub) == len(stringtuple): print("长度不符.") return 1 #若长度不符则显示“长度不符”并返回1 else: output = template for i in range(len(toSub)): output = output.replace(toSub[i],stringtuple[i]) return output #若长度符合则返回替换后的字符串 def GenerateTexDataforEditing(id_string,prodict,templatefilepath,editor): # 根据id_string的题号列表在prodict中生成一个可修改的tex文件, 包含题目内容,答案和解答,editor表示编辑者 id_list = generate_number_set(id_string,prodict) output = "编辑者: " + editor + "\n\n\\begin{enumerate}\n\n" for id in id_list: content = prodict[id]["content"] answer = prodict[id]["ans"] solution = prodict[id]["solution"] remark = prodict[id]["remark"] output += "\\item (%s) "%str(id).zfill(6) + content + "\n\n" + "答案: " + answer + "\n\n" + "解答与提示: " + solution + "\n\n" + "备注: " + remark + "\n\n" output += "\\end{enumerate}" template = ReadTextFile(templatefilepath) texdata = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",template,[output]) return texdata # 返回Tex文件的字符串, 待保存至tex文件 def GetEditedProblems(string): #从.tex文件所读取的字符串中取出正文内容, 并切割成以题号为key的字典, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组 bodydata = re.findall(r"\\begin\{enumerate\}([\s\S]*)\\end\{enumerate\}",string)[0]+r"\item" problems = re.findall(r"\((\d{6})\)([\s\S]*?)\\item",bodydata) edited_dict = {} for problem in problems: id, pro_string = problem edited_dict[id] = parseProblem(pro_string) return edited_dict # 返回以题号为key, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组的字典 def parseProblem(string): # 对以不小于两个回车切分的四段式字符串进行分词(通常第二段有"答案:", 第三段有"解答与提示:", 第四段有"备注:") data = string.strip() data = re.sub(r"\n{2,}","\n\n",data) content,ans,solution,remark = data.split("\n\n") content = content.strip() ans = re.sub("答案:","",ans).strip() solution = re.sub("解答与提示:","",solution).strip() remark = re.sub("备注:","",remark).strip() return (content,ans,solution,remark) # 返回四元组(题目内容, 答案, 解答与提示, 备注) def ModifyProblembyTeX(id_string,prodict,toeditfilepath,editor): # vscode打开.tex文件修改题目的内容, 答案与解答 # 读取题号列表并生成待编辑的.tex文件 texdata = GenerateTexDataforEditing(id_string,prodict,"模板文件/题目编辑.txt",editor) SaveTextFile(texdata,toeditfilepath) # 打开待编辑的.tex文件 print("编辑完成后保存文件, 关闭文件继续...") os.system("code -w -g "+toeditfilepath) # 生成修改后的题号与内容, 答案, 解答与提示的对应 editor = GetDate() + "\t" + editor editedtexdata = ReadTextFile(toeditfilepath) edited_dict = GetEditedProblems(editedtexdata) editedIDList = [] # 将.tex文件中的修改反映到原题库字典, 并保存 for id in edited_dict: content, ans, solution, remark = edited_dict[id] if not (content == prodict[id]["content"] and ans == prodict[id]["ans"] and solution == prodict[id]["solution"] and remark == prodict[id]["remark"]): prodict[id]["content"] = content prodict[id]["ans"] = ans prodict[id]["solution"] = solution prodict[id]["remark"] = remark prodict[id]["edit"] = prodict[id]["edit"] + [editor] editedIDList.append(id) save_dict(prodict,"../题库0.3/problems.json") return generate_exp(editedIDList) # 返回编辑过的字典 def RemoveMutualLink(metadata,prodict): # 删除双向关联id(same,related,unrelated), 输入为一个字符串, 每行表示一对题号, 用空格或制表符隔开 lines = [re.sub(r"[\s]+",r"\t",line).strip() for line in metadata.split("\n")] for line in lines: if not line == []: id1,id2 = line.split("\t") id1 = id1.zfill(6) id2 = id2.zfill(6) for field in ["same","related","unrelated"]: if id1 in prodict[id2][field]: prodict[id2][field].remove(id1) if id2 in prodict[id1][field]: prodict[id1][field].remove(id2) return 0 # 返回0 def jsonEditProblemMetadata(id_string,prodict,editor): #用vscode在json模式下编辑题目综合信息 jsontoeditpath = "临时文件/problem_edit.json" idlist = generate_number_set(id_string,prodict) edit_dict = {} for id in idlist: edit_dict[id] = prodict[id].copy() save_dict(edit_dict,jsontoeditpath) #打开待编辑的json文件 os.system("code -w -g "+jsontoeditpath) #编辑后关闭文件窗口自动执行下面步骤 editeddict = load_dict(jsontoeditpath) editlist = [] for id in editeddict: if not prodict[id] == editeddict[id]: prodict[id] = editeddict[id].copy() prodict[id]["edit"] = prodict[id]["edit"] + [GetDate() + "\t" + editor] editlist.append(id) return(generate_exp(editlist)) # 返回编辑过的题号字符串 def GetSamePairs(prodict): #获取已标注的相同题目组 same_groups = [] for id in prodict: same = prodict[id]["same"] if len(same) > 0 and not len(prodict[id]["usages"]) == 0: same_groups.append([id]+same) return(same_groups) #返回相同题目组, 每组由一些相同的题目构成 def ShareSameUsages(prodict,same_group): #对same_group中的所有题号共享使用记录 current_usages = [] for id in same_group: for usage in prodict[id]["usages"]: if not usage in current_usages: current_usages = current_usages + [usage] for id in same_group: prodict[id]["usages"] = current_usages.copy() return((same_group,current_usages)) #返回same_group中的题号列表 及 所有题号记录 组成的二元组 def SortUsages(prodict): #对使用记录按字符串进行排序 for id in prodict: usages = prodict[id]["usages"].copy() usages.sort() prodict[id]["usages"] = usages.copy() return 0 #返回0 def SortUsagesbyAverage(theusages): #根据使用记录每一条的平均值进行排序, 越高的在越前面 glossdifflist = [] for i in range(len(theusages)): glossdifflist = glossdifflist + [(i,parseUsage(theusages[i])["glossdiff"])] sortedglossdifflist = sorted(glossdifflist, key = lambda x:x[1], reverse = True) newusages = [theusages[i[0]] for i in sortedglossdifflist] return(newusages) # 返回排序后的list def StripSuffix(string, suf_words): #除去字符串前后的空格及suf_words中每一个符合regex的字符串及其之后的部分 string = string.strip() for sw in suf_words: string = re.sub(sw+r"[\S]*$","",string) return(string) # 返回处理以后的字符串 def MatchCondition(problem,condition_dict): #判断problem这一字典是否符合condition_dict中的所有筛选条件 match = True #初始设定符合条件 for fieldraw in [c for c in condition_dict if not "_not" in c and not condition_dict[c] == [""]]: #选出正向的条件([""]表示该条件不起作用) cond_list = condition_dict[fieldraw] if type(cond_list) == str: cond_list = [cond_list] field = re.sub(r"\d","",fieldraw) if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string string = "\n".join((problem[field])) else: string = str(problem[field]) current_match = False for cond in cond_list: #有一个条件被满足, 就认为当前字段满足条件了 if len(re.findall(cond,string)) > 0: current_match = True if current_match == False: match = False # 如果某个正向条件未满足, 那么match就赋值为False; 如果每一个正向条件都被满足, 那么match仍为True for fieldraw in [c for c in condition_dict if "_not" in c and not condition_dict[c] == [""]]: #选出反向的条件([""]表示该条件不起作用) cond_list = condition_dict[fieldraw] fieldraw = fieldraw.replace("_not","") field = re.sub(r"\d","",fieldraw) if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string string = "\n".join((problem[field])) else: string = str(problem[field]) current_match = True for cond in cond_list: #有一个条件被满足, 就认为当前字段不被满足了 if len(re.findall(cond,string)) > 0: current_match = False if current_match == False: match = False return match #返回是否符合条件 def get_color(value): # 根据得分率获得rgb颜色代码 value = float(value) if value>=0.5: (r,g)=(1,2-2*value) else: (r,g)=(2*value,1) return "{%.3f,%3f,0}"%(r,g) #返回用大括号包围的rgb数值 def GenerateValueColorCode(matchobj): # 生成三位小数代表的颜色方框(黑色边框, 难度对应的底色)的LaTeX代码 value = matchobj.group(1) return "\t\\fcolorbox[rgb]{0,0,0}%s{%s}"%(get_color(value),value) #返回代码 def StudentsGetAfterContent(id,prodict,answered,spaceflag): #生成学生版讲义后的答案及空格, answered表示有无答案, spaceflag表示有无空格 string = "" if answered: string += "答案: \\textcolor{red}{%s}\n\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案") if spaceflag: if prodict[id]["space"] != "": string += "\\vspace*{%s}\n\n"%prodict[id]["space"] return string #生成学生讲义后的答案及空格 def XeLaTeXCompile(filedir,filename): #在filedir目录中用XeLaTeX编译filename文件两次(能编译出正确的引用和页码) flagsuc = True for i in range(2): if os.system("xelatex -interaction=batchmode -output-directory=%s %s"%(filedir,filename)) == 0: print("第%d次编译成功."%(i+1)) else: flagsuc = False return flagsuc # 若第二次编译成功则返回True, 否则返回False def GenerateStudentsBodyString(problems,sectiontitles,pro_dict,consecutivenumbering,answered,spaceflag): #生成学生版的.tex文件的主体内容 bodystring = "" if len(problems) == len(sectiontitles): count = 0 for i in range(len(problems)): idlist = generate_number_set(problems[i],pro_dict) sectionstring = "\\section{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectiontitles[i],count if consecutivenumbering else 0) for id in idlist: count += 1 aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag) sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent) sectionstring += "\\end{enumerate}" bodystring += sectionstring else: idstring = ",".join(problems) idlist = generate_number_set(idstring,pro_dict) sectionstring = "\\begin{enumerate}\n\n" for id in idlist: aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag) sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent) sectionstring += "\\end{enumerate}" bodystring += sectionstring return bodystring #返回主题内容字符串 if __name__ == "__main__": print("数据库工具, import用.")