import json,re,os,Levenshtein,fitz,time,sys,win32con import pandas as pd import win32clipboard as wc def GetDate(): #获得当前日期 currentdate = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2) return currentdate #返回当前日期yyyymmdd def GetTime(): #获得当前时间 t = time.localtime() currenttime = f"{t.tm_hour:0>2}{t.tm_min:0>2}{t.tm_sec:0>2}" return currenttime #返回当前时间hhmmss def ReadTextFile(filepath): #读取文本格式的文件 with open(filepath,"r",encoding="u8") as f: data = f.read() return data #返回文本格式文件的内容 def SaveTextFile(data,filepath): #写入文本格式的文件 pathname = os.path.dirname(filepath) if not os.path.exists(pathname): os.mkdir(pathname) with open(filepath,"w",encoding="u8") as f: f.write(data) return filepath #返回文件名 def AppendTextFile(string,filepath): #在文本文件后添加内容 with open(filepath,"a",encoding = "u8") as f: f.write(string.strip()+"\n\n") return filepath #返回文件名 def SortDict(adict): #按字典项顺序排序字典 return dict(sorted(adict.items())) #返回排序后的字典 #读取存储json数据库相关(不限于题号数据库) def load_dict(filename): #根据filename读取json数据库并转化为python字典 with open(filename,"r",encoding = "u8") as f: adict = json.loads(f.read()) return adict #返回python字典 def save_dict(adict,filepath): #将adict字典转化为json文件并保存至filename文件中 try: jsondata = json.dumps(adict,indent=4,ensure_ascii=False) SaveTextFile(jsondata,filepath) return filepath # 成功则返回文件路径 except: print("保存 %s 文件失败"%filepath) return 1 # 失败则返回1 def pre_treating(string): #删除字符串中对比较无用的字符, 以供比较 string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string) string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string) string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string) string = re.sub(r"[\n\t]","",string) string = re.sub(r"(displaystyle)|(overrightarrow)|(overline)","",string) string = re.sub(r"[,\.:;?]","",string) return string #返回处理后的字符串 def treat_dict(p_dict): #对整个题库字典中的内容部分进行预处理,删除无用字符 treated_dict = {} for id in p_dict: treated_dict[id] = {} treated_dict[id]["content"] = pre_treating(p_dict[id]["content"]) treated_dict[id]["same"] = p_dict[id]["same"] return treated_dict #返回处理后的字典, 含内容字段及相同题目字段 def detectmaxsim(currentid,excludelist,adict): #检测与已知题目关联程度最大的题目(除外列表之外的部分) maxsim = -1 argmaxsim = "000000" for id in adict: if not id in excludelist and not "OBSOLETE" in adict[id]["content"]: simrate = Levenshtein.jaro(adict[id]["content"],adict[currentid]["content"]) if simrate > maxsim: maxsim = simrate argmaxsim = id return (maxsim,argmaxsim) #返回最大关联系数与关联程度最大的题号 def stringmaxsim(string,adict,listlength): # maxsim = -1 # argmaxsim = "000000" maxsimlist = [] string = pre_treating(string) for id in adict: if not "OBSOLETE" in adict[id]["content"]: simrate = Levenshtein.jaro(adict[id]["content"],string) # if simrate > maxsim: # maxsim = simrate # argmaxsim = id if len(maxsimlist) < listlength: maxsimlist.append((id,simrate)) elif simrate > maxsimlist[-1][1]: maxsimlist = maxsimlist[:-1] maxsimlist.append((id,simrate)) maxsimlist = sorted(maxsimlist,key = lambda x: x[1], reverse = True) return maxsimlist #返回最大关联的listlength个题号及关联系数 def generate_sim_group(startingids,prodict,max_size,threshold): #生成与已知题块startingids(冒号和逗号连接的字符串)关联程度超过threshold的题目组, 超过max_size即停止 id_list = generate_number_set(startingids) output_list = [] treated_dict = treat_dict(prodict) continue_flag = True while continue_flag: appending_id_list = [] for oldid in id_list: for newid in [id for id in prodict if not id in id_list and not id in output_list and not "OBSOLETE" in treated_dict[id]["content"]]: simrate = Levenshtein.jaro(treated_dict[oldid]["content"],treated_dict[newid]["content"]) if simrate >= threshold: appending_id_list.append(newid) output_list = output_list + id_list.copy() id_list = appending_id_list.copy() if len(appending_id_list) == 0 or len(output_list)>max_size: continue_flag = False return generate_exp(sorted(output_list)) #返回冒号和逗号连接的题目组字符串 def generate_problem_series(startingid,length,adict): #在adict字典里返回从startingid开始的一系列题号, 每一题都是与上一题的关联程度最大的 excludelist = [startingid] currentid = startingid for i in range(length): maxsim,currentid = detectmaxsim(currentid,excludelist,adict) excludelist.append(currentid) return ",".join(excludelist) #返回按顺序的题号列表 def generate_number_set(string,*thedict): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"] #可变参数*dict如果存在, 将只生成dict的keys中包含的题号列表 string = re.sub(r"[\n\s]","",string).strip() while not string[-1] in "0123456789": string = string[:-1] string_list = string.split(",") numbers_list = [] for s in string_list: if not ":" in s: numbers_list.append(s.zfill(6)) else: start,end = s.split(":") for ind in range(int(start),int(end)+1): numbers_list.append(str(ind).zfill(6)) if len(thedict) == 0: return numbers_list #返回六位题号列表 elif len(thedict) == 1 and type(thedict[0]) == dict: numbers_list = [id for id in numbers_list if id in thedict[0]] return numbers_list #返回字典中存在的六位题号列表 else: return "输入参数有误" def generate_id_set(string,*thedict): #除了生成题号列表外, 还能根据首字母生成基础知识编号列表或课时目标列表 if re.findall(r"[BXK]",string) == []: if thedict == (): return generate_number_set(string) else: return generate_number_set(string,thedict) else: if string[0] == "B": string = re.sub("B","",string) return ["B"+i[-5:] for i in generate_number_set(string)] elif string[0] == "K": suffix = string.strip()[-1] string = re.sub("[KBX]","",string) return ["K"+i.zfill(7)+suffix for i in generate_number_set(string)] def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号" if not len(id_list) == 0: exp_list = [] start = id_list[0] current = start end = start for id in id_list[1:]: # print(id,current) if int(id)-1 == int(current): current = id end = id else: if not start == end: exp_list.append('"'+start+":"+end+'"') else: exp_list.append('"'+start+'"') start = id current = id end = id if not start == end: exp_list.append('"'+start+":"+end+'"') else: exp_list.append('"'+start+'"') exp_str = ",".join(exp_list).replace('"',"") else: exp_str = "无有效题号" return exp_str #返回含有":"或","的题号字符串 def parsePDF(filePath): #提取pdf文件中的字符 with fitz.open(filePath) as doc: text = "" for page in doc.pages(): text += page.get_text() + "\n" return text def extractIDs(filePath): #提取.txt,.tex或.pdf文件中的题号, 返回含有":"或","的题号字符串 if filePath[-4:] == ".txt" or filePath[-4:] == ".tex": with open(filePath,"r",encoding = "u8") as f: data = f.read() elif filePath[-4:] == ".pdf": data = parsePDF(filePath) else: return "格式不正确" ids = re.findall(r"\((\d{6})\)",data) return generate_exp(ids) def spareIDs(dictname): #返回空闲题号 idlist = list(dictname.keys()) used_str = generate_exp(idlist) used_list = used_str.split(",") output = "" for group in range(len(used_list)-1): output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[group][-6:])+1).zfill(6),str(int(used_list[group+1][:6])-1).zfill(6)) + "\n" output += "首个空闲id: %s, 直至: %s"%(str(int(used_list[-1][-6:])+1).zfill(6),"999999") return output #返回的是一个多行的字符串, 每一行中含有一个空闲题号的闭区间 def NextSpareID(num,adict): #返回adict中下一个空闲的题号 num = int(num) while str(num).zfill(6) in adict: num += 1 return num def NextSpareIDBlock(num,adict): #返回adict中下一个空闲的题号块 start = NextSpareID(num,adict) larger_ID_list = [int(id) for id in adict.keys() if int(id)>start] if larger_ID_list == []: end = 999999 else: end = min(larger_ID_list) - 1 return str(start)+":"+str(end) def GenerateProblemListFromString(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀 try: data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0] except: pass data = re.sub(r"\n{2,}","\n",data) data = re.sub(r"\\item",r"\\enditem\\item",data) data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息 problempositions = [] for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data): problempositions.append(item.regs[1]) #确定题目内容所在位置 problem_list = [] for pos in problempositions: content = data[pos[0]:pos[1]].strip() content = re.sub(r"\n\%[\s\S]*$","",content) #题目内容 content = re.sub(r"\\\\$","",content) # 删去题目最后一行处可能存在的\\ subdata = data[:pos[0]] #开始寻找出处中缀 suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata) if len(suflist) == 0: suffix = "" else: suffix = suflist[-1].replace("%","").strip() problem_list.append((content,suffix)) return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 组成的元组 def GenerateProblemListFromString2024(data): #从来自.tex文件的字符串生成题目列表, 每个item是一道题目, 新一行的%用作前缀, item后面的方括号放与题库的交互信息(如[rep3214]表示不添加, 用003214代替; [s2521;r354;u10021,20024]表示和2521相同, 和354相关, 和10021,20024无关), 返回三元组(题目内容, 题目来源前缀, 交互信息列表)的字典 try: data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",data)[0] except: pass data = re.sub(r"\n{2,}","\n",data) data = re.sub(r"\\begin\{tcolorbox\}[\s\S]*?\\end\{tcolorbox\}","\n",data) data = re.sub(r"\\item",r"\\enditem\\item",data) data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data) #切除无关信息, 保留关键信息 problempositions = [] for item in re.finditer(r"\\item([\s\S]*?)\\enditem",data): problempositions.append(item.regs[1]) #确定题目内容所在位置 problem_list = [] for pos in problempositions: content_raw = data[pos[0]:pos[1]].strip() content_raw = re.sub(r"\n\%[\s\S]*$","",content_raw) #题目内容 content_raw = re.sub(r"\\\\$","",content_raw) # 删去题目最后一行处可能存在的\\ content_raw = content_raw.strip() # 删去前后多余的空格 if not content_raw[0] == "[": # 根据方括号内的内容生成交互信息, 这是无方括号的内容, 无meta content = content_raw meta = {} elif re.findall(r"^\[rep(\d+)\]",content_raw) != []: # 方括号以rep开始, 用已有的题号代替, 字典中显示唯一的题号 content = re.sub(r"^\[rep(\d+)\]","",content_raw).strip() meta = {"rep":re.findall(r"\[rep(\d+)\]",content_raw)[0].zfill(6)} else: # 方括号不以rep开始, 用s,r,u分别表示相同, 相关, 无关, 返回有这三个字段的字典 content = re.sub(r"^\[.*?\]","",content_raw).strip() metaraw = re.findall(r"^\[(.*?)\]",content_raw)[0] sameidraw = re.findall(r"s([\d,:]*)",metaraw) if not sameidraw == []: same_id_list = generate_number_set(sameidraw[0]) else: same_id_list = [] relatedidraw = re.findall(r"r([\d,:]*)",metaraw) if not relatedidraw == []: related_id_list = generate_number_set(relatedidraw[0]) else: related_id_list = [] unrelatedidraw = re.findall(r"u([\d,:]*)",metaraw) if not unrelatedidraw == []: unrelated_id_list = generate_number_set(unrelatedidraw[0]) else: unrelated_id_list = [] meta = {"same":same_id_list,"related":related_id_list,"unrelated":unrelated_id_list} subdata = data[:pos[0]] #开始寻找出处中缀 suflist = re.findall(r"\n(\%\s{0,}[\S]+)\n",subdata) if len(suflist) == 0: suffix = "" else: suffix = suflist[-1].replace("%","").strip() problem_list.append((content,suffix,meta)) return problem_list #返回一个列表, 每一项是一个由 题目内容 和 题目来源前缀 和 交互信息字典 组成的元组 def CreateEmptyProblem(problem): # 根据已有的题目创建新的空题目 NewProblem = problem.copy() for field in NewProblem: if type(NewProblem[field]) == str: NewProblem[field] = "" elif type(NewProblem[field]) == list: NewProblem[field] = [] elif type(NewProblem[field]) == int or type(NewProblem[field]) == float: NewProblem[field] = -1 return NewProblem #返回一个空题目的字典, ID和内容待赋值 def CreateNewProblem(id,content,origin,dict,editor): # 构建一道新题目的字典 NewProblem = CreateEmptyProblem(dict["000001"]) NewProblem["id"] = str(id).zfill(6) NewProblem["content"] = content NewProblem["origin"] = origin NewProblem["edit"] = [editor] return NewProblem # 返回一道新题目的字典, 已赋新的ID, 内容, 来源和编辑者 def AddProblemstoDict(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典 id = int(startingid) currentsuffix = problems[0][1] problemindex = 0 for p_and_suffix in problems: p, suffix = p_and_suffix pid = str(id).zfill(6) if pid in thedict: print("ID %s 已被使用."%pid) return 1 else: if suffix == currentsuffix: problemindex += 1 else: problemindex = 1 currentsuffix = suffix origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == "" else str(problemindex)) newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor) if "blank" in p: newproblem["genre"] = "填空题" newproblem["space"] = "" elif "bracket" in p: newproblem["genre"] = "选择题" newproblem["space"] = "" else: newproblem["genre"] = "解答题" newproblem["space"] = "4em" thedict[pid] = newproblem maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict) print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p)) id += 1 return 0 def AddProblemstoDict2024(startingid,raworigin,problems,editor,indexdescription,thedict): #将来自GenerateProblemListFromString的列表中的题目添加到thedict字典, 返回题号列表(包括用老题号替代的题目) idlist = [] id = int(startingid) currentsuffix = problems[0][1] problemindex = 0 for p_and_suffix_and_meta in problems: p, suffix, meta = p_and_suffix_and_meta pid = str(id).zfill(6) if pid in thedict: print("ID %s 已被使用."%pid) return 1 else: if suffix == currentsuffix: problemindex += 1 else: problemindex = 1 currentsuffix = suffix origin = raworigin + suffix + indexdescription.strip() + ("" if indexdescription.strip() == "" else str(problemindex)) if not "rep" in meta: newproblem = CreateNewProblem(pid,p.strip(),origin,thedict,GetDate() + "\t" + editor) if "blank" in p: newproblem["genre"] = "填空题" newproblem["space"] = "" elif "bracket" in p: newproblem["genre"] = "选择题" newproblem["space"] = "" else: newproblem["genre"] = "解答题" newproblem["space"] = "4em" if "same" in meta: for sid in meta["same"]: thedict[sid]["same"].append(pid) newproblem["same"].append(sid) if "related" in meta: for sid in meta["related"]: thedict[sid]["related"].append(pid) newproblem["related"].append(sid) if "unrelated" in meta: for sid in meta["unrelated"]: thedict[sid]["unrelated"].append(pid) newproblem["unrelated"].append(sid) thedict[pid] = newproblem maxsim,argmaxsim = detectmaxsim(pid,[pid],thedict) print("已收录题号: %s, 最接近题目: %s, 相似程度: %.3f, 题目类型: %s, 题目来源: %s, 题目内容: %s"%(pid,argmaxsim,maxsim,newproblem["genre"],origin,p)) id += 1 idlist.append(pid) else: idlist.append(meta["rep"]) print(f"该题 {idlist[-1]} {p} 已在题库中, 不必收录.") return idlist def CreateIDLinks(old_id_list,new_id_list,*thedict): #建立已有id和新id之间的联系, thedict为可选, 选中的话即为当前字典, 会从new_id_list中排除当前字典中有的项 if len(thedict) == 1 and type(thedict[0]) == dict: new_id_list = [id for id in new_id_list if not id in thedict[0]] if len(old_id_list)>len(new_id_list): return "新ID个数不足." else: id_links = [] for i in range(len(old_id_list)): id_links.append((old_id_list[i],new_id_list[i])) return id_links # 返回id联系, 每个元组表示一对id, 前者是旧id, 后者是新id def CreateRelatedProblems(links,thedict,filepath,editor): # 根据links关联生成待编辑的新题目字典, 等待编辑修改 try: new_dict = {} for item in links: old_id,new_id = item new_dict[old_id] = thedict[old_id].copy() new_dict[old_id]["id"] = new_id + "待替换" new_dict[old_id]["content"] = new_dict[old_id]["content"] new_dict[old_id]["usages"] = [] new_dict[old_id]["same"] = [] new_dict[old_id]["unrelated"] = [] new_dict[old_id]["edit"] = new_dict[old_id]["edit"].copy() + [GetDate()+"\t"+editor] new_dict[old_id]["origin"] += "-" + GetDate() + "修改" save_dict(new_dict,filepath) except: return 1 #异常返回1 return 0 #正常返回0 def ImportRelatedProblems(new_json,main_json): # 导入编辑过的关联题目json文件到主数据库 pro_dict = load_dict(main_json) new_dict = load_dict(new_json) for id in new_dict: new_id = new_dict[id]["id"].replace("待替换","") #新题号后需要跟"待替换"字样 if new_id in pro_dict: print("题号有重复") return 1 else: pro_dict[new_id] = new_dict[id].copy() pro_dict[new_id]["id"] = new_id pro_dict[id]["related"] += [new_id] pro_dict[new_id]["related"] += [id] p = pro_dict[new_id]["content"] if "blank" in p: pro_dict[new_id]["genre"] = "填空题" pro_dict[new_id]["genre"] = "" elif "bracket" in p: pro_dict[new_id]["genre"] = "选择题" pro_dict[new_id]["genre"] = "" else: pro_dict[new_id]["genre"] = "解答题" pro_dict[new_id]["genre"] = "4em" print("导入关联题目 %s -> %s 信息成功."%(id,new_id)) save_dict(SortDict(pro_dict),main_json) #保存至目标pro_dict文件 return 0 #正常返回0 def strip_suffix(originalString, suf_words_list): # 字符串去除指定后缀 for sw in suf_words_list: output = re.sub(sw+r"[\S]*$","",originalString) return(output) # 返回原字符串中截去suf_words_list及之后字符的部分 def get_striped_origin(pro_dict,id,suf_words_list): # 题目来源去除指定后缀 return strip_suffix(pro_dict[id]["origin"],suf_words_list) # 返回去除指定后缀后的题目来源 def SeperateFirstLine(string): # 切割一个含有换行的字符串 thelist = string.split("\n") firstline = thelist[0].strip() contents = "\n".join([lines.strip() for lines in thelist[1:]]) return (firstline,contents) # 返回第一行, 其余行形成的二元组, 每一行前后的空格都被删去 def ObtainDatatoModify(metadatafilepath,fields_dict): #从metadata文件中获取需要修改的综合信息, metadata文件用双回车区分项, 单行的项表示此后的字段, 不小于两行的项中第一行是题目id, 第二行起是要修改的内容. #fieldsdictpath是字段信息数据库文件路径 data = ReadTextFile(metadatafilepath) datalines = data.split("\n") data = "" for l in datalines: if l.strip() in fields_dict: data += l + "\n\n" else: data += l + "\n" data = re.sub(r"\n[\s\n]*\n","\n\n",data.strip()) #去除一些无意义的空格和多余的回车 datalist = data.split("\n\n") to_modify_list = [] currentfield = "NotAField" for line in datalist: if line.strip() in fields_dict: currentfield = line.strip() elif not "\n" in line: currentfield = "NotAField" else: id,content = SeperateFirstLine(line) to_modify_list.append((currentfield,str(id).zfill(6),content)) return to_modify_list #返回一个列表, 每一项是一个三元组, 三项依次为字段, 题号, 要修改的内容 def FloatToInt(string): #从字符串返回浮点数,如果值非常接近整数则返回该整数 f = float(string) if abs(f-round(f))<0.01: f = round(f) return f #返回浮点数或整数 def OverwriteData(prodict,fieldsdict,field_id_and_content): #用覆盖方式修改题目某字段信息 field,id,content = field_id_and_content fieldType = fieldsdict[field]["FieldType"] if not id in prodict: print("题号 %s 并不在数据库中"%id) else: if fieldType == "str": prodict[id][field] = content elif fieldType == "int" or fieldType == "float": content = FloatToInt(content) prodict[id][field] = content print("已覆盖 %s 的 %s 字段, 内容为 %s"%(id,field,content)) return (id,field,content) #返回三元组: 题号, 字段, 覆盖内容 def AppendData(prodict,fieldsdict,field_id_and_content): #用添加方式修改题目某字段信息 field,id,content = field_id_and_content fieldType = fieldsdict[field]["FieldType"] if not id in prodict: print("题号 %s 并不在数据库中"%id) else: if fieldType == "str": if content.strip() in prodict[id][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,content)) else: prodict[id][field] = (prodict[id][field].strip() + "\n" + content).strip() print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,content)) elif fieldType == "list": lines = [line.strip() for line in content.split("\n")] for line in lines: if line in prodict[id][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,line)) else: prodict[id][field] = prodict[id][field].copy() + [line] print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,line)) return (id,field,content) #返回三元组: 题号, 字段, 内容 def AppendMutualData(prodict,field_id_and_content): #添加两个id之间的same, related, unrelated关联 field,id,content = field_id_and_content lines = [str(line).zfill(6) for line in content.split("\n")] for id2 in lines: if not id in prodict: print("题号 %s 并不在数据库中"%id) elif not id2 in prodict: print("题号 %s 并不在数据库中"%id2) else: if id2 in prodict[id][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,id2)) else: prodict[id][field] = prodict[id][field] + [id2] print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,id2)) if id in prodict[id2][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id2,field,id)) else: prodict[id2][field] = prodict[id2][field] + [id] print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id2,field,id)) return (id,field,content) #返回三元组: 题号, 字段, 内容 def AppendObjData(prodict,objdict,field_id_and_content): #添加目标编号数据 field,id,content = field_id_and_content if not id in prodict: print("题号 %s 并不在数据库中"%id) else: lines = [line.strip() for line in content.split("\n")] for objid in lines: if not objid in objdict: print("目标编号 %s 并不在数据库中"%objid) elif objid in prodict[id][field]: print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,objid)) else: prodict[id][field] = prodict[id][field] + [objid] print("已于 %s 的 %s 字段执行添加, 编号为 %s, 内容为 %s"%(id,field,objid,objdict[objid]["content"])) return (id,field,content) #返回三元组: 题号, 字段, 内容 def AppendUsageData(prodict,field_id_and_content): #添加使用记录数据 field,id,content = field_id_and_content lines = [re.sub(r"\s+",r"\t",line.strip()) for line in content.strip().split("\n")] pending_list = [] for line in lines: if not "FORCE" in line.upper(): time_stripped = re.sub(r"^\d{4,}\t","",line) #读取去除时间的数据 if time_stripped in "\n".join(prodict[id][field]): print("题号 %s 的 %s 字段, 内容 %s 已存在"%(id,field,line)) else: classid = [info for info in line.split("\t") if len(re.findall(r"[班高一二三]",info))>0][0] # 读取班级号 if classid in "\n".join(prodict[id][field]): print("班级 %s 在题号为 %s 的题目处已有使用记录, 在pending list中记录"%(classid,id)) oldinfo = [usage for usage in prodict[id][field] if classid in usage] pending_list = pending_list + [(id,line,oldinfo)] else: prodict[id][field].append(line) print("已于 %s 的 %s 字段执行添加, 内容为 %s"%(id,field,line)) else: line = re.sub(r"\s+",r"\t",re.sub(r"FORCE","",line.upper())).strip() prodict[id][field].append(line) print("已于 %s 的 %s 字段执行强制添加, 内容为 %s"%(id,field,line)) output = "" for item in pending_list: id,new,oldlist = item output += id + "\n" output += new+"\tFORCE\n" output += "\n".join(oldlist)+"\n\n" return (field,id,content,output) #返回四元组: 题号, 字段, 内容, 待确定是否要添加的字符串(不含FORCE字样的行为旧结果,含FORCE字样的行为新结果,FORCE是运行后强制添加) def ImportMetadata(prodict,objdict,fieldsdict,metadatafilepath,pendingdatafilepath): #metadata自动修改, 根据字段自适应修改, 参数为题库字典, 目标字典, 字段字典, metadata文本文件路径, 待确定是否替换的内容的存放路径 data_to_modify = ObtainDatatoModify(metadatafilepath,fieldsdict) outputstring = "usages\n\n" for item in data_to_modify: field = item[0] if field == "NotAField": print("字段名有误") return 1 else: method = fieldsdict[field]["Method"] if method == "overwrite": feedback = OverwriteData(prodict,fieldsdict,item) elif method == "append": feedback = AppendData(prodict,fieldsdict,item) elif method == "mutualappend": feedback = AppendMutualData(prodict,item) elif method == "objappend": feedback = AppendObjData(prodict,objdict,item) elif method == "usageappend": feedback = AppendUsageData(prodict,item) outputstring += feedback[3] elif method == "fixed": print("字段 %s 不可按此方式修改"%field) SaveTextFile(outputstring,pendingdatafilepath) return outputstring # 已在数据库中修改, 之后需要将数据库写入一次, 返回1表示字段名有误, 返回其他表示成功进行了修改 def parseUsage(usagestring): #对单行usage信息进行分词 usagedict = {} datalist = re.sub(r"\s+",r"\t",usagestring.strip()).split("\t") if re.findall(r"20[\d]{2,6}",datalist[0]) != [] and re.findall(r"[\u4e00-\u9fa5]",datalist[0]) == []: date = datalist.pop(0) else: date = "" usagedict["date"] = date if re.findall(r"[高一二三班]",datalist[0]) != []: classid = datalist.pop(0) else: classid = "" usagedict["classid"] = classid usagedict["subproblems"] = len(datalist) usagedict["difficulties"] = [float(u) for u in datalist] usagedict["glossdiff"] = round(sum(usagedict["difficulties"])/usagedict["subproblems"],3) return usagedict #返回词典, 含date日期, classid班级标识, subproblems小题数目, difficulties得分率, glossdiff小题平均得分率 def StringSubstitute(regex,template,stringtuple): # 用stringtuple里的字符串逐个替换template中的符合某种regex规则的字符串 toSub = re.findall(regex,template) if not len(toSub) == len(stringtuple): print("长度不符.") return 1 #若长度不符则显示“长度不符”并返回1 else: output = template for i in range(len(toSub)): output = output.replace(toSub[i],stringtuple[i]) return output #若长度符合则返回替换后的字符串 def GenerateTexDataforEditing(id_string,prodict,templatefilepath,editor): # 根据id_string的题号列表在prodict中生成一个可修改的tex文件, 包含题目内容,答案和解答,editor表示编辑者 id_list = generate_number_set(id_string,prodict) output = "编辑者: " + editor + "\n\n\\begin{enumerate}\n\n" for id in id_list: content = prodict[id]["content"] answer = prodict[id]["ans"] solution = prodict[id]["solution"] remark = prodict[id]["remark"] output += "\\item (%s) "%str(id).zfill(6) + content + "\n\n" + "答案: " + answer + "\n\n" + "解答与提示: " + solution + "\n\n" + "备注: " + remark + "\n\n" output += "\\end{enumerate}" template = ReadTextFile(templatefilepath) texdata = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",template,[output]) return texdata # 返回Tex文件的字符串, 待保存至tex文件 def GetEditedProblems(string): #从.tex文件所读取的字符串中取出正文内容, 并切割成以题号为key的字典, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组 bodydata = re.findall(r"\\begin\{enumerate\}([\s\S]*)\\end\{enumerate\}",string)[0]+r"\item" problems = re.findall(r"\((\d{6})\)([\s\S]*?)\\item",bodydata) edited_dict = {} for problem in problems: id, pro_string = problem edited_dict[id] = parseProblem(pro_string) return edited_dict # 返回以题号为key, 内容为[题目内容, 答案, 解答与提示, 备注]四元数组的字典 def parseProblem(string): # 对以不小于两个回车切分的四段式字符串进行分词(通常第二段有"答案:", 第三段有"解答与提示:", 第四段有"备注:") data = string.strip() data = re.sub(r"\n{2,}","\n\n",data) content,ans,solution,remark = data.split("\n\n") content = content.strip() ans = re.sub("答案:","",ans).strip() solution = re.sub("解答与提示:","",solution).strip() remark = re.sub("备注:","",remark).strip() return (content,ans,solution,remark) # 返回四元组(题目内容, 答案, 解答与提示, 备注) def ModifyProblembyTeX(id_string,prodict,toeditfilepath,editor): # vscode打开.tex文件修改题目的内容, 答案与解答 # 读取题号列表并生成待编辑的.tex文件 texdata = GenerateTexDataforEditing(id_string,prodict,"模板文件/题目编辑.txt",editor) SaveTextFile(texdata,toeditfilepath) # 打开待编辑的.tex文件 print("编辑完成后保存文件, 关闭文件继续...") os.system("code -w -g "+toeditfilepath) # 生成修改后的题号与内容, 答案, 解答与提示的对应 editor = GetDate() + "\t" + editor editedtexdata = ReadTextFile(toeditfilepath) edited_dict = GetEditedProblems(editedtexdata) editedIDList = [] # 将.tex文件中的修改反映到原题库字典, 并保存 for id in edited_dict: content, ans, solution, remark = edited_dict[id] if not (content == prodict[id]["content"] and ans == prodict[id]["ans"] and solution == prodict[id]["solution"] and remark == prodict[id]["remark"]): prodict[id]["content"] = content prodict[id]["ans"] = ans prodict[id]["solution"] = solution prodict[id]["remark"] = remark prodict[id]["edit"] = prodict[id]["edit"] + [editor] editedIDList.append(id) save_dict(prodict,"../题库0.3/problems.json") return generate_exp(editedIDList) # 返回编辑过的字典 def RemoveMutualLink(metadata,prodict): # 删除双向关联id(same,related,unrelated), 输入为一个字符串, 每行表示一对题号, 用空格或制表符隔开 lines = [re.sub(r"[\s]+",r"\t",line).strip() for line in metadata.split("\n")] for line in lines: if not line == []: id1,id2 = line.split("\t") id1 = id1.zfill(6) id2 = id2.zfill(6) for field in ["same","related","unrelated"]: if id1 in prodict[id2][field]: prodict[id2][field].remove(id1) if id2 in prodict[id1][field]: prodict[id1][field].remove(id2) return 0 # 返回0 def jsonEditProblemMetadata(id_string,prodict,editor): #用vscode在json模式下编辑题目综合信息 jsontoeditpath = "临时文件/problem_edit.json" idlist = generate_number_set(id_string,prodict) edit_dict = {} for id in idlist: edit_dict[id] = prodict[id].copy() save_dict(edit_dict,jsontoeditpath) #打开待编辑的json文件 os.system("code -w -g "+jsontoeditpath) #编辑后关闭文件窗口自动执行下面步骤 editeddict = load_dict(jsontoeditpath) editlist = [] for id in editeddict: if not prodict[id] == editeddict[id]: prodict[id] = editeddict[id].copy() prodict[id]["edit"] = prodict[id]["edit"] + [GetDate() + "\t" + editor] editlist.append(id) return(generate_exp(editlist)) # 返回编辑过的题号字符串 def GetSamePairs(prodict): #获取已标注的相同题目组 same_groups = [] for id in prodict: same = prodict[id]["same"] if len(same) > 0 and not len(prodict[id]["usages"]) == 0: same_groups.append([id]+same) return(same_groups) #返回相同题目组, 每组由一些相同的题目构成 def ShareSameUsages(prodict,same_group): #对same_group中的所有题号共享使用记录 current_usages = [] for id in same_group: for usage in prodict[id]["usages"]: if not usage in current_usages: current_usages = current_usages + [usage] for id in same_group: prodict[id]["usages"] = current_usages.copy() return((same_group,current_usages)) #返回same_group中的题号列表 及 所有题号记录 组成的二元组 def SortUsages(prodict): #对使用记录按字符串进行排序 for id in prodict: usages = prodict[id]["usages"].copy() usages.sort() prodict[id]["usages"] = usages.copy() return 0 #返回0 def SortUsagesbyAverage(theusages): #根据使用记录每一条的平均值进行排序, 越高的在越前面 glossdifflist = [] for i in range(len(theusages)): glossdiff = parseUsage(theusages[i])["glossdiff"] if glossdiff <= 0.999: #去除全对的使用记录 glossdifflist = glossdifflist + [(i,glossdiff)] sortedglossdifflist = sorted(glossdifflist, key = lambda x:x[1], reverse = True) newusages = [theusages[i[0]] for i in sortedglossdifflist] return(newusages) # 返回排序后的list def StripSuffix(string, suf_words): #除去字符串前后的空格及suf_words中每一个符合regex的字符串及其之后的部分 string = string.strip() for sw in suf_words: string = re.sub(sw+r"[\S]*$","",string) return(string) # 返回处理以后的字符串 def MatchCondition(problem,condition_dict): #判断problem这一字典是否符合condition_dict中的所有筛选条件 match = True #初始设定符合条件 for fieldraw in [c for c in condition_dict if not "_not" in c and not condition_dict[c] == [""]]: #选出正向的条件([""]表示该条件不起作用) cond_list = condition_dict[fieldraw] if type(cond_list) == str: cond_list = [cond_list] field = re.sub(r"\d","",fieldraw) if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string string = "\n".join((problem[field])) else: string = str(problem[field]) current_match = False for cond in cond_list: #有一个条件被满足, 就认为当前字段满足条件了 if len(re.findall(cond,string)) > 0: current_match = True if current_match == False: match = False # 如果某个正向条件未满足, 那么match就赋值为False; 如果每一个正向条件都被满足, 那么match仍为True for fieldraw in [c for c in condition_dict if "_not" in c and not condition_dict[c] == [""]]: #选出反向的条件([""]表示该条件不起作用) cond_list = condition_dict[fieldraw] fieldraw = fieldraw.replace("_not","") field = re.sub(r"\d","",fieldraw) if type(problem[field]) == list: #将题库字典中的相应字段转化成字符串string string = "\n".join((problem[field])) else: string = str(problem[field]) current_match = True for cond in cond_list: #有一个条件被满足, 就认为当前字段不被满足了 if len(re.findall(cond,string)) > 0: current_match = False if current_match == False: match = False return match #返回是否符合条件 def get_color(value): # 根据得分率获得rgb颜色代码 value = float(value) if value>=0.5: (r,g)=(1,2-2*value) else: (r,g)=(2*value,1) return "{%.3f,%.3f,0}"%(r,g) #返回用大括号包围的rgb数值 def GenerateValueColorCode(matchobj): # 生成三位小数代表的颜色方框(黑色边框, 难度对应的底色)的LaTeX代码 value = matchobj.group(1) return "\t\\fcolorbox[rgb]{0,0,0}%s{%s}"%(get_color(value),value) #返回代码 def StudentsGetAfterContent(id,prodict,answered,spaceflag): #生成学生版讲义后的答案及空格, answered表示有无答案, spaceflag表示有无空格 string = "" if "ans" in prodict[id]: if answered: string += "答案: \\textcolor{red}{%s}\n\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案") if spaceflag: if "space" in prodict[id]: if prodict[id]["space"] != "": string += "\\vspace*{%s}\n\n"%prodict[id]["space"] return string #生成学生讲义后的答案及空格 def XeLaTeXCompile(filedir,filename): #在filedir目录中用XeLaTeX编译filename文件两次(能编译出正确的引用和页码) flagsuc = True for i in range(2): if os.system("xelatex -interaction=batchmode -output-directory=%s %s"%(filedir,filename)) == 0: print("第%d次编译成功."%(i+1)) else: flagsuc = False return flagsuc # 若第二次编译成功则返回True, 否则返回False def GenerateStudentBodyString(problems,sectiontitles,pro_dict,consecutivenumbering= True,answered= True,spaceflag=True): #生成学生版的.tex文件的主体内容 bodystring = "" if len(problems) == len(sectiontitles): count = 0 for i in range(len(problems)): idlist = generate_number_set(problems[i],pro_dict) sectionstring = "\\section{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectiontitles[i],count if consecutivenumbering else 0) for id in idlist: count += 1 aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag) sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent) sectionstring += "\\end{enumerate}\n\n" bodystring += sectionstring else: idstring = ",".join(problems) idlist = generate_number_set(idstring,pro_dict) sectionstring = "\\begin{enumerate}\n\n" for id in idlist: aftercontent = StudentsGetAfterContent(id,pro_dict,answered,spaceflag) sectionstring += "\\item {\\tiny(%s)} %s\n\n%s"%(id,pro_dict[id]["content"],aftercontent) sectionstring += "\\end{enumerate}\n\n" bodystring += sectionstring return bodystring #返回主题内容字符串 def TeachersGetAfterContentColored(id,prodict,objdict,topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #生成教师版讲义后的答案及空格, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 彩色背景 string = "" objs = GenerateObjTexCode(id,prodict,objdict) if showobjs else "" tags = ("\\begin{tcolorbox}[colback = orange!10!white, colframe = orange!10!white, breakable]\n标签: %s\n\n\\end{tcolorbox}\n"%("; ".join(prodict[id]["tags"]))) if showtags else "" ans = ("\\begin{tcolorbox}[colback = red!10!white, colframe = red!10!white, breakable]\n答案: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案")) if showans else "" solution = ("\\begin{tcolorbox}[colback = green!10!white, colframe = green!10!white, breakable]\n解答或提示: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else "" # solution = ("解答或提示: \\textcolor{magenta}{%s}\n\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else "" origin = ("\\begin{tcolorbox}[colback = yellow!30!white, colframe = yellow!30!white, breakable]\n来源: %s\n\n\\end{tcolorbox}\n"%prodict[id]["origin"]) if showorigin else "" remark = ("\\begin{tcolorbox}[colback = cyan!30!white, colframe = cyan!30!white, breakable]\n备注: %s\n\n\\end{tcolorbox}\n"%(prodict[id]["remark"] if prodict[id]["remark"] != "" else "暂无备注")) if showremark else "" usages = ("\\begin{tcolorbox}[colback = lime!10!white, colframe = lime!10!white, breakable]\n使用记录(%s):\n\n%s\n\n\\end{tcolorbox}\n"%(str(topandbottomusagestuple),GenerateUsageTexCode(id,prodict,topandbottomusagestuple))) if showusages else "" string += objs + tags + ans + solution + usages + origin + remark return string #生成教师版讲义后的答案及空格 def TeachersGetAfterContentPlain(id,prodict,objdict,topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #生成教师版讲义后的答案及空格, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列 string = "" objs = ("目标:\n\n%s\n\n"%GenerateObjTexCode(id,prodict,objdict)) if showobjs else "" tags = ("标签: \\textcolor[rgb]{0.5,0.6,0.8}{%s}\n\n"%("; ".join(prodict[id]["tags"])) if not prodict[id]["tags"] == [] else "标签: \n\n") if showtags else "" ans = ("答案: \\textcolor{red}{%s}\n\n"%(prodict[id]["ans"] if prodict[id]["ans"] != "" else "暂无答案")) if showans else "" solution = ("解答或提示: \\textcolor{magenta}{%s}\n\n"%(prodict[id]["solution"] if prodict[id]["solution"] != "" else "暂无解答")) if showsolution else "" origin = ("来源: %s\n\n"%prodict[id]["origin"]) if showorigin else "" remark = ("备注: \\textcolor[rgb]{0,0.5,0.2}{%s}\n\n"%(prodict[id]["remark"] if prodict[id]["remark"] != "" else "暂无备注")) if showremark else "" usages = ("使用记录:\n\n%s\n\n"%GenerateUsageTexCode(id,prodict,topandbottomusagestuple)) if showusages else "" string += objs + tags + ans + solution + usages + origin + remark return string #生成教师版讲义后的答案及空格 def GenerateObjTexCode(id,prodict,objdict): #生成目标代号对应的学习目标字符串(含蓝色编码) # string = "" # if prodict[id]["objs"] != []: string = "\n\\begin{tcolorbox}[colback = blue!10!white, colframe = blue!10!white, breakable]" for objid in prodict[id]["objs"]: if objid.upper() == "KNONE": string += "\n\n%s\t%s"%(objid,"无当前有效关联目标") else: string += "\n\n%s\t%s"%(objid,objdict[objid]["content"]) if prodict[id]["objs"] == []: string += "暂未关联\n" string += "\n\n\\end{tcolorbox}\n" return string #返回目标代码字符串 def ChooseUsage(usages,topandbottomusagestuple): #生成题号对应的题目的使用记录, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 两数之和大于记录数则从高到低排列后全部展示 top,bottom = topandbottomusagestuple if top < 0 or bottom < 0: return usages # 返回原本的全部使用记录 elif top + bottom > len(usages): return SortUsagesbyAverage(usages) # 返回排列后的使用记录 else: return (SortUsagesbyAverage(usages)[:top] + SortUsagesbyAverage(usages)[(len(usages)-bottom):]) # 返回排序后的最前面top个和最后面bottom个 def GenerateUsageTexCode(id,prodict,topandbottomusagestuple): #根据topandbottomusagestuple的要求生成题号为id的题目的缩减版的使用记录列表, topandbottomusagestuple表示保留得分率最高的使用记录与最低的使用记录的个数, 有负数表示不排列, 两数之和大于记录数则从高到低排列后全部展示 rawusages = prodict[id]["usages"].copy() usages = ChooseUsage(rawusages,topandbottomusagestuple) usagecode = re.sub("\\t([\d]\.[\d]{0,10})",GenerateValueColorCode,"\n\n".join(usages)) return usagecode #返回缩减后的使用记录列表 def GenerateTeacherBodyString(problems,sectiontitles,prodict,objdict,consecutivenumbering = True,topandbottomusagestuple = (3,3),sectionname = "section", showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True, colored = False): #生成教师版的.tex文件的主体内容, 各项是否显示为可选 bodystring = "" GetAfterContent = TeachersGetAfterContentPlain if not colored else TeachersGetAfterContentColored if len(problems) == len(sectiontitles): count = 0 for i in range(len(problems)): idlist = generate_number_set(problems[i],prodict) sectionstring = "\\%s{%s}\n\\begin{enumerate}\n\\setcounter{enumi}{%d}\n\n"%(sectionname,sectiontitles[i],count if consecutivenumbering else 0) for id in idlist: count += 1 aftercontent = GetAfterContent(id,prodict,objdict,topandbottomusagestuple = topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark) sectionstring += "\\item (%s) %s\n\n%s"%(id,prodict[id]["content"],aftercontent) sectionstring += "\\end{enumerate}\n\n" bodystring += sectionstring else: idstring = ",".join(problems) idlist = generate_number_set(idstring,prodict) sectionstring = "\\begin{enumerate}\n\n" for id in idlist: aftercontent = GetAfterContent(id,prodict,objdict,topandbottomusagestuple = topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark) sectionstring += "\\item (%s) %s\n\n%s"%(id,prodict[id]["content"],aftercontent) sectionstring += "\\end{enumerate}\n\n" bodystring += sectionstring return bodystring #返回主体内容字符串 def GetValidTexFiles(path): #获取 有题号的.tex文件列表 及 有题号的.tex文件所在的路径列表 texlist = [] pathlist = [] for root,folders,files in os.walk(path): for file in files: if file[-4:] == ".tex": texdata = ReadTextFile(os.path.join(root,file)) ids = re.findall(r"\((\d{6})\)",texdata) if len(ids) > 0: pathlist = pathlist + [root] texlist = texlist + [os.path.join(root,file)] texlist = sorted(list(set(texlist))) pathlist = sorted(list(set(pathlist))) return (texlist,pathlist) # 返回 有题号的.tex文件列表 与 所在路径列表 组成的二元组 def generate_lessonid_list(lessonidstr,lessonsdict): #根据lessonidstr的条件(开头字母与数字)返回课时代码列表 raw_criterion_list = lessonidstr.split(",") criterion_list = [] for cr in raw_criterion_list: #生成满足要求的课时代码的前若干位 if not ":" in cr: criterion_list.append(cr) else: criterion_list = criterion_list + ["K"+c[-4:] for c in generate_number_set(cr.upper().replace("K",""))] lessons_list = [] for id in lessonsdict: #对每一课时作比对, 生成课时代码列表 for cr in criterion_list: if id.startswith(cr): lessons_list.append(id) break return lessons_list # 返回课时代码列表 def GenerateLessonPreparationDraft(notetitle, outputdir, adict, prodict, objdict, lessonsdict, topandbottomusagestuple = (3,3), showobjs = True, showtags = True, showans = True, showsolution = True, showusages = True, showorigin = True, showremark = True): #根据adict中课时与题号的对应生成每一课时的备课草稿 output = "\\tableofcontents\n\\newpage\n\n" outputfilepath = os.path.join(outputdir,notetitle+".tex") for lid in adict: output += "\\section{%s \ %s}\n\n"%(lid,lessonsdict[lid]["name"]) output += "\\subsection{课时目标}\n\n" output += "\\begin{enumerate}\n\n" for objid in objdict: if objid.startswith(lid): output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"]) output += "\\end{enumerate}\n\n" output += GenerateTeacherBodyString([adict[lid]],["参考题目资源"],prodict,objdict,sectionname = "subsection",topandbottomusagestuple= topandbottomusagestuple, showobjs = showobjs, showtags = showtags, showans = showans, showsolution = showsolution, showusages = showusages, showorigin = showorigin, showremark = showremark) output += "\\newpage\n\n" latex_raw = ReadTextFile("模板文件/讲义模板.txt") latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号 if sys.platform != "win32": #非win系统用默认字体 latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw) latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw) latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring SaveTextFile(latex_data,outputfilepath) #保存.tex文件 if XeLaTeXCompile(outputdir,notetitle+".tex"): print("编译成功") return 0 # 返回0 else: print("编译失败") return latex_data # 返回有错误的latex源代码 def GenerateLessonPreparation(notetitle, outputdir, adict, prodict, objdict, basicknowledgedict, homeworkspaces = False): #根据adict生成每一课时的教案 output = "\\tableofcontents\n\\newpage\n\n" outputfilepath = os.path.join(outputdir,notetitle+".tex") for key in adict: lessonid = key lessonname = adict[key]["name"] objlist = adict[key]["objects"] bnlist = adict[key]["basicknowledges"] eblist = adict[key]["examples_basic"] ealist = adict[key]["examples_adv"] worklist = adict[key]["work_inclass"] homeworklist = adict[key]["homework"] remarks = adict[key]["remarks"] if not "复习" in lessonname: output += "\\section{%s \ %s}\n\n"%(lessonid,lessonname) if not objlist == []: output += "\\subsection{课时目标}\n\n" output += "\\begin{enumerate}\n\n" for objid in objlist: output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"]) output += "\\end{enumerate}\n\n" #生成学习目标 if not bnlist == []: output += "\\subsection{双基梳理}\n\n" basic_body = "" for bnid in bnlist: basic_body += "\\item %s\n\n"%basicknowledgedict[bnid]["content"] output += "\\begin{enumerate}\n\n %s\n\n\\end{enumerate}\n\n"%basic_body #生成基础知识梳理 output += "\\subsection{知识体验}\n\n" output += "\\subsubsection{必讲例题}\n\n" output += GenerateStudentBodyString(eblist,[],prodict,consecutivenumbering=False,answered=False) output += "\\subsubsection{选讲例题}\n\n" output += GenerateStudentBodyString(ealist,[],prodict,consecutivenumbering=False,answered=False) output += "\\subsection{巩固新知}\n\n" output += GenerateStudentBodyString(worklist,[],prodict,consecutivenumbering=False,answered=False) if not remarks == "": output += "\\subsection{备注}\n\n" output += remarks + "\n\n" if homeworkspaces: output += "\\newpage\n\n" output += "\\subsection{课后作业}\n\n" output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered=False,spaceflag=homeworkspaces) output += "\\newpage\n\n" else: output += "\\section{%s \ %s}\n\n"%(lessonid,lessonname) output += "\\subsection{复习题}\n\n" output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered = False,spaceflag=homeworkspaces) output += "\\newpage\n\n" latex_raw = ReadTextFile("模板文件/讲义模板.txt") latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号 if sys.platform != "win32": #非win系统用默认字体 latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw) latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw) latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring SaveTextFile(latex_data,outputfilepath) #保存.tex文件 if XeLaTeXCompile(outputdir,notetitle+".tex"): print("编译成功") return 0 # 返回0 else: print("编译失败") return latex_data # 返回有错误的latex源代码 def GenerateSingleLessonPreparation(lessonid, outputdir, adict, prodict, objdict, basicknowledgedict, homeworkspaces = False, filename = "default"): #根据adict生成每一课时的教案 output = "" notetitle = lessonid + r" \ " + adict[lessonid]["name"] if filename == "default": filename = lessonid + adict[lessonid]["name"]+".tex" else: filename = filename+".tex" outputfilepath = os.path.join(outputdir,filename) lessonname = adict[lessonid]["name"] objlist = adict[lessonid]["objects"] bnlist = adict[lessonid]["basicknowledges"] eblist = adict[lessonid]["examples_basic"] ealist = adict[lessonid]["examples_adv"] worklist = adict[lessonid]["work_inclass"] homeworklist = adict[lessonid]["homework"] remarks = adict[lessonid]["remarks"] if not "复习" in lessonname: if not objlist == []: output += "\\section{课时目标}\n\n" output += "\\begin{enumerate}\n\n" for objid in objlist: output += "\\item %s \\ %s \n\n"%(objid,objdict[objid]["content"]) output += "\\end{enumerate}\n\n" #生成学习目标 if not bnlist == []: output += "\\section{双基梳理}\n\n" basic_body = "" for bnid in bnlist: basic_body += "\\item %s\n\n"%basicknowledgedict[bnid]["content"] output += "\\begin{enumerate}\n\n %s\n\n\\end{enumerate}\n\n"%basic_body #生成基础知识梳理 output += "\\section{知识体验}\n\n" # output += "\\subsection{必讲例题}\n\n" output += GenerateStudentBodyString(eblist,[],prodict,consecutivenumbering=False,answered=False) # output += "\\subsection{选讲例题}\n\n" # output += GenerateStudentBodyString(ealist,[],prodict,consecutivenumbering=False,answered=False) output += "\\section{巩固新知}\n\n" output += GenerateStudentBodyString(worklist,[],prodict,consecutivenumbering=False,answered=False) if not remarks == "": output += "\\section{备注}\n\n" output += remarks + "\n\n" if homeworkspaces: output += "\\newpage\n\n" output += "\\section{课后作业}\n\n" output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered=False,spaceflag=homeworkspaces) else: output += "\\section{复习题}\n\n" output += GenerateStudentBodyString(homeworklist,[],prodict,consecutivenumbering=False,answered = False,spaceflag=homeworkspaces) latex_raw = ReadTextFile("模板文件/讲义模板.txt") latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号 if sys.platform != "win32": #非win系统用默认字体 latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw) latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw) latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring SaveTextFile(latex_data,outputfilepath) #保存.tex文件 if XeLaTeXCompile(outputdir,filename): print("编译成功") return 0 # 返回0 else: print("编译失败") return latex_data # 返回有错误的latex源代码 def getCopy(): # 获取剪切板内容 wc.OpenClipboard() t = wc.GetClipboardData(win32con.CF_UNICODETEXT) t = t.replace("\r\n","\n") wc.CloseClipboard() return t def setCopy(string): # 写入剪切板内容 wc.OpenClipboard() wc.EmptyClipboard() wc.SetClipboardData(win32con.CF_UNICODETEXT, string) wc.CloseClipboard() def RefineMathpix(raw_string): # 进一步修改mathpix得到的字符串 puctuationsfulltosemi = {" ": " ","。": ". ",".": ". ",",": ", ",":": ": ",";": "; ","(": "(",")": ")","?": "? ","“": "``","”": "''", "【": "[", "】": "]"} replacestrings = {r"\\overparen": r"\\overset\\frown", "eqslant": "eq", r"\\vec": r"\\overrightarrow ", r"\\bar": r"\\overline", r"\\lim": r"\\displaystyle\\lim", r"\\sum":r"\\displaystyle\\sum", r"\\prod":r"\\displaystyle\\prod", r"\\mid":"|", r"\^\{\\prime\}":"'",r"e\^":r"\\mathrm{e}^",r"/\s*/":r"\\parallel "} wrongrecog = {"雉":"锥","[粗秿]圆":"椭圆","投郑":"投掷","抛郑":"抛掷","范目":"范围","揷":"插","末见":"未见","末成":"未成","针角":"钝角","幕函数":"幂函数","末知":"未知","阀值":"阈值","祖[桓晅]":"祖暅","图象":"图像","末使用":"未使用","末来":"未来"} string = raw_string string = re.sub(r"\\left([\.\(\[|])",lambda matchobj: "" if matchobj.group(1) == "." else matchobj.group(1),string) #删去括号前的\left标记 string = re.sub(r"\\right([\.\)\]|])",lambda matchobj: "" if matchobj.group(1) == "." else matchobj.group(1),string) #删去括号前的\right标记\ string = re.sub(r"\\left\\\{","\\{",string) #删去大括号前的\left标记 string = re.sub(r"\\right\\\}","\\}",string) #删去大括号前的\right标记 for s in puctuationsfulltosemi: string = re.sub(s,puctuationsfulltosemi[s],string) #将部分全角标记替换为半角 for s in replacestrings: string = re.sub(s,replacestrings[s],string) #修改部分LaTeX命令成为惯用的 for s in wrongrecog: string = re.sub(s,wrongrecog[s],string) #修改mathpix识别的一些常见错别字 string = re.sub(r"[\s]*(``|''|\}|\{)[\s]*",lambda matchobj: matchobj.group(1),string) #去除符号前后的空格 string = re.sub(r"(?:(?:^|\n)+[例]*[\s]*[0-9]+[\s]*[\.、\s]+|\[[\s]*例[\s]*[0-9]*[\s]*\][\s]*)","\\n\\\\item ",string) #将题号替换为\item string = re.sub(r"\$\$","$",string) #行间公式替换为行内公式 string = re.sub(r"\$\s+\$"," ",string) #删除多余的$符号 string = re.sub(r"([,.:;?!])\$",lambda x:x.group(1)+" $",string) #标点和$符号分开 string = re.sub(r"\\frac",r"\\dfrac",string) #替换frac为dfrac string = re.sub(r"\n(?:A\.|\(A\))([\s\S]*?)(?:B\.|\(B\))([\s\S]*?)(?:C\.|\(C\))([\s\S]*?)(?:D\.|\(D\))([\s\S]*?)\n",lambda matchobj: "\n\\fourch{%s}{%s}{%s}{%s}\n"%(matchobj.group(1).strip(),matchobj.group(2).strip(),matchobj.group(3).strip(),matchobj.group(4).strip()),string+"\n\n") # 选择题的选择支处理 string = re.sub(r"[\.;](\}\{|\}\n)",lambda matchobj: matchobj.group(1),string) #去除选择题选项最末尾的句号或分号 string = re.sub(r"\n\s+","\n",string) #删除多余的回车 string = re.sub(r"\\q+uad","",string) #删除\quad,\qquad等 string = re.sub(r"~","",string) #删除~ string = re.sub(r"\s*\([\s]{,10}\)",r"\\bracket{20}",string) string = re.sub(r"\s*\\bracket\{20\}\s*\n",r"\\bracket{20}.\n",string)#行末无内容括号的处理 for i in range(2): string = re.sub(r"(\^|_)\{([0-9a-zA-Z])\}",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #删除一些无用的大括号 for i in range(2): string = re.sub(r"([0-9A-Z])\s+([0-9A-Z])",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #合并一些公式中的无效空格 for i in range(2): string = re.sub(r"([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])",lambda matchobj: matchobj.group(1)+matchobj.group(2),string) #合并一些文本中的无效空格 string = re.sub(r"([CP])(_[^_\^]{,5}\^)",lambda x:r"\mathrm{"+x.group(1)+"}"+x.group(2),string) #处理排列数和组合数字母的正体 string = re.sub(r"([\^_])\{([-]{0,1})\\dfrac",lambda matchobj: matchobj.group(1) + "{" + matchobj.group(2) + "\\frac",string) string = re.sub(r"ldots",r"cdots",string) #将ldots替换为cdots string = re.sub(r"\\text\s*\{,\s*当\s*\}",", ",string) string = re.sub(r"[\\{]*\\(begin|end)\{array\}(?:\{[rcl]*\}){0,1}",lambda matchobj: "\\" + matchobj.group(1) + "{cases}",string) #将分段函数的array环境改为cases环境 string = re.sub(r"([\u4e00-\u9fa5\$])[\s]*\n\\item",lambda matchobj: matchobj.group(1)+"\\blank{50}.\n\\item",string) #给中文或公式结尾的题目最后一行加上填空的空格. string = re.sub(r"(是|为|(?:=\$))\s*([,.;\n])",lambda matchobj: matchobj.group(1) + "\\blank{50}" + ("." if matchobj.group(2) == "\n" else "") + matchobj.group(2),string) #给行中的题目需要的地方加上空格 if not "\\bracket" in string: string = re.sub(r"([\u4e00-\u9fa5\$])(?:\\bracket\{20\})*[\.]*[\s]*\n\\fourch",lambda matchobj: matchobj.group(1)+"\\bracket{20}.\n\\fourch",string) #给中文或公式结尾的题目最后一行加上选择题的括号. string = re.sub(r"\\bracket\{(\d*)\}\$",lambda matchobj: "$"+"\\bracket{"+matchobj.group(1)+"}",string)#交换出现在$之前的括号与$的位置 string = re.sub(r"(%[^\n]*)\\blank\{50\}\.",lambda matchobj:matchobj.group(1),string) #注释行不加\blank{50} string = re.sub(r"\\blank\{50\}\.\s*\n\\fourch",r"\\bracket{20}."+"\n"+r"\\fourch",string) #选择题前的空格改为括号 string = re.sub(r"[\\\\]*\n(\(\d{1,2}\))(?:(?!\n)\s)*",lambda matchobj: "\\\\\n"+matchobj.group(1)+" ",string) #新一行的小题号回车前加上换行符 string = re.sub(r"\(([^\(\)]*(?:\\in|=|\\ge|\\le|\\ne|>|<|\\parallel|\\perp)[^\(\)]*)\)\$",lambda matchobj: "$($" + matchobj.group(1) + "$)",string) #公式最后的范围陈述的括号放到公式环境外 string = re.sub(r"\$\$\(","(",string) #删去上一步造成的多余双$ string = re.sub(r"\(\s\$","($",string) #删去首个括号内公式前的空格 string = re.sub(r"\(\\begin\{cases\}",r"\\begin{pmatrix}",string) #修改错误的\begin{cases}) string = re.sub(r"\\end\{cases\}\)",r"\\end{pmatrix}",string) #修改错误的(\end{cases} string = SplitMathComma(string) #判断数学环境中的","是否需要用$ $分离, 如果需要则执行分离 string = MergeMathComma(string) #判断非数学环境中的","是否需要合并在一个数学环境中, 如果需要则执行合并 string = RefineCasesEnv(string) #美化cases环境 string = RefineChineseComma(string) #改顿号 string = RefineInterval(string) #改错误的dollars符号和括号的顺序 return string def RefineInterval(string): newstring = string matches = re.finditer(r"\+\\infty\$\)",newstring) #先处理右括号 poslist = [(match.start(),match.end()) for match in matches] poslist.reverse() #从后往前逐一更改 for s,e in poslist: leftdollar = newstring[:s].rfind("$") leftbracket = leftdollar-1 lb = newstring[leftbracket] if lb in ["[","("]: #如果上一个$前面是括号, 就交换这一对相应的$符号和括号 print("$"+lb+newstring[leftbracket+2:e-2] + ")$") newstring = newstring[:leftbracket] + "$"+lb+newstring[leftbracket+2:e-2] + ")$" + newstring[e:] matches = re.finditer(r"\(\$-\\infty",newstring) poslist = [(match.start(),match.end()) for match in matches] poslist.reverse() #从后往前逐一更改 for s,e in poslist: print(s,e) rightdollar = newstring[e:].find("$")+e rightbracket = rightdollar+1 rb = newstring[rightbracket] if rb in ["]",")"]: print("$("+newstring[s+2:rightbracket-1]+rb+"$") newstring = newstring[:s]+"$("+newstring[s+2:rightbracket-1]+rb+"$"+newstring[rightbracket+1:] newstring = newstring.replace("($-\\infty$, $+\\infty$)","$(-\\infty, +\\infty)$") return newstring def RefineChineseComma(string): #顿号如果在数学环境中, 则在两侧加上$符号 CommaPositions = [match.start() for match in re.finditer("、",string)] CommaPositions.reverse() for pos in CommaPositions: if string[:pos].count("$") % 2 == 1: string = string[:pos].rstrip() + "$、$" + string[(pos+1):].lstrip() return string def SplitMathComma(string): #判断数学环境中的","是否需要用$ $分离, 如果需要则执行分离 UnsplittedPairs = [(r"[\(\[]",r"[\)\]]"),(r"\{",r"\}"),(r"\\begin\{cases\}",r"\\end\{cases\}")] lmatter,rmatter = (string,"") while "," in lmatter: pos = lmatter.rfind(",") if lmatter[:pos].count("$") % 2 == 1: stringtemp = lmatter+rmatter start = stringtemp[:pos].rfind("$") end = stringtemp[pos:].find("$")+pos #寻找,两侧的数学环境 stringtemp[start:end+1] locallmatter = stringtemp[start+1:pos] localrmatter = stringtemp[pos+1:end] tosplit = True for p1,p2 in UnsplittedPairs: p1l = SubstringOccurence(p1,locallmatter) p1r = SubstringOccurence(p1,localrmatter) p2l = SubstringOccurence(p2,locallmatter) p2r = SubstringOccurence(p2,localrmatter) if len(p1l)>0 and len(p2l) == 0: tosplit = False if len(p2r)>0 and len(p1r) == 0: tosplit = False if len(p1l)*len(p2l)>0: if p1l[-1]>p2l[-1]: tosplit = False if len(p1r)*len(p2r)>0: if p1r[0]>p2r[0]: tosplit = False if len(SubstringOccurence(r"(?:=|\\ge|\\le|\\ne|\\in|>|<|\\parallel|\\perp)",locallmatter))*len(SubstringOccurence(r"(?:=|\\ge|\\le|\\ne|\\in|>|<|\\parallel|\\perp)",localrmatter)) == 0: tosplit = False if tosplit: rmatter = ", $"+lmatter[pos+1:].lstrip()+rmatter.lstrip() lmatter = lmatter[:pos]+"$" else: rmatter = lmatter[pos-1:]+rmatter lmatter = lmatter[:pos-1] else: rmatter = lmatter[pos-1:]+rmatter lmatter = lmatter[:pos-1] return lmatter+rmatter def MergeMathComma(string): #判断非数学环境中的","是否需要合并在一个数学环境中, 如果需要则执行合并 UnsplittedPairs = [(r"[\(\[]",r"[\)\]]"),(r"\{",r"\}"),(r"\\begin\{cases\}",r"\\end\{cases\}")] lmatter,rmatter = (string,"") while not [item for item in re.finditer(r"\$,\s*\$",lmatter)] == []: CommaPos = [(item.start(),item.end()) for item in re.finditer(r"\$,\s*\$",lmatter)] ThePos = CommaPos[-1] tempstring = lmatter+rmatter lpos = tempstring[:ThePos[0]].rfind("$") rpos = tempstring[ThePos[1]+1:].find("$") + ThePos[1] + 1 locallmatter = tempstring[lpos+1:ThePos[0]] localrmatter = tempstring[ThePos[1]:rpos] tomerge = False for p1,p2 in UnsplittedPairs: if len(SubstringOccurence(p1,locallmatter))-len(SubstringOccurence(p2,locallmatter)) == 1 and len(SubstringOccurence(p2,localrmatter))-len(SubstringOccurence(p1,localrmatter)) == 1: tomerge = True if tomerge: rmatter = ", "+lmatter[ThePos[1]:]+rmatter lmatter = lmatter[:ThePos[0]] else: rmatter = lmatter[ThePos[0]:]+rmatter lmatter = lmatter[:ThePos[0]] return lmatter+rmatter def ReverseMatchBrackets(string):#从最后开始寻找第一组匹配的括号, 返回左括号位置与右括号位置组成的数对 if not ")" in string: return -1 else: endpos = string.rfind(")") lmatter = string[:endpos] startpos = -1 for i in range(len(lmatter)-1,-1,-1): templmatter = lmatter[i:] if templmatter.count("(")-templmatter.count(")") == 1: startpos = i break return (startpos,endpos) def RefineCasesEnv(string): # 美化cases环境 CasesPos = [(item.start(1),item.end(1)) for item in re.finditer(r"\\begin\{cases\}(.*?)\\end\{cases\}",string, re.DOTALL)] #找到所有的cases环境 CasesPos.reverse() for start,end in CasesPos: lmatter = string[:start] rmatter = string[end:] content = string[start:end] lines = [item.strip() for item in content.split(r"\\")] newlist = [] for line in lines: newline = line if line[-1] == ")": lbracket,rbracket = ReverseMatchBrackets(line) if re.findall(r"(?:=|<|>|\\ge|\\le|\\in)",line[lbracket:rbracket]) != []: newline = line[:lbracket] + ", " + line[lbracket+1:rbracket] + "," #用逗号代替表示范围的括号 newline = re.sub(r",",",&",newline) newlist.append(newline) newcontent = r"\\".join(newlist) newcontent = re.sub(r",[\s]*&[\s]*\\",r",\\",newcontent) newcontent = re.sub(r",[\s]*&[\s]*$",",",newcontent) #给逗号添加&分栏符, 并去除无效的& string = lmatter + newcontent + rmatter string = re.sub(r"&[,\s]&","& ",string) return string #返回处理后的字符串 def SubstringOccurence(regex,string): #生成regex在string中出现的所有位置 poslist = [item.start() for item in re.finditer(regex,string)] return poslist def select_grade_from_pro_dict(prodict,grades): if len(grades) == 0: return prodict else: gradelist = [] for g in grades: if not "届" in g: gradelist.append(g+"届") else: gradelist.append(g) adict = prodict.copy() for id in prodict: raw_usages = prodict[id]["usages"].copy() new_usages = [] for u in raw_usages: for g in gradelist: if g in u: new_usages.append(u) adict[id]["usages"] = new_usages.copy() return adict def GenerateSingleLessonNote(id,notesdict,metadict,templatepath,outputfilepath,consecutivenumbering = False, answered = False): #20231215版讲义生成 notetitle = id + r" \ " + notesdict["notes"][id]["name"] structure = notesdict["structures"][id[0].upper()]["structure"] note_contents = notesdict["notes"][id] output = "" sections_list = [] problems_list = [] for key in structure: if not len(note_contents[key]) == 0: sections_list.append(key) problems_list.append(",".join(note_contents[key])) rawoutput = GenerateStudentBodyString(problems=problems_list,sectiontitles=sections_list,pro_dict=metadict,consecutivenumbering= consecutivenumbering, answered= answered, spaceflag = True) paragraphs = [p for p in rawoutput.split("\\section") if not p.strip() == ""] for item in paragraphs: sectionkey, content = re.findall(r"\{([\S]*)\}\n([\S\s]*)$",item)[0] if not len(paragraphs) == 1: output += "\\section{" + structure[sectionkey]["name"] + "}\n\n" if not structure[sectionkey]["spaceflag"] or answered: content = re.sub(r"\\vspace[\*]?\{[\S]*\}","\n",content) output += content + "\n\n" latex_raw = ReadTextFile(templatepath) latex_raw = latex_raw.replace(r"学号\blank{50} \ 姓名\blank{80}","上海市控江中学") #替换掉模板中的姓名学号 if sys.platform != "win32": #非win系统用默认字体 latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw) latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw) latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,(notetitle,output)) #替换标题和bodystring SaveTextFile(latex_data,outputfilepath) #保存.tex文件 outputdir,filename = os.path.split(outputfilepath) print(f"{filename}编译中...") if XeLaTeXCompile(outputdir,filename): print("编译成功") return latex_data # 返回0 else: print("编译失败") return latex_data # 返回有错误的latex源代码 def getUnit(n): # 返回0-9的数字对应的单元名 unitlist = ["暂无对应","第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元"] if 0<=n<=9: return unitlist[n] def ExtractProblemIDs(paperdict,pro_dict):#从备课组材料的每一张讲义的dict(paperdict)中提取题号 output = [] for key in paperdict.keys(): if type(paperdict[key]) == list: suslist = paperdict[key] flag = True for id in suslist: if not id in pro_dict: flag = False break if flag: for id in suslist: output.append(id) return(output) def ParseZipname(zipfilename): #小闲平台的zip文件中获得试卷编号, 返回试卷编号字符串 xiaoxianpid = re.findall(r"^(\d*?)_",os.path.split(zipfilename)[1]) return xiaoxianpid[0] def FindFile(dir,filename): #在指定目录及子目录下寻找特定文件名的文件, 返回文件所在的路径列表 pathlist = [] for path,m,filenames in os.walk(dir): if filename in filenames: pathlist.append(path) return pathlist def FindPaper(xiaoxianpid, answersheetpath): #根据小闲的试卷编号和答题纸对应json的根目录寻找题库的试卷编号,届别,题号, 返回(题库试卷编号,届别,题号列表), 如果未找到则返回False answersheetpathlist = FindFile(answersheetpath,"答题纸对应.json") foundpid = False for dir in answersheetpathlist: filepath = os.path.join(dir,"答题纸对应.json") anssheetjson = load_dict(filepath) if xiaoxianpid in anssheetjson: foundpid = True grade = "20"+re.findall(r"\d{2}届",dir)[0] pid = anssheetjson[xiaoxianpid]["id"] notesjson = load_dict(os.path.join(dir,"校本材料.json")) if not "idlist" in anssheetjson[xiaoxianpid]: idlist = [] for part in anssheetjson[xiaoxianpid]["parts"]: idlist += notesjson["notes"][pid][part].copy() else: idlist = anssheetjson[xiaoxianpid]["idlist"] if "marks" in anssheetjson[xiaoxianpid]: marks = anssheetjson[xiaoxianpid]["marks"] else: marks = [] break if foundpid: return(pid,grade,idlist,marks) else: return False def CheckPaperType(filepath,filename): #根据filepath(通常是小闲的zip解压出的目录)和filename(通常是"小题分_按学号(数学).xlsx")检测试卷类型, 未找到该文件则返回False, 找到文件且是日常试卷返回"日常卷", 找到文件且不是日常试卷返回"考试卷" statsfilepathlist = FindFile(filepath,filename) if statsfilepathlist == []: return False else: dir = statsfilepathlist[0] dfcurrent = pd.read_excel(os.path.join(dir,filename)) if re.findall(r"第\d*步",str(dfcurrent.loc[1,:])) == []: return "日常卷" else: return "考试卷" def generateColIndexandMarks(filepath,statsfilename,paperinfo): #根据filepath(是一个有statsfilename的文件夹列表)中第一个路径中的数据文件, statsfilename数据文件名, 及paperinfo(FindPaper返回的结果)寻找excel文件中有效的列的位置和相应的满分分数 dir = filepath[0] dfcurrent = pd.read_excel(os.path.join(dir,statsfilename)) validcols = [] for i in range(len(dfcurrent.columns)): colname = str(dfcurrent.iloc[1,i]) if ("单选" in colname or "填空" in colname or "主观" in colname or "步" in colname) and re.findall("[ABCD]",colname) == []: validcols.append(i) for col in range(len(validcols)-1,-1,-1): colname = str(dfcurrent.iloc[1,validcols[col]]) if "主观" in colname: colname_main = re.findall(r"^([\d\.]*)[\($]",colname[2:])[0] t = [dfcurrent.iloc[1,c] for c in validcols[col+1:]] t = str(t) if colname_main in t: validcols.pop(col) if paperinfo[3] == []: marks = [1] * len(validcols) else: marks = paperinfo[3] if len(marks) == len(validcols): return (validcols,marks) else: return False def CheckValidity(classpath,gradename,threshold): #根据文件夹classpath, 年级名gradename和提交比例threshold, 检测提交人数是否不小于threshold, 返回(班级名, 提交是否有效) classname_raw = re.findall(r"(高[一二三])(\d*?)班",classpath)[0] classname = gradename + classname_raw[0] + classname_raw[1].zfill(2) + "班" df = pd.read_excel(os.path.join(os.path.split(classpath)[0],"学科总体分析.xlsx")) totalstudents = df.loc[2,df.columns[1]] validstudents = df.loc[2,df.columns[2]] classvalidflag = False if threshold * totalstudents < validstudents: print(f"{classname} 有效, 共 {totalstudents} 人, 提交 {validstudents} 人") classvalidflag = True else: print(f"!!! {classname} 无效, 共 {totalstudents} 人, 提交 {validstudents} 人") return (classname,classvalidflag) def generateIDtoUsageCorrespondence(idlist,validcols,names): #根据idlist(题库ID列表), validcols(有效列位置列表), names(题目名称列表)自动生成一个字典, 键值为题库ID, 内容为该题对应的列位置列表 corr_dict = {} for i in range(len(idlist)): ind = i+1 collist = [] for j in range(len(validcols)): n = names[j] if not "步" in n: name = re.findall(r"^([^\(]*)",n)[0] else: name = re.findall(r"^([^第]*)",n)[0] if ind == getindex(name): collist.append(j) corr_dict[idlist[i]] = collist return corr_dict def CalculateUsages(statsfilepathlist,statsfilename,gradename,threshold,marks,correspondence_dict,validcols,date): #根据统计数据所在的路径,文件名,年级,阈值,分数列表和题号列数(0-len(validcols))对应字典,以及原excel文件中的有效列位置validcols, 日期date, 生成usages的metadata.txt文件的内容, 如果有正确率大于1的则返回False output = "ans\n\n\n" validflag = True for dir in statsfilepathlist: classname, valid = CheckValidity(dir,gradename,threshold) if valid: dfcurrent = pd.read_excel(os.path.join(dir,statsfilename)) means = dfcurrent.iloc[2:-2,validcols].mean()/marks if max(means)>1: print("满分数据有误!!!") validflag = False else: means_out = [f"{t:.3f}" for t in means] for id in correspondence_dict: cols = correspondence_dict[id] if not len(cols) == 0: diffs = "\t".join([means_out[u] for u in cols]) usages = f"{date}\t{classname}\t{diffs}" output += f"{id}\n{usages}\n\n\n" if validflag: return output else: return False def getindex(string,pos = 2): para = string.split(".") return int(para[pos-1]) if __name__ == "__main__": print("数据库工具, import用.")