From 918d1108f93854ad408718a519f291ff8cc8704f Mon Sep 17 00:00:00 2001 From: wangweiye7840 Date: Tue, 16 Apr 2024 14:38:49 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8D=95=E5=85=83=E6=8C=82=E9=92=A9=20?= =?UTF-8?q?=E8=BF=81=E7=A7=BB=E5=88=B0=E5=B7=A5=E5=85=B7v3=20=E5=AE=8C?= =?UTF-8?q?=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v3/database_tools_2.py | 68 +++++++++++++++++++++++++++++--------- 工具v3/单元挂钩.py | 4 ++- 2 files changed, 56 insertions(+), 16 deletions(-) diff --git a/工具v3/database_tools_2.py b/工具v3/database_tools_2.py index 64afcf01..c403864c 100644 --- a/工具v3/database_tools_2.py +++ b/工具v3/database_tools_2.py @@ -100,11 +100,13 @@ def pre_treating(string): #删除字符串中对比较无用的字符, 以供比 string = re.sub(r"[,\.:;?]","",string) return string #返回处理后的字符串 -def findsame(id,same_list): #在same_list中寻找与id相同的题号 - set1 = set([pair[1] for pair in same_list if pair[0] == id]) - set2 = set([pair[0] for pair in same_list if pair[1] == id]) +def findsru(id,alist): #在same_list中寻找与id相同的题号 + set1 = set([pair[1] for pair in alist if pair[0] == id]) + set2 = set([pair[0] for pair in alist if pair[1] == id]) return sorted(list(set1 | set2)) + + def findsameinDB(id): #在数据库中寻找与id相同的题号 mydb = connect(hostname = "wwylss.synology.me", port = "13306", username="tikuuser", pwd="Kjmathds_2024", db = "tikutest") mycursor = mydb.cursor() @@ -129,18 +131,54 @@ def findrelatedinDB(id): #在数据库中寻找与id关联的题号 set2 = set([ret[0] for ret in mycursor.fetchall()]) return sorted(list(set1 | set2)) +def get_unit_tags(id): + mydb = connect(hostname = "wwylss.synology.me", port = "13306", username="tikuuser", pwd="Kjmathds_2024", db = "tikutest") + mycursor = mydb.cursor() + sql = "SELECT tagname FROM tagcorresp WHERE ID = (%s);" + val = (id,) + mycursor.execute(sql,val) + unittags = [ret[0] for ret in mycursor.fetchall() if "单元" in ret[0]] + mydb.close() + return unittags + +def get_problem_content(id): + mydb = connect(hostname = "wwylss.synology.me", port = "13306", username="tikuuser", pwd="Kjmathds_2024", db = "tikutest") + mycursor = mydb.cursor() + sql = "SELECT content FROM problems WHERE ID = (%s);" + val = (id,) + mycursor.execute(sql,val) + content = mycursor.fetchall()[0][0] + mydb.close() + return content + + +def generate_same_list(): + mydb = connect(hostname = "wwylss.synology.me", port = "13306", username="tikuuser", pwd="Kjmathds_2024", db = "tikutest") + mycursor = mydb.cursor() + mycursor.execute("SELECT ID,SAME_ID FROM same;") + same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()] + mydb.close() + return same_list.copy() + +def generate_related_list(): + mydb = connect(hostname = "wwylss.synology.me", port = "13306", username="tikuuser", pwd="Kjmathds_2024", db = "tikutest") + mycursor = mydb.cursor() + mycursor.execute("SELECT ID,RELATED_ID FROM related;") + related_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()] + mydb.close() + return related_list.copy() + def treat_dict(): #对整个题库字典中的内容部分进行预处理,删除无用字符 treated_dict = {} mydb = connect(hostname = "wwylss.synology.me", port = "13306", username="tikuuser", pwd="Kjmathds_2024", db = "tikutest") mycursor = mydb.cursor() mycursor.execute("SELECT ID,content FROM problems;") p_dict = {id:content for id,content in mycursor.fetchall()} - mycursor.execute("SELECT ID,SAME_ID FROM same;") - same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()] + # mycursor.execute("SELECT ID,SAME_ID FROM same;") + # same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()] for id in p_dict: treated_dict[id] = {} - treated_dict[id]["content"] = pre_treating(p_dict[id]) - treated_dict[id]["same"] = findsame(id,same_list).copy() + treated_dict[id] = pre_treating(p_dict[id]) mydb.close() return treated_dict #返回处理后的字典, 含内容字段及相同题目字段 @@ -2195,26 +2233,26 @@ def unUnitted(idexp): #返回adict中未赋单元的id列表 return ununittedids -def AutoAssignTagNotoLaTeX(newlist): #根据题库中已有的单元挂钩和字符串相似比对, 给newlist中的每一个ID在可能的情况下自动适配单元号码, 可能有误, 需人工审核, 返回(LaTeX代码的body(从\begin{enumerate}到\end{enumerate}的部分),题号和单元号码的对应)组成的二元tuple +def AutoAssignTagNotoLaTeX(newlist,allsamelist,allrelatedlist): output = "\\begin{enumerate}\n\n" tagrecord = "" print("正在生成简化题目的字典...") treateddict = treat_dict() print("简化题目字典生成完毕.") for id in newlist: - samelist = findsameinDB(id) - relatedlist = findrelatedinDB(id) + samelist = findsru(id,allsamelist) + relatedlist = findsru(id,allrelatedlist) if not samelist == []: - unittags = [i for i in adict[samelist[0]]["tags"] if "单元" in i] + unittags = get_unit_tags(samelist[0]) elif not relatedlist == []: - unittags = [i for i in adict[relatedlist[0]]["tags"] if "单元" in i] + unittags = get_unit_tags(relatedlist[0]) else: - simgroup = stringmaxsim(treateddict[id]["content"],treateddict,10) + simgroup = stringmaxsim(treateddict[id],treateddict,10) unittags_raw = {} for g in simgroup: rid,sim = g if not rid == id and sim > 0.75: - for t in [i for i in adict[rid]["tags"] if "单元" in i]: + for t in get_unit_tags(rid): if t in unittags_raw: unittags_raw[t] += 1 else: @@ -2226,7 +2264,7 @@ def AutoAssignTagNotoLaTeX(newlist): #根据题库中已有的单元挂钩和字 tagnumbers = "" for u in unittags: tagnumbers += str(getUnitNumber(u)) - output += f"\\item ({id})[{tagnumbers}] {adict[id]['content']}\n\n" + output += f"\\item ({id})[{tagnumbers}] {get_problem_content(id)}\n\n" tagrecord += f"{id}\t{tagnumbers}\n" output += "\\end{enumerate}\n\n" return (output,tagrecord) diff --git a/工具v3/单元挂钩.py b/工具v3/单元挂钩.py index 1168f6a7..7880e11d 100644 --- a/工具v3/单元挂钩.py +++ b/工具v3/单元挂钩.py @@ -14,13 +14,15 @@ class MyWindow(QWidget,Ui_Form): def exec(self): # pro_dict = load_dict("../题库0.3/Problems.json") allids = getAllIDsExp() + all_same_list = generate_same_list() + all_related_list = generate_related_list() if self.radioButton_phase1.isChecked(): idexp = self.lineEdit_ids.text() if idexp.strip() == "": idlist = unUnitted(allids) else: idlist = generate_number_set(idexp) - latex_body,tagrec = AutoAssignTagNotoLaTeX(idlist,pro_dict) + latex_body,tagrec = AutoAssignTagNotoLaTeX(idlist,all_same_list,all_related_list) latex_raw = ReadTextFile("模板文件/讲义模板.txt") latex_data = StringSubstitute(r"<<[\s\S]*?待替换[\s\S]*?>>",latex_raw,("待检查单元",latex_body)) SaveTextFile(tagrec,"临时文件/单元对应.txt")