From 963b8f8423da6f078ecfa89ad999464eaf82fa80 Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Wed, 7 Feb 2024 09:32:31 +0800 Subject: [PATCH] =?UTF-8?q?database=5Ftools=E4=B8=AD=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E5=87=A0=E4=B8=AA=E5=92=8C=E5=8D=95=E5=85=83=E6=8C=82=E9=92=A9?= =?UTF-8?q?=E6=9C=89=E5=85=B3=E7=9A=84functions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具v2/database_tools.py | 54 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py index bb93b240..d191bfe7 100644 --- a/工具v2/database_tools.py +++ b/工具v2/database_tools.py @@ -1829,5 +1829,59 @@ def generateAnswerList(string,anspreamble = "答案: \\textcolor{red}{"): #从La return anslist +def unUnitted(adict): #返回adict中未赋单元的id列表 + return [id for id in adict if not "单元" in "".join(adict[id]["tags"]) and not "暂无" in "".join(adict[id]["tags"])] + + +def AutoAssignTagNotoLaTeX(newlist,adict): #根据题库中已有的单元挂钩和字符串相似比对, 给newlist中的每一个ID在可能的情况下自动适配单元号码, 可能有误, 需人工审核, 返回(LaTeX代码的body(从\begin{enumerate}到\end{enumerate}的部分),题号和单元号码的对应)组成的二元tuple + output = "\\begin{enumerate}\n\n" + tagrecord = "" + treateddict = treat_dict(adict) + for id in newlist: + samelist = adict[id]["same"] + relatedlist = adict[id]["related"] + if not samelist == []: + unittags = [i for i in adict[samelist[0]]["tags"] if "单元" in i] + elif not relatedlist == []: + unittags = [i for i in adict[relatedlist[0]]["tags"] if "单元" in i] + else: + simgroup = stringmaxsim(treateddict[id]["content"],treateddict,10) + unittags_raw = {} + for g in simgroup: + rid,sim = g + if not rid == id and sim > 0.75: + for t in [i for i in adict[rid]["tags"] if "单元" in i]: + if t in unittags_raw: + unittags_raw[t] += 1 + else: + unittags_raw[t] = 1 + if len(unittags_raw) == 0: + unittags = [] + else: + unittags = [max(unittags_raw, key = lambda x: unittags_raw[x])] + tagnumbers = "" + for u in unittags: + tagnumbers += str(getUnitNumber(u)) + output += f"\\item ({id})[{tagnumbers}] {adict[id]['content']}\n\n" + tagrecord += f"{id}\t{tagnumbers}\n" + output += "\\end{enumerate}\n\n" + return (output,tagrecord) + + +def UnitRectoMetadataText(data): #把data中的单元对应变为metadata.txt中的单元对应文本 + lines = [l for l in data.split("\n") if l.strip() != ""] + output = "tags\n\n" + for l in lines: + id,unitno = l.split("\t") + units = "" + if not len(unitno.strip()) == 0: + for n in unitno: + units += f"{getUnit(int(n))}\n" + output += f"{id.zfill(6)}\n" + output += f"{units}\n\n" + return output + + + if __name__ == "__main__": print("数据库工具, import用.") \ No newline at end of file