From 963b8f8423da6f078ecfa89ad999464eaf82fa80 Mon Sep 17 00:00:00 2001
From: "weiye.wang" <kj_wangweiye@126.com>
Date: Wed, 7 Feb 2024 09:32:31 +0800
Subject: [PATCH] =?UTF-8?q?database=5Ftools=E4=B8=AD=E6=96=B0=E5=A2=9E?=
 =?UTF-8?q?=E5=87=A0=E4=B8=AA=E5=92=8C=E5=8D=95=E5=85=83=E6=8C=82=E9=92=A9?=
 =?UTF-8?q?=E6=9C=89=E5=85=B3=E7=9A=84functions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 工具v2/database_tools.py | 54 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/工具v2/database_tools.py b/工具v2/database_tools.py
index bb93b240..d191bfe7 100644
--- a/工具v2/database_tools.py
+++ b/工具v2/database_tools.py
@@ -1829,5 +1829,59 @@ def generateAnswerList(string,anspreamble = "答案: \\textcolor{red}{"): #从La
     return anslist
 
 
+def unUnitted(adict): #返回adict中未赋单元的id列表
+    return [id for id in adict if not "单元" in "".join(adict[id]["tags"]) and not "暂无" in "".join(adict[id]["tags"])]
+
+
+def AutoAssignTagNotoLaTeX(newlist,adict): #根据题库中已有的单元挂钩和字符串相似比对, 给newlist中的每一个ID在可能的情况下自动适配单元号码, 可能有误, 需人工审核, 返回(LaTeX代码的body(从\begin{enumerate}到\end{enumerate}的部分),题号和单元号码的对应)组成的二元tuple
+    output = "\\begin{enumerate}\n\n"
+    tagrecord = ""
+    treateddict = treat_dict(adict)
+    for id in newlist:
+        samelist = adict[id]["same"]
+        relatedlist = adict[id]["related"]
+        if not samelist == []:
+            unittags = [i for i in adict[samelist[0]]["tags"] if "单元" in i]
+        elif not relatedlist == []:
+            unittags = [i for i in adict[relatedlist[0]]["tags"] if "单元" in i]
+        else:
+            simgroup = stringmaxsim(treateddict[id]["content"],treateddict,10)
+            unittags_raw = {}
+            for g in simgroup:
+                rid,sim = g
+                if not rid == id and sim > 0.75:
+                    for t in [i for i in adict[rid]["tags"] if "单元" in i]:
+                        if t in unittags_raw:
+                            unittags_raw[t] += 1
+                        else:
+                            unittags_raw[t] = 1
+            if len(unittags_raw) == 0:
+                unittags = []
+            else:
+                unittags = [max(unittags_raw, key = lambda x: unittags_raw[x])]
+        tagnumbers = ""
+        for u in unittags:
+            tagnumbers += str(getUnitNumber(u))
+        output += f"\\item ({id})[{tagnumbers}] {adict[id]['content']}\n\n"
+        tagrecord += f"{id}\t{tagnumbers}\n"
+    output += "\\end{enumerate}\n\n"
+    return (output,tagrecord)
+
+
+def UnitRectoMetadataText(data): #把data中的单元对应变为metadata.txt中的单元对应文本
+    lines = [l for l in data.split("\n") if l.strip() != ""]
+    output = "tags\n\n"
+    for l in lines:
+        id,unitno = l.split("\t")
+        units = ""
+        if not len(unitno.strip()) == 0:
+            for n in unitno:
+                units += f"{getUnit(int(n))}\n"
+            output += f"{id.zfill(6)}\n"
+            output += f"{units}\n\n"
+    return output
+    
+
+
 if __name__ == "__main__":
     print("数据库工具, import用.")
\ No newline at end of file