From 528a29b2cad15e3df7dfcd0e064ca988837d3e2c Mon Sep 17 00:00:00 2001
From: "weiye.wang" <kj_wangweiye@126.com>
Date: Thu, 22 Jun 2023 14:26:08 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=B0=E5=AF=8Cdatabase=20tools?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 工具/database_tools.py | 51 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/工具/database_tools.py b/工具/database_tools.py
index a655533e..b45c79ca 100644
--- a/工具/database_tools.py
+++ b/工具/database_tools.py
@@ -1,4 +1,6 @@
-import json,re,os,Levenshtein
+import json,re,os,Levenshtein,fitz
+
+#读取存储json数据库相关(不限于题号数据库)
 
 def load_dict(filename): #根据filename读取json数据库并转化为python字典
     with open(filename,"r",encoding = "u8") as f:
@@ -51,6 +53,53 @@ def generate_problem_series(startingid,length,adict): #在adict字典里返回
     return ",".join(excludelist) #返回按顺序的题号列表
 
 
+def generate_number_set(string): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"]
+    string = re.sub(r"[\n\s]","",string)
+    string_list = string.split(",")
+    numbers_list = []
+    for s in string_list:
+        if not ":" in s:
+            numbers_list.append(s.zfill(6))
+        else:
+            start,end = s.split(":")
+            for ind in range(int(start),int(end)+1):
+                numbers_list.append(str(ind).zfill(6))
+    return numbers_list #返回六位题号列表
+
+def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号"
+    if not len(id_list) == 0:
+        exp_list = []
+        start = id_list[0]
+        current = start
+        end = start
+        for id in id_list[1:]:
+            # print(id,current)
+            if int(id)-1 == int(current):
+                current = id
+                end = id
+            else:
+                if not start == end:
+                    exp_list.append('"'+start+":"+end+'"')
+                else:
+                    exp_list.append('"'+start+'"')
+                start = id
+                current = id
+                end = id
+        if not start == end:
+            exp_list.append('"'+start+":"+end+'"')
+        else:
+            exp_list.append('"'+start+'"')
+        exp_str = ",".join(exp_list).replace('"',"")
+    else:
+        exp_str = "无有效题号"
+    return exp_str #返回含有":"或","的题号字符串
+
+def parsePDF(filePath): #提取pdf文件中的字符
+    with fitz.open(filePath) as doc:
+        text = ""
+        for page in doc.pages():
+            text += page.get_text() + "\n"
+    return text
 
 if __name__ == "__main__":
     print("数据库工具, import用.")
\ No newline at end of file