From 528a29b2cad15e3df7dfcd0e064ca988837d3e2c Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Thu, 22 Jun 2023 14:26:08 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=B0=E5=AF=8Cdatabase=20tools?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 工具/database_tools.py | 51 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/工具/database_tools.py b/工具/database_tools.py index a655533e..b45c79ca 100644 --- a/工具/database_tools.py +++ b/工具/database_tools.py @@ -1,4 +1,6 @@ -import json,re,os,Levenshtein +import json,re,os,Levenshtein,fitz + +#读取存储json数据库相关(不限于题号数据库) def load_dict(filename): #根据filename读取json数据库并转化为python字典 with open(filename,"r",encoding = "u8") as f: @@ -51,6 +53,53 @@ def generate_problem_series(startingid,length,adict): #在adict字典里返回 return ",".join(excludelist) #返回按顺序的题号列表 +def generate_number_set(string): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"] + string = re.sub(r"[\n\s]","",string) + string_list = string.split(",") + numbers_list = [] + for s in string_list: + if not ":" in s: + numbers_list.append(s.zfill(6)) + else: + start,end = s.split(":") + for ind in range(int(start),int(end)+1): + numbers_list.append(str(ind).zfill(6)) + return numbers_list #返回六位题号列表 + +def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号" + if not len(id_list) == 0: + exp_list = [] + start = id_list[0] + current = start + end = start + for id in id_list[1:]: + # print(id,current) + if int(id)-1 == int(current): + current = id + end = id + else: + if not start == end: + exp_list.append('"'+start+":"+end+'"') + else: + exp_list.append('"'+start+'"') + start = id + current = id + end = id + if not start == end: + exp_list.append('"'+start+":"+end+'"') + else: + exp_list.append('"'+start+'"') + exp_str = ",".join(exp_list).replace('"',"") + else: + exp_str = "无有效题号" + return exp_str #返回含有":"或","的题号字符串 + +def parsePDF(filePath): #提取pdf文件中的字符 + with fitz.open(filePath) as doc: + text = "" + for page in doc.pages(): + text += page.get_text() + "\n" + return text if __name__ == "__main__": print("数据库工具, import用.") \ No newline at end of file