丰富database tools
This commit is contained in:
parent
9490e03460
commit
528a29b2ca
|
|
@ -1,4 +1,6 @@
|
||||||
import json,re,os,Levenshtein
|
import json,re,os,Levenshtein,fitz
|
||||||
|
|
||||||
|
#读取存储json数据库相关(不限于题号数据库)
|
||||||
|
|
||||||
def load_dict(filename): #根据filename读取json数据库并转化为python字典
|
def load_dict(filename): #根据filename读取json数据库并转化为python字典
|
||||||
with open(filename,"r",encoding = "u8") as f:
|
with open(filename,"r",encoding = "u8") as f:
|
||||||
|
|
@ -51,6 +53,53 @@ def generate_problem_series(startingid,length,adict): #在adict字典里返回
|
||||||
return ",".join(excludelist) #返回按顺序的题号列表
|
return ",".join(excludelist) #返回按顺序的题号列表
|
||||||
|
|
||||||
|
|
||||||
|
def generate_number_set(string): #根据可能含有":"和","的题号字符串生成一个用逗号分隔的六位题号列表, 例如"1:3,5"会生成["000001","000002","000003","000005"]
|
||||||
|
string = re.sub(r"[\n\s]","",string)
|
||||||
|
string_list = string.split(",")
|
||||||
|
numbers_list = []
|
||||||
|
for s in string_list:
|
||||||
|
if not ":" in s:
|
||||||
|
numbers_list.append(s.zfill(6))
|
||||||
|
else:
|
||||||
|
start,end = s.split(":")
|
||||||
|
for ind in range(int(start),int(end)+1):
|
||||||
|
numbers_list.append(str(ind).zfill(6))
|
||||||
|
return numbers_list #返回六位题号列表
|
||||||
|
|
||||||
|
def generate_exp(id_list): #根据题号列表生成字符串式的含":"和","的题号字符串, 例如["000001","000002","000003","000005"]生成"000001:000003,000005", 若列表为空则生成"无有效题号"
|
||||||
|
if not len(id_list) == 0:
|
||||||
|
exp_list = []
|
||||||
|
start = id_list[0]
|
||||||
|
current = start
|
||||||
|
end = start
|
||||||
|
for id in id_list[1:]:
|
||||||
|
# print(id,current)
|
||||||
|
if int(id)-1 == int(current):
|
||||||
|
current = id
|
||||||
|
end = id
|
||||||
|
else:
|
||||||
|
if not start == end:
|
||||||
|
exp_list.append('"'+start+":"+end+'"')
|
||||||
|
else:
|
||||||
|
exp_list.append('"'+start+'"')
|
||||||
|
start = id
|
||||||
|
current = id
|
||||||
|
end = id
|
||||||
|
if not start == end:
|
||||||
|
exp_list.append('"'+start+":"+end+'"')
|
||||||
|
else:
|
||||||
|
exp_list.append('"'+start+'"')
|
||||||
|
exp_str = ",".join(exp_list).replace('"',"")
|
||||||
|
else:
|
||||||
|
exp_str = "无有效题号"
|
||||||
|
return exp_str #返回含有":"或","的题号字符串
|
||||||
|
|
||||||
|
def parsePDF(filePath): #提取pdf文件中的字符
|
||||||
|
with fitz.open(filePath) as doc:
|
||||||
|
text = ""
|
||||||
|
for page in doc.pages():
|
||||||
|
text += page.get_text() + "\n"
|
||||||
|
return text
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("数据库工具, import用.")
|
print("数据库工具, import用.")
|
||||||
Reference in New Issue