20230403 afternoon

This commit is contained in:
WangWeiye 2023-04-03 19:31:52 +08:00
parent 3a14be5815
commit 69687a0601
31 changed files with 2361 additions and 5310 deletions

View File

@ -19,7 +19,7 @@
"source": [
"import os,re,json\n",
"\"\"\"这里编辑题号(列表)后将在vscode中打开窗口, 编辑后保存关闭, 随后运行第二个代码块\"\"\"\n",
"problems = \"31158:31196\"\n",
"problems = \"14085\"\n",
"\n",
"def generate_number_set(string,dict):\n",
" string = re.sub(r\"[\\n\\s]\",\"\",string)\n",

View File

@ -1,6 +1,6 @@
import os,re,json
"""这里编辑题号(列表)后将在vscode中打开窗口, 编辑后保存关闭, 随后运行第二个代码块"""
problems = "1"
problems = "10000,10003"
def generate_number_set(string,dict):
string = re.sub(r"[\n\s]","",string)

View File

@ -0,0 +1,102 @@
import os,re,difflib,Levenshtein,time,json
# 相同题目的阈值
threshold = 0.99
outputfile = r"临时文件/相同题目列表.txt"
#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
def generate_number_set(string):
string = re.sub(r"[\n\s]","",string)
string_list = string.split(",")
numbers_list = []
for s in string_list:
if not ":" in s:
numbers_list.append(s.zfill(6))
else:
start,end = s.split(":")
for ind in range(int(start),int(end)+1):
numbers_list.append(str(ind).zfill(6))
return numbers_list
#字符串预处理
def pre_treating(string):
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
string = re.sub(r"[\n\t]","",string)
string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
string = re.sub(r"[,\.:;?]","",string)
return string
#difflab字符串比较
def difflab_get_equal_rate(str1, str2):
# str1 = pre_treating(str1)
# str2 = pre_treating(str2)
return difflib.SequenceMatcher(None, str1, str2).ratio()
#Levenshtein jaro字符串比较
def jaro_get_equal_rate(str1,str2):
# str1 = pre_treating(str1)
# str2 = pre_treating(str2)
return Levenshtein.jaro(str1,str2)
#Levenshtein 字符串比较
def Lev_get_equal_rate(str1,str2):
# str1 = pre_treating(str1)
# str2 = pre_treating(str2)
return Levenshtein.ratio(str1,str2)
#指定对比方法
sim_test = jaro_get_equal_rate
#读入题库
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
database = f.read()
pro_dict = json.loads(database)
pro_dict_treated = {}
for id in pro_dict:
pro_dict_treated[id] = pro_dict[id].copy()
pro_dict_treated[id]["content"] = pre_treating(pro_dict_treated[id]["content"])
print("题目数:",len(pro_dict))
#记录起始时间
starttime = time.time()
alike_problems = ""
count = 0
keys = list(pro_dict_treated.keys())
while len(keys) >= 2:
count += 1
if count % 500 == 0:
print(count)
currentid = keys.pop(0)
content1 = pro_dict_treated[currentid]["content"]
same = []
for id in keys:
if not id in pro_dict[currentid]["same"] and not id in pro_dict[currentid]["related"]:
content2 = pro_dict_treated[id]["content"]
if sim_test(content1,content2)>threshold:
same.append(id)
if len(same) >= 1:
# print(currentid)
alike_problems += currentid + ","
for i in same:
# print(i)
keys.pop(keys.index(i))
alike_problems += ",".join(same)
alike_problems += "\n\n"
endtime = time.time()
print("耗时: %.3f" %(endtime-starttime))
with open(outputfile,"w",encoding = "u8") as f:
f.write(alike_problems)

View File

@ -2,7 +2,7 @@ import os,re,json
"""---设置关键字, 同一field下不同选项为or关系, 同一字典中不同字段间为and关系, 不同字典间为or关系, _not表示列表中的关键字都不含, 同一字典中的数字用来供应同一字段不同的条件之间的and---"""
keywords_dict_table = [
{"origin":["一模"]}
{"origin":["杨浦"]}
]
"""---关键字设置完毕---"""
# 示例: keywords_dict_table = [

View File

@ -14,7 +14,7 @@
"filepath = \"数据导入作业文件\"\n",
"\n",
"# date = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)\n",
"date = \"20230329\"\n",
"date = \"20230331\"\n",
"\n",
"#生成文件名tex_file和zip_file\n",
"files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n",

View File

@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os,re\n",
"import win32clipboard as wc\n",
"import win32con\n",
"\n",
"# 获取剪切板内容\n",
"def getCopy():\n",
" wc.OpenClipboard()\n",
" t = wc.GetClipboardData(win32con.CF_UNICODETEXT)\n",
" wc.CloseClipboard()\n",
" return t\n",
"\n",
"# 写入剪切板内容\n",
"def setCopy(str):\n",
" wc.OpenClipboard()\n",
" wc.EmptyClipboard()\n",
" wc.SetClipboardData(win32con.CF_UNICODETEXT, str)\n",
" wc.CloseClipboard()\n",
"\n",
"def dollared(string):\n",
" flag = True\n",
" for c in string:\n",
" if not c in \"1234567890.+-:[]()\":\n",
" flag = False\n",
" break\n",
" if flag:\n",
" string = \"$\" + string + \"$\"\n",
" return string\n",
"\n",
"\n",
"\n",
"data = getCopy()\n",
"\n",
"data1 = \"\"\n",
"for c in data:\n",
" if 65296 <= ord(c) < 65306:\n",
" data1 += str(ord(c)-65296)\n",
" else:\n",
" data1 += c\n",
"data = data1\n",
"data = data.replace(\"\",\".\").replace(\"\",\":\")\n",
"elements = data.split(\"\\n\")\n",
"elements_per_line = int(elements.pop(-1)) #这里需要修改\n",
"contents = \"\\\\begin{center}\\n\\\\begin{tabular}{|\"\n",
"for i in range(elements_per_line):\n",
" contents += \"c|\"\n",
"contents += \"}\\n\"\n",
"contents += r\"\\hline\"+\"\\n\"\n",
"col = 1\n",
"for element in elements:\n",
" if col != 1:\n",
" contents += \" & \"\n",
" contents += dollared(element.strip())\n",
" if col == elements_per_line:\n",
" contents += r\" \\\\ \\hline\"+\"\\n\"\n",
" col += 1\n",
" if col > elements_per_line:\n",
" col = 1\n",
"contents += \"\\\\end{tabular}\" + \"\\n\" + \"\\\\end{center}\"\n",
"\n",
"with open(\"临时文件/tablefile.txt\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(contents)\n",
"\n",
"setCopy(contents)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "pythontest",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "91219a98e0e9be72efb992f647fe78b593124968b75db0b865552d6787c8db93"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,63 @@
import os,re
import win32clipboard as wc
import win32con
# 获取剪切板内容
def getCopy():
wc.OpenClipboard()
t = wc.GetClipboardData(win32con.CF_UNICODETEXT)
wc.CloseClipboard()
return t
# 写入剪切板内容
def setCopy(str):
wc.OpenClipboard()
wc.EmptyClipboard()
wc.SetClipboardData(win32con.CF_UNICODETEXT, str)
wc.CloseClipboard()
def dollared(string):
flag = True
for c in string:
if not c in "1234567890.+-:[]()":
flag = False
break
if flag:
string = "$" + string + "$"
return string
data = getCopy()
data1 = ""
for c in data:
if 65296 <= ord(c) < 65306:
data1 += str(ord(c)-65296)
else:
data1 += c
data = data1
data = data.replace("",".").replace("",":")
elements = data.split("\n")
elements_per_line = int(elements.pop(-1)) #这里需要修改
contents = "\\begin{center}\n\\begin{tabular}{|"
for i in range(elements_per_line):
contents += "c|"
contents += "}\n"
contents += r"\hline"+"\n"
col = 1
for element in elements:
if col != 1:
contents += " & "
contents += dollared(element.strip())
if col == elements_per_line:
contents += r" \\ \hline"+"\n"
col += 1
if col > elements_per_line:
col = 1
contents += "\\end{tabular}" + "\n" + "\\end{center}"
with open("临时文件/tablefile.txt","w",encoding = "utf8") as f:
f.write(contents)
setCopy(contents)

View File

@ -0,0 +1,129 @@
import os,re,difflib,Levenshtein,time,json
# 重要!!! 新旧题目的范围(有重复默认为新题)
id_new_problems = "40000:41000"
id_old_problems = "1:30000"
threshold = 0.99
#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
def generate_number_set(string):
string = re.sub(r"[\n\s]","",string)
string_list = string.split(",")
numbers_list = []
for s in string_list:
if not ":" in s:
numbers_list.append(s.zfill(6))
else:
start,end = s.split(":")
for ind in range(int(start),int(end)+1):
numbers_list.append(str(ind).zfill(6))
return numbers_list
#字符串预处理
def pre_treating(string):
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
string = re.sub(r"[\n\t]","",string)
string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
string = re.sub(r"[,\.:;?]","",string)
return string
#difflab字符串比较
def difflab_get_equal_rate(str1, str2):
# str1 = pre_treating(str1)
# str2 = pre_treating(str2)
return difflib.SequenceMatcher(None, str1, str2).ratio()
#Levenshtein jaro字符串比较
def jaro_get_equal_rate(str1,str2):
# str1 = pre_treating(str1)
# str2 = pre_treating(str2)
return Levenshtein.jaro(str1,str2)
#Levenshtein 字符串比较
def Lev_get_equal_rate(str1,str2):
# str1 = pre_treating(str1)
# str2 = pre_treating(str2)
return Levenshtein.ratio(str1,str2)
#指定对比方法
sim_test = jaro_get_equal_rate
#读入题库
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
database = f.read()
pro_dict = json.loads(database)
#生成旧题目数据库字典与新题目数据库字典
new_id_list_raw = generate_number_set(id_new_problems)
new_id_list = [id for id in pro_dict if id in new_id_list_raw]
old_id_list_raw = generate_number_set(id_old_problems)
old_id_list = [id for id in pro_dict if (id in old_id_list_raw and not id in new_id_list_raw)]
old_problems_dict = {}
new_problems_dict = {}
old_problems_dict_content = {}
new_problems_dict_content = {}
for id in new_id_list:
new_problems_dict[id] = pro_dict[id]
new_problems_dict_content[id] = pre_treating(pro_dict[id]["content"])
for id in old_id_list:
old_problems_dict[id] = pro_dict[id]
old_problems_dict_content[id] = pre_treating(pro_dict[id]["content"])
print("旧题目数:",len(old_problems_dict),", 新题目数:",len(new_problems_dict))
#记录起始时间
start_time = time.time()
suspect_count = 0
remarked = 0
alike_problems = ""
#开始新题与旧题的比对
count = 0
print("开始新题与旧题的比对")
for id_new in new_problems_dict:
count += 1
if count % 50 == 0:
print(count)
for id_old in old_problems_dict:
similar_rate = sim_test(new_problems_dict_content[id_new],old_problems_dict_content[id_old])
if similar_rate > threshold or id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"]:
suspect_count += 1
if not (id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"]):
alike_problems += ("%.4f" %similar_rate) + "\n\n" + id_new + " " + new_problems_dict[id_new]["content"] + "\n\n" + id_old + " " + old_problems_dict[id_old]["content"] + "\n\n"
else:
remarked += 1
#开始新题之间的比对
count = 0
print("开始新题之间的比对")
while len(new_problems_dict) >= 2:
count += 1
if count % 50 == 0:
print(count)
keys = list(new_problems_dict.keys())
current_problem = new_problems_dict.pop(keys[0])
current_problem_content = new_problems_dict_content[current_problem["id"]]
for id_new in new_problems_dict:
similar_rate = sim_test(new_problems_dict_content[id_new],current_problem_content)
if similar_rate > threshold or id_new in current_problem["related"] or id_new in current_problem["same"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"]:
suspect_count += 1
if not (id_new in current_problem["related"] or id_new in current_problem["same"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"]):
alike_problems += ("%.4f" %similar_rate) + "\n\n" + id_new + " " + new_problems_dict[id_new]["content"] + "\n\n" + current_problem["id"] + " " + current_problem["content"] + "\n\n"
else:
remarked += 1
#记录终止时间及显示结果
end_time = time.time()
print("总耗时:",end_time-start_time,"秒.")
print("发现相似: ",suspect_count,", 其中已标注: ",remarked,".")
with open("临时文件/相似题目.txt","w",encoding="utf8") as f:
f.write(alike_problems)

View File

@ -47,7 +47,7 @@ def run_command1():
call(["python","试卷答案生成.py"])
elif selectedtool == "单元标记转换":
call(["python","单元标记转换.py"])
elif selectedtool == "目标清点":
elif selectedtool == "目标清点及清单生成":
call(["python","单元课时目标题目数据清点.py"])
elif selectedtool == "目标表体生成":
call(["python","根据范围提取课时目标.py"])
@ -59,6 +59,28 @@ def run_command1():
call(["python","讲义生成.py"])
elif selectedtool == "课时目标划分信息汇总":
call(["python","课时目标划分信息汇总.py"])
elif selectedtool == "识别题目类型":
call(["python","识别题目类型.py"])
elif selectedtool == "批量添加字段数据":
call(["python","批量添加字段数据.py"])
elif selectedtool == "目标挂钩清点":
call(["python","目标挂钩清点.py"])
elif selectedtool == "批量题号选题pdf生成":
call(["python","批量生成题目pdf.py"])
elif selectedtool == "目标pdf生成":
call(["python","课时目标pdf生成.py"])
elif selectedtool == "剪贴板表格整理":
call(["python","剪贴板表格整理.py"])
elif selectedtool == "手动统计结果转换":
call(["python","手动统计结果转换.py"])
elif selectedtool == "新题相似相同比对":
call(["python","新题相似相同比对.py"])
elif selectedtool == "全局未标注相同题目检测":
call(["python","全局未标注相同题目检测.py"])
elif selectedtool == "局部相似题目检测":
call(["python","局部相似题目检测.py"])
elif selectedtool == "相同相似题目标注":
call(["python","相同相似题目标注.py"])
LabelTool.config(text = selectedtool+"STEP1命令执行完毕")
button1.place_forget()
@ -95,7 +117,12 @@ ImportMenu.add_command(label = "添加关联题目", command = lambda: SetButton
# 设置 维护 菜单项
MaintainenceMenu = Menu(menubar, tearoff = False)
menubar.add_cascade(label = "维护", menu = MaintainenceMenu)
MaintainenceMenu.add_command(label = "批量添加字段数据", command = lambda: SetButton("批量添加字段数据",1,["文本文件/metadata.txt","批量添加字段数据.py"]))
MaintainenceMenu.add_command(label = "手动统计结果转换", command = lambda: SetButton("手动统计结果转换",1,["文本文件/metadata.txt","文本文件/手动统计结果.txt"]))
MaintainenceMenu.add_separator()
MaintainenceMenu.add_command(label = "修改题目", command = lambda: SetButton("修改题目数据库",2,["修改题目数据库.py"]))
MaintainenceMenu.add_command(label = "识别题目类型", command = lambda: SetButton("识别题目类型",1,[]))
# 设置 使用 菜单项
UseMenu = Menu(menubar, tearoff = False)
@ -104,6 +131,7 @@ UseMenu.add_command(label = "关键字筛选题号", command = lambda: SetButton
UseMenu.add_separator()
UseMenu.add_command(label = "讲义试卷生成", command = lambda: SetButton("讲义生成",1,["讲义生成.py"]))
UseMenu.add_command(label = "题号选题pdf生成", command = lambda: SetButton("题号选题pdf生成",1,["题号选题pdf生成.py"]))
UseMenu.add_command(label = "批量题号选题pdf生成", command = lambda: SetButton("批量题号选题pdf生成",1,["批量生成题目pdf.py"]))
UseMenu.add_command(label = "试卷答案生成", command = lambda: SetButton("试卷答案生成",1,["试卷答案生成.py"]))
# 设置 目标及标签 菜单项
@ -111,16 +139,32 @@ ObjTagMenu = Menu(menubar, tearoff = False)
menubar.add_cascade(label = "目标及标签", menu = ObjTagMenu)
ObjTagMenu.add_command(label = "单元标记转换", command = lambda: SetButton("单元标记转换",1,["单元标记转换.py"]))
ObjTagMenu.add_separator()
ObjTagMenu.add_command(label = "目标清点", command = lambda: SetButton("目标清点",1,[]))
ObjTagMenu.add_command(label = "目标表体生成", command = lambda: SetButton("目标表体生成",1,["根据范围提取课时目标.py"]))
ObjTagMenu.add_command(label = "目标pdf生成", command = lambda: SetButton("目标pdf生成",1,["课时目标pdf生成.py"]))
ObjTagMenu.add_command(label = "课时目标寻找题目", command = lambda: SetButton("课时目标寻找题目",1,["课时目标寻找题目.py"]))
ObjTagMenu.add_command(label = "课时目标划分信息汇总", command = lambda: SetButton("课时目标划分信息汇总",1,[]))
ObjTagMenu.add_separator()
ObjTagMenu.add_command(label = "目标挂钩基础清点", command = lambda: SetButton("目标挂钩清点",1,[]))
ObjTagMenu.add_command(label = "目标清点及清单生成", command = lambda: SetButton("目标清点及清单生成",1,[]))
# 设置 相同相似比对 菜单项
SimRelMenu = Menu(menubar, tearoff= False)
menubar.add_cascade(label = "相同相似", menu = SimRelMenu)
SimRelMenu.add_command(label = "新题相似相同比对", command = lambda: SetButton("新题相似相同比对",1,["新题相似相同比对.py"]))
SimRelMenu.add_separator()
SimRelMenu.add_command(label = "局部相似题目检测", command = lambda: SetButton("局部相似题目检测",1,["局部相似题目检测.py"]))
SimRelMenu.add_command(label = "相同相似题目标注", command = lambda: SetButton("相同相似题目标注",1,["相同相似题目标注.py"]))
SimRelMenu.add_separator()
SimRelMenu.add_command(label = "全局未标注相同题目检测", command = lambda: SetButton("全局未标注相同题目检测",1,[]))
# 设置 其他 菜单项
OtherMenu = Menu(menubar, tearoff = False)
menubar.add_cascade(label = "其他", menu = OtherMenu)
OtherMenu.add_command(label = "mathpix预处理", command = lambda: SetButton("mathpix预处理",1,[]))
OtherMenu.add_command(label = "带圈数字处理", command = lambda: SetButton("带圈数字处理",1,[]))
OtherMenu.add_command(label = "剪贴板mathpix预处理", command = lambda: SetButton("mathpix预处理",1,[]))
OtherMenu.add_command(label = "剪贴板带圈数字处理", command = lambda: SetButton("带圈数字处理",1,[]))
OtherMenu.add_command(label = "剪贴板表格整理", command = lambda: SetButton("剪贴板表格整理",1,[]))
menubar.add_command(label = "退出", command = root.destroy)

View File

@ -17,7 +17,7 @@
" numerals_list[i] = str_numeral\n",
" return \"\\t\".join(numerals_list)\n",
"\n",
"with open(\"临时文件/统计结果.txt\",\"r\",encoding = \"utf8\") as f:\n",
"with open(\"文本文件/手动统计结果.txt\",\"r\",encoding = \"utf8\") as f:\n",
" data = f.read()\n",
"\n",
"blocks = re.findall(r\"\\[BEGIN\\]([\\s\\S]*?)\\[END\\]\",data)\n",
@ -43,7 +43,7 @@
" output_data += \"\\n\".join(results_dict[id])\n",
" output_data += \"\\n\\n\"\n",
"\n",
"with open(\"临时文件/字段数据.txt\",\"w\",encoding = \"utf8\") as f:\n",
"with open(\"文本文件/metadata.txt\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(output_data)\n"
]
},

View File

@ -0,0 +1,39 @@
import os,re,json
def form_decimals(string):
string = re.sub(r"[\s]+",r"\t",string)
numerals_list = [n for n in string.split("\t") if len(n)>0]
for i in range(len(numerals_list)):
numeral = numerals_list[i]
str_numeral = "%.3f" %float(numeral)
numerals_list[i] = str_numeral
return "\t".join(numerals_list)
with open("文本文件/手动统计结果.txt","r",encoding = "utf8") as f:
data = f.read()
blocks = re.findall(r"\[BEGIN\]([\s\S]*?)\[END\]",data)
results_dict = {}
for block in blocks:
temp_list = [l.strip() for l in block.split("\n") if l.strip() != ""]
for line in temp_list:
if line[:2] == "##":
date = line[2:].strip()
elif line[:2] == "**":
current_class = line[2:].strip()
else:
separating_pos = re.search("\s",line).span(0)[0]
if not line[:separating_pos].zfill(6) in results_dict:
results_dict[line[:separating_pos].zfill(6)] = [date + "\t" + current_class + "\t" + form_decimals(re.sub("\s+?","\t",line[separating_pos:])).strip()]
else:
results_dict[line[:separating_pos].zfill(6)].append(date + "\t" + current_class + "\t" + form_decimals(re.sub("\s+?","\t",line[separating_pos:])).strip())
output_data = "usages\n"
for id in results_dict:
output_data += id + "\n"
output_data += "\n".join(results_dict[id])
output_data += "\n\n"
with open("文本文件/metadata.txt","w",encoding = "utf8") as f:
f.write(output_data)

View File

@ -0,0 +1,83 @@
import os,re,json
"""---明确数据文件位置---"""
datafile = "文本文件/metadata.txt"
# 双回车分隔,记录内单回车分隔列表,首行为字段名
"""---文件位置结束---"""
def trim(string):
string = re.sub(r"^[ \t\n]*","",string)
string = re.sub(r"[ \t\n]*$","",string)
return string
def FloatToInt(string):
f = float(string)
if abs(f-round(f))<0.01:
f = round(f)
return f
with open(datafile,"r",encoding="utf8") as f:
data = f.read().strip()
pos = data.index("\n")
field = data[:pos].strip()
appending_data = data[pos:]
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
database = f.read()
pro_dict = json.loads(database)
with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
database = f.read()
obj_dict = json.loads(database)
#该字段列表可能需要更新
fields = ["content","objs","tags","genre","ans","solution","duration","usages","origin","edit","same","related","remark","space"]
if field in fields:
field_type = type(pro_dict["000001"][field])
datalist = [record.strip() for record in appending_data.split("\n\n") if len(trim(record)) > 0]
for record in datalist:
id = re.findall(r"^[\d]{1,}",record)[0]
data = record[len(id):].strip()
id = id.zfill(6)
if not id in pro_dict:
print("题号:",id,"不在数据库中.")
break
#字符串类型字段添加数据
elif field_type == str and data in pro_dict[id][field]:
print("题号:",id,", 字段:",field,"中已有该数据:",data)
elif field_type == str and not data in pro_dict[id][field] and not field == "ans" and not field == "space":
origin_data = pro_dict[id][field]
new_data = trim(origin_data + "\n" + data)
pro_dict[id][field] = new_data
print("题号:",id,", 字段:",field,"中已添加数据:",data)
elif field_type == str and not data in pro_dict[id][field] and field == "ans" or field == "space":
pro_dict[id][field] = data
print("题号:",id,", 字段:",field,"中已修改数据:",data)
#数值类型字段添加数据
elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])<0.01:
print("题号:",id,", 字段:",field,"中已有该数据:",FloatToInt(data))
elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])>=0.01:
pro_dict[id][field] = FloatToInt(data)
print("题号:",id,", 字段:",field,"中已修改数据:",FloatToInt(data))
#列表类型字段添加数据
elif field_type == list:
cell_data_list = [d.strip() for d in data.split("\n")]
for cell_data in cell_data_list:
if cell_data in pro_dict[id][field]:
print("题号:",id,", 字段:",field,"中已有该数据:",cell_data)
elif not field == "objs":
pro_dict[id][field].append(cell_data)
print("题号:",id,", 字段:",field,"中已添加数据:",cell_data)
else:
if not cell_data in obj_dict and not cell_data.upper() == "KNONE":
print("题号:",id,", 字段:",field,"目标编号有误:",cell_data)
else:
pro_dict[id][field].append(cell_data.upper())
print("题号:",id,", 字段:",field,"中已添加数据:",cell_data.upper())
else:
print("字段名有误")
with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
f.write(json.dumps(pro_dict,indent=4,ensure_ascii=False))

View File

@ -9,111 +9,126 @@
"name": "stdout",
"output_type": "stream",
"text": [
"题号: 040422 , 字段: ans 中已修改数据: $(-1,3)$\n",
"题号: 040423 , 字段: ans 中已修改数据: $(-\\infty,-1)$\n",
"题号: 040424 , 字段: ans 中已修改数据: $\\sqrt{5}$\n",
"题号: 040425 , 字段: ans 中已修改数据: $5$\n",
"题号: 040426 , 字段: ans 中已修改数据: $\\dfrac{24}{7}$\n",
"题号: 040427 , 字段: ans 中已修改数据: $0.12$\n",
"题号: 040428 , 字段: ans 中已修改数据: $52$\n",
"题号: 040429 , 字段: ans 中已修改数据: $2$\n",
"题号: 040430 , 字段: ans 中已修改数据: $60^{\\circ}$\n",
"题号: 040431 , 字段: ans 中已修改数据: $\\dfrac{4}{5}$\n",
"题号: 040432 , 字段: ans 中已修改数据: $\\dfrac{\\sqrt{10}}{10}$\n",
"题号: 040433 , 字段: ans 中已修改数据: $(-\\infty,-\\frac{2}{\\mathrm{e}}) \\cup(\\frac{2}{\\mathrm{e}},+\\infty)$\n",
"题号: 040434 , 字段: ans 中已修改数据: D\n",
"题号: 040435 , 字段: ans 中已修改数据: C\n",
"题号: 040436 , 字段: ans 中已修改数据: B\n",
"题号: 040437 , 字段: ans 中已修改数据: D\n",
"题号: 040438 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{\\sqrt{15}}{15}$\n",
"题号: 040439 , 字段: ans 中已修改数据: (1) $a_1+a_2=6$, 公差为$4$; (2) $S_n=\\begin{cases}n^2+n, & n=2 k, \\\\ n^2+n+2, & n=2 k-1\\end{cases}$, $k$为正整数\n",
"题号: 040440 , 字段: ans 中已修改数据: (1) $y=0.1 x-0.2$; (2) (i) $5900$万元; (ii) $[\\dfrac{1}{3}, \\dfrac{5}{8}]$\n",
"题号: 040441 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{8}+\\dfrac{y^2}{4}=1$; (2) $y= \\pm \\dfrac{\\sqrt{30}}{10} x+1$; (3) $-\\dfrac{1}{2}$\n",
"题号: 040442 , 字段: ans 中已修改数据: (1) $y=(1+a) x$; (2) $-1$; (3) $(-\\infty,-1)$\n",
"题号: 040443 , 字段: ans 中已修改数据: $\\{1,2,3\\}$\n",
"题号: 040444 , 字段: ans 中已修改数据: $\\sqrt{10}$\n",
"题号: 040445 , 字段: ans 中已修改数据: $2 \\pi$\n",
"题号: 040446 , 字段: ans 中已修改数据: $-\\dfrac{1}{3}$\n",
"题号: 040447 , 字段: ans 中已修改数据: $24$\n",
"题号: 040448 , 字段: ans 中已修改数据: $-5$\n",
"题号: 040449 , 字段: ans 中已修改数据: $(\\dfrac{7}{2}, 0, \\dfrac{7}{2})$\n",
"题号: 040450 , 字段: ans 中已修改数据: $(-\\dfrac{1}{3}, 1)$\n",
"题号: 040451 , 字段: ans 中已修改数据: $17$或$18$\n",
"题号: 040452 , 字段: ans 中已修改数据: 支持\n",
"题号: 040453 , 字段: ans 中已修改数据: $\\sqrt{6}-\\sqrt{3}$\n",
"题号: 040454 , 字段: ans 中已修改数据: $\\{(4,-4),(4,1)\\}$\n",
"题号: 040455 , 字段: ans 中已修改数据: C\n",
"题号: 040456 , 字段: ans 中已修改数据: A\n",
"题号: 040457 , 字段: ans 中已修改数据: B\n",
"题号: 040458 , 字段: ans 中已修改数据: B\n",
"题号: 040459 , 字段: ans 中已修改数据: (1) $[-\\dfrac{\\pi}{2}+k \\pi, k \\pi]$, $k \\in \\mathbf{Z}$; (2) $\\dfrac{15 \\sqrt{3}}{4}$\n",
"题号: 040460 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{1}{2}$\n",
"题号: 040461 , 字段: ans 中已修改数据: (1) $\\dfrac{3}{256}$; (2) 选择A同学\n",
"题号: 040462 , 字段: ans 中已修改数据: (1) $\\sqrt{3}$; (2) 略; (3) 存在, $k=\\dfrac{\\sqrt{3}}{2}$, $P(-3 \\sqrt{3}, 0)$\n",
"题号: 040463 , 字段: ans 中已修改数据: (1) $y=2 x$; (2) $(-\\infty, 0] \\cup[1,+\\infty)$; (3) $(-\\infty,\\dfrac{2}{\\mathrm{e}})$\n",
"题号: 040464 , 字段: ans 中已修改数据: $(-2,1]$\n",
"题号: 040465 , 字段: ans 中已修改数据: $1$\n",
"题号: 040466 , 字段: ans 中已修改数据: $\\dfrac{2 \\pi}{3}$\n",
"题号: 040467 , 字段: ans 中已修改数据: $\\sqrt{3}$\n",
"题号: 040468 , 字段: ans 中已修改数据: $(-\\infty,-2] \\cup[0,2]$\n",
"题号: 040469 , 字段: ans 中已修改数据: $y^2=4 x$\n",
"题号: 040470 , 字段: ans 中已修改数据: $\\dfrac{14}{15}$\n",
"题号: 040471 , 字段: ans 中已修改数据: $5$\n",
"题号: 040472 , 字段: ans 中已修改数据: $\\dfrac{6+2 \\sqrt{3}}{5}$\n",
"题号: 040473 , 字段: ans 中已修改数据: $1-2^{2023}$\n",
"题号: 040474 , 字段: ans 中已修改数据: $\\dfrac{2 \\sqrt{3}}{3}$\n",
"题号: 040475 , 字段: ans 中已修改数据: $6+4 \\sqrt{2}$\n",
"题号: 040476 , 字段: ans 中已修改数据: B\n",
"题号: 040477 , 字段: ans 中已修改数据: D\n",
"题号: 040478 , 字段: ans 中已修改数据: C\n",
"题号: 040479 , 字段: ans 中已修改数据: B\n",
"题号: 040480 , 字段: ans 中已修改数据: (1) 略; (2) $\\arccos \\dfrac{2}{3}$\n",
"题号: 040481 , 字段: ans 中已修改数据: (1) $[k \\pi-\\dfrac{3 \\pi}{8}, k \\pi+\\dfrac{\\pi}{8}]$, $k \\in \\mathbf{Z}$; (2) $\\{x | x=\\dfrac{k \\pi}{2}+\\dfrac{\\pi}{8},\\ k \\in \\mathbf{Z}\\}$\n",
"题号: 040482 , 字段: ans 中已修改数据: (1) $1.4$米; (2) $\\arcsin \\dfrac{5 \\sqrt{3}}{14}$\n",
"题号: 040483 , 字段: ans 中已修改数据: (1) $\\dfrac{6 \\sqrt{5}}{5}$; (2) 略; (3) $\\dfrac{3 \\sqrt{3}}{4}$\n",
"题号: 040484 , 字段: ans 中已修改数据: (1) 略; (2) $a_n=\\dfrac{(n+3)(n+4)}{2}$, $b_n=\\dfrac{(n+4)^2}{2}$; (3) $(-\\infty,1]$\n",
"题号: 040485 , 字段: ans 中已修改数据: $4$\n",
"题号: 040486 , 字段: ans 中已修改数据: 四\n",
"题号: 040487 , 字段: ans 中已修改数据: $2$\n",
"题号: 040488 , 字段: ans 中已修改数据: $\\dfrac 23$\n",
"题号: 040489 , 字段: ans 中已修改数据: $12$\n",
"题号: 040490 , 字段: ans 中已修改数据: $40$\n",
"题号: 040491 , 字段: ans 中已修改数据: $2$\n",
"题号: 040492 , 字段: ans 中已修改数据: $72$\n",
"题号: 040493 , 字段: ans 中已修改数据: $3$\n",
"题号: 040494 , 字段: ans 中已修改数据: $\\dfrac{4\\pi}3$\n",
"题号: 040495 , 字段: ans 中已修改数据: $\\sqrt{3}$\n",
"题号: 040496 , 字段: ans 中已修改数据: $-2+\\sqrt{7}$\n",
"题号: 040497 , 字段: ans 中已修改数据: B\n",
"题号: 040498 , 字段: ans 中已修改数据: C\n",
"题号: 040499 , 字段: ans 中已修改数据: D\n",
"题号: 040500 , 字段: ans 中已修改数据: D\n",
"题号: 040501 , 字段: ans 中已修改数据: (1) 证明略; (2) $\\dfrac{\\sqrt{14}}7$\n",
"题号: 040502 , 字段: ans 中已修改数据: (1) $y=0.072x-0.046$; (2) (i) $1060$万; (ii) $(\\dfrac 13,\\dfrac 58]$\n",
"题号: 040503 , 字段: ans 中已修改数据: (1) $[k\\pi,\\dfrac\\pi 4+k\\pi]$, $k\\in \\mathbf{Z}$; (2) $\\dfrac\\pi 4$\n",
"题号: 040504 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{4}-y^2=1$; (2) 存在, $t=\\pm \\sqrt{3}$或$t=\\pm \\sqrt{\\dfrac{76}{15}}$; (3) $(-8-5\\sqrt{3},-\\dfrac 12)\\cup (\\dfrac 12,-8+5\\sqrt{3})$\n",
"题号: 040505 , 字段: ans 中已修改数据: (1) $f(g(x))$的导函数为$y=-a\\sin x$, $g(f(x))$的导函数为$y=-a\\sin (ax)$; (2) $(-\\infty,-1]$; (3) $[\\dfrac 32,3)$\n",
"题号: 040506 , 字段: ans 中已修改数据: $4$\n",
"题号: 040507 , 字段: ans 中已修改数据: $1$\n",
"题号: 040508 , 字段: ans 中已修改数据: $24$\n",
"题号: 040509 , 字段: ans 中已修改数据: $x^2+(y-1)^2=4$\n",
"题号: 040510 , 字段: ans 中已修改数据: $(-1,1]$\n",
"题号: 040511 , 字段: ans 中已修改数据: $300(4+\\sqrt{3})$\n",
"题号: 040512 , 字段: ans 中已修改数据: $\\dfrac{1}{2}$\n",
"题号: 040513 , 字段: ans 中已修改数据: $92$\n",
"题号: 040514 , 字段: ans 中已修改数据: $\\dfrac{1}{2}$\n",
"题号: 040515 , 字段: ans 中已修改数据: $\\dfrac{\\sqrt{21}}{6}$\n",
"题号: 040516 , 字段: ans 中已修改数据: $2 \\sqrt{2}$\n",
"题号: 040517 , 字段: ans 中已修改数据: $2$\n",
"题号: 040518 , 字段: ans 中已修改数据: C\n",
"题号: 040519 , 字段: ans 中已修改数据: A\n",
"题号: 040520 , 字段: ans 中已修改数据: A\n",
"题号: 040521 , 字段: ans 中已修改数据: C\n",
"题号: 040522 , 字段: ans 中已修改数据: (1) $2n$; (2) $n^2+n+\\dfrac{4^{n+1}}{3}-\\dfrac{4}{3}$\n",
"题号: 040523 , 字段: ans 中已修改数据: (1) $\\begin{pmatrix}0 & 1 & 2 \\\\ \\dfrac{7}{15} & \\dfrac{7}{15} & \\dfrac{1}{15}\\end{pmatrix}$, 期望为$\\dfrac{3}{5}$; (2) $\\chi^2 \\approx 0.794<3.841$, 不能认为有关\n",
"题号: 040524 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{\\sqrt{10}}{5}$\n",
"题号: 040525 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{4}+y^2=1$; (2) 最大值为$3$; 最小值为$\\dfrac{\\sqrt{6}}{3}$; (3) $(0,3)$\n",
"题号: 040526 , 字段: ans 中已修改数据: (1) $y=x+1$; (2) 略; (3) $2$\n"
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t0.000\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t1.000\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t0.000\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t0.000\t1.000\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t0.000\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.800\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.350\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.850\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.750\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.950\t0.900\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.950\t0.750\t0.850\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.900\t0.650\t0.350\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.650\t0.600\t0.600\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.900\t0.600\t0.350\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.842\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.105\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.842\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.421\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.895\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\t0.789\t0.895\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.947\t0.000\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.895\t0.842\t0.895\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.789\t0.842\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.893\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.071\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.893\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.714\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\t0.786\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t1.000\t0.929\t0.821\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\t0.929\t0.000\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\t0.893\t0.821\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\t0.750\t0.821\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.391\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.870\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.652\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\t0.870\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t1.000\t0.957\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t0.913\t0.000\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.696\t0.783\t0.739\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t0.826\t0.696\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.938\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.875\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.000\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.875\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.562\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.750\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.812\t0.812\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.938\t0.062\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.812\t0.688\t0.750\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.938\t0.812\t0.438\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.865\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.973\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.135\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.946\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.730\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t1.000\t1.000\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.973\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.946\t0.838\t0.946\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.351\t0.919\t0.378\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.378\t0.513\t0.622\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.892\t0.622\t0.513\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.846\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.179\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.769\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\t0.974\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\t0.872\t0.974\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\t1.000\t0.051\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.692\t0.718\t0.795\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.667\t0.872\t0.590\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.889\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.037\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.889\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.667\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.963\t0.926\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\t0.852\t0.704\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.963\t0.963\t0.037\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.852\t0.778\t0.778\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t1.000\t0.556\t0.518\n",
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.842\n",
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\n",
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.053\n",
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.895\n",
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.632\n",
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\t0.895\n",
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.947\n",
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\n",
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\t0.842\t0.947\n",
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.895\t0.895\t0.105\n",
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.789\t0.632\t0.526\n",
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.842\t0.421\t0.105\n"
]
}
],
@ -196,6 +211,8 @@
" else:\n",
" pro_dict[id][field].append(cell_data.upper())\n",
" print(\"题号:\",id,\", 字段:\",field,\"中已添加数据:\",cell_data.upper())\n",
"else:\n",
" print(\"字段名有误\")\n",
"\n",
"with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(json.dumps(pro_dict,indent=4,ensure_ascii=False))"

View File

@ -0,0 +1,197 @@
import os,re,time,json,sys
"""
模板文件目录下 题目清单.txt 文件不能缺失
"""
"""---设置是否在学生版中提供答案---"""
answered = True
"""---设置文件保存路径---"""
#目录和文件的分隔务必用/
directory = "临时文件/"
# filename = "高三二模前易错题"
filename = "测试"
"""---设置文件名结束---"""
"""---设置题目列表---"""
#字典字段为文件名, 之后为内容的题号
problems_dict = {
"函数1":"778,1253,1262,1325,1339,1352,2918,2968,3747,4009,4157,4435,4721,5123,5650,7939,10197,10938,11148,12179,12277,12543,12859,12902,13721,30060,30398,30406,10796,12104,30051,30327,30337,30356,31321",
"函数2":"87,342,655,1277,1329,2831,2851,2859,2905,2959,3648,4153,4320,4359,11066,11079,11144,11999,12063,12064,12178,12192,12842,12856,12903,13747,13840,14191,30410,12549,12717,30377,30381,30411,30416,30420,30424,30426",
"数列":"321,403,1781,1788,1810,3210,3219,3283,3309,3310,3312,4472,4476,6793,10777,10942,12067,12112,12239,12933,12979,13921,13925,30072,30473,30499",
"解析几何":"248,282,363,625,960,2162,2252,2268,2278,2372,2418,3421,3437,3438,4246,8760,8866,8912,9784,10951,10958,12199,12213,12255,12548,13060,13063,13069,13097,13118,13134,13982,14505,31229,31233,31323",
"概率统计":"332,340,401,412,654,2586,2605,2664,3574,3585,3640,4575,4584,7361,7423,7502,7630,10868,11993,14091,30227,30275,30495,30520,30540,31158,31196,31320"
# "2024届高二下学期周末卷01":"40001:40017",
# "2025届高一下学期周末卷01":"40018:40036",
# "2024届高二下学期周末卷02":"40037:40056",
# "2025届高一下学期周末卷02":"40057:40082",
# "2025届高一下学期周末卷03":"40083:40104",
# "2025届高一下学期周末卷03小测":"40105:40112",
# "2025届高一下学期周末卷04旧版":"40113:40130",
# "2025届高一下学期周末卷04小测":"40131:40139",
# "2024届高二下学期周末卷03":"40140:40160",
# "2024届高二上学期期末考试":"31267:31287",
# "2025届高一上学期期末考试":"31288:31308",
# "2024届高二下学期周末卷04":"40161:40180",
# "2025届高一下学期周末卷04":"40181:40201",
# "2024届高二下学期周末卷05":"40202:40225",
# "2025届高一下学期周末卷05":"40226:40245",
# "2024届空间向量校本作业":"22048:22083",
# "2024届二项式定理校本作业":"22084:22105",
# "2025届高一下学期周末卷05小测":"40246:40255",
# "2025届高一下学期周末卷06":"40256:40273",
# "2025届高一下学期周末卷06小测":"40274:40282",
# "2025届高一下学期期中复习一(集合逻辑不等式)":"40283:40298",
# "2024届高二下学期周末卷06":"40299:40316",
# "2024届高二下学期周末卷07":"40317:40335",
# "2025届高一下学期测验01":"40336:40349",
# "2025届高一下学期测验02":"40350:40367",
# "2025届高一下学期期中复习二(幂指对函数)":"40368:40386",
# "2025届高一下学期周末卷02小测":"40387:40395",
# "2025届高一下学期周末卷07":"40396:40413",
# "2025届高一下学期周末卷07小测":"40414:40421"
}
"""---设置题目列表结束---"""
if directory[-1] != "/":
directory += "/"
#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
def generate_number_set(string,dict):
string = re.sub(r"[\n\s]","",string)
string_list = string.split(",")
numbers_list = []
for s in string_list:
if not ":" in s:
numbers_list.append(s.zfill(6))
else:
start,end = s.split(":")
for ind in range(int(start),int(end)+1):
numbers_list.append(str(ind).zfill(6))
return numbers_list
#将正确率转化为含颜色代码的字符串
def get_color(value):
value = float(value)
if value>=0.5:
(r,g,b)=(1,2-2*value,0)
else:
(r,g,b)=(2*value,1,0)
return "{" + "%.3f" %(r) + "," + "%.3f" %(g) + ",0}"
def color_value(matchobj):
value = matchobj.group(1)
return "\t"+"\\fcolorbox[rgb]{0,0,0}"+ get_color(value) +"{" + value +"}"
#读取题库json文件并转化为字典
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
database = f.read()
pro_dict = json.loads(database)
#读取目标数据库json并转化为字典
with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
database = f.read()
obj_dict = json.loads(database)
try:
os.mkdir(directory)
except:
pass
#读取系统日期
current_time = time.localtime()
time_string = "_"+str(current_time.tm_year).zfill(4)+str(current_time.tm_mon).zfill(2)+str(current_time.tm_mday).zfill(2)
data_teachers = ""
data_students = ""
teachers_latex_file = directory + filename + "_教师用" + time_string + ".tex"
students_latex_file = directory + filename + "_学生用" + time_string + ".tex"
for section_name in problems_dict:
problems = problems_dict[section_name]
data_teachers += r"\newpage" + "\n\n" + r"\section{" + section_name +"}\n\n"
data_teachers += r"\setcounter{enumi}{0}"+"\n\n"
data_students += r"\newpage" + "\n\n" + r"\section{" + section_name +"}\n\n"
data_students += r"\setcounter{enumi}{0}"+"\n\n"
#生成题目列表
problem_list = [id for id in generate_number_set(problems.strip(),pro_dict) if id in pro_dict]
#生成教师题目字符串与学生题目字符串, 准备替换至latex文件
for id in problem_list:
problemset = pro_dict[id]
problem = problemset["content"]
solution = (problemset["solution"] if problemset["solution"] != "" else "暂无解答与提示")
answer = "\\textcolor{red}{" + (problemset["ans"] if problemset["ans"] != "" else "暂无答案") + "}"
remarks = (problemset["remark"] if problemset["remark"] != "" else "暂无备注")
usages_list = problemset["usages"]
if len(usages_list) > 0:
usage = re.sub("\\t([\d]\.[\d]{0,10})",color_value,"\n\n".join(usages_list))
usage = re.sub("[\\t ]([\d]\.[\d]{0,10})",color_value,usage)
else:
usage = "暂无使用记录"
origin = (problemset["origin"] if problemset["origin"] != "" else "出处不详")
objects = problemset["objs"]
if len(objects) == 0:
objects = "暂未关联目标\n\n"
elif "KNONE" in [o.upper() for o in objects]:
objects = "该题的考查目标不在目前的集合中\n\n"
else:
objects_string = ""
for obj in objects:
if not obj in obj_dict:
objects_string = "目标" + obj + "有误\n\n"
break
else:
objects_string += "\\textcolor{blue}{" + obj + "|" + obj_dict[obj]["content"] + "}\n\n"
objects = objects_string
space = ("" if problemset["space"] == "" or answered else r"\vspace*{"+problemset["space"]+"}\n")
tags = ("|".join(problemset["tags"]) if len(problemset["origin"])>0 else "暂无标签")
raw_string = "\\item " + "{\\tiny ("+id+")} "+problem
teachers_string = raw_string.replace("\\tiny","")+"\n\n关联目标:\n\n"+ objects + "\n\n标签: " + tags + "\n\n答案: "+answer + "\n\n" + "解答或提示: " + solution + "\n\n使用记录:\n\n"+ usage + "\n" + "\n\n出处: "+origin + "\n\n"
students_string = raw_string + space + "\n\n"
if answered:
students_string += "答案: \\textcolor{red}{"+answer + "}\n\n"
data_teachers += teachers_string
data_students += students_string
#去除第一个newpage
data_teachers = data_teachers[10:]
data_students = data_students[10:]
#替换latex文件的内容并编译
with open("模板文件/题目清单.txt","r",encoding = "utf8") as f:
latex_raw = f.read()
#识别操作系统
if sys.platform != "win32":
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
latex_teachers = latex_raw.replace("编译模板",data_teachers)
with open(teachers_latex_file,"w",encoding = "utf8") as f:
f.write(latex_teachers)
print("开始编译教师版本pdf文件: ", teachers_latex_file)
os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ teachers_latex_file)
print(os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ teachers_latex_file))
latex_students = latex_raw.replace("编译模板",data_students)
with open(students_latex_file,"w",encoding = "utf8") as f:
f.write(latex_students)
print("开始编译学生版本pdf文件: ", students_latex_file)
os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ students_latex_file)
print(os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ students_latex_file))

View File

@ -1,18 +1,235 @@
tags
usages
001050
20220906 2023届高三2班 0.871
1
第一单元
第二单元
第三单元
009446
20220906 2023届高三2班 0.355
001072
20220906 2023届高三2班 0.774
2
第三单元
010060
20220906 2023届高三2班 0.742
000033
20220906 2023届高三2班 0.903
4
第三单元
第五单元
第六单元
001073
20220906 2023届高三2班 0.935
000023
20220906 2023届高三2班 0.774
002773
20220906 2023届高三2班 0.742
002775
20220906 2023届高三2班 0.645
002784
20220906 2023届高三2班 0.903
007991
20220906 2023届高三2班 0.581
005150
20220906 2023届高三2班 0.903
002790
20220906 2023届高三2班 0.774
002791
20220906 2023届高三2班 0.774
000757
20220906 2023届高三2班 0.968
002793
20220906 2023届高三2班 0.968
000389
20220906 2023届高三2班 0.871
002800
20220906 2023届高三2班 0.903
000778
20220907 2023届高三2班 0.750
001262
20220907 2023届高三2班 0.656
000342
20220907 2023届高三2班 0.125
001238
20220907 2023届高三2班 0.906
001239
20220907 2023届高三2班 0.875
001242
20220907 2023届高三2班 0.844
002831
20220907 2023届高三2班 0.594 0.406
002968
20220907 2023届高三2班 0.531 0.063
009508
20220907 2023届高三2班 1.000
003936
20220907 2023届高三2班 0.906
007984
20220907 2023届高三2班 1.000
009511
20220907 2023届高三2班 0.844
005508
20220907 2023届高三2班 0.844
000734
20220907 2023届高三2班 0.469
002856
20220907 2023届高三2班 0.781 0.906
000474
20220907 2023届高三2班 0.813
002847
20220907 2023届高三2班 0.844 0.938
002851
20220907 2023届高三2班 0.500 0.281
001286
20220907 2023届高三2班 0.906
001292
20220907 2023届高三2班 0.906
010110
20220907 2023届高三2班 1.000 1.000 0.875 0.906
000058
20220907 2023届高三2班 1.000
010114
20220907 2023届高三2班 0.844
001296
20220907 2023届高三2班 1.000
005610
20220907 2023届高三2班 0.906
001353
20220907 2023届高三2班 0.813
001305
20220907 2023届高三2班 0.813 0.813 0.969 0.969 0.969
010125
20220907 2023届高三2班 0.969
001309
20220907 2023届高三2班 1.000 0.750
005123
20220907 2023届高三2班 0.563
005678
20220907 2023届高三2班 0.469
001340
20220908 2023届高三2班 0.788 0.818 0.879 0.727 0.788
002925
20220908 2023届高三2班 0.939
002911
20220908 2023届高三2班 0.879
002918
20220908 2023届高三2班 0.727
003815
20220908 2023届高三2班 0.939
005568
20220908 2023届高三2班 0.939
000954
20220908 2023届高三2班 1.000
001324
20220908 2023届高三2班 0.879
001326
20220908 2023届高三2班 0.939 0.970
002871
20220908 2023届高三2班 0.818
002878
20220908 2023届高三2班 0.818 0.818
002898
20220908 2023届高三2班 0.727
000362
20220908 2023届高三2班 0.879
001351
20220908 2023届高三2班 0.848
003747
20220908 2023届高三2班 0.727
005199
20220908 2023届高三2班 0.970
009490
20220908 2023届高三2班 0.879
009517
20220909 2023届高三2班 0.656
007941
20220909 2023届高三2班 0.813
000092
20220909 2023届高三2班 0.625
001270
20220909 2023届高三2班 1.000 1.000 0.969 0.781
002888
20220909 2023届高三2班 0.719
001331
20220909 2023届高三2班 0.156
002894
20220909 2023届高三2班 0.844 0.750
001211
20220909 2023届高三2班 0.969 0.906 0.875 0.844 0.750
001218
20220909 2023届高三2班 0.906 0.750 0.625
002893
20220909 2023届高三2班 0.813
002895
20220909 2023届高三2班 0.688
009522
20220909 2023届高三2班 0.563 0.250

View File

@ -0,0 +1,107 @@
[BEGIN]
## 20220906
** 2023届高三2班
1050 0.871
9446 0.355
1072 0.774
10060 0.742
33 0.903
1073 0.935
23 0.774
2773 0.742
2775 0.645
2784 0.903
7991 0.581
5150 0.903
2790 0.774
2791 0.774
757 0.968
2793 0.968
389 0.871
2800 0.903
[END]
[BEGIN]
## 20220907
** 2023届高三2班
778 0.750
1262 0.656
342 0.125
1238 0.906
1239 0.875
1242 0.844
2831 0.594 0.406
2968 0.531 0.063
9508 1.000
3936 0.906
7984 1.000
9511 0.844
5508 0.844
734 0.469
2856 0.781 0.906
474 0.813
2847 0.844 0.938
2851 0.500 0.281
[END]
[BEGIN]
## 20220907
** 2023届高三2班
1286 0.906
1292 0.906
10110 1.000 1.000 0.875 0.906
58 1.000
10114 0.844
1296 1.000
5610 0.906
1353 0.813
1305 0.813 0.813 0.969 0.969 0.969
10125 0.969
1309 1.000 0.750
5123 0.563
5678 0.469
[END]
[BEGIN]
## 20220908
** 2023届高三2班
1340 0.788 0.818 0.879 0.727 0.788
2925 0.939
2911 0.879
2918 0.727
3815 0.939
5568 0.939
954 1.000
1324 0.879
1326 0.939 0.970
2871 0.818
2878 0.818 0.818
2898 0.727
362 0.879
1351 0.848
3747 0.727
5199 0.970
9490 0.879
[END]
[BEGIN]
## 20220909
** 2023届高三2班
9517 0.656
7941 0.813
92 0.625
1270 1.000 1.000 0.969 0.781
2888 0.719
1331 0.156
2894 0.844 0.750
1211 0.969 0.906 0.875 0.844 0.750
1218 0.906 0.750 0.625
2893 0.813
2895 0.688
9522 0.563 0.250
[END]

File diff suppressed because one or more lines are too long

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
@ -13,61 +13,11 @@
"0.805\t2\t006207\n",
"0.812\t3\t006137\n",
"0.798\t4\t012269\n",
"0.831\t5\t040063\n",
"0.817\t5\t010989\n",
"0.888\t6\t021503\n",
"0.932\t7\t040057\n",
"0.815\t7\t021484\n",
"0.959\t8\t003206\n",
"0.817\t9\t011090\n",
"0.796\t10\t003058\n",
"0.768\t11\t005984\n",
"0.744\t12\t021463\n",
"0.872\t13\t008169\n",
"0.843\t14\t021559\n",
"0.758\t15\t006230\n",
"0.888\t16\t008317\n",
"0.682\t17\t012005\n",
"0.630\t18\t006107\n",
"0.765\t19\t021581\n",
"0.763\t20\t014242\n",
"0.661\t21\t013849\n",
"0.716\t22\t008191\n",
"0.504\t23\t003537\n",
"0.934\t24\t000818\n",
"0.755\t25\t012596\n",
"0.893\t26\t012355\n",
"0.835\t27\t000479\n",
"0.950\t28\t001529\n",
"0.797\t29\t000577\n",
"0.737\t30\t003191\n",
"1.000\t31\t012594\n",
"0.824\t32\t003150\n",
"1.000\t33\t004066\n",
"0.707\t34\t003140\n",
"0.648\t35\t030294\n",
"0.752\t36\t014186\n",
"0.621\t37\t014223\n",
"0.700\t38\t004442\n",
"0.813\t39\t030808\n",
"0.669\t40\t013843\n",
"0.654\t41\t000738\n",
"0.682\t42\t013354\n",
"0.830\t43\t030042\n",
"0.977\t44\t020411\n",
"0.661\t45\t011133\n",
"0.681\t46\t004118\n",
"0.667\t47\t011943\n",
"0.683\t48\t005600\n",
"0.691\t49\t011386\n",
"0.711\t50\t002922\n",
"1.000\t51\t020452\n",
"0.736\t52\t020356\n",
"0.666\t53\t020430\n",
"0.685\t54\t020357\n",
"0.713\t55\t005540\n",
"0.643\t56\t012868\n",
"0.845\t57\t005598\n",
"0.860\t58\t002958\n",
"0.646\t59\t004184\n"
"0.839\t9\t011090\n"
]
}
],
@ -75,7 +25,7 @@
"import os,re,difflib,Levenshtein,time,json\n",
"\n",
"# 重要!!! 范围\n",
"old_problems_range = \"1:999999\"\n",
"old_problems_range = \"1:30000\"\n",
"threshold = 0.85\n",
"\n",
"# 待比对的文件\n",
@ -117,11 +67,11 @@
"def Lev_get_equal_rate(str1,str2):\n",
" return Levenshtein.ratio(str1,str2)\n",
"\n",
"def GenerateProblemListFromString(data):\n",
"def GenerateProblemListFromString(problem_string):\n",
" try:\n",
" data = re.findall(r\"\\\\begin\\{document\\}([\\s\\S]*?)\\\\end\\{document\\}\",problems_string)[0]\n",
" data = re.findall(r\"\\\\begin\\{document\\}([\\s\\S]*?)\\\\end\\{document\\}\",problem_string)[0]\n",
" except:\n",
" pass\n",
" data = problem_string\n",
" data = re.sub(r\"\\n{2,}\",\"\\n\",data)\n",
" data = re.sub(r\"\\\\item\",r\"\\\\enditem\\\\item\",data)\n",
" data = re.sub(r\"\\\\end\\{enumerate\\}\",r\"\\\\enditem\",data)\n",
@ -220,7 +170,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
"version": "3.8.15"
},
"orig_nbformat": 4,
"vscode": {

View File

@ -0,0 +1,108 @@
import os,re,difflib,Levenshtein,time,json
# 重要!!! 范围
old_problems_range = "1:30000"
threshold = 0.85
# 待比对的文件
filename = r"C:\Users\weiye\Documents\wwy sync\临时工作区\自拟题目9.tex"
#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
def generate_number_set(string):
string = re.sub(r"[\n\s]","",string)
string_list = string.split(",")
numbers_list = []
for s in string_list:
if not ":" in s:
numbers_list.append(s.zfill(6))
else:
start,end = s.split(":")
for ind in range(int(start),int(end)+1):
numbers_list.append(str(ind).zfill(6))
return numbers_list
#字符串预处理
def pre_treating(string):
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)|(mathrm)|(text)","",string)
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
string = re.sub(r"[\n\t]","",string)
string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
string = re.sub(r"[,\.:;?]","",string)
return string
#difflab字符串比较
def difflab_get_equal_rate(str1, str2):
return difflib.SequenceMatcher(None, str1, str2).ratio()
#Levenshtein jaro字符串比较
def jaro_get_equal_rate(str1,str2):
return Levenshtein.jaro(str1,str2)
#Levenshtein 字符串比较
def Lev_get_equal_rate(str1,str2):
return Levenshtein.ratio(str1,str2)
def GenerateProblemListFromString(problem_string):
try:
data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",problem_string)[0]
except:
data = problem_string
data = re.sub(r"\n{2,}","\n",data)
data = re.sub(r"\\item",r"\\enditem\\item",data)
data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data)
ProblemList_raw = [p.strip() for p in re.findall(r"\\item([\s\S]*?)\\enditem",data)]
ProblemsList = []
for p in ProblemList_raw:
startpos = data.index(p)
tempdata = data[:startpos]
suflist = re.findall(r"\n\%[\dA-Za-z]+",tempdata)
if len(suflist) > 0:
suffix = suflist[-1].replace("%","").strip()
else:
suffix = ""
ProblemsList.append((p,suffix))
return ProblemsList
#指定对比方法
sim_test = jaro_get_equal_rate
#读入题库
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
database = f.read()
pro_dict = json.loads(database)
output = ""
with open(filename,"r",encoding="u8") as f:
newdatabase = f.read()
new_pro_list = GenerateProblemListFromString(newdatabase)
pro_dict_treated = {}
idrange_raw = generate_number_set(old_problems_range)
idrange = [id for id in pro_dict if id in idrange_raw]
for p in idrange:
pro_dict_treated[p] = pre_treating(pro_dict[p]["content"])
new_dict_treated = {}
for i in range(len(new_pro_list)):
new_dict_treated[i+1] = pre_treating(new_pro_list[i][0])
for i in new_dict_treated:
new_p = new_dict_treated[i]
maxsim = 0
for p in pro_dict_treated:
old_p = pro_dict_treated[p]
sim = sim_test(new_p,old_p)
if sim > maxsim:
maxsim = sim
argmax = p
print("%.3f\t%d\t%s" %(maxsim,i,argmax))
output += ("%.3f\t%d\t%s" %(maxsim,i,argmax)) + "\n"
# print("\n新题: %s" %new_pro_list[i-1][0])
# print("\n原题: %s\n\n\n" %pro_dict[]["content"])
with open("临时文件/新题相似相同.txt","w",encoding = "u8") as f:
f.write(output)

View File

@ -2,9 +2,26 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'openpyxl'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[1], line 63\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[39mfor\u001b[39;00m t \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39mlen\u001b[39m(results)):\n\u001b[0;32m 61\u001b[0m df[\u001b[39mid\u001b[39m\u001b[39m+\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m_\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m+\u001b[39m\u001b[39mstr\u001b[39m(t\u001b[39m+\u001b[39m\u001b[39m1\u001b[39m)][c] \u001b[39m=\u001b[39m \u001b[39mfloat\u001b[39m(results[t])\n\u001b[1;32m---> 63\u001b[0m df\u001b[39m.\u001b[39;49mto_excel(outputfile,index \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m)\n",
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 209\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 210\u001b[0m kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 209\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 210\u001b[0m kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\core\\generic.py:2374\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)\u001b[0m\n\u001b[0;32m 2361\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mio\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mformats\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mexcel\u001b[39;00m \u001b[39mimport\u001b[39;00m ExcelFormatter\n\u001b[0;32m 2363\u001b[0m formatter \u001b[39m=\u001b[39m ExcelFormatter(\n\u001b[0;32m 2364\u001b[0m df,\n\u001b[0;32m 2365\u001b[0m na_rep\u001b[39m=\u001b[39mna_rep,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2372\u001b[0m inf_rep\u001b[39m=\u001b[39minf_rep,\n\u001b[0;32m 2373\u001b[0m )\n\u001b[1;32m-> 2374\u001b[0m formatter\u001b[39m.\u001b[39;49mwrite(\n\u001b[0;32m 2375\u001b[0m excel_writer,\n\u001b[0;32m 2376\u001b[0m sheet_name\u001b[39m=\u001b[39;49msheet_name,\n\u001b[0;32m 2377\u001b[0m startrow\u001b[39m=\u001b[39;49mstartrow,\n\u001b[0;32m 2378\u001b[0m startcol\u001b[39m=\u001b[39;49mstartcol,\n\u001b[0;32m 2379\u001b[0m freeze_panes\u001b[39m=\u001b[39;49mfreeze_panes,\n\u001b[0;32m 2380\u001b[0m engine\u001b[39m=\u001b[39;49mengine,\n\u001b[0;32m 2381\u001b[0m storage_options\u001b[39m=\u001b[39;49mstorage_options,\n\u001b[0;32m 2382\u001b[0m )\n",
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\io\\formats\\excel.py:918\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[0;32m 914\u001b[0m need_save \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 915\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 916\u001b[0m \u001b[39m# error: Cannot instantiate abstract class 'ExcelWriter' with abstract\u001b[39;00m\n\u001b[0;32m 917\u001b[0m \u001b[39m# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'\u001b[39;00m\n\u001b[1;32m--> 918\u001b[0m writer \u001b[39m=\u001b[39m ExcelWriter( \u001b[39m# type: ignore[abstract]\u001b[39;49;00m\n\u001b[0;32m 919\u001b[0m writer, engine\u001b[39m=\u001b[39;49mengine, storage_options\u001b[39m=\u001b[39;49mstorage_options\n\u001b[0;32m 920\u001b[0m )\n\u001b[0;32m 921\u001b[0m need_save \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m 923\u001b[0m \u001b[39mtry\u001b[39;00m:\n",
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:56\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[0;32m 44\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 45\u001b[0m path: FilePath \u001b[39m|\u001b[39m WriteExcelBuffer \u001b[39m|\u001b[39m ExcelWriter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 54\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 55\u001b[0m \u001b[39m# Use the openpyxl module as the Excel writer.\u001b[39;00m\n\u001b[1;32m---> 56\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mopenpyxl\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mworkbook\u001b[39;00m \u001b[39mimport\u001b[39;00m Workbook\n\u001b[0;32m 58\u001b[0m engine_kwargs \u001b[39m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[0;32m 60\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\n\u001b[0;32m 61\u001b[0m path,\n\u001b[0;32m 62\u001b[0m mode\u001b[39m=\u001b[39mmode,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 65\u001b[0m engine_kwargs\u001b[39m=\u001b[39mengine_kwargs,\n\u001b[0;32m 66\u001b[0m )\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'openpyxl'"
]
}
],
"source": [
"import os,re,json\n",
"import pandas as pd\n",
@ -71,26 +88,6 @@
"df.to_excel(outputfile,index = False)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.231"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"float(\"0.231\")"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -115,7 +112,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {

View File

@ -0,0 +1,27 @@
import json
filename = r"文本文件/题号筛选.txt"
with open(r"..\题库0.3\problems.json","r",encoding = "u8") as f:
database = f.read()
pro_dict = json.loads(database)
units = ["第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元","暂无对应"]
count1 = [0]*10
count2 = [0]*10
count3 = 0
untagged = []
for id in pro_dict:
for u in range(10):
unit = units[u]
if unit in "".join(pro_dict[id]["tags"]):
count1[u] += 1
if len(pro_dict[id]["objs"]) > 0:
count2[u] += 1
if len(pro_dict[id]["tags"]) == 0:
count3 += 1
untagged.append(id)
for u in range(len(units)):
print(units[u],". 总题数:",count1[u],", 完成对应题数:",count2[u])
print("题库总题数: %d, 已有单元标签题数: %d, 未赋单元标签题数: %d."%(len(pro_dict),len(pro_dict)-count3,count3))
with open (filename,"w",encoding = "u8") as f:
f.write(",".join(untagged))
print("未赋标签的题号列表已保存至: %s" %filename)

View File

@ -2,70 +2,56 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json,os\n",
"with open(r\"..\\题库0.3\\problems.json\",\"r\",encoding = \"u8\") as f:\n",
" database = f.read()\n",
"pro_dict = json.loads(database)\n",
"units = [\"第一单元\",\"第二单元\",\"第三单元\",\"第四单元\",\"第五单元\",\"第六单元\",\"第七单元\",\"第八单元\",\"第九单元\"]\n",
"count1 = [0]*9\n",
"count2 = [0]*9\n",
"for id in pro_dict:\n",
" for u in range(9):\n",
" unit = units[u]\n",
" if unit in \"\".join(pro_dict[id][\"tags\"]):\n",
" count1[u] += 1\n",
" if len(pro_dict[id][\"objs\"]) > 0:\n",
" count2[u] += 1"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"第一单元 . 总题数: 2014 , 完成对应题数: 1400\n",
"第二单元 . 总题数: 3111 , 完成对应题数: 2127\n",
"第三单元 . 总题数: 2218 , 完成对应题数: 395\n",
"第四单元 . 总题数: 1444 , 完成对应题数: 1036\n",
"第五单元 . 总题数: 1433 , 完成对应题数: 306\n",
"第六单元 . 总题数: 1383 , 完成对应题数: 431\n",
"第七单元 . 总题数: 1712 , 完成对应题数: 957\n",
"第八单元 . 总题数: 1378 , 完成对应题数: 522\n",
"第九单元 . 总题数: 422 , 完成对应题数: 101\n"
"第一单元 . 总题数: 2213 , 完成对应题数: 1401\n",
"第二单元 . 总题数: 3472 , 完成对应题数: 2135\n",
"第三单元 . 总题数: 2426 , 完成对应题数: 395\n",
"第四单元 . 总题数: 1657 , 完成对应题数: 1037\n",
"第五单元 . 总题数: 1590 , 完成对应题数: 307\n",
"第六单元 . 总题数: 1555 , 完成对应题数: 431\n",
"第七单元 . 总题数: 1996 , 完成对应题数: 958\n",
"第八单元 . 总题数: 1526 , 完成对应题数: 525\n",
"第九单元 . 总题数: 473 , 完成对应题数: 101\n",
"暂无对应 . 总题数: 344 , 完成对应题数: 121\n",
"题库总题数: 18795, 已有单元标签题数: 16941, 未赋单元标签题数: 1854.\n",
"未赋标签的题号列表已保存至: 文本文件/题号筛选.txt\n"
]
}
],
"source": [
"import json,os\n",
"\n",
"filename = r\"文本文件/题号筛选.txt\"\n",
"with open(r\"..\\题库0.3\\problems.json\",\"r\",encoding = \"u8\") as f:\n",
" database = f.read()\n",
"pro_dict = json.loads(database)\n",
"units = [\"第一单元\",\"第二单元\",\"第三单元\",\"第四单元\",\"第五单元\",\"第六单元\",\"第七单元\",\"第八单元\",\"第九单元\",\"暂无对应\"]\n",
"count1 = [0]*10\n",
"count2 = [0]*10\n",
"count3 = 0\n",
"untagged = []\n",
"for id in pro_dict:\n",
" for u in range(10):\n",
" unit = units[u]\n",
" if unit in \"\".join(pro_dict[id][\"tags\"]):\n",
" count1[u] += 1\n",
" if len(pro_dict[id][\"objs\"]) > 0:\n",
" count2[u] += 1\n",
" if len(pro_dict[id][\"tags\"]) == 0:\n",
" count3 += 1\n",
" untagged.append(id)\n",
"for u in range(len(units)):\n",
" print(units[u],\". 总题数:\",count1[u],\", 完成对应题数:\",count2[u])\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(12684, 7269)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(sum(count1),sum(count2))"
" print(units[u],\". 总题数:\",count1[u],\", 完成对应题数:\",count2[u])\n",
"print(\"题库总题数: %d, 已有单元标签题数: %d, 未赋单元标签题数: %d.\"%(len(pro_dict),len(pro_dict)-count3,count3))\n",
"with open (filename,\"w\",encoding = \"u8\") as f:\n",
" f.write(\",\".join(untagged))\n",
"print(\"未赋标签的题号列表已保存至: %s\" %(filename))\n"
]
},
{
@ -92,7 +78,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15 (main, Nov 24 2022, 14:39:17) [MSC v.1916 64 bit (AMD64)]"
"version": "3.9.15"
},
"orig_nbformat": 4,
"vscode": {

View File

@ -2,12 +2,22 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"相同题目已标注: 000100 031239\n",
"关联题目已标注: 000466 030610\n"
]
}
],
"source": [
"import os,re,json\n",
"\n",
"filename = \"临时文件/相似题目.txt\"\n",
"\n",
"# 读取题库数据并转换为字典\n",
"with open(r\"../题库0.3/Problems.json\",\"r\",encoding = \"utf8\") as f:\n",
@ -15,7 +25,7 @@
"pro_dict = json.loads(database)\n",
"\n",
"# 读取已分类的相似文件列表\n",
"with open(\"临时文件/相似题目.txt\",\"r\",encoding = \"utf8\") as f:\n",
"with open(filename,\"r\",encoding = \"utf8\") as f:\n",
" similar_text = \"\\n\"+f.read()\n",
"\n",
"similar_types = re.findall(r\"\\n[\\d]\\.[\\d]{4}[\\s]*([srSRnN ])[\\s]*\\n\",similar_text)\n",
@ -47,6 +57,13 @@
"with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(database)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -65,7 +82,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.8.15"
},
"orig_nbformat": 4,
"vscode": {

View File

@ -9,7 +9,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"旧题目数: 0 , 新题目数: 18100\n",
"旧题目数: 1907 , 新题目数: 500\n",
"开始新题与旧题的比对\n",
"50\n",
"100\n",
@ -21,358 +21,6 @@
"400\n",
"450\n",
"500\n",
"550\n",
"600\n",
"650\n",
"700\n",
"750\n",
"800\n",
"850\n",
"900\n",
"950\n",
"1000\n",
"1050\n",
"1100\n",
"1150\n",
"1200\n",
"1250\n",
"1300\n",
"1350\n",
"1400\n",
"1450\n",
"1500\n",
"1550\n",
"1600\n",
"1650\n",
"1700\n",
"1750\n",
"1800\n",
"1850\n",
"1900\n",
"1950\n",
"2000\n",
"2050\n",
"2100\n",
"2150\n",
"2200\n",
"2250\n",
"2300\n",
"2350\n",
"2400\n",
"2450\n",
"2500\n",
"2550\n",
"2600\n",
"2650\n",
"2700\n",
"2750\n",
"2800\n",
"2850\n",
"2900\n",
"2950\n",
"3000\n",
"3050\n",
"3100\n",
"3150\n",
"3200\n",
"3250\n",
"3300\n",
"3350\n",
"3400\n",
"3450\n",
"3500\n",
"3550\n",
"3600\n",
"3650\n",
"3700\n",
"3750\n",
"3800\n",
"3850\n",
"3900\n",
"3950\n",
"4000\n",
"4050\n",
"4100\n",
"4150\n",
"4200\n",
"4250\n",
"4300\n",
"4350\n",
"4400\n",
"4450\n",
"4500\n",
"4550\n",
"4600\n",
"4650\n",
"4700\n",
"4750\n",
"4800\n",
"4850\n",
"4900\n",
"4950\n",
"5000\n",
"5050\n",
"5100\n",
"5150\n",
"5200\n",
"5250\n",
"5300\n",
"5350\n",
"5400\n",
"5450\n",
"5500\n",
"5550\n",
"5600\n",
"5650\n",
"5700\n",
"5750\n",
"5800\n",
"5850\n",
"5900\n",
"5950\n",
"6000\n",
"6050\n",
"6100\n",
"6150\n",
"6200\n",
"6250\n",
"6300\n",
"6350\n",
"6400\n",
"6450\n",
"6500\n",
"6550\n",
"6600\n",
"6650\n",
"6700\n",
"6750\n",
"6800\n",
"6850\n",
"6900\n",
"6950\n",
"7000\n",
"7050\n",
"7100\n",
"7150\n",
"7200\n",
"7250\n",
"7300\n",
"7350\n",
"7400\n",
"7450\n",
"7500\n",
"7550\n",
"7600\n",
"7650\n",
"7700\n",
"7750\n",
"7800\n",
"7850\n",
"7900\n",
"7950\n",
"8000\n",
"8050\n",
"8100\n",
"8150\n",
"8200\n",
"8250\n",
"8300\n",
"8350\n",
"8400\n",
"8450\n",
"8500\n",
"8550\n",
"8600\n",
"8650\n",
"8700\n",
"8750\n",
"8800\n",
"8850\n",
"8900\n",
"8950\n",
"9000\n",
"9050\n",
"9100\n",
"9150\n",
"9200\n",
"9250\n",
"9300\n",
"9350\n",
"9400\n",
"9450\n",
"9500\n",
"9550\n",
"9600\n",
"9650\n",
"9700\n",
"9750\n",
"9800\n",
"9850\n",
"9900\n",
"9950\n",
"10000\n",
"10050\n",
"10100\n",
"10150\n",
"10200\n",
"10250\n",
"10300\n",
"10350\n",
"10400\n",
"10450\n",
"10500\n",
"10550\n",
"10600\n",
"10650\n",
"10700\n",
"10750\n",
"10800\n",
"10850\n",
"10900\n",
"10950\n",
"11000\n",
"11050\n",
"11100\n",
"11150\n",
"11200\n",
"11250\n",
"11300\n",
"11350\n",
"11400\n",
"11450\n",
"11500\n",
"11550\n",
"11600\n",
"11650\n",
"11700\n",
"11750\n",
"11800\n",
"11850\n",
"11900\n",
"11950\n",
"12000\n",
"12050\n",
"12100\n",
"12150\n",
"12200\n",
"12250\n",
"12300\n",
"12350\n",
"12400\n",
"12450\n",
"12500\n",
"12550\n",
"12600\n",
"12650\n",
"12700\n",
"12750\n",
"12800\n",
"12850\n",
"12900\n",
"12950\n",
"13000\n",
"13050\n",
"13100\n",
"13150\n",
"13200\n",
"13250\n",
"13300\n",
"13350\n",
"13400\n",
"13450\n",
"13500\n",
"13550\n",
"13600\n",
"13650\n",
"13700\n",
"13750\n",
"13800\n",
"13850\n",
"13900\n",
"13950\n",
"14000\n",
"14050\n",
"14100\n",
"14150\n",
"14200\n",
"14250\n",
"14300\n",
"14350\n",
"14400\n",
"14450\n",
"14500\n",
"14550\n",
"14600\n",
"14650\n",
"14700\n",
"14750\n",
"14800\n",
"14850\n",
"14900\n",
"14950\n",
"15000\n",
"15050\n",
"15100\n",
"15150\n",
"15200\n",
"15250\n",
"15300\n",
"15350\n",
"15400\n",
"15450\n",
"15500\n",
"15550\n",
"15600\n",
"15650\n",
"15700\n",
"15750\n",
"15800\n",
"15850\n",
"15900\n",
"15950\n",
"16000\n",
"16050\n",
"16100\n",
"16150\n",
"16200\n",
"16250\n",
"16300\n",
"16350\n",
"16400\n",
"16450\n",
"16500\n",
"16550\n",
"16600\n",
"16650\n",
"16700\n",
"16750\n",
"16800\n",
"16850\n",
"16900\n",
"16950\n",
"17000\n",
"17050\n",
"17100\n",
"17150\n",
"17200\n",
"17250\n",
"17300\n",
"17350\n",
"17400\n",
"17450\n",
"17500\n",
"17550\n",
"17600\n",
"17650\n",
"17700\n",
"17750\n",
"17800\n",
"17850\n",
"17900\n",
"17950\n",
"18000\n",
"18050\n",
"18100\n",
"开始新题之间的比对\n",
"50\n",
"100\n",
@ -383,370 +31,18 @@
"350\n",
"400\n",
"450\n",
"500\n",
"550\n",
"600\n",
"650\n",
"700\n",
"750\n",
"800\n",
"850\n",
"900\n",
"950\n",
"1000\n",
"1050\n",
"1100\n",
"1150\n",
"1200\n",
"1250\n",
"1300\n",
"1350\n",
"1400\n",
"1450\n",
"1500\n",
"1550\n",
"1600\n",
"1650\n",
"1700\n",
"1750\n",
"1800\n",
"1850\n",
"1900\n",
"1950\n",
"2000\n",
"2050\n",
"2100\n",
"2150\n",
"2200\n",
"2250\n",
"2300\n",
"2350\n",
"2400\n",
"2450\n",
"2500\n",
"2550\n",
"2600\n",
"2650\n",
"2700\n",
"2750\n",
"2800\n",
"2850\n",
"2900\n",
"2950\n",
"3000\n",
"3050\n",
"3100\n",
"3150\n",
"3200\n",
"3250\n",
"3300\n",
"3350\n",
"3400\n",
"3450\n",
"3500\n",
"3550\n",
"3600\n",
"3650\n",
"3700\n",
"3750\n",
"3800\n",
"3850\n",
"3900\n",
"3950\n",
"4000\n",
"4050\n",
"4100\n",
"4150\n",
"4200\n",
"4250\n",
"4300\n",
"4350\n",
"4400\n",
"4450\n",
"4500\n",
"4550\n",
"4600\n",
"4650\n",
"4700\n",
"4750\n",
"4800\n",
"4850\n",
"4900\n",
"4950\n",
"5000\n",
"5050\n",
"5100\n",
"5150\n",
"5200\n",
"5250\n",
"5300\n",
"5350\n",
"5400\n",
"5450\n",
"5500\n",
"5550\n",
"5600\n",
"5650\n",
"5700\n",
"5750\n",
"5800\n",
"5850\n",
"5900\n",
"5950\n",
"6000\n",
"6050\n",
"6100\n",
"6150\n",
"6200\n",
"6250\n",
"6300\n",
"6350\n",
"6400\n",
"6450\n",
"6500\n",
"6550\n",
"6600\n",
"6650\n",
"6700\n",
"6750\n",
"6800\n",
"6850\n",
"6900\n",
"6950\n",
"7000\n",
"7050\n",
"7100\n",
"7150\n",
"7200\n",
"7250\n",
"7300\n",
"7350\n",
"7400\n",
"7450\n",
"7500\n",
"7550\n",
"7600\n",
"7650\n",
"7700\n",
"7750\n",
"7800\n",
"7850\n",
"7900\n",
"7950\n",
"8000\n",
"8050\n",
"8100\n",
"8150\n",
"8200\n",
"8250\n",
"8300\n",
"8350\n",
"8400\n",
"8450\n",
"8500\n",
"8550\n",
"8600\n",
"8650\n",
"8700\n",
"8750\n",
"8800\n",
"8850\n",
"8900\n",
"8950\n",
"9000\n",
"9050\n",
"9100\n",
"9150\n",
"9200\n",
"9250\n",
"9300\n",
"9350\n",
"9400\n",
"9450\n",
"9500\n",
"9550\n",
"9600\n",
"9650\n",
"9700\n",
"9750\n",
"9800\n",
"9850\n",
"9900\n",
"9950\n",
"10000\n",
"10050\n",
"10100\n",
"10150\n",
"10200\n",
"10250\n",
"10300\n",
"10350\n",
"10400\n",
"10450\n",
"10500\n",
"10550\n",
"10600\n",
"10650\n",
"10700\n",
"10750\n",
"10800\n",
"10850\n",
"10900\n",
"10950\n",
"11000\n",
"11050\n",
"11100\n",
"11150\n",
"11200\n",
"11250\n",
"11300\n",
"11350\n",
"11400\n",
"11450\n",
"11500\n",
"11550\n",
"11600\n",
"11650\n",
"11700\n",
"11750\n",
"11800\n",
"11850\n",
"11900\n",
"11950\n",
"12000\n",
"12050\n",
"12100\n",
"12150\n",
"12200\n",
"12250\n",
"12300\n",
"12350\n",
"12400\n",
"12450\n",
"12500\n",
"12550\n",
"12600\n",
"12650\n",
"12700\n",
"12750\n",
"12800\n",
"12850\n",
"12900\n",
"12950\n",
"13000\n",
"13050\n",
"13100\n",
"13150\n",
"13200\n",
"13250\n",
"13300\n",
"13350\n",
"13400\n",
"13450\n",
"13500\n",
"13550\n",
"13600\n",
"13650\n",
"13700\n",
"13750\n",
"13800\n",
"13850\n",
"13900\n",
"13950\n",
"14000\n",
"14050\n",
"14100\n",
"14150\n",
"14200\n",
"14250\n",
"14300\n",
"14350\n",
"14400\n",
"14450\n",
"14500\n",
"14550\n",
"14600\n",
"14650\n",
"14700\n",
"14750\n",
"14800\n",
"14850\n",
"14900\n",
"14950\n",
"15000\n",
"15050\n",
"15100\n",
"15150\n",
"15200\n",
"15250\n",
"15300\n",
"15350\n",
"15400\n",
"15450\n",
"15500\n",
"15550\n",
"15600\n",
"15650\n",
"15700\n",
"15750\n",
"15800\n",
"15850\n",
"15900\n",
"15950\n",
"16000\n",
"16050\n",
"16100\n",
"16150\n",
"16200\n",
"16250\n",
"16300\n",
"16350\n",
"16400\n",
"16450\n",
"16500\n",
"16550\n",
"16600\n",
"16650\n",
"16700\n",
"16750\n",
"16800\n",
"16850\n",
"16900\n",
"16950\n",
"17000\n",
"17050\n",
"17100\n",
"17150\n",
"17200\n",
"17250\n",
"17300\n",
"17350\n",
"17400\n",
"17450\n",
"17500\n",
"17550\n",
"17600\n",
"17650\n",
"17700\n",
"17750\n",
"17800\n",
"17850\n",
"17900\n",
"17950\n",
"18000\n",
"18050\n",
"总耗时: 384.6457269191742 秒.\n",
"发现相似: 2800 , 其中已标注: 2223 .\n"
"总耗时: 3.233793258666992 秒.\n",
"发现相似: 30 , 其中已标注: 28 .\n"
]
}
],
"source": [
"# from hashlib import new\n",
"import os,re,difflib,Levenshtein,time,json\n",
"\n",
"# 重要!!! 新题目的范围\n",
"id_new_problems = \"1:50000\"\n",
"threshold = 0.99\n",
"# 重要!!! 新旧题目的范围(有重复默认为新题)\n",
"id_new_problems = \"1:500\"\n",
"id_old_problems = \"30000:50000\"\n",
"threshold = 0.95\n",
"\n",
"#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间\n",
"def generate_number_set(string):\n",
@ -804,15 +100,16 @@
"#生成旧题目数据库字典与新题目数据库字典\n",
"new_id_list_raw = generate_number_set(id_new_problems)\n",
"new_id_list = [id for id in pro_dict if id in new_id_list_raw]\n",
"old_id_list_raw = generate_number_set(id_old_problems)\n",
"old_id_list = [id for id in pro_dict if (id in old_id_list_raw and not id in new_id_list_raw)]\n",
"old_problems_dict = {}\n",
"new_problems_dict = {}\n",
"old_problems_dict_content = {}\n",
"new_problems_dict_content = {}\n",
"for id in pro_dict:\n",
" if id in new_id_list:\n",
"for id in new_id_list:\n",
" new_problems_dict[id] = pro_dict[id]\n",
" new_problems_dict_content[id] = pre_treating(pro_dict[id][\"content\"])\n",
" else:\n",
"for id in old_id_list:\n",
" old_problems_dict[id] = pro_dict[id]\n",
" old_problems_dict_content[id] = pre_treating(pro_dict[id][\"content\"])\n",
"print(\"旧题目数:\",len(old_problems_dict),\", 新题目数:\",len(new_problems_dict))\n",
@ -876,7 +173,60 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"#记录起始时间\n",
"start_time = time.time()\n",
"suspect_count = 0\n",
"remarked = 0\n",
"\n",
"alike_problems = \"\"\n",
"\n",
"\n",
"\n",
"#开始新题与旧题的比对\n",
"count = 0\n",
"print(\"开始新题与旧题的比对\")\n",
"for id_new in new_problems_dict:\n",
" count += 1\n",
" if count % 50 == 0:\n",
" print(count)\n",
" for id_old in old_problems_dict:\n",
" similar_rate = sim_test(new_problems_dict_content[id_new],old_problems_dict_content[id_old])\n",
" if similar_rate > threshold or id_new in old_problems_dict[id_old][\"related\"] or id_new in old_problems_dict[id_old][\"same\"] or id_old in new_problems_dict[id_new][\"related\"] or id_old in new_problems_dict[id_new][\"same\"]:\n",
" suspect_count += 1\n",
" if not (id_new in old_problems_dict[id_old][\"related\"] or id_new in old_problems_dict[id_old][\"same\"] or id_old in new_problems_dict[id_new][\"related\"] or id_old in new_problems_dict[id_new][\"same\"]):\n",
" alike_problems += (\"%.4f\" %similar_rate) + \"\\n\\n\" + id_new + \" \" + new_problems_dict[id_new][\"content\"] + \"\\n\\n\" + id_old + \" \" + old_problems_dict[id_old][\"content\"] + \"\\n\\n\"\n",
" else:\n",
" remarked += 1\n",
"\n",
"#开始新题之间的比对\n",
"count = 0\n",
"print(\"开始新题之间的比对\")\n",
"while len(new_problems_dict) >= 2:\n",
" count += 1\n",
" if count % 50 == 0:\n",
" print(count)\n",
" keys = list(new_problems_dict.keys())\n",
" current_problem = new_problems_dict.pop(keys[0])\n",
" current_problem_content = new_problems_dict_content[current_problem[\"id\"]]\n",
" for id_new in new_problems_dict:\n",
" similar_rate = sim_test(new_problems_dict_content[id_new],current_problem_content)\n",
" if similar_rate > threshold or id_new in current_problem[\"related\"] or id_new in current_problem[\"same\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"related\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"same\"]:\n",
" suspect_count += 1\n",
" if not (id_new in current_problem[\"related\"] or id_new in current_problem[\"same\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"related\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"same\"]):\n",
" alike_problems += (\"%.4f\" %similar_rate) + \"\\n\\n\" + id_new + \" \" + new_problems_dict[id_new][\"content\"] + \"\\n\\n\" + current_problem[\"id\"] + \" \" + current_problem[\"content\"] + \"\\n\\n\"\n",
" else:\n",
" remarked += 1\n",
"\n",
"\n",
"#记录终止时间及显示结果\n",
"end_time = time.time()\n",
"print(\"总耗时:\",end_time-start_time,\"秒.\")\n",
"print(\"发现相似: \",suspect_count,\", 其中已标注: \",remarked,\".\")\n",
"\n",
"with open(\"临时文件/相似题目.txt\",\"w\",encoding=\"utf8\") as f:\n",
" f.write(alike_problems)"
]
}
],
"metadata": {
@ -895,7 +245,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
"version": "3.8.15"
},
"orig_nbformat": 4,
"vscode": {

View File

@ -0,0 +1,41 @@
import os,re,json
filename = "临时文件/相似题目.txt"
# 读取题库数据并转换为字典
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
database = f.read()
pro_dict = json.loads(database)
# 读取已分类的相似文件列表
with open(filename,"r",encoding = "utf8") as f:
similar_text = "\n"+f.read()
similar_types = re.findall(r"\n[\d]\.[\d]{4}[\s]*([srSRnN ])[\s]*\n",similar_text)
similar_problems = re.findall(r"\n([\d]{6}) ",similar_text)
if len(similar_types) * 2 == len(similar_problems):
for i in similar_types:
id1 = similar_problems.pop(0)
id2 = similar_problems.pop(0)
if i.upper() == "S":
if not id2 in pro_dict[id1]["same"]:
pro_dict[id1]["same"].append(id2)
if not id1 in pro_dict[id2]["same"]:
pro_dict[id2]["same"].append(id1)
print("相同题目已标注:",id1,id2)
elif i.upper() == "R":
if not id2 in pro_dict[id1]["related"]:
pro_dict[id1]["related"].append(id2)
if not id1 in pro_dict[id2]["related"]:
pro_dict[id2]["related"].append(id1)
print("关联题目已标注:",id1,id2)
else:
print("相似程度数据:",len(similar_types),"个, 相似题目:",len(similar_problems),"题. 数据有问题, 请检查.")
# 将题库字典转换为json文件并保存至原位
database = json.dumps(pro_dict,indent=4,ensure_ascii=False)
with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
f.write(database)

File diff suppressed because it is too large Load Diff

View File

@ -2,22 +2,14 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"040387 填空题\n",
"040388 填空题\n",
"040389 填空题\n",
"040390 填空题\n",
"040391 填空题\n",
"040392 填空题\n",
"040393 填空题\n",
"040394 填空题\n",
"040395 解答题\n"
"均已确定题目类型\n"
]
}
],
@ -29,9 +21,11 @@
" database = f.read()\n",
"pro_dict = json.loads(database)\n",
"\n",
"count = 0\n",
"#根据特征字符识别题目类型\n",
"for p in pro_dict:\n",
" if pro_dict[p][\"genre\"] == \"\":\n",
" count += 1\n",
" if \"bracket\" in pro_dict[p][\"content\"]:\n",
" pro_dict[p][\"genre\"] = \"选择题\"\n",
" print(p,\"选择题\")\n",
@ -46,7 +40,11 @@
"#将修改结果写入json数据库\n",
"database = json.dumps(pro_dict,indent = 4, ensure_ascii= False)\n",
"with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(database)"
" f.write(database)\n",
"if count > 0:\n",
" print(\"为 %d 道题目确定类型\" %count)\n",
"else:\n",
" print(\"均已确定题目类型\")"
]
},
{

View File

@ -0,0 +1,31 @@
import os,re,json
# 读取数据库并转成题目字典
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
database = f.read()
pro_dict = json.loads(database)
count = 0
#根据特征字符识别题目类型
for p in pro_dict:
if pro_dict[p]["genre"] == "":
count += 1
if "bracket" in pro_dict[p]["content"]:
pro_dict[p]["genre"] = "选择题"
print(p,"选择题")
elif "blank" in pro_dict[p]["content"]:
pro_dict[p]["genre"] = "填空题"
print(p,"填空题")
else:
pro_dict[p]["genre"] = "解答题"
pro_dict[p]["space"] = "12ex"
print(p,"解答题")
#将修改结果写入json数据库
database = json.dumps(pro_dict,indent = 4, ensure_ascii= False)
with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
f.write(database)
if count > 0:
print("%d 道题目确定类型" %count)
else:
print("均已确定题目类型")

View File

@ -0,0 +1,66 @@
import os,re,json,sys
#范围定义在使用前需要替换
"""使用前替换范围定义"""
obj_range = "K0810:K0817999"
"""范围定义到此结束"""
#定义文件名
"""规定文件名"""
index = "38"
title = "计数原理与排列组合"
"""文件名到此结束"""
filename = index+"_"+title
outputfile = r"临时文件/"+filename+".tex"
template_file = r"模板文件/复习课目标模板.tex"
# 检查某一字符串是否在由,:的表达式给出的范围内
def within_range(string,list):
flag = False
for item in list:
if string == item.strip():
flag = True
break
elif ":" in item:
start, end = item.split(":")
if start <= string <= end:
flag = True
break
return flag
# 读取课时目标数据库
with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
database = f.read()
obj_dict = json.loads(database)
# 根据范围生成若干用于检查的闭区间范围
obj_range_list = obj_range.split(",")
output_string = ""
# 逐一选择目标, 并整合成表格的内容部分
for obj_id in obj_dict:
if within_range(obj_id,obj_range_list):
output_string += obj_id + " & " + obj_dict[obj_id]["content"] + " & " + r"\\ \hline" + "\n"
# 打开模板文件
with open(template_file,"r",encoding="utf8") as f:
latex_raw = f.read()
#识别操作系统
if sys.platform != "win32":
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
#预处理
exec_list = [("编号待替换",index),("标题待替换",title),("内容待替换",output_string)]
for command in exec_list:
latex_raw = latex_raw.replace(command[0],command[1])
#输出到临时文件夹
with open(outputfile,"w",encoding = "utf8") as f:
f.write(latex_raw)
print("开始编译目标pdf文件: ", outputfile)
os.system("xelatex -interaction=batchmode -output-directory=" + "临时文件" + " "+ outputfile)
print(os.system("xelatex -interaction=batchmode -output-directory=" + "临时文件" + " "+ outputfile))

View File

@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os,re\n",
"import win32clipboard as wc\n",
"import win32con\n",
"\n",
"# 获取剪切板内容\n",
"def getCopy():\n",
" wc.OpenClipboard()\n",
" t = wc.GetClipboardData(win32con.CF_UNICODETEXT)\n",
" wc.CloseClipboard()\n",
" return t\n",
"\n",
"# 写入剪切板内容\n",
"def setCopy(str):\n",
" wc.OpenClipboard()\n",
" wc.EmptyClipboard()\n",
" wc.SetClipboardData(win32con.CF_UNICODETEXT, str)\n",
" wc.CloseClipboard()\n",
"\n",
"def dollared(string):\n",
" flag = True\n",
" for c in string:\n",
" if not c in \"1234567890.+-:[]()\":\n",
" flag = False\n",
" break\n",
" if flag:\n",
" string = \"$\" + string + \"$\"\n",
" return string\n",
"\n",
"\n",
"\n",
"data = getCopy()\n",
"\n",
"data1 = \"\"\n",
"for c in data:\n",
" if 65296 <= ord(c) < 65306:\n",
" data1 += str(ord(c)-65296)\n",
" else:\n",
" data1 += c\n",
"data = data1\n",
"data = data.replace(\"\",\".\").replace(\"\",\":\")\n",
"elements = data.split(\"\\n\")\n",
"elements_per_line = int(elements.pop(-1)) #这里需要修改\n",
"contents = \"\\\\begin{center}\\n\\\\begin{tabular}{|\"\n",
"for i in range(elements_per_line):\n",
" contents += \"c|\"\n",
"contents += \"}\\n\"\n",
"contents += r\"\\hline\"+\"\\n\"\n",
"col = 1\n",
"for element in elements:\n",
" if col != 1:\n",
" contents += \" & \"\n",
" contents += dollared(element)\n",
" if col == elements_per_line:\n",
" contents += r\" \\\\ \\hline\"+\"\\n\"\n",
" col += 1\n",
" if col > elements_per_line:\n",
" col = 1\n",
"contents += \"\\\\end{tabular}\" + \"\\n\" + \"\\\\end{center}\"\n",
"\n",
"with open(\"临时文件/tablefile.txt\",\"w\",encoding = \"utf8\") as f:\n",
" f.write(contents)\n",
"\n",
"setCopy(contents)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "pythontest",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.15"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "91219a98e0e9be72efb992f647fe78b593124968b75db0b865552d6787c8db93"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because it is too large Load Diff