20230403 afternoon
This commit is contained in:
parent
3a14be5815
commit
69687a0601
|
|
@ -19,7 +19,7 @@
|
|||
"source": [
|
||||
"import os,re,json\n",
|
||||
"\"\"\"这里编辑题号(列表)后将在vscode中打开窗口, 编辑后保存关闭, 随后运行第二个代码块\"\"\"\n",
|
||||
"problems = \"31158:31196\"\n",
|
||||
"problems = \"14085\"\n",
|
||||
"\n",
|
||||
"def generate_number_set(string,dict):\n",
|
||||
" string = re.sub(r\"[\\n\\s]\",\"\",string)\n",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import os,re,json
|
||||
"""这里编辑题号(列表)后将在vscode中打开窗口, 编辑后保存关闭, 随后运行第二个代码块"""
|
||||
problems = "1"
|
||||
problems = "10000,10003"
|
||||
|
||||
def generate_number_set(string,dict):
|
||||
string = re.sub(r"[\n\s]","",string)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
import os,re,difflib,Levenshtein,time,json
|
||||
|
||||
# 相同题目的阈值
|
||||
threshold = 0.99
|
||||
|
||||
outputfile = r"临时文件/相同题目列表.txt"
|
||||
|
||||
#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
|
||||
def generate_number_set(string):
|
||||
string = re.sub(r"[\n\s]","",string)
|
||||
string_list = string.split(",")
|
||||
numbers_list = []
|
||||
for s in string_list:
|
||||
if not ":" in s:
|
||||
numbers_list.append(s.zfill(6))
|
||||
else:
|
||||
start,end = s.split(":")
|
||||
for ind in range(int(start),int(end)+1):
|
||||
numbers_list.append(str(ind).zfill(6))
|
||||
return numbers_list
|
||||
|
||||
#字符串预处理
|
||||
def pre_treating(string):
|
||||
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
|
||||
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
|
||||
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
|
||||
string = re.sub(r"[\n\t]","",string)
|
||||
string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
|
||||
string = re.sub(r"[,\.:;?]","",string)
|
||||
return string
|
||||
|
||||
#difflab字符串比较
|
||||
def difflab_get_equal_rate(str1, str2):
|
||||
# str1 = pre_treating(str1)
|
||||
# str2 = pre_treating(str2)
|
||||
return difflib.SequenceMatcher(None, str1, str2).ratio()
|
||||
|
||||
#Levenshtein jaro字符串比较
|
||||
def jaro_get_equal_rate(str1,str2):
|
||||
# str1 = pre_treating(str1)
|
||||
# str2 = pre_treating(str2)
|
||||
return Levenshtein.jaro(str1,str2)
|
||||
|
||||
#Levenshtein 字符串比较
|
||||
def Lev_get_equal_rate(str1,str2):
|
||||
# str1 = pre_treating(str1)
|
||||
# str2 = pre_treating(str2)
|
||||
return Levenshtein.ratio(str1,str2)
|
||||
|
||||
|
||||
|
||||
|
||||
#指定对比方法
|
||||
sim_test = jaro_get_equal_rate
|
||||
|
||||
#读入题库
|
||||
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
pro_dict = json.loads(database)
|
||||
|
||||
pro_dict_treated = {}
|
||||
for id in pro_dict:
|
||||
pro_dict_treated[id] = pro_dict[id].copy()
|
||||
pro_dict_treated[id]["content"] = pre_treating(pro_dict_treated[id]["content"])
|
||||
|
||||
|
||||
print("题目数:",len(pro_dict))
|
||||
|
||||
#记录起始时间
|
||||
starttime = time.time()
|
||||
alike_problems = ""
|
||||
|
||||
|
||||
count = 0
|
||||
keys = list(pro_dict_treated.keys())
|
||||
while len(keys) >= 2:
|
||||
count += 1
|
||||
if count % 500 == 0:
|
||||
print(count)
|
||||
|
||||
currentid = keys.pop(0)
|
||||
content1 = pro_dict_treated[currentid]["content"]
|
||||
same = []
|
||||
for id in keys:
|
||||
if not id in pro_dict[currentid]["same"] and not id in pro_dict[currentid]["related"]:
|
||||
content2 = pro_dict_treated[id]["content"]
|
||||
if sim_test(content1,content2)>threshold:
|
||||
same.append(id)
|
||||
if len(same) >= 1:
|
||||
# print(currentid)
|
||||
alike_problems += currentid + ","
|
||||
for i in same:
|
||||
# print(i)
|
||||
keys.pop(keys.index(i))
|
||||
alike_problems += ",".join(same)
|
||||
alike_problems += "\n\n"
|
||||
|
||||
endtime = time.time()
|
||||
print("耗时: %.3f秒" %(endtime-starttime))
|
||||
|
||||
with open(outputfile,"w",encoding = "u8") as f:
|
||||
f.write(alike_problems)
|
||||
|
|
@ -2,7 +2,7 @@ import os,re,json
|
|||
|
||||
"""---设置关键字, 同一field下不同选项为or关系, 同一字典中不同字段间为and关系, 不同字典间为or关系, _not表示列表中的关键字都不含, 同一字典中的数字用来供应同一字段不同的条件之间的and---"""
|
||||
keywords_dict_table = [
|
||||
{"origin":["一模"]}
|
||||
{"origin":["杨浦"]}
|
||||
]
|
||||
"""---关键字设置完毕---"""
|
||||
# 示例: keywords_dict_table = [
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
"filepath = \"数据导入作业文件\"\n",
|
||||
"\n",
|
||||
"# date = str(time.localtime().tm_year)+str(time.localtime().tm_mon).zfill(2)+str(time.localtime().tm_mday).zfill(2)\n",
|
||||
"date = \"20230329\"\n",
|
||||
"date = \"20230331\"\n",
|
||||
"\n",
|
||||
"#生成文件名tex_file和zip_file\n",
|
||||
"files = [os.path.join(filepath,f) for f in os.listdir(filepath)]\n",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,116 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os,re\n",
|
||||
"import win32clipboard as wc\n",
|
||||
"import win32con\n",
|
||||
"\n",
|
||||
"# 获取剪切板内容\n",
|
||||
"def getCopy():\n",
|
||||
" wc.OpenClipboard()\n",
|
||||
" t = wc.GetClipboardData(win32con.CF_UNICODETEXT)\n",
|
||||
" wc.CloseClipboard()\n",
|
||||
" return t\n",
|
||||
"\n",
|
||||
"# 写入剪切板内容\n",
|
||||
"def setCopy(str):\n",
|
||||
" wc.OpenClipboard()\n",
|
||||
" wc.EmptyClipboard()\n",
|
||||
" wc.SetClipboardData(win32con.CF_UNICODETEXT, str)\n",
|
||||
" wc.CloseClipboard()\n",
|
||||
"\n",
|
||||
"def dollared(string):\n",
|
||||
" flag = True\n",
|
||||
" for c in string:\n",
|
||||
" if not c in \"1234567890.+-:[]()\":\n",
|
||||
" flag = False\n",
|
||||
" break\n",
|
||||
" if flag:\n",
|
||||
" string = \"$\" + string + \"$\"\n",
|
||||
" return string\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"data = getCopy()\n",
|
||||
"\n",
|
||||
"data1 = \"\"\n",
|
||||
"for c in data:\n",
|
||||
" if 65296 <= ord(c) < 65306:\n",
|
||||
" data1 += str(ord(c)-65296)\n",
|
||||
" else:\n",
|
||||
" data1 += c\n",
|
||||
"data = data1\n",
|
||||
"data = data.replace(\".\",\".\").replace(\":\",\":\")\n",
|
||||
"elements = data.split(\"\\n\")\n",
|
||||
"elements_per_line = int(elements.pop(-1)) #这里需要修改\n",
|
||||
"contents = \"\\\\begin{center}\\n\\\\begin{tabular}{|\"\n",
|
||||
"for i in range(elements_per_line):\n",
|
||||
" contents += \"c|\"\n",
|
||||
"contents += \"}\\n\"\n",
|
||||
"contents += r\"\\hline\"+\"\\n\"\n",
|
||||
"col = 1\n",
|
||||
"for element in elements:\n",
|
||||
" if col != 1:\n",
|
||||
" contents += \" & \"\n",
|
||||
" contents += dollared(element.strip())\n",
|
||||
" if col == elements_per_line:\n",
|
||||
" contents += r\" \\\\ \\hline\"+\"\\n\"\n",
|
||||
" col += 1\n",
|
||||
" if col > elements_per_line:\n",
|
||||
" col = 1\n",
|
||||
"contents += \"\\\\end{tabular}\" + \"\\n\" + \"\\\\end{center}\"\n",
|
||||
"\n",
|
||||
"with open(\"临时文件/tablefile.txt\",\"w\",encoding = \"utf8\") as f:\n",
|
||||
" f.write(contents)\n",
|
||||
"\n",
|
||||
"setCopy(contents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "pythontest",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.15"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "91219a98e0e9be72efb992f647fe78b593124968b75db0b865552d6787c8db93"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
import os,re
|
||||
import win32clipboard as wc
|
||||
import win32con
|
||||
|
||||
# 获取剪切板内容
|
||||
def getCopy():
|
||||
wc.OpenClipboard()
|
||||
t = wc.GetClipboardData(win32con.CF_UNICODETEXT)
|
||||
wc.CloseClipboard()
|
||||
return t
|
||||
|
||||
# 写入剪切板内容
|
||||
def setCopy(str):
|
||||
wc.OpenClipboard()
|
||||
wc.EmptyClipboard()
|
||||
wc.SetClipboardData(win32con.CF_UNICODETEXT, str)
|
||||
wc.CloseClipboard()
|
||||
|
||||
def dollared(string):
|
||||
flag = True
|
||||
for c in string:
|
||||
if not c in "1234567890.+-:[]()":
|
||||
flag = False
|
||||
break
|
||||
if flag:
|
||||
string = "$" + string + "$"
|
||||
return string
|
||||
|
||||
|
||||
|
||||
data = getCopy()
|
||||
|
||||
data1 = ""
|
||||
for c in data:
|
||||
if 65296 <= ord(c) < 65306:
|
||||
data1 += str(ord(c)-65296)
|
||||
else:
|
||||
data1 += c
|
||||
data = data1
|
||||
data = data.replace(".",".").replace(":",":")
|
||||
elements = data.split("\n")
|
||||
elements_per_line = int(elements.pop(-1)) #这里需要修改
|
||||
contents = "\\begin{center}\n\\begin{tabular}{|"
|
||||
for i in range(elements_per_line):
|
||||
contents += "c|"
|
||||
contents += "}\n"
|
||||
contents += r"\hline"+"\n"
|
||||
col = 1
|
||||
for element in elements:
|
||||
if col != 1:
|
||||
contents += " & "
|
||||
contents += dollared(element.strip())
|
||||
if col == elements_per_line:
|
||||
contents += r" \\ \hline"+"\n"
|
||||
col += 1
|
||||
if col > elements_per_line:
|
||||
col = 1
|
||||
contents += "\\end{tabular}" + "\n" + "\\end{center}"
|
||||
|
||||
with open("临时文件/tablefile.txt","w",encoding = "utf8") as f:
|
||||
f.write(contents)
|
||||
|
||||
setCopy(contents)
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
import os,re,difflib,Levenshtein,time,json
|
||||
|
||||
# 重要!!! 新旧题目的范围(有重复默认为新题)
|
||||
id_new_problems = "40000:41000"
|
||||
id_old_problems = "1:30000"
|
||||
threshold = 0.99
|
||||
|
||||
#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
|
||||
def generate_number_set(string):
|
||||
string = re.sub(r"[\n\s]","",string)
|
||||
string_list = string.split(",")
|
||||
numbers_list = []
|
||||
for s in string_list:
|
||||
if not ":" in s:
|
||||
numbers_list.append(s.zfill(6))
|
||||
else:
|
||||
start,end = s.split(":")
|
||||
for ind in range(int(start),int(end)+1):
|
||||
numbers_list.append(str(ind).zfill(6))
|
||||
return numbers_list
|
||||
|
||||
#字符串预处理
|
||||
def pre_treating(string):
|
||||
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
|
||||
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)","",string)
|
||||
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
|
||||
string = re.sub(r"[\n\t]","",string)
|
||||
string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
|
||||
string = re.sub(r"[,\.:;?]","",string)
|
||||
return string
|
||||
|
||||
#difflab字符串比较
|
||||
def difflab_get_equal_rate(str1, str2):
|
||||
# str1 = pre_treating(str1)
|
||||
# str2 = pre_treating(str2)
|
||||
return difflib.SequenceMatcher(None, str1, str2).ratio()
|
||||
|
||||
#Levenshtein jaro字符串比较
|
||||
def jaro_get_equal_rate(str1,str2):
|
||||
# str1 = pre_treating(str1)
|
||||
# str2 = pre_treating(str2)
|
||||
return Levenshtein.jaro(str1,str2)
|
||||
|
||||
#Levenshtein 字符串比较
|
||||
def Lev_get_equal_rate(str1,str2):
|
||||
# str1 = pre_treating(str1)
|
||||
# str2 = pre_treating(str2)
|
||||
return Levenshtein.ratio(str1,str2)
|
||||
|
||||
|
||||
|
||||
|
||||
#指定对比方法
|
||||
sim_test = jaro_get_equal_rate
|
||||
|
||||
#读入题库
|
||||
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
pro_dict = json.loads(database)
|
||||
|
||||
#生成旧题目数据库字典与新题目数据库字典
|
||||
new_id_list_raw = generate_number_set(id_new_problems)
|
||||
new_id_list = [id for id in pro_dict if id in new_id_list_raw]
|
||||
old_id_list_raw = generate_number_set(id_old_problems)
|
||||
old_id_list = [id for id in pro_dict if (id in old_id_list_raw and not id in new_id_list_raw)]
|
||||
old_problems_dict = {}
|
||||
new_problems_dict = {}
|
||||
old_problems_dict_content = {}
|
||||
new_problems_dict_content = {}
|
||||
for id in new_id_list:
|
||||
new_problems_dict[id] = pro_dict[id]
|
||||
new_problems_dict_content[id] = pre_treating(pro_dict[id]["content"])
|
||||
for id in old_id_list:
|
||||
old_problems_dict[id] = pro_dict[id]
|
||||
old_problems_dict_content[id] = pre_treating(pro_dict[id]["content"])
|
||||
print("旧题目数:",len(old_problems_dict),", 新题目数:",len(new_problems_dict))
|
||||
|
||||
#记录起始时间
|
||||
start_time = time.time()
|
||||
suspect_count = 0
|
||||
remarked = 0
|
||||
|
||||
alike_problems = ""
|
||||
|
||||
|
||||
|
||||
#开始新题与旧题的比对
|
||||
count = 0
|
||||
print("开始新题与旧题的比对")
|
||||
for id_new in new_problems_dict:
|
||||
count += 1
|
||||
if count % 50 == 0:
|
||||
print(count)
|
||||
for id_old in old_problems_dict:
|
||||
similar_rate = sim_test(new_problems_dict_content[id_new],old_problems_dict_content[id_old])
|
||||
if similar_rate > threshold or id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"]:
|
||||
suspect_count += 1
|
||||
if not (id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"]):
|
||||
alike_problems += ("%.4f" %similar_rate) + "\n\n" + id_new + " " + new_problems_dict[id_new]["content"] + "\n\n" + id_old + " " + old_problems_dict[id_old]["content"] + "\n\n"
|
||||
else:
|
||||
remarked += 1
|
||||
|
||||
#开始新题之间的比对
|
||||
count = 0
|
||||
print("开始新题之间的比对")
|
||||
while len(new_problems_dict) >= 2:
|
||||
count += 1
|
||||
if count % 50 == 0:
|
||||
print(count)
|
||||
keys = list(new_problems_dict.keys())
|
||||
current_problem = new_problems_dict.pop(keys[0])
|
||||
current_problem_content = new_problems_dict_content[current_problem["id"]]
|
||||
for id_new in new_problems_dict:
|
||||
similar_rate = sim_test(new_problems_dict_content[id_new],current_problem_content)
|
||||
if similar_rate > threshold or id_new in current_problem["related"] or id_new in current_problem["same"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"]:
|
||||
suspect_count += 1
|
||||
if not (id_new in current_problem["related"] or id_new in current_problem["same"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"]):
|
||||
alike_problems += ("%.4f" %similar_rate) + "\n\n" + id_new + " " + new_problems_dict[id_new]["content"] + "\n\n" + current_problem["id"] + " " + current_problem["content"] + "\n\n"
|
||||
else:
|
||||
remarked += 1
|
||||
|
||||
|
||||
#记录终止时间及显示结果
|
||||
end_time = time.time()
|
||||
print("总耗时:",end_time-start_time,"秒.")
|
||||
print("发现相似: ",suspect_count,", 其中已标注: ",remarked,".")
|
||||
|
||||
with open("临时文件/相似题目.txt","w",encoding="utf8") as f:
|
||||
f.write(alike_problems)
|
||||
52
工具/工具面板.py
52
工具/工具面板.py
|
|
@ -47,7 +47,7 @@ def run_command1():
|
|||
call(["python","试卷答案生成.py"])
|
||||
elif selectedtool == "单元标记转换":
|
||||
call(["python","单元标记转换.py"])
|
||||
elif selectedtool == "目标清点":
|
||||
elif selectedtool == "目标清点及清单生成":
|
||||
call(["python","单元课时目标题目数据清点.py"])
|
||||
elif selectedtool == "目标表体生成":
|
||||
call(["python","根据范围提取课时目标.py"])
|
||||
|
|
@ -59,6 +59,28 @@ def run_command1():
|
|||
call(["python","讲义生成.py"])
|
||||
elif selectedtool == "课时目标划分信息汇总":
|
||||
call(["python","课时目标划分信息汇总.py"])
|
||||
elif selectedtool == "识别题目类型":
|
||||
call(["python","识别题目类型.py"])
|
||||
elif selectedtool == "批量添加字段数据":
|
||||
call(["python","批量添加字段数据.py"])
|
||||
elif selectedtool == "目标挂钩清点":
|
||||
call(["python","目标挂钩清点.py"])
|
||||
elif selectedtool == "批量题号选题pdf生成":
|
||||
call(["python","批量生成题目pdf.py"])
|
||||
elif selectedtool == "目标pdf生成":
|
||||
call(["python","课时目标pdf生成.py"])
|
||||
elif selectedtool == "剪贴板表格整理":
|
||||
call(["python","剪贴板表格整理.py"])
|
||||
elif selectedtool == "手动统计结果转换":
|
||||
call(["python","手动统计结果转换.py"])
|
||||
elif selectedtool == "新题相似相同比对":
|
||||
call(["python","新题相似相同比对.py"])
|
||||
elif selectedtool == "全局未标注相同题目检测":
|
||||
call(["python","全局未标注相同题目检测.py"])
|
||||
elif selectedtool == "局部相似题目检测":
|
||||
call(["python","局部相似题目检测.py"])
|
||||
elif selectedtool == "相同相似题目标注":
|
||||
call(["python","相同相似题目标注.py"])
|
||||
LabelTool.config(text = selectedtool+"STEP1命令执行完毕")
|
||||
button1.place_forget()
|
||||
|
||||
|
|
@ -95,7 +117,12 @@ ImportMenu.add_command(label = "添加关联题目", command = lambda: SetButton
|
|||
# 设置 维护 菜单项
|
||||
MaintainenceMenu = Menu(menubar, tearoff = False)
|
||||
menubar.add_cascade(label = "维护", menu = MaintainenceMenu)
|
||||
MaintainenceMenu.add_command(label = "批量添加字段数据", command = lambda: SetButton("批量添加字段数据",1,["文本文件/metadata.txt","批量添加字段数据.py"]))
|
||||
MaintainenceMenu.add_command(label = "手动统计结果转换", command = lambda: SetButton("手动统计结果转换",1,["文本文件/metadata.txt","文本文件/手动统计结果.txt"]))
|
||||
MaintainenceMenu.add_separator()
|
||||
MaintainenceMenu.add_command(label = "修改题目", command = lambda: SetButton("修改题目数据库",2,["修改题目数据库.py"]))
|
||||
MaintainenceMenu.add_command(label = "识别题目类型", command = lambda: SetButton("识别题目类型",1,[]))
|
||||
|
||||
|
||||
# 设置 使用 菜单项
|
||||
UseMenu = Menu(menubar, tearoff = False)
|
||||
|
|
@ -104,6 +131,7 @@ UseMenu.add_command(label = "关键字筛选题号", command = lambda: SetButton
|
|||
UseMenu.add_separator()
|
||||
UseMenu.add_command(label = "讲义试卷生成", command = lambda: SetButton("讲义生成",1,["讲义生成.py"]))
|
||||
UseMenu.add_command(label = "题号选题pdf生成", command = lambda: SetButton("题号选题pdf生成",1,["题号选题pdf生成.py"]))
|
||||
UseMenu.add_command(label = "批量题号选题pdf生成", command = lambda: SetButton("批量题号选题pdf生成",1,["批量生成题目pdf.py"]))
|
||||
UseMenu.add_command(label = "试卷答案生成", command = lambda: SetButton("试卷答案生成",1,["试卷答案生成.py"]))
|
||||
|
||||
# 设置 目标及标签 菜单项
|
||||
|
|
@ -111,16 +139,32 @@ ObjTagMenu = Menu(menubar, tearoff = False)
|
|||
menubar.add_cascade(label = "目标及标签", menu = ObjTagMenu)
|
||||
ObjTagMenu.add_command(label = "单元标记转换", command = lambda: SetButton("单元标记转换",1,["单元标记转换.py"]))
|
||||
ObjTagMenu.add_separator()
|
||||
ObjTagMenu.add_command(label = "目标清点", command = lambda: SetButton("目标清点",1,[]))
|
||||
ObjTagMenu.add_command(label = "目标表体生成", command = lambda: SetButton("目标表体生成",1,["根据范围提取课时目标.py"]))
|
||||
ObjTagMenu.add_command(label = "目标pdf生成", command = lambda: SetButton("目标pdf生成",1,["课时目标pdf生成.py"]))
|
||||
ObjTagMenu.add_command(label = "课时目标寻找题目", command = lambda: SetButton("课时目标寻找题目",1,["课时目标寻找题目.py"]))
|
||||
ObjTagMenu.add_command(label = "课时目标划分信息汇总", command = lambda: SetButton("课时目标划分信息汇总",1,[]))
|
||||
ObjTagMenu.add_separator()
|
||||
ObjTagMenu.add_command(label = "目标挂钩基础清点", command = lambda: SetButton("目标挂钩清点",1,[]))
|
||||
ObjTagMenu.add_command(label = "目标清点及清单生成", command = lambda: SetButton("目标清点及清单生成",1,[]))
|
||||
|
||||
# 设置 相同相似比对 菜单项
|
||||
SimRelMenu = Menu(menubar, tearoff= False)
|
||||
menubar.add_cascade(label = "相同相似", menu = SimRelMenu)
|
||||
SimRelMenu.add_command(label = "新题相似相同比对", command = lambda: SetButton("新题相似相同比对",1,["新题相似相同比对.py"]))
|
||||
SimRelMenu.add_separator()
|
||||
SimRelMenu.add_command(label = "局部相似题目检测", command = lambda: SetButton("局部相似题目检测",1,["局部相似题目检测.py"]))
|
||||
SimRelMenu.add_command(label = "相同相似题目标注", command = lambda: SetButton("相同相似题目标注",1,["相同相似题目标注.py"]))
|
||||
SimRelMenu.add_separator()
|
||||
SimRelMenu.add_command(label = "全局未标注相同题目检测", command = lambda: SetButton("全局未标注相同题目检测",1,[]))
|
||||
|
||||
|
||||
|
||||
# 设置 其他 菜单项
|
||||
OtherMenu = Menu(menubar, tearoff = False)
|
||||
menubar.add_cascade(label = "其他", menu = OtherMenu)
|
||||
OtherMenu.add_command(label = "mathpix预处理", command = lambda: SetButton("mathpix预处理",1,[]))
|
||||
OtherMenu.add_command(label = "带圈数字处理", command = lambda: SetButton("带圈数字处理",1,[]))
|
||||
OtherMenu.add_command(label = "剪贴板mathpix预处理", command = lambda: SetButton("mathpix预处理",1,[]))
|
||||
OtherMenu.add_command(label = "剪贴板带圈数字处理", command = lambda: SetButton("带圈数字处理",1,[]))
|
||||
OtherMenu.add_command(label = "剪贴板表格整理", command = lambda: SetButton("剪贴板表格整理",1,[]))
|
||||
|
||||
|
||||
menubar.add_command(label = "退出", command = root.destroy)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
" numerals_list[i] = str_numeral\n",
|
||||
" return \"\\t\".join(numerals_list)\n",
|
||||
"\n",
|
||||
"with open(\"临时文件/统计结果.txt\",\"r\",encoding = \"utf8\") as f:\n",
|
||||
"with open(\"文本文件/手动统计结果.txt\",\"r\",encoding = \"utf8\") as f:\n",
|
||||
" data = f.read()\n",
|
||||
"\n",
|
||||
"blocks = re.findall(r\"\\[BEGIN\\]([\\s\\S]*?)\\[END\\]\",data)\n",
|
||||
|
|
@ -43,7 +43,7 @@
|
|||
" output_data += \"\\n\".join(results_dict[id])\n",
|
||||
" output_data += \"\\n\\n\"\n",
|
||||
"\n",
|
||||
"with open(\"临时文件/字段数据.txt\",\"w\",encoding = \"utf8\") as f:\n",
|
||||
"with open(\"文本文件/metadata.txt\",\"w\",encoding = \"utf8\") as f:\n",
|
||||
" f.write(output_data)\n"
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
import os,re,json
|
||||
|
||||
def form_decimals(string):
|
||||
string = re.sub(r"[\s]+",r"\t",string)
|
||||
numerals_list = [n for n in string.split("\t") if len(n)>0]
|
||||
for i in range(len(numerals_list)):
|
||||
numeral = numerals_list[i]
|
||||
str_numeral = "%.3f" %float(numeral)
|
||||
numerals_list[i] = str_numeral
|
||||
return "\t".join(numerals_list)
|
||||
|
||||
with open("文本文件/手动统计结果.txt","r",encoding = "utf8") as f:
|
||||
data = f.read()
|
||||
|
||||
blocks = re.findall(r"\[BEGIN\]([\s\S]*?)\[END\]",data)
|
||||
|
||||
results_dict = {}
|
||||
for block in blocks:
|
||||
temp_list = [l.strip() for l in block.split("\n") if l.strip() != ""]
|
||||
for line in temp_list:
|
||||
if line[:2] == "##":
|
||||
date = line[2:].strip()
|
||||
elif line[:2] == "**":
|
||||
current_class = line[2:].strip()
|
||||
else:
|
||||
separating_pos = re.search("\s",line).span(0)[0]
|
||||
if not line[:separating_pos].zfill(6) in results_dict:
|
||||
results_dict[line[:separating_pos].zfill(6)] = [date + "\t" + current_class + "\t" + form_decimals(re.sub("\s+?","\t",line[separating_pos:])).strip()]
|
||||
else:
|
||||
results_dict[line[:separating_pos].zfill(6)].append(date + "\t" + current_class + "\t" + form_decimals(re.sub("\s+?","\t",line[separating_pos:])).strip())
|
||||
|
||||
output_data = "usages\n"
|
||||
for id in results_dict:
|
||||
output_data += id + "\n"
|
||||
output_data += "\n".join(results_dict[id])
|
||||
output_data += "\n\n"
|
||||
|
||||
with open("文本文件/metadata.txt","w",encoding = "utf8") as f:
|
||||
f.write(output_data)
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
import os,re,json
|
||||
|
||||
"""---明确数据文件位置---"""
|
||||
datafile = "文本文件/metadata.txt"
|
||||
# 双回车分隔,记录内单回车分隔列表,首行为字段名
|
||||
"""---文件位置结束---"""
|
||||
|
||||
def trim(string):
|
||||
string = re.sub(r"^[ \t\n]*","",string)
|
||||
string = re.sub(r"[ \t\n]*$","",string)
|
||||
return string
|
||||
def FloatToInt(string):
|
||||
f = float(string)
|
||||
if abs(f-round(f))<0.01:
|
||||
f = round(f)
|
||||
return f
|
||||
|
||||
with open(datafile,"r",encoding="utf8") as f:
|
||||
data = f.read().strip()
|
||||
pos = data.index("\n")
|
||||
field = data[:pos].strip()
|
||||
appending_data = data[pos:]
|
||||
|
||||
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
pro_dict = json.loads(database)
|
||||
with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
obj_dict = json.loads(database)
|
||||
|
||||
#该字段列表可能需要更新
|
||||
fields = ["content","objs","tags","genre","ans","solution","duration","usages","origin","edit","same","related","remark","space"]
|
||||
|
||||
if field in fields:
|
||||
field_type = type(pro_dict["000001"][field])
|
||||
datalist = [record.strip() for record in appending_data.split("\n\n") if len(trim(record)) > 0]
|
||||
for record in datalist:
|
||||
id = re.findall(r"^[\d]{1,}",record)[0]
|
||||
data = record[len(id):].strip()
|
||||
id = id.zfill(6)
|
||||
if not id in pro_dict:
|
||||
print("题号:",id,"不在数据库中.")
|
||||
break
|
||||
|
||||
#字符串类型字段添加数据
|
||||
elif field_type == str and data in pro_dict[id][field]:
|
||||
print("题号:",id,", 字段:",field,"中已有该数据:",data)
|
||||
elif field_type == str and not data in pro_dict[id][field] and not field == "ans" and not field == "space":
|
||||
origin_data = pro_dict[id][field]
|
||||
new_data = trim(origin_data + "\n" + data)
|
||||
pro_dict[id][field] = new_data
|
||||
print("题号:",id,", 字段:",field,"中已添加数据:",data)
|
||||
elif field_type == str and not data in pro_dict[id][field] and field == "ans" or field == "space":
|
||||
pro_dict[id][field] = data
|
||||
print("题号:",id,", 字段:",field,"中已修改数据:",data)
|
||||
|
||||
#数值类型字段添加数据
|
||||
elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])<0.01:
|
||||
print("题号:",id,", 字段:",field,"中已有该数据:",FloatToInt(data))
|
||||
elif (field_type == int or field_type == float) and abs(float(data) - pro_dict[id][field])>=0.01:
|
||||
pro_dict[id][field] = FloatToInt(data)
|
||||
print("题号:",id,", 字段:",field,"中已修改数据:",FloatToInt(data))
|
||||
|
||||
#列表类型字段添加数据
|
||||
elif field_type == list:
|
||||
cell_data_list = [d.strip() for d in data.split("\n")]
|
||||
for cell_data in cell_data_list:
|
||||
if cell_data in pro_dict[id][field]:
|
||||
print("题号:",id,", 字段:",field,"中已有该数据:",cell_data)
|
||||
elif not field == "objs":
|
||||
pro_dict[id][field].append(cell_data)
|
||||
print("题号:",id,", 字段:",field,"中已添加数据:",cell_data)
|
||||
else:
|
||||
if not cell_data in obj_dict and not cell_data.upper() == "KNONE":
|
||||
print("题号:",id,", 字段:",field,"目标编号有误:",cell_data)
|
||||
else:
|
||||
pro_dict[id][field].append(cell_data.upper())
|
||||
print("题号:",id,", 字段:",field,"中已添加数据:",cell_data.upper())
|
||||
else:
|
||||
print("字段名有误")
|
||||
|
||||
with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
|
||||
f.write(json.dumps(pro_dict,indent=4,ensure_ascii=False))
|
||||
|
|
@ -9,111 +9,126 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"题号: 040422 , 字段: ans 中已修改数据: $(-1,3)$\n",
|
||||
"题号: 040423 , 字段: ans 中已修改数据: $(-\\infty,-1)$\n",
|
||||
"题号: 040424 , 字段: ans 中已修改数据: $\\sqrt{5}$\n",
|
||||
"题号: 040425 , 字段: ans 中已修改数据: $5$\n",
|
||||
"题号: 040426 , 字段: ans 中已修改数据: $\\dfrac{24}{7}$\n",
|
||||
"题号: 040427 , 字段: ans 中已修改数据: $0.12$\n",
|
||||
"题号: 040428 , 字段: ans 中已修改数据: $52$\n",
|
||||
"题号: 040429 , 字段: ans 中已修改数据: $2$\n",
|
||||
"题号: 040430 , 字段: ans 中已修改数据: $60^{\\circ}$\n",
|
||||
"题号: 040431 , 字段: ans 中已修改数据: $\\dfrac{4}{5}$\n",
|
||||
"题号: 040432 , 字段: ans 中已修改数据: $\\dfrac{\\sqrt{10}}{10}$\n",
|
||||
"题号: 040433 , 字段: ans 中已修改数据: $(-\\infty,-\\frac{2}{\\mathrm{e}}) \\cup(\\frac{2}{\\mathrm{e}},+\\infty)$\n",
|
||||
"题号: 040434 , 字段: ans 中已修改数据: D\n",
|
||||
"题号: 040435 , 字段: ans 中已修改数据: C\n",
|
||||
"题号: 040436 , 字段: ans 中已修改数据: B\n",
|
||||
"题号: 040437 , 字段: ans 中已修改数据: D\n",
|
||||
"题号: 040438 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{\\sqrt{15}}{15}$\n",
|
||||
"题号: 040439 , 字段: ans 中已修改数据: (1) $a_1+a_2=6$, 公差为$4$; (2) $S_n=\\begin{cases}n^2+n, & n=2 k, \\\\ n^2+n+2, & n=2 k-1\\end{cases}$, $k$为正整数\n",
|
||||
"题号: 040440 , 字段: ans 中已修改数据: (1) $y=0.1 x-0.2$; (2) (i) $5900$万元; (ii) $[\\dfrac{1}{3}, \\dfrac{5}{8}]$\n",
|
||||
"题号: 040441 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{8}+\\dfrac{y^2}{4}=1$; (2) $y= \\pm \\dfrac{\\sqrt{30}}{10} x+1$; (3) $-\\dfrac{1}{2}$\n",
|
||||
"题号: 040442 , 字段: ans 中已修改数据: (1) $y=(1+a) x$; (2) $-1$; (3) $(-\\infty,-1)$\n",
|
||||
"题号: 040443 , 字段: ans 中已修改数据: $\\{1,2,3\\}$\n",
|
||||
"题号: 040444 , 字段: ans 中已修改数据: $\\sqrt{10}$\n",
|
||||
"题号: 040445 , 字段: ans 中已修改数据: $2 \\pi$\n",
|
||||
"题号: 040446 , 字段: ans 中已修改数据: $-\\dfrac{1}{3}$\n",
|
||||
"题号: 040447 , 字段: ans 中已修改数据: $24$\n",
|
||||
"题号: 040448 , 字段: ans 中已修改数据: $-5$\n",
|
||||
"题号: 040449 , 字段: ans 中已修改数据: $(\\dfrac{7}{2}, 0, \\dfrac{7}{2})$\n",
|
||||
"题号: 040450 , 字段: ans 中已修改数据: $(-\\dfrac{1}{3}, 1)$\n",
|
||||
"题号: 040451 , 字段: ans 中已修改数据: $17$或$18$\n",
|
||||
"题号: 040452 , 字段: ans 中已修改数据: 支持\n",
|
||||
"题号: 040453 , 字段: ans 中已修改数据: $\\sqrt{6}-\\sqrt{3}$\n",
|
||||
"题号: 040454 , 字段: ans 中已修改数据: $\\{(4,-4),(4,1)\\}$\n",
|
||||
"题号: 040455 , 字段: ans 中已修改数据: C\n",
|
||||
"题号: 040456 , 字段: ans 中已修改数据: A\n",
|
||||
"题号: 040457 , 字段: ans 中已修改数据: B\n",
|
||||
"题号: 040458 , 字段: ans 中已修改数据: B\n",
|
||||
"题号: 040459 , 字段: ans 中已修改数据: (1) $[-\\dfrac{\\pi}{2}+k \\pi, k \\pi]$, $k \\in \\mathbf{Z}$; (2) $\\dfrac{15 \\sqrt{3}}{4}$\n",
|
||||
"题号: 040460 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{1}{2}$\n",
|
||||
"题号: 040461 , 字段: ans 中已修改数据: (1) $\\dfrac{3}{256}$; (2) 选择A同学\n",
|
||||
"题号: 040462 , 字段: ans 中已修改数据: (1) $\\sqrt{3}$; (2) 略; (3) 存在, $k=\\dfrac{\\sqrt{3}}{2}$, $P(-3 \\sqrt{3}, 0)$\n",
|
||||
"题号: 040463 , 字段: ans 中已修改数据: (1) $y=2 x$; (2) $(-\\infty, 0] \\cup[1,+\\infty)$; (3) $(-\\infty,\\dfrac{2}{\\mathrm{e}})$\n",
|
||||
"题号: 040464 , 字段: ans 中已修改数据: $(-2,1]$\n",
|
||||
"题号: 040465 , 字段: ans 中已修改数据: $1$\n",
|
||||
"题号: 040466 , 字段: ans 中已修改数据: $\\dfrac{2 \\pi}{3}$\n",
|
||||
"题号: 040467 , 字段: ans 中已修改数据: $\\sqrt{3}$\n",
|
||||
"题号: 040468 , 字段: ans 中已修改数据: $(-\\infty,-2] \\cup[0,2]$\n",
|
||||
"题号: 040469 , 字段: ans 中已修改数据: $y^2=4 x$\n",
|
||||
"题号: 040470 , 字段: ans 中已修改数据: $\\dfrac{14}{15}$\n",
|
||||
"题号: 040471 , 字段: ans 中已修改数据: $5$\n",
|
||||
"题号: 040472 , 字段: ans 中已修改数据: $\\dfrac{6+2 \\sqrt{3}}{5}$\n",
|
||||
"题号: 040473 , 字段: ans 中已修改数据: $1-2^{2023}$\n",
|
||||
"题号: 040474 , 字段: ans 中已修改数据: $\\dfrac{2 \\sqrt{3}}{3}$\n",
|
||||
"题号: 040475 , 字段: ans 中已修改数据: $6+4 \\sqrt{2}$\n",
|
||||
"题号: 040476 , 字段: ans 中已修改数据: B\n",
|
||||
"题号: 040477 , 字段: ans 中已修改数据: D\n",
|
||||
"题号: 040478 , 字段: ans 中已修改数据: C\n",
|
||||
"题号: 040479 , 字段: ans 中已修改数据: B\n",
|
||||
"题号: 040480 , 字段: ans 中已修改数据: (1) 略; (2) $\\arccos \\dfrac{2}{3}$\n",
|
||||
"题号: 040481 , 字段: ans 中已修改数据: (1) $[k \\pi-\\dfrac{3 \\pi}{8}, k \\pi+\\dfrac{\\pi}{8}]$, $k \\in \\mathbf{Z}$; (2) $\\{x | x=\\dfrac{k \\pi}{2}+\\dfrac{\\pi}{8},\\ k \\in \\mathbf{Z}\\}$\n",
|
||||
"题号: 040482 , 字段: ans 中已修改数据: (1) $1.4$米; (2) $\\arcsin \\dfrac{5 \\sqrt{3}}{14}$\n",
|
||||
"题号: 040483 , 字段: ans 中已修改数据: (1) $\\dfrac{6 \\sqrt{5}}{5}$; (2) 略; (3) $\\dfrac{3 \\sqrt{3}}{4}$\n",
|
||||
"题号: 040484 , 字段: ans 中已修改数据: (1) 略; (2) $a_n=\\dfrac{(n+3)(n+4)}{2}$, $b_n=\\dfrac{(n+4)^2}{2}$; (3) $(-\\infty,1]$\n",
|
||||
"题号: 040485 , 字段: ans 中已修改数据: $4$\n",
|
||||
"题号: 040486 , 字段: ans 中已修改数据: 四\n",
|
||||
"题号: 040487 , 字段: ans 中已修改数据: $2$\n",
|
||||
"题号: 040488 , 字段: ans 中已修改数据: $\\dfrac 23$\n",
|
||||
"题号: 040489 , 字段: ans 中已修改数据: $12$\n",
|
||||
"题号: 040490 , 字段: ans 中已修改数据: $40$\n",
|
||||
"题号: 040491 , 字段: ans 中已修改数据: $2$\n",
|
||||
"题号: 040492 , 字段: ans 中已修改数据: $72$\n",
|
||||
"题号: 040493 , 字段: ans 中已修改数据: $3$\n",
|
||||
"题号: 040494 , 字段: ans 中已修改数据: $\\dfrac{4\\pi}3$\n",
|
||||
"题号: 040495 , 字段: ans 中已修改数据: $\\sqrt{3}$\n",
|
||||
"题号: 040496 , 字段: ans 中已修改数据: $-2+\\sqrt{7}$\n",
|
||||
"题号: 040497 , 字段: ans 中已修改数据: B\n",
|
||||
"题号: 040498 , 字段: ans 中已修改数据: C\n",
|
||||
"题号: 040499 , 字段: ans 中已修改数据: D\n",
|
||||
"题号: 040500 , 字段: ans 中已修改数据: D\n",
|
||||
"题号: 040501 , 字段: ans 中已修改数据: (1) 证明略; (2) $\\dfrac{\\sqrt{14}}7$\n",
|
||||
"题号: 040502 , 字段: ans 中已修改数据: (1) $y=0.072x-0.046$; (2) (i) $1060$万; (ii) $(\\dfrac 13,\\dfrac 58]$\n",
|
||||
"题号: 040503 , 字段: ans 中已修改数据: (1) $[k\\pi,\\dfrac\\pi 4+k\\pi]$, $k\\in \\mathbf{Z}$; (2) $\\dfrac\\pi 4$\n",
|
||||
"题号: 040504 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{4}-y^2=1$; (2) 存在, $t=\\pm \\sqrt{3}$或$t=\\pm \\sqrt{\\dfrac{76}{15}}$; (3) $(-8-5\\sqrt{3},-\\dfrac 12)\\cup (\\dfrac 12,-8+5\\sqrt{3})$\n",
|
||||
"题号: 040505 , 字段: ans 中已修改数据: (1) $f(g(x))$的导函数为$y=-a\\sin x$, $g(f(x))$的导函数为$y=-a\\sin (ax)$; (2) $(-\\infty,-1]$; (3) $[\\dfrac 32,3)$\n",
|
||||
"题号: 040506 , 字段: ans 中已修改数据: $4$\n",
|
||||
"题号: 040507 , 字段: ans 中已修改数据: $1$\n",
|
||||
"题号: 040508 , 字段: ans 中已修改数据: $24$\n",
|
||||
"题号: 040509 , 字段: ans 中已修改数据: $x^2+(y-1)^2=4$\n",
|
||||
"题号: 040510 , 字段: ans 中已修改数据: $(-1,1]$\n",
|
||||
"题号: 040511 , 字段: ans 中已修改数据: $300(4+\\sqrt{3})$\n",
|
||||
"题号: 040512 , 字段: ans 中已修改数据: $\\dfrac{1}{2}$\n",
|
||||
"题号: 040513 , 字段: ans 中已修改数据: $92$\n",
|
||||
"题号: 040514 , 字段: ans 中已修改数据: $\\dfrac{1}{2}$\n",
|
||||
"题号: 040515 , 字段: ans 中已修改数据: $\\dfrac{\\sqrt{21}}{6}$\n",
|
||||
"题号: 040516 , 字段: ans 中已修改数据: $2 \\sqrt{2}$\n",
|
||||
"题号: 040517 , 字段: ans 中已修改数据: $2$\n",
|
||||
"题号: 040518 , 字段: ans 中已修改数据: C\n",
|
||||
"题号: 040519 , 字段: ans 中已修改数据: A\n",
|
||||
"题号: 040520 , 字段: ans 中已修改数据: A\n",
|
||||
"题号: 040521 , 字段: ans 中已修改数据: C\n",
|
||||
"题号: 040522 , 字段: ans 中已修改数据: (1) $2n$; (2) $n^2+n+\\dfrac{4^{n+1}}{3}-\\dfrac{4}{3}$\n",
|
||||
"题号: 040523 , 字段: ans 中已修改数据: (1) $\\begin{pmatrix}0 & 1 & 2 \\\\ \\dfrac{7}{15} & \\dfrac{7}{15} & \\dfrac{1}{15}\\end{pmatrix}$, 期望为$\\dfrac{3}{5}$; (2) $\\chi^2 \\approx 0.794<3.841$, 不能认为有关\n",
|
||||
"题号: 040524 , 字段: ans 中已修改数据: (1) 略; (2) $\\dfrac{\\sqrt{10}}{5}$\n",
|
||||
"题号: 040525 , 字段: ans 中已修改数据: (1) $\\dfrac{x^2}{4}+y^2=1$; (2) 最大值为$3$; 最小值为$\\dfrac{\\sqrt{6}}{3}$; (3) $(0,3)$\n",
|
||||
"题号: 040526 , 字段: ans 中已修改数据: (1) $y=x+1$; (2) 略; (3) $2$\n"
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t0.000\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t1.000\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t0.000\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t0.000\t1.000\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三10班\t1.000\t1.000\t0.000\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.800\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.350\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.850\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.750\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.950\t0.900\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.950\t0.750\t0.850\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.900\t0.650\t0.350\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.650\t0.600\t0.600\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三11班\t0.900\t0.600\t0.350\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.842\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.105\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.842\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.421\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.895\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t1.000\t0.789\t0.895\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.947\t0.000\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.895\t0.842\t0.895\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三12班\t0.947\t0.789\t0.842\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.893\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.071\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.893\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.714\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\t0.786\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t1.000\t0.929\t0.821\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\t0.929\t0.000\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.964\t0.893\t0.821\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三02班\t0.929\t0.750\t0.821\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.391\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.870\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.652\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\t0.870\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t1.000\t0.957\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t0.913\t0.000\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.696\t0.783\t0.739\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三03班\t0.913\t0.826\t0.696\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.938\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.875\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.000\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.875\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.562\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.750\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.812\t0.812\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t1.000\t0.938\t0.062\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.812\t0.688\t0.750\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三04班\t0.938\t0.812\t0.438\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.865\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.973\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.135\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.946\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.730\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t1.000\t1.000\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.973\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.946\t0.838\t0.946\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.351\t0.919\t0.378\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.378\t0.513\t0.622\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三05班\t0.892\t0.622\t0.513\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.846\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.179\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.769\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\t0.974\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.974\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\t0.872\t0.974\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t1.000\t1.000\t0.051\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.692\t0.718\t0.795\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三06班\t0.667\t0.872\t0.590\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.889\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.037\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.889\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.667\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.963\t0.926\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.926\t0.852\t0.704\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.963\t0.963\t0.037\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t0.852\t0.778\t0.778\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三07班\t1.000\t0.556\t0.518\n",
|
||||
"题号: 013235 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.842\n",
|
||||
"题号: 013218 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\n",
|
||||
"题号: 013237 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.053\n",
|
||||
"题号: 014640 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.895\n",
|
||||
"题号: 014642 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.632\n",
|
||||
"题号: 014643 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\t0.895\n",
|
||||
"题号: 014636 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.947\n",
|
||||
"题号: 014088 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\n",
|
||||
"题号: 014085 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t1.000\t0.842\t0.947\n",
|
||||
"题号: 014644 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.895\t0.895\t0.105\n",
|
||||
"题号: 014645 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.789\t0.632\t0.526\n",
|
||||
"题号: 014646 , 字段: usages 中已有该数据: 20230331\t2023届高三09班\t0.842\t0.421\t0.105\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -196,6 +211,8 @@
|
|||
" else:\n",
|
||||
" pro_dict[id][field].append(cell_data.upper())\n",
|
||||
" print(\"题号:\",id,\", 字段:\",field,\"中已添加数据:\",cell_data.upper())\n",
|
||||
"else:\n",
|
||||
" print(\"字段名有误\")\n",
|
||||
"\n",
|
||||
"with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
|
||||
" f.write(json.dumps(pro_dict,indent=4,ensure_ascii=False))"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,197 @@
|
|||
import os,re,time,json,sys
|
||||
|
||||
"""
|
||||
模板文件目录下 题目清单.txt 文件不能缺失
|
||||
"""
|
||||
|
||||
"""---设置是否在学生版中提供答案---"""
|
||||
answered = True
|
||||
|
||||
"""---设置文件保存路径---"""
|
||||
#目录和文件的分隔务必用/
|
||||
directory = "临时文件/"
|
||||
# filename = "高三二模前易错题"
|
||||
filename = "测试"
|
||||
"""---设置文件名结束---"""
|
||||
|
||||
"""---设置题目列表---"""
|
||||
#字典字段为文件名, 之后为内容的题号
|
||||
|
||||
problems_dict = {
|
||||
"函数1":"778,1253,1262,1325,1339,1352,2918,2968,3747,4009,4157,4435,4721,5123,5650,7939,10197,10938,11148,12179,12277,12543,12859,12902,13721,30060,30398,30406,10796,12104,30051,30327,30337,30356,31321",
|
||||
"函数2":"87,342,655,1277,1329,2831,2851,2859,2905,2959,3648,4153,4320,4359,11066,11079,11144,11999,12063,12064,12178,12192,12842,12856,12903,13747,13840,14191,30410,12549,12717,30377,30381,30411,30416,30420,30424,30426",
|
||||
"数列":"321,403,1781,1788,1810,3210,3219,3283,3309,3310,3312,4472,4476,6793,10777,10942,12067,12112,12239,12933,12979,13921,13925,30072,30473,30499",
|
||||
"解析几何":"248,282,363,625,960,2162,2252,2268,2278,2372,2418,3421,3437,3438,4246,8760,8866,8912,9784,10951,10958,12199,12213,12255,12548,13060,13063,13069,13097,13118,13134,13982,14505,31229,31233,31323",
|
||||
"概率统计":"332,340,401,412,654,2586,2605,2664,3574,3585,3640,4575,4584,7361,7423,7502,7630,10868,11993,14091,30227,30275,30495,30520,30540,31158,31196,31320"
|
||||
|
||||
|
||||
# "2024届高二下学期周末卷01":"40001:40017",
|
||||
# "2025届高一下学期周末卷01":"40018:40036",
|
||||
# "2024届高二下学期周末卷02":"40037:40056",
|
||||
# "2025届高一下学期周末卷02":"40057:40082",
|
||||
# "2025届高一下学期周末卷03":"40083:40104",
|
||||
# "2025届高一下学期周末卷03小测":"40105:40112",
|
||||
# "2025届高一下学期周末卷04旧版":"40113:40130",
|
||||
# "2025届高一下学期周末卷04小测":"40131:40139",
|
||||
# "2024届高二下学期周末卷03":"40140:40160",
|
||||
# "2024届高二上学期期末考试":"31267:31287",
|
||||
# "2025届高一上学期期末考试":"31288:31308",
|
||||
# "2024届高二下学期周末卷04":"40161:40180",
|
||||
# "2025届高一下学期周末卷04":"40181:40201",
|
||||
# "2024届高二下学期周末卷05":"40202:40225",
|
||||
# "2025届高一下学期周末卷05":"40226:40245",
|
||||
# "2024届空间向量校本作业":"22048:22083",
|
||||
# "2024届二项式定理校本作业":"22084:22105",
|
||||
# "2025届高一下学期周末卷05小测":"40246:40255",
|
||||
# "2025届高一下学期周末卷06":"40256:40273",
|
||||
# "2025届高一下学期周末卷06小测":"40274:40282",
|
||||
# "2025届高一下学期期中复习一(集合逻辑不等式)":"40283:40298",
|
||||
# "2024届高二下学期周末卷06":"40299:40316",
|
||||
# "2024届高二下学期周末卷07":"40317:40335",
|
||||
# "2025届高一下学期测验01":"40336:40349",
|
||||
# "2025届高一下学期测验02":"40350:40367",
|
||||
# "2025届高一下学期期中复习二(幂指对函数)":"40368:40386",
|
||||
# "2025届高一下学期周末卷02小测":"40387:40395",
|
||||
# "2025届高一下学期周末卷07":"40396:40413",
|
||||
# "2025届高一下学期周末卷07小测":"40414:40421"
|
||||
|
||||
}
|
||||
|
||||
"""---设置题目列表结束---"""
|
||||
|
||||
|
||||
if directory[-1] != "/":
|
||||
directory += "/"
|
||||
|
||||
|
||||
#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
|
||||
def generate_number_set(string,dict):
|
||||
string = re.sub(r"[\n\s]","",string)
|
||||
string_list = string.split(",")
|
||||
numbers_list = []
|
||||
for s in string_list:
|
||||
if not ":" in s:
|
||||
numbers_list.append(s.zfill(6))
|
||||
else:
|
||||
start,end = s.split(":")
|
||||
for ind in range(int(start),int(end)+1):
|
||||
numbers_list.append(str(ind).zfill(6))
|
||||
return numbers_list
|
||||
|
||||
#将正确率转化为含颜色代码的字符串
|
||||
def get_color(value):
|
||||
value = float(value)
|
||||
if value>=0.5:
|
||||
(r,g,b)=(1,2-2*value,0)
|
||||
else:
|
||||
(r,g,b)=(2*value,1,0)
|
||||
return "{" + "%.3f" %(r) + "," + "%.3f" %(g) + ",0}"
|
||||
|
||||
|
||||
def color_value(matchobj):
|
||||
value = matchobj.group(1)
|
||||
return "\t"+"\\fcolorbox[rgb]{0,0,0}"+ get_color(value) +"{" + value +"}"
|
||||
|
||||
|
||||
#读取题库json文件并转化为字典
|
||||
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
pro_dict = json.loads(database)
|
||||
|
||||
#读取目标数据库json并转化为字典
|
||||
with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
obj_dict = json.loads(database)
|
||||
|
||||
try:
|
||||
os.mkdir(directory)
|
||||
except:
|
||||
pass
|
||||
|
||||
#读取系统日期
|
||||
current_time = time.localtime()
|
||||
time_string = "_"+str(current_time.tm_year).zfill(4)+str(current_time.tm_mon).zfill(2)+str(current_time.tm_mday).zfill(2)
|
||||
|
||||
|
||||
data_teachers = ""
|
||||
data_students = ""
|
||||
|
||||
teachers_latex_file = directory + filename + "_教师用" + time_string + ".tex"
|
||||
students_latex_file = directory + filename + "_学生用" + time_string + ".tex"
|
||||
|
||||
for section_name in problems_dict:
|
||||
problems = problems_dict[section_name]
|
||||
|
||||
data_teachers += r"\newpage" + "\n\n" + r"\section{" + section_name +"}\n\n"
|
||||
data_teachers += r"\setcounter{enumi}{0}"+"\n\n"
|
||||
data_students += r"\newpage" + "\n\n" + r"\section{" + section_name +"}\n\n"
|
||||
data_students += r"\setcounter{enumi}{0}"+"\n\n"
|
||||
|
||||
#生成题目列表
|
||||
problem_list = [id for id in generate_number_set(problems.strip(),pro_dict) if id in pro_dict]
|
||||
|
||||
|
||||
|
||||
|
||||
#生成教师题目字符串与学生题目字符串, 准备替换至latex文件
|
||||
for id in problem_list:
|
||||
problemset = pro_dict[id]
|
||||
problem = problemset["content"]
|
||||
solution = (problemset["solution"] if problemset["solution"] != "" else "暂无解答与提示")
|
||||
answer = "\\textcolor{red}{" + (problemset["ans"] if problemset["ans"] != "" else "暂无答案") + "}"
|
||||
remarks = (problemset["remark"] if problemset["remark"] != "" else "暂无备注")
|
||||
usages_list = problemset["usages"]
|
||||
if len(usages_list) > 0:
|
||||
usage = re.sub("\\t([\d]\.[\d]{0,10})",color_value,"\n\n".join(usages_list))
|
||||
usage = re.sub("[\\t ]([\d]\.[\d]{0,10})",color_value,usage)
|
||||
else:
|
||||
usage = "暂无使用记录"
|
||||
origin = (problemset["origin"] if problemset["origin"] != "" else "出处不详")
|
||||
objects = problemset["objs"]
|
||||
if len(objects) == 0:
|
||||
objects = "暂未关联目标\n\n"
|
||||
elif "KNONE" in [o.upper() for o in objects]:
|
||||
objects = "该题的考查目标不在目前的集合中\n\n"
|
||||
else:
|
||||
objects_string = ""
|
||||
for obj in objects:
|
||||
if not obj in obj_dict:
|
||||
objects_string = "目标" + obj + "有误\n\n"
|
||||
break
|
||||
else:
|
||||
objects_string += "\\textcolor{blue}{" + obj + "|" + obj_dict[obj]["content"] + "}\n\n"
|
||||
objects = objects_string
|
||||
space = ("" if problemset["space"] == "" or answered else r"\vspace*{"+problemset["space"]+"}\n")
|
||||
tags = ("|".join(problemset["tags"]) if len(problemset["origin"])>0 else "暂无标签")
|
||||
raw_string = "\\item " + "{\\tiny ("+id+")} "+problem
|
||||
teachers_string = raw_string.replace("\\tiny","")+"\n\n关联目标:\n\n"+ objects + "\n\n标签: " + tags + "\n\n答案: "+answer + "\n\n" + "解答或提示: " + solution + "\n\n使用记录:\n\n"+ usage + "\n" + "\n\n出处: "+origin + "\n\n"
|
||||
students_string = raw_string + space + "\n\n"
|
||||
if answered:
|
||||
students_string += "答案: \\textcolor{red}{"+answer + "}\n\n"
|
||||
data_teachers += teachers_string
|
||||
data_students += students_string
|
||||
|
||||
#去除第一个newpage
|
||||
data_teachers = data_teachers[10:]
|
||||
data_students = data_students[10:]
|
||||
|
||||
#替换latex文件的内容并编译
|
||||
with open("模板文件/题目清单.txt","r",encoding = "utf8") as f:
|
||||
latex_raw = f.read()
|
||||
#识别操作系统
|
||||
if sys.platform != "win32":
|
||||
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
|
||||
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
|
||||
latex_teachers = latex_raw.replace("编译模板",data_teachers)
|
||||
with open(teachers_latex_file,"w",encoding = "utf8") as f:
|
||||
f.write(latex_teachers)
|
||||
print("开始编译教师版本pdf文件: ", teachers_latex_file)
|
||||
os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ teachers_latex_file)
|
||||
print(os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ teachers_latex_file))
|
||||
|
||||
latex_students = latex_raw.replace("编译模板",data_students)
|
||||
with open(students_latex_file,"w",encoding = "utf8") as f:
|
||||
f.write(latex_students)
|
||||
print("开始编译学生版本pdf文件: ", students_latex_file)
|
||||
os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ students_latex_file)
|
||||
print(os.system("xelatex -interaction=batchmode -output-directory=" + directory + " "+ students_latex_file))
|
||||
|
|
@ -1,18 +1,235 @@
|
|||
tags
|
||||
usages
|
||||
001050
|
||||
20220906 2023届高三2班 0.871
|
||||
|
||||
1
|
||||
第一单元
|
||||
第二单元
|
||||
第三单元
|
||||
009446
|
||||
20220906 2023届高三2班 0.355
|
||||
|
||||
001072
|
||||
20220906 2023届高三2班 0.774
|
||||
|
||||
2
|
||||
第三单元
|
||||
010060
|
||||
20220906 2023届高三2班 0.742
|
||||
|
||||
000033
|
||||
20220906 2023届高三2班 0.903
|
||||
|
||||
4
|
||||
第三单元
|
||||
第五单元
|
||||
第六单元
|
||||
001073
|
||||
20220906 2023届高三2班 0.935
|
||||
|
||||
000023
|
||||
20220906 2023届高三2班 0.774
|
||||
|
||||
002773
|
||||
20220906 2023届高三2班 0.742
|
||||
|
||||
002775
|
||||
20220906 2023届高三2班 0.645
|
||||
|
||||
002784
|
||||
20220906 2023届高三2班 0.903
|
||||
|
||||
007991
|
||||
20220906 2023届高三2班 0.581
|
||||
|
||||
005150
|
||||
20220906 2023届高三2班 0.903
|
||||
|
||||
002790
|
||||
20220906 2023届高三2班 0.774
|
||||
|
||||
002791
|
||||
20220906 2023届高三2班 0.774
|
||||
|
||||
000757
|
||||
20220906 2023届高三2班 0.968
|
||||
|
||||
002793
|
||||
20220906 2023届高三2班 0.968
|
||||
|
||||
000389
|
||||
20220906 2023届高三2班 0.871
|
||||
|
||||
002800
|
||||
20220906 2023届高三2班 0.903
|
||||
|
||||
000778
|
||||
20220907 2023届高三2班 0.750
|
||||
|
||||
001262
|
||||
20220907 2023届高三2班 0.656
|
||||
|
||||
000342
|
||||
20220907 2023届高三2班 0.125
|
||||
|
||||
001238
|
||||
20220907 2023届高三2班 0.906
|
||||
|
||||
001239
|
||||
20220907 2023届高三2班 0.875
|
||||
|
||||
001242
|
||||
20220907 2023届高三2班 0.844
|
||||
|
||||
002831
|
||||
20220907 2023届高三2班 0.594 0.406
|
||||
|
||||
002968
|
||||
20220907 2023届高三2班 0.531 0.063
|
||||
|
||||
009508
|
||||
20220907 2023届高三2班 1.000
|
||||
|
||||
003936
|
||||
20220907 2023届高三2班 0.906
|
||||
|
||||
007984
|
||||
20220907 2023届高三2班 1.000
|
||||
|
||||
009511
|
||||
20220907 2023届高三2班 0.844
|
||||
|
||||
005508
|
||||
20220907 2023届高三2班 0.844
|
||||
|
||||
000734
|
||||
20220907 2023届高三2班 0.469
|
||||
|
||||
002856
|
||||
20220907 2023届高三2班 0.781 0.906
|
||||
|
||||
000474
|
||||
20220907 2023届高三2班 0.813
|
||||
|
||||
002847
|
||||
20220907 2023届高三2班 0.844 0.938
|
||||
|
||||
002851
|
||||
20220907 2023届高三2班 0.500 0.281
|
||||
|
||||
001286
|
||||
20220907 2023届高三2班 0.906
|
||||
|
||||
001292
|
||||
20220907 2023届高三2班 0.906
|
||||
|
||||
010110
|
||||
20220907 2023届高三2班 1.000 1.000 0.875 0.906
|
||||
|
||||
000058
|
||||
20220907 2023届高三2班 1.000
|
||||
|
||||
010114
|
||||
20220907 2023届高三2班 0.844
|
||||
|
||||
001296
|
||||
20220907 2023届高三2班 1.000
|
||||
|
||||
005610
|
||||
20220907 2023届高三2班 0.906
|
||||
|
||||
001353
|
||||
20220907 2023届高三2班 0.813
|
||||
|
||||
001305
|
||||
20220907 2023届高三2班 0.813 0.813 0.969 0.969 0.969
|
||||
|
||||
010125
|
||||
20220907 2023届高三2班 0.969
|
||||
|
||||
001309
|
||||
20220907 2023届高三2班 1.000 0.750
|
||||
|
||||
005123
|
||||
20220907 2023届高三2班 0.563
|
||||
|
||||
005678
|
||||
20220907 2023届高三2班 0.469
|
||||
|
||||
001340
|
||||
20220908 2023届高三2班 0.788 0.818 0.879 0.727 0.788
|
||||
|
||||
002925
|
||||
20220908 2023届高三2班 0.939
|
||||
|
||||
002911
|
||||
20220908 2023届高三2班 0.879
|
||||
|
||||
002918
|
||||
20220908 2023届高三2班 0.727
|
||||
|
||||
003815
|
||||
20220908 2023届高三2班 0.939
|
||||
|
||||
005568
|
||||
20220908 2023届高三2班 0.939
|
||||
|
||||
000954
|
||||
20220908 2023届高三2班 1.000
|
||||
|
||||
001324
|
||||
20220908 2023届高三2班 0.879
|
||||
|
||||
001326
|
||||
20220908 2023届高三2班 0.939 0.970
|
||||
|
||||
002871
|
||||
20220908 2023届高三2班 0.818
|
||||
|
||||
002878
|
||||
20220908 2023届高三2班 0.818 0.818
|
||||
|
||||
002898
|
||||
20220908 2023届高三2班 0.727
|
||||
|
||||
000362
|
||||
20220908 2023届高三2班 0.879
|
||||
|
||||
001351
|
||||
20220908 2023届高三2班 0.848
|
||||
|
||||
003747
|
||||
20220908 2023届高三2班 0.727
|
||||
|
||||
005199
|
||||
20220908 2023届高三2班 0.970
|
||||
|
||||
009490
|
||||
20220908 2023届高三2班 0.879
|
||||
|
||||
009517
|
||||
20220909 2023届高三2班 0.656
|
||||
|
||||
007941
|
||||
20220909 2023届高三2班 0.813
|
||||
|
||||
000092
|
||||
20220909 2023届高三2班 0.625
|
||||
|
||||
001270
|
||||
20220909 2023届高三2班 1.000 1.000 0.969 0.781
|
||||
|
||||
002888
|
||||
20220909 2023届高三2班 0.719
|
||||
|
||||
001331
|
||||
20220909 2023届高三2班 0.156
|
||||
|
||||
002894
|
||||
20220909 2023届高三2班 0.844 0.750
|
||||
|
||||
001211
|
||||
20220909 2023届高三2班 0.969 0.906 0.875 0.844 0.750
|
||||
|
||||
001218
|
||||
20220909 2023届高三2班 0.906 0.750 0.625
|
||||
|
||||
002893
|
||||
20220909 2023届高三2班 0.813
|
||||
|
||||
002895
|
||||
20220909 2023届高三2班 0.688
|
||||
|
||||
009522
|
||||
20220909 2023届高三2班 0.563 0.250
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,107 @@
|
|||
|
||||
|
||||
|
||||
[BEGIN]
|
||||
## 20220906
|
||||
** 2023届高三2班
|
||||
1050 0.871
|
||||
9446 0.355
|
||||
1072 0.774
|
||||
10060 0.742
|
||||
33 0.903
|
||||
1073 0.935
|
||||
23 0.774
|
||||
2773 0.742
|
||||
2775 0.645
|
||||
2784 0.903
|
||||
7991 0.581
|
||||
5150 0.903
|
||||
2790 0.774
|
||||
2791 0.774
|
||||
757 0.968
|
||||
2793 0.968
|
||||
389 0.871
|
||||
2800 0.903
|
||||
[END]
|
||||
|
||||
|
||||
[BEGIN]
|
||||
## 20220907
|
||||
** 2023届高三2班
|
||||
778 0.750
|
||||
1262 0.656
|
||||
342 0.125
|
||||
1238 0.906
|
||||
1239 0.875
|
||||
1242 0.844
|
||||
2831 0.594 0.406
|
||||
2968 0.531 0.063
|
||||
9508 1.000
|
||||
3936 0.906
|
||||
7984 1.000
|
||||
9511 0.844
|
||||
5508 0.844
|
||||
734 0.469
|
||||
2856 0.781 0.906
|
||||
474 0.813
|
||||
2847 0.844 0.938
|
||||
2851 0.500 0.281
|
||||
[END]
|
||||
|
||||
[BEGIN]
|
||||
## 20220907
|
||||
** 2023届高三2班
|
||||
1286 0.906
|
||||
1292 0.906
|
||||
10110 1.000 1.000 0.875 0.906
|
||||
58 1.000
|
||||
10114 0.844
|
||||
1296 1.000
|
||||
5610 0.906
|
||||
1353 0.813
|
||||
1305 0.813 0.813 0.969 0.969 0.969
|
||||
10125 0.969
|
||||
1309 1.000 0.750
|
||||
5123 0.563
|
||||
5678 0.469
|
||||
[END]
|
||||
|
||||
[BEGIN]
|
||||
## 20220908
|
||||
** 2023届高三2班
|
||||
1340 0.788 0.818 0.879 0.727 0.788
|
||||
2925 0.939
|
||||
2911 0.879
|
||||
2918 0.727
|
||||
3815 0.939
|
||||
5568 0.939
|
||||
954 1.000
|
||||
1324 0.879
|
||||
1326 0.939 0.970
|
||||
2871 0.818
|
||||
2878 0.818 0.818
|
||||
2898 0.727
|
||||
362 0.879
|
||||
1351 0.848
|
||||
3747 0.727
|
||||
5199 0.970
|
||||
9490 0.879
|
||||
[END]
|
||||
|
||||
|
||||
[BEGIN]
|
||||
## 20220909
|
||||
** 2023届高三2班
|
||||
9517 0.656
|
||||
7941 0.813
|
||||
92 0.625
|
||||
1270 1.000 1.000 0.969 0.781
|
||||
2888 0.719
|
||||
1331 0.156
|
||||
2894 0.844 0.750
|
||||
1211 0.969 0.906 0.875 0.844 0.750
|
||||
1218 0.906 0.750 0.625
|
||||
2893 0.813
|
||||
2895 0.688
|
||||
9522 0.563 0.250
|
||||
[END]
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -2,7 +2,7 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -13,61 +13,11 @@
|
|||
"0.805\t2\t006207\n",
|
||||
"0.812\t3\t006137\n",
|
||||
"0.798\t4\t012269\n",
|
||||
"0.831\t5\t040063\n",
|
||||
"0.817\t5\t010989\n",
|
||||
"0.888\t6\t021503\n",
|
||||
"0.932\t7\t040057\n",
|
||||
"0.815\t7\t021484\n",
|
||||
"0.959\t8\t003206\n",
|
||||
"0.817\t9\t011090\n",
|
||||
"0.796\t10\t003058\n",
|
||||
"0.768\t11\t005984\n",
|
||||
"0.744\t12\t021463\n",
|
||||
"0.872\t13\t008169\n",
|
||||
"0.843\t14\t021559\n",
|
||||
"0.758\t15\t006230\n",
|
||||
"0.888\t16\t008317\n",
|
||||
"0.682\t17\t012005\n",
|
||||
"0.630\t18\t006107\n",
|
||||
"0.765\t19\t021581\n",
|
||||
"0.763\t20\t014242\n",
|
||||
"0.661\t21\t013849\n",
|
||||
"0.716\t22\t008191\n",
|
||||
"0.504\t23\t003537\n",
|
||||
"0.934\t24\t000818\n",
|
||||
"0.755\t25\t012596\n",
|
||||
"0.893\t26\t012355\n",
|
||||
"0.835\t27\t000479\n",
|
||||
"0.950\t28\t001529\n",
|
||||
"0.797\t29\t000577\n",
|
||||
"0.737\t30\t003191\n",
|
||||
"1.000\t31\t012594\n",
|
||||
"0.824\t32\t003150\n",
|
||||
"1.000\t33\t004066\n",
|
||||
"0.707\t34\t003140\n",
|
||||
"0.648\t35\t030294\n",
|
||||
"0.752\t36\t014186\n",
|
||||
"0.621\t37\t014223\n",
|
||||
"0.700\t38\t004442\n",
|
||||
"0.813\t39\t030808\n",
|
||||
"0.669\t40\t013843\n",
|
||||
"0.654\t41\t000738\n",
|
||||
"0.682\t42\t013354\n",
|
||||
"0.830\t43\t030042\n",
|
||||
"0.977\t44\t020411\n",
|
||||
"0.661\t45\t011133\n",
|
||||
"0.681\t46\t004118\n",
|
||||
"0.667\t47\t011943\n",
|
||||
"0.683\t48\t005600\n",
|
||||
"0.691\t49\t011386\n",
|
||||
"0.711\t50\t002922\n",
|
||||
"1.000\t51\t020452\n",
|
||||
"0.736\t52\t020356\n",
|
||||
"0.666\t53\t020430\n",
|
||||
"0.685\t54\t020357\n",
|
||||
"0.713\t55\t005540\n",
|
||||
"0.643\t56\t012868\n",
|
||||
"0.845\t57\t005598\n",
|
||||
"0.860\t58\t002958\n",
|
||||
"0.646\t59\t004184\n"
|
||||
"0.839\t9\t011090\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -75,7 +25,7 @@
|
|||
"import os,re,difflib,Levenshtein,time,json\n",
|
||||
"\n",
|
||||
"# 重要!!! 范围\n",
|
||||
"old_problems_range = \"1:999999\"\n",
|
||||
"old_problems_range = \"1:30000\"\n",
|
||||
"threshold = 0.85\n",
|
||||
"\n",
|
||||
"# 待比对的文件\n",
|
||||
|
|
@ -117,11 +67,11 @@
|
|||
"def Lev_get_equal_rate(str1,str2):\n",
|
||||
" return Levenshtein.ratio(str1,str2)\n",
|
||||
"\n",
|
||||
"def GenerateProblemListFromString(data):\n",
|
||||
"def GenerateProblemListFromString(problem_string):\n",
|
||||
" try:\n",
|
||||
" data = re.findall(r\"\\\\begin\\{document\\}([\\s\\S]*?)\\\\end\\{document\\}\",problems_string)[0]\n",
|
||||
" data = re.findall(r\"\\\\begin\\{document\\}([\\s\\S]*?)\\\\end\\{document\\}\",problem_string)[0]\n",
|
||||
" except:\n",
|
||||
" pass\n",
|
||||
" data = problem_string\n",
|
||||
" data = re.sub(r\"\\n{2,}\",\"\\n\",data)\n",
|
||||
" data = re.sub(r\"\\\\item\",r\"\\\\enditem\\\\item\",data)\n",
|
||||
" data = re.sub(r\"\\\\end\\{enumerate\\}\",r\"\\\\enditem\",data)\n",
|
||||
|
|
@ -220,7 +170,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.15"
|
||||
"version": "3.8.15"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,108 @@
|
|||
import os,re,difflib,Levenshtein,time,json
|
||||
|
||||
# 重要!!! 范围
|
||||
old_problems_range = "1:30000"
|
||||
threshold = 0.85
|
||||
|
||||
# 待比对的文件
|
||||
filename = r"C:\Users\weiye\Documents\wwy sync\临时工作区\自拟题目9.tex"
|
||||
|
||||
#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间
|
||||
def generate_number_set(string):
|
||||
string = re.sub(r"[\n\s]","",string)
|
||||
string_list = string.split(",")
|
||||
numbers_list = []
|
||||
for s in string_list:
|
||||
if not ":" in s:
|
||||
numbers_list.append(s.zfill(6))
|
||||
else:
|
||||
start,end = s.split(":")
|
||||
for ind in range(int(start),int(end)+1):
|
||||
numbers_list.append(str(ind).zfill(6))
|
||||
return numbers_list
|
||||
|
||||
#字符串预处理
|
||||
def pre_treating(string):
|
||||
string = re.sub(r"\\begin\{center\}[\s\S]*?\\end\{center\}","",string)
|
||||
string = re.sub(r"(bracket\{\d+\})|(blank\{\d+\})|(fourch)|(twoch)|(onech)|(mathrm)|(text)","",string)
|
||||
string = re.sub(r"[\s\\\{\}\$\(\)\[\]]","",string)
|
||||
string = re.sub(r"[\n\t]","",string)
|
||||
string = re.sub(r"(displaystyle)|(overrightarrow)","",string)
|
||||
string = re.sub(r"[,\.:;?]","",string)
|
||||
return string
|
||||
|
||||
#difflab字符串比较
|
||||
def difflab_get_equal_rate(str1, str2):
|
||||
return difflib.SequenceMatcher(None, str1, str2).ratio()
|
||||
|
||||
#Levenshtein jaro字符串比较
|
||||
def jaro_get_equal_rate(str1,str2):
|
||||
return Levenshtein.jaro(str1,str2)
|
||||
|
||||
#Levenshtein 字符串比较
|
||||
def Lev_get_equal_rate(str1,str2):
|
||||
return Levenshtein.ratio(str1,str2)
|
||||
|
||||
def GenerateProblemListFromString(problem_string):
|
||||
try:
|
||||
data = re.findall(r"\\begin\{document\}([\s\S]*?)\\end\{document\}",problem_string)[0]
|
||||
except:
|
||||
data = problem_string
|
||||
data = re.sub(r"\n{2,}","\n",data)
|
||||
data = re.sub(r"\\item",r"\\enditem\\item",data)
|
||||
data = re.sub(r"\\end\{enumerate\}",r"\\enditem",data)
|
||||
ProblemList_raw = [p.strip() for p in re.findall(r"\\item([\s\S]*?)\\enditem",data)]
|
||||
ProblemsList = []
|
||||
for p in ProblemList_raw:
|
||||
startpos = data.index(p)
|
||||
tempdata = data[:startpos]
|
||||
suflist = re.findall(r"\n\%[\dA-Za-z]+",tempdata)
|
||||
if len(suflist) > 0:
|
||||
suffix = suflist[-1].replace("%","").strip()
|
||||
else:
|
||||
suffix = ""
|
||||
ProblemsList.append((p,suffix))
|
||||
return ProblemsList
|
||||
|
||||
|
||||
#指定对比方法
|
||||
sim_test = jaro_get_equal_rate
|
||||
|
||||
#读入题库
|
||||
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
pro_dict = json.loads(database)
|
||||
|
||||
output = ""
|
||||
|
||||
with open(filename,"r",encoding="u8") as f:
|
||||
newdatabase = f.read()
|
||||
new_pro_list = GenerateProblemListFromString(newdatabase)
|
||||
|
||||
pro_dict_treated = {}
|
||||
idrange_raw = generate_number_set(old_problems_range)
|
||||
idrange = [id for id in pro_dict if id in idrange_raw]
|
||||
for p in idrange:
|
||||
pro_dict_treated[p] = pre_treating(pro_dict[p]["content"])
|
||||
|
||||
new_dict_treated = {}
|
||||
for i in range(len(new_pro_list)):
|
||||
new_dict_treated[i+1] = pre_treating(new_pro_list[i][0])
|
||||
|
||||
for i in new_dict_treated:
|
||||
new_p = new_dict_treated[i]
|
||||
maxsim = 0
|
||||
for p in pro_dict_treated:
|
||||
old_p = pro_dict_treated[p]
|
||||
sim = sim_test(new_p,old_p)
|
||||
if sim > maxsim:
|
||||
maxsim = sim
|
||||
argmax = p
|
||||
print("%.3f\t%d\t%s" %(maxsim,i,argmax))
|
||||
output += ("%.3f\t%d\t%s" %(maxsim,i,argmax)) + "\n"
|
||||
# print("\n新题: %s" %new_pro_list[i-1][0])
|
||||
# print("\n原题: %s\n\n\n" %pro_dict[]["content"])
|
||||
|
||||
with open("临时文件/新题相似相同.txt","w",encoding = "u8") as f:
|
||||
f.write(output)
|
||||
|
||||
|
|
@ -2,9 +2,26 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ModuleNotFoundError",
|
||||
"evalue": "No module named 'openpyxl'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[1], line 63\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[39mfor\u001b[39;00m t \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39mlen\u001b[39m(results)):\n\u001b[0;32m 61\u001b[0m df[\u001b[39mid\u001b[39m\u001b[39m+\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m_\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m+\u001b[39m\u001b[39mstr\u001b[39m(t\u001b[39m+\u001b[39m\u001b[39m1\u001b[39m)][c] \u001b[39m=\u001b[39m \u001b[39mfloat\u001b[39m(results[t])\n\u001b[1;32m---> 63\u001b[0m df\u001b[39m.\u001b[39;49mto_excel(outputfile,index \u001b[39m=\u001b[39;49m \u001b[39mFalse\u001b[39;49;00m)\n",
|
||||
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 209\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 210\u001b[0m kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
|
||||
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 209\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 210\u001b[0m kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
|
||||
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\core\\generic.py:2374\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)\u001b[0m\n\u001b[0;32m 2361\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mio\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mformats\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mexcel\u001b[39;00m \u001b[39mimport\u001b[39;00m ExcelFormatter\n\u001b[0;32m 2363\u001b[0m formatter \u001b[39m=\u001b[39m ExcelFormatter(\n\u001b[0;32m 2364\u001b[0m df,\n\u001b[0;32m 2365\u001b[0m na_rep\u001b[39m=\u001b[39mna_rep,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2372\u001b[0m inf_rep\u001b[39m=\u001b[39minf_rep,\n\u001b[0;32m 2373\u001b[0m )\n\u001b[1;32m-> 2374\u001b[0m formatter\u001b[39m.\u001b[39;49mwrite(\n\u001b[0;32m 2375\u001b[0m excel_writer,\n\u001b[0;32m 2376\u001b[0m sheet_name\u001b[39m=\u001b[39;49msheet_name,\n\u001b[0;32m 2377\u001b[0m startrow\u001b[39m=\u001b[39;49mstartrow,\n\u001b[0;32m 2378\u001b[0m startcol\u001b[39m=\u001b[39;49mstartcol,\n\u001b[0;32m 2379\u001b[0m freeze_panes\u001b[39m=\u001b[39;49mfreeze_panes,\n\u001b[0;32m 2380\u001b[0m engine\u001b[39m=\u001b[39;49mengine,\n\u001b[0;32m 2381\u001b[0m storage_options\u001b[39m=\u001b[39;49mstorage_options,\n\u001b[0;32m 2382\u001b[0m )\n",
|
||||
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\io\\formats\\excel.py:918\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[0;32m 914\u001b[0m need_save \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 915\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 916\u001b[0m \u001b[39m# error: Cannot instantiate abstract class 'ExcelWriter' with abstract\u001b[39;00m\n\u001b[0;32m 917\u001b[0m \u001b[39m# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'\u001b[39;00m\n\u001b[1;32m--> 918\u001b[0m writer \u001b[39m=\u001b[39m ExcelWriter( \u001b[39m# type: ignore[abstract]\u001b[39;49;00m\n\u001b[0;32m 919\u001b[0m writer, engine\u001b[39m=\u001b[39;49mengine, storage_options\u001b[39m=\u001b[39;49mstorage_options\n\u001b[0;32m 920\u001b[0m )\n\u001b[0;32m 921\u001b[0m need_save \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m 923\u001b[0m \u001b[39mtry\u001b[39;00m:\n",
|
||||
"File \u001b[1;32mc:\\Users\\weiye\\.conda\\envs\\pythontest\\lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:56\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[0;32m 44\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 45\u001b[0m path: FilePath \u001b[39m|\u001b[39m WriteExcelBuffer \u001b[39m|\u001b[39m ExcelWriter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 54\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 55\u001b[0m \u001b[39m# Use the openpyxl module as the Excel writer.\u001b[39;00m\n\u001b[1;32m---> 56\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mopenpyxl\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mworkbook\u001b[39;00m \u001b[39mimport\u001b[39;00m Workbook\n\u001b[0;32m 58\u001b[0m engine_kwargs \u001b[39m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[0;32m 60\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\n\u001b[0;32m 61\u001b[0m path,\n\u001b[0;32m 62\u001b[0m mode\u001b[39m=\u001b[39mmode,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 65\u001b[0m engine_kwargs\u001b[39m=\u001b[39mengine_kwargs,\n\u001b[0;32m 66\u001b[0m )\n",
|
||||
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'openpyxl'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os,re,json\n",
|
||||
"import pandas as pd\n",
|
||||
|
|
@ -71,26 +88,6 @@
|
|||
"df.to_excel(outputfile,index = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0.231"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"float(\"0.231\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
|
@ -115,7 +112,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
"version": "3.9.15"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
import json
|
||||
|
||||
filename = r"文本文件/题号筛选.txt"
|
||||
with open(r"..\题库0.3\problems.json","r",encoding = "u8") as f:
|
||||
database = f.read()
|
||||
pro_dict = json.loads(database)
|
||||
units = ["第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元","暂无对应"]
|
||||
count1 = [0]*10
|
||||
count2 = [0]*10
|
||||
count3 = 0
|
||||
untagged = []
|
||||
for id in pro_dict:
|
||||
for u in range(10):
|
||||
unit = units[u]
|
||||
if unit in "".join(pro_dict[id]["tags"]):
|
||||
count1[u] += 1
|
||||
if len(pro_dict[id]["objs"]) > 0:
|
||||
count2[u] += 1
|
||||
if len(pro_dict[id]["tags"]) == 0:
|
||||
count3 += 1
|
||||
untagged.append(id)
|
||||
for u in range(len(units)):
|
||||
print(units[u],". 总题数:",count1[u],", 完成对应题数:",count2[u])
|
||||
print("题库总题数: %d, 已有单元标签题数: %d, 未赋单元标签题数: %d."%(len(pro_dict),len(pro_dict)-count3,count3))
|
||||
with open (filename,"w",encoding = "u8") as f:
|
||||
f.write(",".join(untagged))
|
||||
print("未赋标签的题号列表已保存至: %s" %filename)
|
||||
|
|
@ -2,70 +2,56 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json,os\n",
|
||||
"with open(r\"..\\题库0.3\\problems.json\",\"r\",encoding = \"u8\") as f:\n",
|
||||
" database = f.read()\n",
|
||||
"pro_dict = json.loads(database)\n",
|
||||
"units = [\"第一单元\",\"第二单元\",\"第三单元\",\"第四单元\",\"第五单元\",\"第六单元\",\"第七单元\",\"第八单元\",\"第九单元\"]\n",
|
||||
"count1 = [0]*9\n",
|
||||
"count2 = [0]*9\n",
|
||||
"for id in pro_dict:\n",
|
||||
" for u in range(9):\n",
|
||||
" unit = units[u]\n",
|
||||
" if unit in \"\".join(pro_dict[id][\"tags\"]):\n",
|
||||
" count1[u] += 1\n",
|
||||
" if len(pro_dict[id][\"objs\"]) > 0:\n",
|
||||
" count2[u] += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"第一单元 . 总题数: 2014 , 完成对应题数: 1400\n",
|
||||
"第二单元 . 总题数: 3111 , 完成对应题数: 2127\n",
|
||||
"第三单元 . 总题数: 2218 , 完成对应题数: 395\n",
|
||||
"第四单元 . 总题数: 1444 , 完成对应题数: 1036\n",
|
||||
"第五单元 . 总题数: 1433 , 完成对应题数: 306\n",
|
||||
"第六单元 . 总题数: 1383 , 完成对应题数: 431\n",
|
||||
"第七单元 . 总题数: 1712 , 完成对应题数: 957\n",
|
||||
"第八单元 . 总题数: 1378 , 完成对应题数: 522\n",
|
||||
"第九单元 . 总题数: 422 , 完成对应题数: 101\n"
|
||||
"第一单元 . 总题数: 2213 , 完成对应题数: 1401\n",
|
||||
"第二单元 . 总题数: 3472 , 完成对应题数: 2135\n",
|
||||
"第三单元 . 总题数: 2426 , 完成对应题数: 395\n",
|
||||
"第四单元 . 总题数: 1657 , 完成对应题数: 1037\n",
|
||||
"第五单元 . 总题数: 1590 , 完成对应题数: 307\n",
|
||||
"第六单元 . 总题数: 1555 , 完成对应题数: 431\n",
|
||||
"第七单元 . 总题数: 1996 , 完成对应题数: 958\n",
|
||||
"第八单元 . 总题数: 1526 , 完成对应题数: 525\n",
|
||||
"第九单元 . 总题数: 473 , 完成对应题数: 101\n",
|
||||
"暂无对应 . 总题数: 344 , 完成对应题数: 121\n",
|
||||
"题库总题数: 18795, 已有单元标签题数: 16941, 未赋单元标签题数: 1854.\n",
|
||||
"未赋标签的题号列表已保存至: 文本文件/题号筛选.txt\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json,os\n",
|
||||
"\n",
|
||||
"filename = r\"文本文件/题号筛选.txt\"\n",
|
||||
"with open(r\"..\\题库0.3\\problems.json\",\"r\",encoding = \"u8\") as f:\n",
|
||||
" database = f.read()\n",
|
||||
"pro_dict = json.loads(database)\n",
|
||||
"units = [\"第一单元\",\"第二单元\",\"第三单元\",\"第四单元\",\"第五单元\",\"第六单元\",\"第七单元\",\"第八单元\",\"第九单元\",\"暂无对应\"]\n",
|
||||
"count1 = [0]*10\n",
|
||||
"count2 = [0]*10\n",
|
||||
"count3 = 0\n",
|
||||
"untagged = []\n",
|
||||
"for id in pro_dict:\n",
|
||||
" for u in range(10):\n",
|
||||
" unit = units[u]\n",
|
||||
" if unit in \"\".join(pro_dict[id][\"tags\"]):\n",
|
||||
" count1[u] += 1\n",
|
||||
" if len(pro_dict[id][\"objs\"]) > 0:\n",
|
||||
" count2[u] += 1\n",
|
||||
" if len(pro_dict[id][\"tags\"]) == 0:\n",
|
||||
" count3 += 1\n",
|
||||
" untagged.append(id)\n",
|
||||
"for u in range(len(units)):\n",
|
||||
" print(units[u],\". 总题数:\",count1[u],\", 完成对应题数:\",count2[u])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(12684, 7269)"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"(sum(count1),sum(count2))"
|
||||
" print(units[u],\". 总题数:\",count1[u],\", 完成对应题数:\",count2[u])\n",
|
||||
"print(\"题库总题数: %d, 已有单元标签题数: %d, 未赋单元标签题数: %d.\"%(len(pro_dict),len(pro_dict)-count3,count3))\n",
|
||||
"with open (filename,\"w\",encoding = \"u8\") as f:\n",
|
||||
" f.write(\",\".join(untagged))\n",
|
||||
"print(\"未赋标签的题号列表已保存至: %s\" %(filename))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -92,7 +78,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.15 (main, Nov 24 2022, 14:39:17) [MSC v.1916 64 bit (AMD64)]"
|
||||
"version": "3.9.15"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
|
|
|||
|
|
@ -2,12 +2,22 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"相同题目已标注: 000100 031239\n",
|
||||
"关联题目已标注: 000466 030610\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os,re,json\n",
|
||||
"\n",
|
||||
"filename = \"临时文件/相似题目.txt\"\n",
|
||||
"\n",
|
||||
"# 读取题库数据并转换为字典\n",
|
||||
"with open(r\"../题库0.3/Problems.json\",\"r\",encoding = \"utf8\") as f:\n",
|
||||
|
|
@ -15,7 +25,7 @@
|
|||
"pro_dict = json.loads(database)\n",
|
||||
"\n",
|
||||
"# 读取已分类的相似文件列表\n",
|
||||
"with open(\"临时文件/相似题目.txt\",\"r\",encoding = \"utf8\") as f:\n",
|
||||
"with open(filename,\"r\",encoding = \"utf8\") as f:\n",
|
||||
" similar_text = \"\\n\"+f.read()\n",
|
||||
"\n",
|
||||
"similar_types = re.findall(r\"\\n[\\d]\\.[\\d]{4}[\\s]*([srSRnN ])[\\s]*\\n\",similar_text)\n",
|
||||
|
|
@ -47,6 +57,13 @@
|
|||
"with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
|
||||
" f.write(database)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
@ -65,7 +82,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
"version": "3.8.15"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
|
|
|||
782
工具/相似题目检测.ipynb
782
工具/相似题目检测.ipynb
|
|
@ -9,7 +9,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"旧题目数: 0 , 新题目数: 18100\n",
|
||||
"旧题目数: 1907 , 新题目数: 500\n",
|
||||
"开始新题与旧题的比对\n",
|
||||
"50\n",
|
||||
"100\n",
|
||||
|
|
@ -21,358 +21,6 @@
|
|||
"400\n",
|
||||
"450\n",
|
||||
"500\n",
|
||||
"550\n",
|
||||
"600\n",
|
||||
"650\n",
|
||||
"700\n",
|
||||
"750\n",
|
||||
"800\n",
|
||||
"850\n",
|
||||
"900\n",
|
||||
"950\n",
|
||||
"1000\n",
|
||||
"1050\n",
|
||||
"1100\n",
|
||||
"1150\n",
|
||||
"1200\n",
|
||||
"1250\n",
|
||||
"1300\n",
|
||||
"1350\n",
|
||||
"1400\n",
|
||||
"1450\n",
|
||||
"1500\n",
|
||||
"1550\n",
|
||||
"1600\n",
|
||||
"1650\n",
|
||||
"1700\n",
|
||||
"1750\n",
|
||||
"1800\n",
|
||||
"1850\n",
|
||||
"1900\n",
|
||||
"1950\n",
|
||||
"2000\n",
|
||||
"2050\n",
|
||||
"2100\n",
|
||||
"2150\n",
|
||||
"2200\n",
|
||||
"2250\n",
|
||||
"2300\n",
|
||||
"2350\n",
|
||||
"2400\n",
|
||||
"2450\n",
|
||||
"2500\n",
|
||||
"2550\n",
|
||||
"2600\n",
|
||||
"2650\n",
|
||||
"2700\n",
|
||||
"2750\n",
|
||||
"2800\n",
|
||||
"2850\n",
|
||||
"2900\n",
|
||||
"2950\n",
|
||||
"3000\n",
|
||||
"3050\n",
|
||||
"3100\n",
|
||||
"3150\n",
|
||||
"3200\n",
|
||||
"3250\n",
|
||||
"3300\n",
|
||||
"3350\n",
|
||||
"3400\n",
|
||||
"3450\n",
|
||||
"3500\n",
|
||||
"3550\n",
|
||||
"3600\n",
|
||||
"3650\n",
|
||||
"3700\n",
|
||||
"3750\n",
|
||||
"3800\n",
|
||||
"3850\n",
|
||||
"3900\n",
|
||||
"3950\n",
|
||||
"4000\n",
|
||||
"4050\n",
|
||||
"4100\n",
|
||||
"4150\n",
|
||||
"4200\n",
|
||||
"4250\n",
|
||||
"4300\n",
|
||||
"4350\n",
|
||||
"4400\n",
|
||||
"4450\n",
|
||||
"4500\n",
|
||||
"4550\n",
|
||||
"4600\n",
|
||||
"4650\n",
|
||||
"4700\n",
|
||||
"4750\n",
|
||||
"4800\n",
|
||||
"4850\n",
|
||||
"4900\n",
|
||||
"4950\n",
|
||||
"5000\n",
|
||||
"5050\n",
|
||||
"5100\n",
|
||||
"5150\n",
|
||||
"5200\n",
|
||||
"5250\n",
|
||||
"5300\n",
|
||||
"5350\n",
|
||||
"5400\n",
|
||||
"5450\n",
|
||||
"5500\n",
|
||||
"5550\n",
|
||||
"5600\n",
|
||||
"5650\n",
|
||||
"5700\n",
|
||||
"5750\n",
|
||||
"5800\n",
|
||||
"5850\n",
|
||||
"5900\n",
|
||||
"5950\n",
|
||||
"6000\n",
|
||||
"6050\n",
|
||||
"6100\n",
|
||||
"6150\n",
|
||||
"6200\n",
|
||||
"6250\n",
|
||||
"6300\n",
|
||||
"6350\n",
|
||||
"6400\n",
|
||||
"6450\n",
|
||||
"6500\n",
|
||||
"6550\n",
|
||||
"6600\n",
|
||||
"6650\n",
|
||||
"6700\n",
|
||||
"6750\n",
|
||||
"6800\n",
|
||||
"6850\n",
|
||||
"6900\n",
|
||||
"6950\n",
|
||||
"7000\n",
|
||||
"7050\n",
|
||||
"7100\n",
|
||||
"7150\n",
|
||||
"7200\n",
|
||||
"7250\n",
|
||||
"7300\n",
|
||||
"7350\n",
|
||||
"7400\n",
|
||||
"7450\n",
|
||||
"7500\n",
|
||||
"7550\n",
|
||||
"7600\n",
|
||||
"7650\n",
|
||||
"7700\n",
|
||||
"7750\n",
|
||||
"7800\n",
|
||||
"7850\n",
|
||||
"7900\n",
|
||||
"7950\n",
|
||||
"8000\n",
|
||||
"8050\n",
|
||||
"8100\n",
|
||||
"8150\n",
|
||||
"8200\n",
|
||||
"8250\n",
|
||||
"8300\n",
|
||||
"8350\n",
|
||||
"8400\n",
|
||||
"8450\n",
|
||||
"8500\n",
|
||||
"8550\n",
|
||||
"8600\n",
|
||||
"8650\n",
|
||||
"8700\n",
|
||||
"8750\n",
|
||||
"8800\n",
|
||||
"8850\n",
|
||||
"8900\n",
|
||||
"8950\n",
|
||||
"9000\n",
|
||||
"9050\n",
|
||||
"9100\n",
|
||||
"9150\n",
|
||||
"9200\n",
|
||||
"9250\n",
|
||||
"9300\n",
|
||||
"9350\n",
|
||||
"9400\n",
|
||||
"9450\n",
|
||||
"9500\n",
|
||||
"9550\n",
|
||||
"9600\n",
|
||||
"9650\n",
|
||||
"9700\n",
|
||||
"9750\n",
|
||||
"9800\n",
|
||||
"9850\n",
|
||||
"9900\n",
|
||||
"9950\n",
|
||||
"10000\n",
|
||||
"10050\n",
|
||||
"10100\n",
|
||||
"10150\n",
|
||||
"10200\n",
|
||||
"10250\n",
|
||||
"10300\n",
|
||||
"10350\n",
|
||||
"10400\n",
|
||||
"10450\n",
|
||||
"10500\n",
|
||||
"10550\n",
|
||||
"10600\n",
|
||||
"10650\n",
|
||||
"10700\n",
|
||||
"10750\n",
|
||||
"10800\n",
|
||||
"10850\n",
|
||||
"10900\n",
|
||||
"10950\n",
|
||||
"11000\n",
|
||||
"11050\n",
|
||||
"11100\n",
|
||||
"11150\n",
|
||||
"11200\n",
|
||||
"11250\n",
|
||||
"11300\n",
|
||||
"11350\n",
|
||||
"11400\n",
|
||||
"11450\n",
|
||||
"11500\n",
|
||||
"11550\n",
|
||||
"11600\n",
|
||||
"11650\n",
|
||||
"11700\n",
|
||||
"11750\n",
|
||||
"11800\n",
|
||||
"11850\n",
|
||||
"11900\n",
|
||||
"11950\n",
|
||||
"12000\n",
|
||||
"12050\n",
|
||||
"12100\n",
|
||||
"12150\n",
|
||||
"12200\n",
|
||||
"12250\n",
|
||||
"12300\n",
|
||||
"12350\n",
|
||||
"12400\n",
|
||||
"12450\n",
|
||||
"12500\n",
|
||||
"12550\n",
|
||||
"12600\n",
|
||||
"12650\n",
|
||||
"12700\n",
|
||||
"12750\n",
|
||||
"12800\n",
|
||||
"12850\n",
|
||||
"12900\n",
|
||||
"12950\n",
|
||||
"13000\n",
|
||||
"13050\n",
|
||||
"13100\n",
|
||||
"13150\n",
|
||||
"13200\n",
|
||||
"13250\n",
|
||||
"13300\n",
|
||||
"13350\n",
|
||||
"13400\n",
|
||||
"13450\n",
|
||||
"13500\n",
|
||||
"13550\n",
|
||||
"13600\n",
|
||||
"13650\n",
|
||||
"13700\n",
|
||||
"13750\n",
|
||||
"13800\n",
|
||||
"13850\n",
|
||||
"13900\n",
|
||||
"13950\n",
|
||||
"14000\n",
|
||||
"14050\n",
|
||||
"14100\n",
|
||||
"14150\n",
|
||||
"14200\n",
|
||||
"14250\n",
|
||||
"14300\n",
|
||||
"14350\n",
|
||||
"14400\n",
|
||||
"14450\n",
|
||||
"14500\n",
|
||||
"14550\n",
|
||||
"14600\n",
|
||||
"14650\n",
|
||||
"14700\n",
|
||||
"14750\n",
|
||||
"14800\n",
|
||||
"14850\n",
|
||||
"14900\n",
|
||||
"14950\n",
|
||||
"15000\n",
|
||||
"15050\n",
|
||||
"15100\n",
|
||||
"15150\n",
|
||||
"15200\n",
|
||||
"15250\n",
|
||||
"15300\n",
|
||||
"15350\n",
|
||||
"15400\n",
|
||||
"15450\n",
|
||||
"15500\n",
|
||||
"15550\n",
|
||||
"15600\n",
|
||||
"15650\n",
|
||||
"15700\n",
|
||||
"15750\n",
|
||||
"15800\n",
|
||||
"15850\n",
|
||||
"15900\n",
|
||||
"15950\n",
|
||||
"16000\n",
|
||||
"16050\n",
|
||||
"16100\n",
|
||||
"16150\n",
|
||||
"16200\n",
|
||||
"16250\n",
|
||||
"16300\n",
|
||||
"16350\n",
|
||||
"16400\n",
|
||||
"16450\n",
|
||||
"16500\n",
|
||||
"16550\n",
|
||||
"16600\n",
|
||||
"16650\n",
|
||||
"16700\n",
|
||||
"16750\n",
|
||||
"16800\n",
|
||||
"16850\n",
|
||||
"16900\n",
|
||||
"16950\n",
|
||||
"17000\n",
|
||||
"17050\n",
|
||||
"17100\n",
|
||||
"17150\n",
|
||||
"17200\n",
|
||||
"17250\n",
|
||||
"17300\n",
|
||||
"17350\n",
|
||||
"17400\n",
|
||||
"17450\n",
|
||||
"17500\n",
|
||||
"17550\n",
|
||||
"17600\n",
|
||||
"17650\n",
|
||||
"17700\n",
|
||||
"17750\n",
|
||||
"17800\n",
|
||||
"17850\n",
|
||||
"17900\n",
|
||||
"17950\n",
|
||||
"18000\n",
|
||||
"18050\n",
|
||||
"18100\n",
|
||||
"开始新题之间的比对\n",
|
||||
"50\n",
|
||||
"100\n",
|
||||
|
|
@ -383,370 +31,18 @@
|
|||
"350\n",
|
||||
"400\n",
|
||||
"450\n",
|
||||
"500\n",
|
||||
"550\n",
|
||||
"600\n",
|
||||
"650\n",
|
||||
"700\n",
|
||||
"750\n",
|
||||
"800\n",
|
||||
"850\n",
|
||||
"900\n",
|
||||
"950\n",
|
||||
"1000\n",
|
||||
"1050\n",
|
||||
"1100\n",
|
||||
"1150\n",
|
||||
"1200\n",
|
||||
"1250\n",
|
||||
"1300\n",
|
||||
"1350\n",
|
||||
"1400\n",
|
||||
"1450\n",
|
||||
"1500\n",
|
||||
"1550\n",
|
||||
"1600\n",
|
||||
"1650\n",
|
||||
"1700\n",
|
||||
"1750\n",
|
||||
"1800\n",
|
||||
"1850\n",
|
||||
"1900\n",
|
||||
"1950\n",
|
||||
"2000\n",
|
||||
"2050\n",
|
||||
"2100\n",
|
||||
"2150\n",
|
||||
"2200\n",
|
||||
"2250\n",
|
||||
"2300\n",
|
||||
"2350\n",
|
||||
"2400\n",
|
||||
"2450\n",
|
||||
"2500\n",
|
||||
"2550\n",
|
||||
"2600\n",
|
||||
"2650\n",
|
||||
"2700\n",
|
||||
"2750\n",
|
||||
"2800\n",
|
||||
"2850\n",
|
||||
"2900\n",
|
||||
"2950\n",
|
||||
"3000\n",
|
||||
"3050\n",
|
||||
"3100\n",
|
||||
"3150\n",
|
||||
"3200\n",
|
||||
"3250\n",
|
||||
"3300\n",
|
||||
"3350\n",
|
||||
"3400\n",
|
||||
"3450\n",
|
||||
"3500\n",
|
||||
"3550\n",
|
||||
"3600\n",
|
||||
"3650\n",
|
||||
"3700\n",
|
||||
"3750\n",
|
||||
"3800\n",
|
||||
"3850\n",
|
||||
"3900\n",
|
||||
"3950\n",
|
||||
"4000\n",
|
||||
"4050\n",
|
||||
"4100\n",
|
||||
"4150\n",
|
||||
"4200\n",
|
||||
"4250\n",
|
||||
"4300\n",
|
||||
"4350\n",
|
||||
"4400\n",
|
||||
"4450\n",
|
||||
"4500\n",
|
||||
"4550\n",
|
||||
"4600\n",
|
||||
"4650\n",
|
||||
"4700\n",
|
||||
"4750\n",
|
||||
"4800\n",
|
||||
"4850\n",
|
||||
"4900\n",
|
||||
"4950\n",
|
||||
"5000\n",
|
||||
"5050\n",
|
||||
"5100\n",
|
||||
"5150\n",
|
||||
"5200\n",
|
||||
"5250\n",
|
||||
"5300\n",
|
||||
"5350\n",
|
||||
"5400\n",
|
||||
"5450\n",
|
||||
"5500\n",
|
||||
"5550\n",
|
||||
"5600\n",
|
||||
"5650\n",
|
||||
"5700\n",
|
||||
"5750\n",
|
||||
"5800\n",
|
||||
"5850\n",
|
||||
"5900\n",
|
||||
"5950\n",
|
||||
"6000\n",
|
||||
"6050\n",
|
||||
"6100\n",
|
||||
"6150\n",
|
||||
"6200\n",
|
||||
"6250\n",
|
||||
"6300\n",
|
||||
"6350\n",
|
||||
"6400\n",
|
||||
"6450\n",
|
||||
"6500\n",
|
||||
"6550\n",
|
||||
"6600\n",
|
||||
"6650\n",
|
||||
"6700\n",
|
||||
"6750\n",
|
||||
"6800\n",
|
||||
"6850\n",
|
||||
"6900\n",
|
||||
"6950\n",
|
||||
"7000\n",
|
||||
"7050\n",
|
||||
"7100\n",
|
||||
"7150\n",
|
||||
"7200\n",
|
||||
"7250\n",
|
||||
"7300\n",
|
||||
"7350\n",
|
||||
"7400\n",
|
||||
"7450\n",
|
||||
"7500\n",
|
||||
"7550\n",
|
||||
"7600\n",
|
||||
"7650\n",
|
||||
"7700\n",
|
||||
"7750\n",
|
||||
"7800\n",
|
||||
"7850\n",
|
||||
"7900\n",
|
||||
"7950\n",
|
||||
"8000\n",
|
||||
"8050\n",
|
||||
"8100\n",
|
||||
"8150\n",
|
||||
"8200\n",
|
||||
"8250\n",
|
||||
"8300\n",
|
||||
"8350\n",
|
||||
"8400\n",
|
||||
"8450\n",
|
||||
"8500\n",
|
||||
"8550\n",
|
||||
"8600\n",
|
||||
"8650\n",
|
||||
"8700\n",
|
||||
"8750\n",
|
||||
"8800\n",
|
||||
"8850\n",
|
||||
"8900\n",
|
||||
"8950\n",
|
||||
"9000\n",
|
||||
"9050\n",
|
||||
"9100\n",
|
||||
"9150\n",
|
||||
"9200\n",
|
||||
"9250\n",
|
||||
"9300\n",
|
||||
"9350\n",
|
||||
"9400\n",
|
||||
"9450\n",
|
||||
"9500\n",
|
||||
"9550\n",
|
||||
"9600\n",
|
||||
"9650\n",
|
||||
"9700\n",
|
||||
"9750\n",
|
||||
"9800\n",
|
||||
"9850\n",
|
||||
"9900\n",
|
||||
"9950\n",
|
||||
"10000\n",
|
||||
"10050\n",
|
||||
"10100\n",
|
||||
"10150\n",
|
||||
"10200\n",
|
||||
"10250\n",
|
||||
"10300\n",
|
||||
"10350\n",
|
||||
"10400\n",
|
||||
"10450\n",
|
||||
"10500\n",
|
||||
"10550\n",
|
||||
"10600\n",
|
||||
"10650\n",
|
||||
"10700\n",
|
||||
"10750\n",
|
||||
"10800\n",
|
||||
"10850\n",
|
||||
"10900\n",
|
||||
"10950\n",
|
||||
"11000\n",
|
||||
"11050\n",
|
||||
"11100\n",
|
||||
"11150\n",
|
||||
"11200\n",
|
||||
"11250\n",
|
||||
"11300\n",
|
||||
"11350\n",
|
||||
"11400\n",
|
||||
"11450\n",
|
||||
"11500\n",
|
||||
"11550\n",
|
||||
"11600\n",
|
||||
"11650\n",
|
||||
"11700\n",
|
||||
"11750\n",
|
||||
"11800\n",
|
||||
"11850\n",
|
||||
"11900\n",
|
||||
"11950\n",
|
||||
"12000\n",
|
||||
"12050\n",
|
||||
"12100\n",
|
||||
"12150\n",
|
||||
"12200\n",
|
||||
"12250\n",
|
||||
"12300\n",
|
||||
"12350\n",
|
||||
"12400\n",
|
||||
"12450\n",
|
||||
"12500\n",
|
||||
"12550\n",
|
||||
"12600\n",
|
||||
"12650\n",
|
||||
"12700\n",
|
||||
"12750\n",
|
||||
"12800\n",
|
||||
"12850\n",
|
||||
"12900\n",
|
||||
"12950\n",
|
||||
"13000\n",
|
||||
"13050\n",
|
||||
"13100\n",
|
||||
"13150\n",
|
||||
"13200\n",
|
||||
"13250\n",
|
||||
"13300\n",
|
||||
"13350\n",
|
||||
"13400\n",
|
||||
"13450\n",
|
||||
"13500\n",
|
||||
"13550\n",
|
||||
"13600\n",
|
||||
"13650\n",
|
||||
"13700\n",
|
||||
"13750\n",
|
||||
"13800\n",
|
||||
"13850\n",
|
||||
"13900\n",
|
||||
"13950\n",
|
||||
"14000\n",
|
||||
"14050\n",
|
||||
"14100\n",
|
||||
"14150\n",
|
||||
"14200\n",
|
||||
"14250\n",
|
||||
"14300\n",
|
||||
"14350\n",
|
||||
"14400\n",
|
||||
"14450\n",
|
||||
"14500\n",
|
||||
"14550\n",
|
||||
"14600\n",
|
||||
"14650\n",
|
||||
"14700\n",
|
||||
"14750\n",
|
||||
"14800\n",
|
||||
"14850\n",
|
||||
"14900\n",
|
||||
"14950\n",
|
||||
"15000\n",
|
||||
"15050\n",
|
||||
"15100\n",
|
||||
"15150\n",
|
||||
"15200\n",
|
||||
"15250\n",
|
||||
"15300\n",
|
||||
"15350\n",
|
||||
"15400\n",
|
||||
"15450\n",
|
||||
"15500\n",
|
||||
"15550\n",
|
||||
"15600\n",
|
||||
"15650\n",
|
||||
"15700\n",
|
||||
"15750\n",
|
||||
"15800\n",
|
||||
"15850\n",
|
||||
"15900\n",
|
||||
"15950\n",
|
||||
"16000\n",
|
||||
"16050\n",
|
||||
"16100\n",
|
||||
"16150\n",
|
||||
"16200\n",
|
||||
"16250\n",
|
||||
"16300\n",
|
||||
"16350\n",
|
||||
"16400\n",
|
||||
"16450\n",
|
||||
"16500\n",
|
||||
"16550\n",
|
||||
"16600\n",
|
||||
"16650\n",
|
||||
"16700\n",
|
||||
"16750\n",
|
||||
"16800\n",
|
||||
"16850\n",
|
||||
"16900\n",
|
||||
"16950\n",
|
||||
"17000\n",
|
||||
"17050\n",
|
||||
"17100\n",
|
||||
"17150\n",
|
||||
"17200\n",
|
||||
"17250\n",
|
||||
"17300\n",
|
||||
"17350\n",
|
||||
"17400\n",
|
||||
"17450\n",
|
||||
"17500\n",
|
||||
"17550\n",
|
||||
"17600\n",
|
||||
"17650\n",
|
||||
"17700\n",
|
||||
"17750\n",
|
||||
"17800\n",
|
||||
"17850\n",
|
||||
"17900\n",
|
||||
"17950\n",
|
||||
"18000\n",
|
||||
"18050\n",
|
||||
"总耗时: 384.6457269191742 秒.\n",
|
||||
"发现相似: 2800 , 其中已标注: 2223 .\n"
|
||||
"总耗时: 3.233793258666992 秒.\n",
|
||||
"发现相似: 30 , 其中已标注: 28 .\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# from hashlib import new\n",
|
||||
"import os,re,difflib,Levenshtein,time,json\n",
|
||||
"\n",
|
||||
"# 重要!!! 新题目的范围\n",
|
||||
"id_new_problems = \"1:50000\"\n",
|
||||
"threshold = 0.99\n",
|
||||
"# 重要!!! 新旧题目的范围(有重复默认为新题)\n",
|
||||
"id_new_problems = \"1:500\"\n",
|
||||
"id_old_problems = \"30000:50000\"\n",
|
||||
"threshold = 0.95\n",
|
||||
"\n",
|
||||
"#生成数码列表, 逗号分隔每个区块, 区块内部用:表示整数闭区间\n",
|
||||
"def generate_number_set(string):\n",
|
||||
|
|
@ -804,15 +100,16 @@
|
|||
"#生成旧题目数据库字典与新题目数据库字典\n",
|
||||
"new_id_list_raw = generate_number_set(id_new_problems)\n",
|
||||
"new_id_list = [id for id in pro_dict if id in new_id_list_raw]\n",
|
||||
"old_id_list_raw = generate_number_set(id_old_problems)\n",
|
||||
"old_id_list = [id for id in pro_dict if (id in old_id_list_raw and not id in new_id_list_raw)]\n",
|
||||
"old_problems_dict = {}\n",
|
||||
"new_problems_dict = {}\n",
|
||||
"old_problems_dict_content = {}\n",
|
||||
"new_problems_dict_content = {}\n",
|
||||
"for id in pro_dict:\n",
|
||||
" if id in new_id_list:\n",
|
||||
"for id in new_id_list:\n",
|
||||
" new_problems_dict[id] = pro_dict[id]\n",
|
||||
" new_problems_dict_content[id] = pre_treating(pro_dict[id][\"content\"])\n",
|
||||
" else:\n",
|
||||
"for id in old_id_list:\n",
|
||||
" old_problems_dict[id] = pro_dict[id]\n",
|
||||
" old_problems_dict_content[id] = pre_treating(pro_dict[id][\"content\"])\n",
|
||||
"print(\"旧题目数:\",len(old_problems_dict),\", 新题目数:\",len(new_problems_dict))\n",
|
||||
|
|
@ -876,7 +173,60 @@
|
|||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"#记录起始时间\n",
|
||||
"start_time = time.time()\n",
|
||||
"suspect_count = 0\n",
|
||||
"remarked = 0\n",
|
||||
"\n",
|
||||
"alike_problems = \"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#开始新题与旧题的比对\n",
|
||||
"count = 0\n",
|
||||
"print(\"开始新题与旧题的比对\")\n",
|
||||
"for id_new in new_problems_dict:\n",
|
||||
" count += 1\n",
|
||||
" if count % 50 == 0:\n",
|
||||
" print(count)\n",
|
||||
" for id_old in old_problems_dict:\n",
|
||||
" similar_rate = sim_test(new_problems_dict_content[id_new],old_problems_dict_content[id_old])\n",
|
||||
" if similar_rate > threshold or id_new in old_problems_dict[id_old][\"related\"] or id_new in old_problems_dict[id_old][\"same\"] or id_old in new_problems_dict[id_new][\"related\"] or id_old in new_problems_dict[id_new][\"same\"]:\n",
|
||||
" suspect_count += 1\n",
|
||||
" if not (id_new in old_problems_dict[id_old][\"related\"] or id_new in old_problems_dict[id_old][\"same\"] or id_old in new_problems_dict[id_new][\"related\"] or id_old in new_problems_dict[id_new][\"same\"]):\n",
|
||||
" alike_problems += (\"%.4f\" %similar_rate) + \"\\n\\n\" + id_new + \" \" + new_problems_dict[id_new][\"content\"] + \"\\n\\n\" + id_old + \" \" + old_problems_dict[id_old][\"content\"] + \"\\n\\n\"\n",
|
||||
" else:\n",
|
||||
" remarked += 1\n",
|
||||
"\n",
|
||||
"#开始新题之间的比对\n",
|
||||
"count = 0\n",
|
||||
"print(\"开始新题之间的比对\")\n",
|
||||
"while len(new_problems_dict) >= 2:\n",
|
||||
" count += 1\n",
|
||||
" if count % 50 == 0:\n",
|
||||
" print(count)\n",
|
||||
" keys = list(new_problems_dict.keys())\n",
|
||||
" current_problem = new_problems_dict.pop(keys[0])\n",
|
||||
" current_problem_content = new_problems_dict_content[current_problem[\"id\"]]\n",
|
||||
" for id_new in new_problems_dict:\n",
|
||||
" similar_rate = sim_test(new_problems_dict_content[id_new],current_problem_content)\n",
|
||||
" if similar_rate > threshold or id_new in current_problem[\"related\"] or id_new in current_problem[\"same\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"related\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"same\"]:\n",
|
||||
" suspect_count += 1\n",
|
||||
" if not (id_new in current_problem[\"related\"] or id_new in current_problem[\"same\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"related\"] or current_problem[\"id\"] in new_problems_dict[id_new][\"same\"]):\n",
|
||||
" alike_problems += (\"%.4f\" %similar_rate) + \"\\n\\n\" + id_new + \" \" + new_problems_dict[id_new][\"content\"] + \"\\n\\n\" + current_problem[\"id\"] + \" \" + current_problem[\"content\"] + \"\\n\\n\"\n",
|
||||
" else:\n",
|
||||
" remarked += 1\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#记录终止时间及显示结果\n",
|
||||
"end_time = time.time()\n",
|
||||
"print(\"总耗时:\",end_time-start_time,\"秒.\")\n",
|
||||
"print(\"发现相似: \",suspect_count,\", 其中已标注: \",remarked,\".\")\n",
|
||||
"\n",
|
||||
"with open(\"临时文件/相似题目.txt\",\"w\",encoding=\"utf8\") as f:\n",
|
||||
" f.write(alike_problems)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
@ -895,7 +245,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.15"
|
||||
"version": "3.8.15"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
import os,re,json
|
||||
|
||||
filename = "临时文件/相似题目.txt"
|
||||
|
||||
# 读取题库数据并转换为字典
|
||||
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
pro_dict = json.loads(database)
|
||||
|
||||
# 读取已分类的相似文件列表
|
||||
with open(filename,"r",encoding = "utf8") as f:
|
||||
similar_text = "\n"+f.read()
|
||||
|
||||
similar_types = re.findall(r"\n[\d]\.[\d]{4}[\s]*([srSRnN ])[\s]*\n",similar_text)
|
||||
similar_problems = re.findall(r"\n([\d]{6}) ",similar_text)
|
||||
|
||||
if len(similar_types) * 2 == len(similar_problems):
|
||||
for i in similar_types:
|
||||
id1 = similar_problems.pop(0)
|
||||
id2 = similar_problems.pop(0)
|
||||
if i.upper() == "S":
|
||||
if not id2 in pro_dict[id1]["same"]:
|
||||
pro_dict[id1]["same"].append(id2)
|
||||
if not id1 in pro_dict[id2]["same"]:
|
||||
pro_dict[id2]["same"].append(id1)
|
||||
print("相同题目已标注:",id1,id2)
|
||||
elif i.upper() == "R":
|
||||
if not id2 in pro_dict[id1]["related"]:
|
||||
pro_dict[id1]["related"].append(id2)
|
||||
if not id1 in pro_dict[id2]["related"]:
|
||||
pro_dict[id2]["related"].append(id1)
|
||||
print("关联题目已标注:",id1,id2)
|
||||
|
||||
else:
|
||||
print("相似程度数据:",len(similar_types),"个, 相似题目:",len(similar_problems),"题. 数据有问题, 请检查.")
|
||||
|
||||
|
||||
# 将题库字典转换为json文件并保存至原位
|
||||
database = json.dumps(pro_dict,indent=4,ensure_ascii=False)
|
||||
with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
|
||||
f.write(database)
|
||||
4209
工具/相同题目检测.ipynb
4209
工具/相同题目检测.ipynb
File diff suppressed because it is too large
Load Diff
|
|
@ -2,22 +2,14 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"040387 填空题\n",
|
||||
"040388 填空题\n",
|
||||
"040389 填空题\n",
|
||||
"040390 填空题\n",
|
||||
"040391 填空题\n",
|
||||
"040392 填空题\n",
|
||||
"040393 填空题\n",
|
||||
"040394 填空题\n",
|
||||
"040395 解答题\n"
|
||||
"均已确定题目类型\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -29,9 +21,11 @@
|
|||
" database = f.read()\n",
|
||||
"pro_dict = json.loads(database)\n",
|
||||
"\n",
|
||||
"count = 0\n",
|
||||
"#根据特征字符识别题目类型\n",
|
||||
"for p in pro_dict:\n",
|
||||
" if pro_dict[p][\"genre\"] == \"\":\n",
|
||||
" count += 1\n",
|
||||
" if \"bracket\" in pro_dict[p][\"content\"]:\n",
|
||||
" pro_dict[p][\"genre\"] = \"选择题\"\n",
|
||||
" print(p,\"选择题\")\n",
|
||||
|
|
@ -46,7 +40,11 @@
|
|||
"#将修改结果写入json数据库\n",
|
||||
"database = json.dumps(pro_dict,indent = 4, ensure_ascii= False)\n",
|
||||
"with open(r\"../题库0.3/Problems.json\",\"w\",encoding = \"utf8\") as f:\n",
|
||||
" f.write(database)"
|
||||
" f.write(database)\n",
|
||||
"if count > 0:\n",
|
||||
" print(\"为 %d 道题目确定类型\" %count)\n",
|
||||
"else:\n",
|
||||
" print(\"均已确定题目类型\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -0,0 +1,31 @@
|
|||
import os,re,json
|
||||
|
||||
# 读取数据库并转成题目字典
|
||||
with open(r"../题库0.3/Problems.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
pro_dict = json.loads(database)
|
||||
|
||||
count = 0
|
||||
#根据特征字符识别题目类型
|
||||
for p in pro_dict:
|
||||
if pro_dict[p]["genre"] == "":
|
||||
count += 1
|
||||
if "bracket" in pro_dict[p]["content"]:
|
||||
pro_dict[p]["genre"] = "选择题"
|
||||
print(p,"选择题")
|
||||
elif "blank" in pro_dict[p]["content"]:
|
||||
pro_dict[p]["genre"] = "填空题"
|
||||
print(p,"填空题")
|
||||
else:
|
||||
pro_dict[p]["genre"] = "解答题"
|
||||
pro_dict[p]["space"] = "12ex"
|
||||
print(p,"解答题")
|
||||
|
||||
#将修改结果写入json数据库
|
||||
database = json.dumps(pro_dict,indent = 4, ensure_ascii= False)
|
||||
with open(r"../题库0.3/Problems.json","w",encoding = "utf8") as f:
|
||||
f.write(database)
|
||||
if count > 0:
|
||||
print("为 %d 道题目确定类型" %count)
|
||||
else:
|
||||
print("均已确定题目类型")
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
import os,re,json,sys
|
||||
|
||||
#范围定义在使用前需要替换
|
||||
"""使用前替换范围定义"""
|
||||
obj_range = "K0810:K0817999"
|
||||
"""范围定义到此结束"""
|
||||
#定义文件名
|
||||
"""规定文件名"""
|
||||
index = "38"
|
||||
title = "计数原理与排列组合"
|
||||
"""文件名到此结束"""
|
||||
|
||||
filename = index+"_"+title
|
||||
outputfile = r"临时文件/"+filename+".tex"
|
||||
|
||||
template_file = r"模板文件/复习课目标模板.tex"
|
||||
|
||||
|
||||
# 检查某一字符串是否在由,:的表达式给出的范围内
|
||||
def within_range(string,list):
|
||||
flag = False
|
||||
for item in list:
|
||||
if string == item.strip():
|
||||
flag = True
|
||||
break
|
||||
elif ":" in item:
|
||||
start, end = item.split(":")
|
||||
if start <= string <= end:
|
||||
flag = True
|
||||
break
|
||||
return flag
|
||||
|
||||
# 读取课时目标数据库
|
||||
with open(r"../题库0.3/LessonObj.json","r",encoding = "utf8") as f:
|
||||
database = f.read()
|
||||
obj_dict = json.loads(database)
|
||||
|
||||
# 根据范围生成若干用于检查的闭区间范围
|
||||
obj_range_list = obj_range.split(",")
|
||||
output_string = ""
|
||||
|
||||
# 逐一选择目标, 并整合成表格的内容部分
|
||||
for obj_id in obj_dict:
|
||||
if within_range(obj_id,obj_range_list):
|
||||
output_string += obj_id + " & " + obj_dict[obj_id]["content"] + " & " + r"\\ \hline" + "\n"
|
||||
|
||||
# 打开模板文件
|
||||
with open(template_file,"r",encoding="utf8") as f:
|
||||
latex_raw = f.read()
|
||||
|
||||
#识别操作系统
|
||||
if sys.platform != "win32":
|
||||
latex_raw = re.sub(r"fontset[\s]*=[\s]*none","fontset = fandol",latex_raw)
|
||||
latex_raw = re.sub(r"\\setCJKmainfont",r"% \\setCJKmainfont",latex_raw)
|
||||
|
||||
#预处理
|
||||
exec_list = [("编号待替换",index),("标题待替换",title),("内容待替换",output_string)]
|
||||
for command in exec_list:
|
||||
latex_raw = latex_raw.replace(command[0],command[1])
|
||||
|
||||
#输出到临时文件夹
|
||||
with open(outputfile,"w",encoding = "utf8") as f:
|
||||
f.write(latex_raw)
|
||||
print("开始编译目标pdf文件: ", outputfile)
|
||||
os.system("xelatex -interaction=batchmode -output-directory=" + "临时文件" + " "+ outputfile)
|
||||
print(os.system("xelatex -interaction=batchmode -output-directory=" + "临时文件" + " "+ outputfile))
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os,re\n",
|
||||
"import win32clipboard as wc\n",
|
||||
"import win32con\n",
|
||||
"\n",
|
||||
"# 获取剪切板内容\n",
|
||||
"def getCopy():\n",
|
||||
" wc.OpenClipboard()\n",
|
||||
" t = wc.GetClipboardData(win32con.CF_UNICODETEXT)\n",
|
||||
" wc.CloseClipboard()\n",
|
||||
" return t\n",
|
||||
"\n",
|
||||
"# 写入剪切板内容\n",
|
||||
"def setCopy(str):\n",
|
||||
" wc.OpenClipboard()\n",
|
||||
" wc.EmptyClipboard()\n",
|
||||
" wc.SetClipboardData(win32con.CF_UNICODETEXT, str)\n",
|
||||
" wc.CloseClipboard()\n",
|
||||
"\n",
|
||||
"def dollared(string):\n",
|
||||
" flag = True\n",
|
||||
" for c in string:\n",
|
||||
" if not c in \"1234567890.+-:[]()\":\n",
|
||||
" flag = False\n",
|
||||
" break\n",
|
||||
" if flag:\n",
|
||||
" string = \"$\" + string + \"$\"\n",
|
||||
" return string\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"data = getCopy()\n",
|
||||
"\n",
|
||||
"data1 = \"\"\n",
|
||||
"for c in data:\n",
|
||||
" if 65296 <= ord(c) < 65306:\n",
|
||||
" data1 += str(ord(c)-65296)\n",
|
||||
" else:\n",
|
||||
" data1 += c\n",
|
||||
"data = data1\n",
|
||||
"data = data.replace(\".\",\".\").replace(\":\",\":\")\n",
|
||||
"elements = data.split(\"\\n\")\n",
|
||||
"elements_per_line = int(elements.pop(-1)) #这里需要修改\n",
|
||||
"contents = \"\\\\begin{center}\\n\\\\begin{tabular}{|\"\n",
|
||||
"for i in range(elements_per_line):\n",
|
||||
" contents += \"c|\"\n",
|
||||
"contents += \"}\\n\"\n",
|
||||
"contents += r\"\\hline\"+\"\\n\"\n",
|
||||
"col = 1\n",
|
||||
"for element in elements:\n",
|
||||
" if col != 1:\n",
|
||||
" contents += \" & \"\n",
|
||||
" contents += dollared(element)\n",
|
||||
" if col == elements_per_line:\n",
|
||||
" contents += r\" \\\\ \\hline\"+\"\\n\"\n",
|
||||
" col += 1\n",
|
||||
" if col > elements_per_line:\n",
|
||||
" col = 1\n",
|
||||
"contents += \"\\\\end{tabular}\" + \"\\n\" + \"\\\\end{center}\"\n",
|
||||
"\n",
|
||||
"with open(\"临时文件/tablefile.txt\",\"w\",encoding = \"utf8\") as f:\n",
|
||||
" f.write(contents)\n",
|
||||
"\n",
|
||||
"setCopy(contents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "pythontest",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.15"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "91219a98e0e9be72efb992f647fe78b593124968b75db0b865552d6787c8db93"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue