新增生成赋能题组功能

This commit is contained in:
weiye.wang 2024-05-03 18:08:24 +08:00
parent 39c60191db
commit 0f5fd9882d
1 changed files with 80 additions and 1 deletions

View File

@ -184,7 +184,7 @@ def treat_dict(database): #对整个题库字典中的内容部分进行预处
# mycursor.execute("SELECT ID,SAME_ID FROM same;")
# same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()]
for id in p_dict:
treated_dict[id] = {}
# treated_dict[id] = {}
treated_dict[id] = pre_treating(p_dict[id])
mydb.close()
return treated_dict #返回处理后的字典, 含内容字段及相同题目字段
@ -3078,5 +3078,84 @@ def generateProDict(cursor): #从数据库的cursor中得到problems字典的con
return prodict
def generate_diff_dict(pro_dict): #根据pro_dict的使用数据生成diff_dict难度词典
diff_dict = {}
for id in pro_dict:
usages = pro_dict[id]["usages"]
if usages == []:
diff_dict[id] = {"min":-1,"average":-1,"median":-1,"max":-1}
else:
diff_list = []
for usage_raw in usages:
usage = parseUsage(usage_raw)['difficulties']
diff_list.append(np.mean(usage))
diff_dict[id] = {"min":min(diff_list),"average":np.mean(diff_list),"median":np.median(diff_list),"max":max(diff_list)}
return diff_dict
def generate_adaptive_problemIDs(pro_dict,units_list,genre_tuple,diffinterval,method="min",excludeids = None,multianswers = False, sim_threshold = 0.8): #生成弗能题组, genre_tuple是二元组(填空题数, 选择题数), diffinterval是二元组(难度最小值,难度最大值)
diff_dict = generate_diff_dict(pro_dict)
treated_dict = {id:pre_treating(pro_dict[id]["content"]) for id in pro_dict}
if excludeids is None:
excludeids = ()
if len(units_list) == 0:
units_list = ["第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元"]
# treated_dict = {id:pre_treating(pro_dict[id]["content"]) for id in pro_dict}
raw_output_list = []
same_list = []
for id in pro_dict: #生成raw_output_list, 满足条件的所有题目(无论题目个数, 题目类型)
tags = pro_dict[id]["tags"]
diff_info = diff_dict[id]
if len(set(units_list) & set(tags)) > 0 and diffinterval[0] <= diff_info[method] <= diffinterval[1] and not id in excludeids and not id in same_list:
raw_output_list.append(id)
same_list += pro_dict[id]["same"].copy()
if multianswers == True: #排除多个作答项的题目
output_list = raw_output_list.copy()
else:
output_list = []
for id in raw_output_list:
subproblemsfromusage = [parseUsage(u)["subproblems"] for u in pro_dict[id]["usages"]]
if max(subproblemsfromusage) == 1:
output_list.append(id)
tiankong_list_raw = []
xuanze_list_raw = []
for id in output_list: #分开生成填空题列表和选择题列表
if pro_dict[id]["genre"] == "填空题":
tiankong_list_raw.append(id)
if pro_dict[id]["genre"] == "选择题":
xuanze_list_raw.append(id)
print(f"满足条件的填空有 {len(tiankong_list_raw)} 题, 选择有 {len(xuanze_list_raw)} 题.")
tiankong_list = []
while len(tiankong_list_raw) + len(tiankong_list) > genre_tuple[0] and not len(tiankong_list) == genre_tuple[0]:
poping_index = np.random.randint(len(tiankong_list_raw))
new_id = tiankong_list_raw.pop(poping_index)
newid_applicable = True
for id in tiankong_list:
if Levenshtein.jaro(treated_dict[id],treated_dict[new_id]) > sim_threshold:
newid_applicable = False
break
if newid_applicable:
tiankong_list.append(new_id)
if len(tiankong_list_raw) + len(tiankong_list) == genre_tuple[0]:
tiankong_list = tiankong_list + tiankong_list_raw
xuanze_list = []
while len(xuanze_list_raw) + len(xuanze_list) > genre_tuple[1] and not len(xuanze_list) == genre_tuple[1]:
poping_index = np.random.randint(len(xuanze_list_raw))
new_id = xuanze_list_raw.pop(poping_index)
newid_applicable = True
for id in tiankong_list + xuanze_list:
if Levenshtein.jaro(treated_dict[id],treated_dict[new_id]) > sim_threshold:
newid_applicable = False
break
if newid_applicable:
xuanze_list.append(new_id)
if len(xuanze_list_raw) + len(xuanze_list) == genre_tuple[1]:
xuanze_list = xuanze_list + xuanze_list_raw
return tiankong_list,xuanze_list
if __name__ == "__main__":
print("数据库工具, import用.")