新增生成赋能题组功能
This commit is contained in:
parent
39c60191db
commit
0f5fd9882d
|
|
@ -184,7 +184,7 @@ def treat_dict(database): #对整个题库字典中的内容部分进行预处
|
||||||
# mycursor.execute("SELECT ID,SAME_ID FROM same;")
|
# mycursor.execute("SELECT ID,SAME_ID FROM same;")
|
||||||
# same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()]
|
# same_list = [(ret[0],ret[1]) for ret in mycursor.fetchall()]
|
||||||
for id in p_dict:
|
for id in p_dict:
|
||||||
treated_dict[id] = {}
|
# treated_dict[id] = {}
|
||||||
treated_dict[id] = pre_treating(p_dict[id])
|
treated_dict[id] = pre_treating(p_dict[id])
|
||||||
mydb.close()
|
mydb.close()
|
||||||
return treated_dict #返回处理后的字典, 含内容字段及相同题目字段
|
return treated_dict #返回处理后的字典, 含内容字段及相同题目字段
|
||||||
|
|
@ -3078,5 +3078,84 @@ def generateProDict(cursor): #从数据库的cursor中得到problems字典的con
|
||||||
return prodict
|
return prodict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def generate_diff_dict(pro_dict): #根据pro_dict的使用数据生成diff_dict难度词典
|
||||||
|
diff_dict = {}
|
||||||
|
for id in pro_dict:
|
||||||
|
usages = pro_dict[id]["usages"]
|
||||||
|
if usages == []:
|
||||||
|
diff_dict[id] = {"min":-1,"average":-1,"median":-1,"max":-1}
|
||||||
|
else:
|
||||||
|
diff_list = []
|
||||||
|
for usage_raw in usages:
|
||||||
|
usage = parseUsage(usage_raw)['difficulties']
|
||||||
|
diff_list.append(np.mean(usage))
|
||||||
|
diff_dict[id] = {"min":min(diff_list),"average":np.mean(diff_list),"median":np.median(diff_list),"max":max(diff_list)}
|
||||||
|
return diff_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def generate_adaptive_problemIDs(pro_dict,units_list,genre_tuple,diffinterval,method="min",excludeids = None,multianswers = False, sim_threshold = 0.8): #生成弗能题组, genre_tuple是二元组(填空题数, 选择题数), diffinterval是二元组(难度最小值,难度最大值)
|
||||||
|
diff_dict = generate_diff_dict(pro_dict)
|
||||||
|
treated_dict = {id:pre_treating(pro_dict[id]["content"]) for id in pro_dict}
|
||||||
|
if excludeids is None:
|
||||||
|
excludeids = ()
|
||||||
|
if len(units_list) == 0:
|
||||||
|
units_list = ["第一单元","第二单元","第三单元","第四单元","第五单元","第六单元","第七单元","第八单元","第九单元"]
|
||||||
|
# treated_dict = {id:pre_treating(pro_dict[id]["content"]) for id in pro_dict}
|
||||||
|
raw_output_list = []
|
||||||
|
same_list = []
|
||||||
|
for id in pro_dict: #生成raw_output_list, 满足条件的所有题目(无论题目个数, 题目类型)
|
||||||
|
tags = pro_dict[id]["tags"]
|
||||||
|
diff_info = diff_dict[id]
|
||||||
|
if len(set(units_list) & set(tags)) > 0 and diffinterval[0] <= diff_info[method] <= diffinterval[1] and not id in excludeids and not id in same_list:
|
||||||
|
raw_output_list.append(id)
|
||||||
|
same_list += pro_dict[id]["same"].copy()
|
||||||
|
if multianswers == True: #排除多个作答项的题目
|
||||||
|
output_list = raw_output_list.copy()
|
||||||
|
else:
|
||||||
|
output_list = []
|
||||||
|
for id in raw_output_list:
|
||||||
|
subproblemsfromusage = [parseUsage(u)["subproblems"] for u in pro_dict[id]["usages"]]
|
||||||
|
if max(subproblemsfromusage) == 1:
|
||||||
|
output_list.append(id)
|
||||||
|
tiankong_list_raw = []
|
||||||
|
xuanze_list_raw = []
|
||||||
|
for id in output_list: #分开生成填空题列表和选择题列表
|
||||||
|
if pro_dict[id]["genre"] == "填空题":
|
||||||
|
tiankong_list_raw.append(id)
|
||||||
|
if pro_dict[id]["genre"] == "选择题":
|
||||||
|
xuanze_list_raw.append(id)
|
||||||
|
print(f"满足条件的填空有 {len(tiankong_list_raw)} 题, 选择有 {len(xuanze_list_raw)} 题.")
|
||||||
|
tiankong_list = []
|
||||||
|
while len(tiankong_list_raw) + len(tiankong_list) > genre_tuple[0] and not len(tiankong_list) == genre_tuple[0]:
|
||||||
|
poping_index = np.random.randint(len(tiankong_list_raw))
|
||||||
|
new_id = tiankong_list_raw.pop(poping_index)
|
||||||
|
newid_applicable = True
|
||||||
|
for id in tiankong_list:
|
||||||
|
if Levenshtein.jaro(treated_dict[id],treated_dict[new_id]) > sim_threshold:
|
||||||
|
newid_applicable = False
|
||||||
|
break
|
||||||
|
if newid_applicable:
|
||||||
|
tiankong_list.append(new_id)
|
||||||
|
if len(tiankong_list_raw) + len(tiankong_list) == genre_tuple[0]:
|
||||||
|
tiankong_list = tiankong_list + tiankong_list_raw
|
||||||
|
xuanze_list = []
|
||||||
|
while len(xuanze_list_raw) + len(xuanze_list) > genre_tuple[1] and not len(xuanze_list) == genre_tuple[1]:
|
||||||
|
poping_index = np.random.randint(len(xuanze_list_raw))
|
||||||
|
new_id = xuanze_list_raw.pop(poping_index)
|
||||||
|
newid_applicable = True
|
||||||
|
for id in tiankong_list + xuanze_list:
|
||||||
|
if Levenshtein.jaro(treated_dict[id],treated_dict[new_id]) > sim_threshold:
|
||||||
|
newid_applicable = False
|
||||||
|
break
|
||||||
|
if newid_applicable:
|
||||||
|
xuanze_list.append(new_id)
|
||||||
|
if len(xuanze_list_raw) + len(xuanze_list) == genre_tuple[1]:
|
||||||
|
xuanze_list = xuanze_list + xuanze_list_raw
|
||||||
|
|
||||||
|
return tiankong_list,xuanze_list
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("数据库工具, import用.")
|
print("数据库工具, import用.")
|
||||||
Reference in New Issue