在相似题目检测中考虑unrelated字段的影响
This commit is contained in:
parent
2cd90643dc
commit
4ecacfeeed
|
|
@ -95,7 +95,7 @@ for id_new in new_problems_dict:
|
|||
similar_rate = sim_test(new_problems_dict_content[id_new],old_problems_dict_content[id_old])
|
||||
if similar_rate > threshold or id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"]:
|
||||
suspect_count += 1
|
||||
if not (id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"]):
|
||||
if not (id_new in old_problems_dict[id_old]["related"] or id_new in old_problems_dict[id_old]["same"] or id_new in old_problems_dict[id_old]["unrelated"] or id_old in new_problems_dict[id_new]["related"] or id_old in new_problems_dict[id_new]["same"] or id_old in new_problems_dict[id_new]["unrelated"]):
|
||||
alike_problems += ("%.4f" %similar_rate) + "\n\n" + id_new + " " + new_problems_dict[id_new]["content"] + "\n\n" + id_old + " " + old_problems_dict[id_old]["content"] + "\n\n"
|
||||
else:
|
||||
remarked += 1
|
||||
|
|
@ -114,7 +114,7 @@ while len(new_problems_dict) >= 2:
|
|||
similar_rate = sim_test(new_problems_dict_content[id_new],current_problem_content)
|
||||
if similar_rate > threshold or id_new in current_problem["related"] or id_new in current_problem["same"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"]:
|
||||
suspect_count += 1
|
||||
if not (id_new in current_problem["related"] or id_new in current_problem["same"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"]):
|
||||
if not (id_new in current_problem["related"] or id_new in current_problem["same"] or id_new in current_problem["unrelated"] or current_problem["id"] in new_problems_dict[id_new]["related"] or current_problem["id"] in new_problems_dict[id_new]["same"] or current_problem["id"] in new_problems_dict[id_new]["unrelated"]):
|
||||
alike_problems += ("%.4f" %similar_rate) + "\n\n" + id_new + " " + new_problems_dict[id_new]["content"] + "\n\n" + current_problem["id"] + " " + current_problem["content"] + "\n\n"
|
||||
else:
|
||||
remarked += 1
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.15"
|
||||
"version": "3.9.15"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
|
|
|
|||
Reference in New Issue