From 9bd0b4e6b3aec84e69137d40965546c40e9aa775 Mon Sep 17 00:00:00 2001 From: "weiye.wang" Date: Wed, 8 Feb 2023 23:22:09 +0800 Subject: [PATCH] 20230208 night --- 工具/相似题目检测.ipynb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/工具/相似题目检测.ipynb b/工具/相似题目检测.ipynb index 8bac3b05..cb7b4779 100644 --- a/工具/相似题目检测.ipynb +++ b/工具/相似题目检测.ipynb @@ -47,10 +47,12 @@ "\n", "#字符串预处理\n", "def pre_treating(string):\n", - " string = re.sub(r\"[\\s\\\\\\{\\}\\$\\(\\)\\[\\]]\",\"\",string)\n", - " string = re.sub(r\"[\\\\n\\\\t]\",\"\",string)\n", - " string = re.sub(r\"(displaystyle)|(overrightarrow)\",\"\",string)\n", " string = re.sub(r\"\\\\begin\\{center\\}[\\s\\S]*?\\\\end\\{center\\}\",\"\",string)\n", + " string = re.sub(r\"[\\s\\\\\\{\\}\\$\\(\\)\\[\\]]\",\"\",string)\n", + " string = re.sub(r\"[\\n\\t]\",\"\",string)\n", + " string = re.sub(r\"(displaystyle)|(overrightarrow)\",\"\",string)\n", + " string = re.sub(r\"(bracket)|(blank)|(fourch)|(twoch)|(onech)\",\"\",string)\n", + " string = re.sub(r\"[,\\.:;?]\",\"\",string)\n", " return string\n", "\n", "#difflab字符串比较\n", @@ -72,6 +74,8 @@ " return Levenshtein.ratio(str1,str2)\n", "\n", "\n", + "\n", + "\n", "#指定对比方法\n", "sim_test = jaro_get_equal_rate\n", "\n",