#60 compare 支持交换句子
This commit is contained in:
parent
c580b3d82d
commit
4a44eff88f
@ -1,3 +1,5 @@
|
||||
# 3.6
|
||||
* Fix Bug: compare 保证交换两个句子后分数一致 [#60](https://github.com/huyingxi/Synonyms/issues/60)
|
||||
|
||||
# 3.5
|
||||
* 根据实际情况,降低向量距离对近似度分数的影响
|
||||
|
@ -1 +1 @@
|
||||
synonyms>=3.5
|
||||
synonyms>=3.6
|
9
demo.py
9
demo.py
@ -114,6 +114,15 @@ class Test(unittest.TestCase):
|
||||
r = synonyms.compare(sen1, sen2, seg=False)
|
||||
print("%s vs %s" % (sen1, sen2), r)
|
||||
|
||||
|
||||
def test_swap_sent(self):
|
||||
print("test_swap_sent")
|
||||
s1 = synonyms.compare("教学", "老师")
|
||||
s2 = synonyms.compare("老师", "教学")
|
||||
print('"教学", "老师": %s ' % s1)
|
||||
print('"老师", "教学": %s ' % s2)
|
||||
assert s1 == s2, "Scores should be the same after swap sents"
|
||||
|
||||
def test_nearby(self):
|
||||
synonyms.display("奥运") # synonyms.display calls synonyms.nearby
|
||||
synonyms.display("北新桥") # synonyms.display calls synonyms.nearby
|
||||
|
2
setup.py
2
setup.py
@ -13,7 +13,7 @@ Welcome
|
||||
|
||||
setup(
|
||||
name='synonyms',
|
||||
version='3.5.0',
|
||||
version='3.6.0',
|
||||
description='Chinese Synonyms for Natural Language Processing and Understanding',
|
||||
long_description=LONGDOC,
|
||||
author='Hai Liang Wang, Hu Ying Xi',
|
||||
|
@ -211,28 +211,28 @@ def _nearby_levenshtein_distance(s1, s2):
|
||||
使用空间距离近的词汇优化编辑距离计算
|
||||
'''
|
||||
s1_len, s2_len = len(s1), len(s2)
|
||||
maxlen = max(s1_len, s2_len)
|
||||
first, second = (s2, s1) if s1_len == maxlen else (s1, s2)
|
||||
ft_1 = set() # all related words with first sentence
|
||||
maxlen = s1_len
|
||||
if s1_len == s2_len:
|
||||
first, second = sorted([s1, s2])
|
||||
elif s1_len < s2_len:
|
||||
first = s1
|
||||
second = s2
|
||||
maxlen = s2_len
|
||||
else:
|
||||
first = s2
|
||||
second = s1
|
||||
|
||||
ft = set() # all related words with first sentence
|
||||
for x in first:
|
||||
ft_1.add(x)
|
||||
ft.add(x)
|
||||
n, _ = nearby(x)
|
||||
for o in n[:5]:
|
||||
ft_1.add(o)
|
||||
|
||||
ft_2 = set() # all related words with second sentence
|
||||
for x in second:
|
||||
ft_2.add(x)
|
||||
n, _ = nearby(x)
|
||||
for o in n[:5]:
|
||||
ft_2.add(0)
|
||||
|
||||
for o in n[:10]:
|
||||
ft.add(o)
|
||||
|
||||
scores = []
|
||||
if len(ft_1) == 0 or len(ft_2) == 0: return 0.0 # invalid length
|
||||
for x in ft_1:
|
||||
for y in ft_2:
|
||||
scores.append([_levenshtein_distance(x, y)])
|
||||
s = np.sum(scores) / (s1_len * s2_len)
|
||||
for x in second:
|
||||
scores.append(max([_levenshtein_distance(x, y) for y in ft]))
|
||||
s = np.sum(scores) / maxlen
|
||||
return s
|
||||
|
||||
def _similarity_distance(s1, s2, ignore):
|
||||
|
Loading…
Reference in New Issue
Block a user