nearby, display add default param: size(=10)

This commit is contained in:
corey@cn 2019-06-11 14:51:20 +08:00 committed by GitHub
parent fe7450d51d
commit 6f6abdc877
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -287,22 +287,23 @@ Public Methods
'''
seg = _segment_words # word segmenter
def nearby(word):
def nearby(word, size = 10):
'''
Nearby word
'''
w = any2unicode(word)
wk = w + '-' + str(size)
# read from cache
if w in _cache_nearby: return _cache_nearby[w]
if wk in _cache_nearby: return _cache_nearby[wk]
words, scores = [], []
try:
for x in _vectors.neighbours(w):
for x in _vectors.neighbours(w, size):
words.append(x[0])
scores.append(x[1])
except: pass # ignore key error, OOV
# put into cache
_cache_nearby[w] = (words, scores)
_cache_nearby[wk] = (words, scores)
return words, scores
def compare(s1, s2, seg=True, ignore=False, stopwords=False):
@ -343,9 +344,9 @@ def compare(s1, s2, seg=True, ignore=False, stopwords=False):
assert len(s1) > 0 and len(s2) > 0, "The length of s1 and s2 should > 0."
return _similarity_distance(s1_words, s2_words, ignore)
def display(word):
def display(word, size = 10):
print("'%s'近义词:" % word)
o = nearby(word)
o = nearby(word, size)
assert len(o) == 2, "should contain 2 list"
if len(o[0]) == 0:
print(" out of vocabulary")