refine format

This commit is contained in:
Hai Liang Wang 2018-03-02 13:19:03 +08:00
parent 0e5794cfff
commit 4450ba836c

View File

@ -59,6 +59,13 @@ _size = 0
_vectors = None
_stopwords = set()
'''
lambda fns
'''
# combine similarity scores
_similarity_smooth = lambda x, y, z: (x * y) + z
_sim_molecule = lambda x: np.sum(x, axis=0) # 分子
'''
nearby
'''
@ -92,11 +99,9 @@ def nearby(word):
except KeyError as e:
return [[], []]
'''
similarity
'''
# stopwords
_fin_stopwords_path = os.path.join(curdir, 'data', 'stopwords.txt')
def _load_stopwords(file_path):
@ -140,8 +145,6 @@ def _load_w2v(model_file=_f_model, binary=True):
print(">> Synonyms on loading vectors ...")
_vectors = _load_w2v(model_file=_f_model)
_sim_molecule = lambda x: np.sum(x, axis=0) # 分子
def _get_wv(sentence):
'''
get word2vec data by sentence
@ -176,7 +179,6 @@ def _get_wv(sentence):
vectors.append(r)
return vectors
def _unigram_overlap(sentence1, sentence2):
'''
compute unigram overlap
@ -222,10 +224,9 @@ def _levenshtein_distance(sentence1, sentence2):
def _nearby_levenshtein_distance(s1, s2):
'''
使用
使用空间距离近的词汇优化编辑距离计算
'''
s1_len = len(s1)
s2_len = len(s2)
s1_len, s2_len = len(s1), len(s2)
maxlen = max(s1_len, s2_len)
first, second = (s2, s1) if s1_len == maxlen else (s1, s2)
ft = set() # all related words with first sentence
@ -241,9 +242,6 @@ def _nearby_levenshtein_distance(s1, s2):
s = np.sum(scores) / maxlen
return s
# combine similarity scores
_similarity_smooth = lambda x, y, z: (x * y) + z
def _similarity_distance(s1, s2):
'''
compute similarity with distance measurement
@ -282,7 +280,6 @@ def compare(s1, s2, seg=True):
s1 = s1.split()
s2 = s2.split()
assert len(s1) > 0 and len(s2) > 0, "The length of s1 and s2 should > 0."
return _similarity_distance(s1, s2)
def display(word):
@ -298,6 +295,5 @@ def main():
display("人脸")
display("NOT_EXIST")
if __name__ == '__main__':
main()