Add synonyms.describe() interface for summary info
This commit is contained in:
parent
95d91e0cb2
commit
a6b522343d
13
README.md
13
README.md
@ -63,6 +63,7 @@ python -c "import synonyms" # download word vectors file
|
||||
| ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| _SYNONYMS_WORD2VEC_BIN_MODEL_ZH_CN_ | 使用 word2vec 训练的词向量文件,二进制格式。 |
|
||||
| _SYNONYMS_WORDSEG_DICT_ | 中文分词[**主字典**](https://github.com/fxsjy/jieba#%E5%BB%B6%E8%BF%9F%E5%8A%A0%E8%BD%BD%E6%9C%BA%E5%88%B6),格式和使用[参考](https://github.com/fxsjy/jieba#%E8%BD%BD%E5%85%A5%E8%AF%8D%E5%85%B8) |
|
||||
| _SYNONYMS_DEBUG_ | ["TRUE"\|"FALSE"], 是否输出调试日志,设置为 “TRUE” 输出,默认为 “FALSE” |
|
||||
|
||||
### synonyms#nearby(word [, size = 10])
|
||||
|
||||
@ -123,6 +124,18 @@ synonyms.nearby(人脸, 10) = (
|
||||
|
||||
`SIZE` 是打印词汇表的数量,默认 10。
|
||||
|
||||
### synonyms#describe()
|
||||
|
||||
打印当前包的描述信息:
|
||||
|
||||
```
|
||||
>>> synonyms.describe()
|
||||
Vocab size in vector model: 435729
|
||||
model_path: /Users/hain/chatopera/Synonyms/synonyms/data/words.vector.gz
|
||||
version: 3.18.0
|
||||
{'vocab_size': 435729, 'version': '3.18.0', 'model_path': '/chatopera/Synonyms/synonyms/data/words.vector.gz'}
|
||||
```
|
||||
|
||||
### synonyms#v(word)
|
||||
|
||||
获得一个词语的向量,该向量为 numpy 的 array,当该词语是未登录词时,抛出 KeyError 异常。
|
||||
|
@ -1 +1 @@
|
||||
synonyms>=3.17
|
||||
synonyms>=3.18
|
2
setup.py
2
setup.py
@ -12,7 +12,7 @@ https://github.com/chatopera/Synonyms
|
||||
|
||||
setup(
|
||||
name='synonyms',
|
||||
version='3.17.0',
|
||||
version='3.18.0',
|
||||
description='中文近义词:聊天机器人,智能问答工具包;Chinese Synonyms for Natural Language Processing and Understanding',
|
||||
long_description=LONGDOC,
|
||||
author='Hai Liang Wang, Hu Ying Xi',
|
||||
|
@ -20,7 +20,7 @@ from __future__ import division
|
||||
__copyright__ = "Copyright (c) (2017-2022) Chatopera Inc. All Rights Reserved"
|
||||
__author__ = "Hu Ying Xi<>, Hai Liang Wang<hai@chatopera.com>"
|
||||
__date__ = "2020-09-24"
|
||||
__version__ = "3.17.0"
|
||||
__version__ = "3.18.0"
|
||||
|
||||
import os
|
||||
import sys
|
||||
@ -372,6 +372,20 @@ def compare(s1, s2, seg=True, ignore=False, stopwords=False):
|
||||
assert len(s1) > 0 and len(s2) > 0, "The length of s1 and s2 should > 0."
|
||||
return _similarity_distance(s1_words, s2_words, ignore)
|
||||
|
||||
def describe():
|
||||
'''
|
||||
summary info of vectors
|
||||
'''
|
||||
vocab_size = len(_vectors.vocab.keys())
|
||||
print("Vocab size in vector model: %d" % vocab_size)
|
||||
print("model_path: %s" % _f_model)
|
||||
print("version: %s" % __version__)
|
||||
return dict({
|
||||
"vocab_size": vocab_size,
|
||||
"version": __version__,
|
||||
"model_path": _f_model
|
||||
})
|
||||
|
||||
def display(word, size = 10):
|
||||
print("'%s'近义词:" % word)
|
||||
o = nearby(word, size)
|
||||
|
Loading…
Reference in New Issue
Block a user