Add synonyms.describe() interface for summary info

2022-05-25 08:27:52 +08:00 · 2022-05-25 08:27:52 +08:00 · a6b522343d
commit a6b522343d
parent 95d91e0cb2
4 changed files with 30 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -63,6 +63,7 @@ python -c "import synonyms" # download word vectors file
 | ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | _SYNONYMS_WORD2VEC_BIN_MODEL_ZH_CN_ | 使用 word2vec 训练的词向量文件，二进制格式。                                                                                                                                                       |
 | _SYNONYMS_WORDSEG_DICT_             | 中文分词[**主字典**](https://github.com/fxsjy/jieba#%E5%BB%B6%E8%BF%9F%E5%8A%A0%E8%BD%BD%E6%9C%BA%E5%88%B6)，格式和使用[参考](https://github.com/fxsjy/jieba#%E8%BD%BD%E5%85%A5%E8%AF%8D%E5%85%B8) |
 | _SYNONYMS_DEBUG_                    | ["TRUE"\|"FALSE"], 是否输出调试日志，设置为 “TRUE” 输出，默认为 “FALSE”                                                                                                                            |
 ### synonyms#nearby(word [, size = 10])
@ -123,6 +124,18 @@ synonyms.nearby(人脸, 10) = (
 `SIZE` 是打印词汇表的数量，默认 10。
 ### synonyms#describe()
 打印当前包的描述信息：
 ```
 >>> synonyms.describe()
 Vocab size in vector model: 435729
 model_path: /Users/hain/chatopera/Synonyms/synonyms/data/words.vector.gz
 version: 3.18.0
 {'vocab_size': 435729, 'version': '3.18.0', 'model_path': '/chatopera/Synonyms/synonyms/data/words.vector.gz'}
 ```
 ### synonyms#v(word)
 获得一个词语的向量，该向量为 numpy 的 array，当该词语是未登录词时，抛出 KeyError 异常。
--- a/Requirements.txt
+++ b/Requirements.txt
@ -1 +1 @@
-synonyms>=3.17
+synonyms>=3.18
--- a/setup.py
+++ b/setup.py
@ -12,7 +12,7 @@ https://github.com/chatopera/Synonyms
 setup(
    name='synonyms',
-    version='3.17.0',
+    version='3.18.0',
    description='中文近义词：聊天机器人，智能问答工具包；Chinese Synonyms for Natural Language Processing and Understanding',
    long_description=LONGDOC,
    author='Hai Liang Wang, Hu Ying Xi',
--- a/synonyms/synonyms.py
+++ b/synonyms/synonyms.py
@ -20,7 +20,7 @@ from __future__ import division
 __copyright__ = "Copyright (c) (2017-2022) Chatopera Inc. All Rights Reserved"
 __author__ = "Hu Ying Xi<>, Hai Liang Wang<hai@chatopera.com>"
 __date__ = "2020-09-24"
-__version__ = "3.17.0"
+__version__ = "3.18.0"
 import os
 import sys
@ -372,6 +372,20 @@ def compare(s1, s2, seg=True, ignore=False, stopwords=False):
    assert len(s1) > 0 and len(s2) > 0, "The length of s1 and s2 should > 0."
    return _similarity_distance(s1_words, s2_words, ignore)
 def describe():
    '''
    summary info of vectors
    '''
    vocab_size = len(_vectors.vocab.keys())
    print("Vocab size in vector model: %d" % vocab_size)
    print("model_path: %s" % _f_model)
    print("version: %s" % __version__)
    return dict({
        "vocab_size": vocab_size,
        "version": __version__,
        "model_path": _f_model
    })
 def display(word, size = 10):
    print("'%s'近义词：" % word)
    o = nearby(word, size)