From a6b522343d6d2d9e16ad0e4e41f43b33dc1d9b16 Mon Sep 17 00:00:00 2001
From: Hai Liang Wang <hain@chatopera.com>
Date: Wed, 25 May 2022 08:27:52 +0800
Subject: [PATCH] Add synonyms.describe() interface for summary info

---
 README.md            | 13 +++++++++++++
 Requirements.txt     |  2 +-
 setup.py             |  2 +-
 synonyms/synonyms.py | 16 +++++++++++++++-
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index d5823ab..612c4b9 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,7 @@ python -c "import synonyms" # download word vectors file
 | ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | _SYNONYMS_WORD2VEC_BIN_MODEL_ZH_CN_ | 使用 word2vec 训练的词向量文件，二进制格式。                                                                                                                                                       |
 | _SYNONYMS_WORDSEG_DICT_             | 中文分词[**主字典**](https://github.com/fxsjy/jieba#%E5%BB%B6%E8%BF%9F%E5%8A%A0%E8%BD%BD%E6%9C%BA%E5%88%B6)，格式和使用[参考](https://github.com/fxsjy/jieba#%E8%BD%BD%E5%85%A5%E8%AF%8D%E5%85%B8) |
+| _SYNONYMS_DEBUG_                    | ["TRUE"\|"FALSE"], 是否输出调试日志，设置为 “TRUE” 输出，默认为 “FALSE”                                                                                                                            |
 
 ### synonyms#nearby(word [, size = 10])
 
@@ -123,6 +124,18 @@ synonyms.nearby(人脸, 10) = (
 
 `SIZE` 是打印词汇表的数量，默认 10。
 
+### synonyms#describe()
+
+打印当前包的描述信息：
+
+```
+>>> synonyms.describe()
+Vocab size in vector model: 435729
+model_path: /Users/hain/chatopera/Synonyms/synonyms/data/words.vector.gz
+version: 3.18.0
+{'vocab_size': 435729, 'version': '3.18.0', 'model_path': '/chatopera/Synonyms/synonyms/data/words.vector.gz'}
+```
+
 ### synonyms#v(word)
 
 获得一个词语的向量，该向量为 numpy 的 array，当该词语是未登录词时，抛出 KeyError 异常。
diff --git a/Requirements.txt b/Requirements.txt
index e71301c..4d63e6d 100644
--- a/Requirements.txt
+++ b/Requirements.txt
@@ -1 +1 @@
-synonyms>=3.17
\ No newline at end of file
+synonyms>=3.18
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 48becea..5c00067 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@ https://github.com/chatopera/Synonyms
 
 setup(
     name='synonyms',
-    version='3.17.0',
+    version='3.18.0',
     description='中文近义词：聊天机器人，智能问答工具包；Chinese Synonyms for Natural Language Processing and Understanding',
     long_description=LONGDOC,
     author='Hai Liang Wang, Hu Ying Xi',
diff --git a/synonyms/synonyms.py b/synonyms/synonyms.py
index 74b99d5..0a4077c 100755
--- a/synonyms/synonyms.py
+++ b/synonyms/synonyms.py
@@ -20,7 +20,7 @@ from __future__ import division
 __copyright__ = "Copyright (c) (2017-2022) Chatopera Inc. All Rights Reserved"
 __author__ = "Hu Ying Xi<>, Hai Liang Wang<hai@chatopera.com>"
 __date__ = "2020-09-24"
-__version__ = "3.17.0"
+__version__ = "3.18.0"
 
 import os
 import sys
@@ -372,6 +372,20 @@ def compare(s1, s2, seg=True, ignore=False, stopwords=False):
     assert len(s1) > 0 and len(s2) > 0, "The length of s1 and s2 should > 0."
     return _similarity_distance(s1_words, s2_words, ignore)
 
+def describe():
+    '''
+    summary info of vectors
+    '''
+    vocab_size = len(_vectors.vocab.keys())
+    print("Vocab size in vector model: %d" % vocab_size)
+    print("model_path: %s" % _f_model)
+    print("version: %s" % __version__)
+    return dict({
+        "vocab_size": vocab_size,
+        "version": __version__,
+        "model_path": _f_model
+    })
+
 def display(word, size = 10):
     print("'%s'近义词：" % word)
     o = nearby(word, size)