From a6b522343d6d2d9e16ad0e4e41f43b33dc1d9b16 Mon Sep 17 00:00:00 2001 From: Hai Liang Wang Date: Wed, 25 May 2022 08:27:52 +0800 Subject: [PATCH] Add synonyms.describe() interface for summary info --- README.md | 13 +++++++++++++ Requirements.txt | 2 +- setup.py | 2 +- synonyms/synonyms.py | 16 +++++++++++++++- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d5823ab..612c4b9 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ python -c "import synonyms" # download word vectors file | ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | _SYNONYMS_WORD2VEC_BIN_MODEL_ZH_CN_ | 使用 word2vec 训练的词向量文件,二进制格式。 | | _SYNONYMS_WORDSEG_DICT_ | 中文分词[**主字典**](https://github.com/fxsjy/jieba#%E5%BB%B6%E8%BF%9F%E5%8A%A0%E8%BD%BD%E6%9C%BA%E5%88%B6),格式和使用[参考](https://github.com/fxsjy/jieba#%E8%BD%BD%E5%85%A5%E8%AF%8D%E5%85%B8) | +| _SYNONYMS_DEBUG_ | ["TRUE"\|"FALSE"], 是否输出调试日志,设置为 “TRUE” 输出,默认为 “FALSE” | ### synonyms#nearby(word [, size = 10]) @@ -123,6 +124,18 @@ synonyms.nearby(人脸, 10) = ( `SIZE` 是打印词汇表的数量,默认 10。 +### synonyms#describe() + +打印当前包的描述信息: + +``` +>>> synonyms.describe() +Vocab size in vector model: 435729 +model_path: /Users/hain/chatopera/Synonyms/synonyms/data/words.vector.gz +version: 3.18.0 +{'vocab_size': 435729, 'version': '3.18.0', 'model_path': '/chatopera/Synonyms/synonyms/data/words.vector.gz'} +``` + ### synonyms#v(word) 获得一个词语的向量,该向量为 numpy 的 array,当该词语是未登录词时,抛出 KeyError 异常。 diff --git a/Requirements.txt b/Requirements.txt index e71301c..4d63e6d 100644 --- a/Requirements.txt +++ b/Requirements.txt @@ -1 +1 @@ -synonyms>=3.17 \ No newline at end of file +synonyms>=3.18 \ No newline at end of file diff --git a/setup.py b/setup.py index 48becea..5c00067 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ https://github.com/chatopera/Synonyms setup( name='synonyms', - version='3.17.0', + version='3.18.0', description='中文近义词:聊天机器人,智能问答工具包;Chinese Synonyms for Natural Language Processing and Understanding', long_description=LONGDOC, author='Hai Liang Wang, Hu Ying Xi', diff --git a/synonyms/synonyms.py b/synonyms/synonyms.py index 74b99d5..0a4077c 100755 --- a/synonyms/synonyms.py +++ b/synonyms/synonyms.py @@ -20,7 +20,7 @@ from __future__ import division __copyright__ = "Copyright (c) (2017-2022) Chatopera Inc. All Rights Reserved" __author__ = "Hu Ying Xi<>, Hai Liang Wang" __date__ = "2020-09-24" -__version__ = "3.17.0" +__version__ = "3.18.0" import os import sys @@ -372,6 +372,20 @@ def compare(s1, s2, seg=True, ignore=False, stopwords=False): assert len(s1) > 0 and len(s2) > 0, "The length of s1 and s2 should > 0." return _similarity_distance(s1_words, s2_words, ignore) +def describe(): + ''' + summary info of vectors + ''' + vocab_size = len(_vectors.vocab.keys()) + print("Vocab size in vector model: %d" % vocab_size) + print("model_path: %s" % _f_model) + print("version: %s" % __version__) + return dict({ + "vocab_size": vocab_size, + "version": __version__, + "model_path": _f_model + }) + def display(word, size = 10): print("'%s'近义词:" % word) o = nearby(word, size)