diff --git a/README.md b/README.md index ae048b6..42f3e7c 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![Stars](https://img.shields.io/github/stars/yongzhuo/Macropodus?style=social)](https://github.com/yongzhuo/Macropodus/stargazers) [![Forks](https://img.shields.io/github/forks/yongzhuo/Macropodus.svg?style=social)](https://github.com/yongzhuo/Macropodus/network/members) [![Join the chat at https://gitter.im/yongzhuo/Macropodus](https://badges.gitter.im/yongzhuo/Macropodus.svg)](https://gitter.im/yongzhuo/Macropodus?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) ->>> Macropodus是一个以Albert+BiLSTM+CRF网络架构为基础,用大规模中文语料训练的自然语言处理工具包。将提供中文分词、命名实体识别、关键词抽取、文本摘要、新词发现、文本相似度、计算器、数字转换、拼音转换、繁简转换等常见NLP功能。 +>>> Macropodus是一个以Albert+BiLSTM+CRF网络架构为基础,用大规模中文语料训练的自然语言处理工具包。将提供中文分词、词性标注、命名实体识别、关键词抽取、文本摘要、新词发现、文本相似度、计算器、数字转换、拼音转换、繁简转换等常见NLP功能。 ## 目录 @@ -222,9 +222,9 @@ print(sents) ``` ## 命名实体提取 - ner, albert+bilstm+crf网络架构, 最大支持126个字符; - 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以) - 需要下载模型(pip安装不默认下载, 将ner_albert_people_1998覆盖到安装目录macropodus/data/model); + * ner, albert+bilstm+crf网络架构, 最大支持126个字符; + * 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以); + * 需要下载模型(pip安装不默认下载, 将ner_albert_people_1998覆盖到安装目录macropodus/data/model); ```python3 import macropodus @@ -237,9 +237,9 @@ print(res_ners) ``` ## 词性标注 - pos tag, albert+bilstm+crf网络架构, 最大支持126个字符; - 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以) - 需要下载模型(pip安装不默认下载, 将tag_albert_people_1998覆盖到安装目录macropodus/data/model); + * pos tag, albert+bilstm+crf网络架构, 最大支持126个字符; + * 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以); + * 需要下载模型(pip安装不默认下载, 将tag_albert_people_1998覆盖到安装目录macropodus/data/model); ```python3 import macropodus @@ -253,7 +253,7 @@ print(res_postags) ## 常用小工具(tookit) - 工具包括科学计算器, 阿拉伯-中文数字转化 + 工具包括科学计算器, 中文繁体-简体转换, 阿拉伯-中文数字转换, 罗马数字-阿拉伯数字转换, 中文拼音 ```python3 import macropodus diff --git a/macropodus/data/model/ner_albert_people_1998/__init__.py b/macropodus/data/model/ner_albert_people_1998/__init__.py new file mode 100644 index 0000000..e099f67 --- /dev/null +++ b/macropodus/data/model/ner_albert_people_1998/__init__.py @@ -0,0 +1,5 @@ +# !/usr/bin/python +# -*- coding: utf-8 -*- +# @time : 2019/12/21 23:06 +# @author : Mo +# @function: \ No newline at end of file diff --git a/macropodus/data/model/tag_albert_people_1998/__init__.py b/macropodus/data/model/tag_albert_people_1998/__init__.py new file mode 100644 index 0000000..e099f67 --- /dev/null +++ b/macropodus/data/model/tag_albert_people_1998/__init__.py @@ -0,0 +1,5 @@ +# !/usr/bin/python +# -*- coding: utf-8 -*- +# @time : 2019/12/21 23:06 +# @author : Mo +# @function: \ No newline at end of file diff --git a/macropodus/tookit/calculator_sihui/calcultor_function.py b/macropodus/tookit/calculator_sihui/calcultor_function.py index cbd54c8..48f9da9 100644 --- a/macropodus/tookit/calculator_sihui/calcultor_function.py +++ b/macropodus/tookit/calculator_sihui/calcultor_function.py @@ -14,7 +14,6 @@ import re logger = get_logger_root() - def rackets_replace(rackets_char, myformula): """ 将2(3换成2*(3, 3)4换成3)*4 diff --git a/macropodus/tookit/calculator_sihui/calcultor_number.py b/macropodus/tookit/calculator_sihui/calcultor_number.py index 0a46b85..1684df6 100644 --- a/macropodus/tookit/calculator_sihui/calcultor_number.py +++ b/macropodus/tookit/calculator_sihui/calcultor_number.py @@ -5,8 +5,8 @@ # @function :extract number from sentence of chinese or mix。提取数字,中文,或者混合中文-阿拉伯数字 -import regex as re -# import re +# import regex as re +import re # * 字符串预处理模块,为分析器TimeNormalizer提供相应的字符串预处理服务 @@ -82,12 +82,14 @@ class StringPreHandler: for m in match: target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1) - pattern = re.compile(u"(?<=(周|星期))[末天日]") + # pattern = re.compile(u"(?<=(周|星期))[末天日]") + pattern = re.compile(u"((?<=周)[末天日])|((?<=星期)[末天日])") match = pattern.finditer(target) for m in match: target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1) - pattern = re.compile(u"(?=0.19.1 -pandas>=0.23.4 -passlib>=1.7.1 -gensim>=3.7.1 -numpy>=1.16.2 -tqdm>=4.31.1 -keras-bert>=0.80.0 -keras-adaptive-softmax>=0.6.0 -regex diff --git a/setup.py b/setup.py index ab1c226..be5c230 100644 --- a/setup.py +++ b/setup.py @@ -35,10 +35,7 @@ setup(name=NAME, packages=find_packages(exclude=('test')), package_data={'macropodus': ['*.*', 'data/*', 'data/dict/*', 'data/embedding/*', 'data/embedding/word2vec/*', - 'data/model/*', 'data/model/ner_albert_people_1998/*', - 'data/model/tag_albert_people_1998/*'], - 'test': ['*.*', 'evaluate/*', 'evaluate/data/*', 'images/*', - 'style_data/*', 'version_and_enhance/*'] + 'data/model/*'] }, install_requires=install_requires, license=LICENSE, @@ -69,3 +66,8 @@ if __name__ == "__main__": # 方案二 # python setup.py bdist_wheel --universal # twine upload dist/* + +# +# conda remove -n py35 --all +# conda create -n py351 python=3.5 +