fix install(remove regex), calcultor

This commit is contained in:
yongzhuo 2020-01-21 14:31:34 +08:00
parent 5c14e95dc1
commit 85470be266
9 changed files with 44 additions and 29 deletions

View File

@ -10,7 +10,7 @@
[![Stars](https://img.shields.io/github/stars/yongzhuo/Macropodus?style=social)](https://github.com/yongzhuo/Macropodus/stargazers) [![Stars](https://img.shields.io/github/stars/yongzhuo/Macropodus?style=social)](https://github.com/yongzhuo/Macropodus/stargazers)
[![Forks](https://img.shields.io/github/forks/yongzhuo/Macropodus.svg?style=social)](https://github.com/yongzhuo/Macropodus/network/members) [![Forks](https://img.shields.io/github/forks/yongzhuo/Macropodus.svg?style=social)](https://github.com/yongzhuo/Macropodus/network/members)
[![Join the chat at https://gitter.im/yongzhuo/Macropodus](https://badges.gitter.im/yongzhuo/Macropodus.svg)](https://gitter.im/yongzhuo/Macropodus?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Join the chat at https://gitter.im/yongzhuo/Macropodus](https://badges.gitter.im/yongzhuo/Macropodus.svg)](https://gitter.im/yongzhuo/Macropodus?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
>>> Macropodus是一个以Albert+BiLSTM+CRF网络架构为基础用大规模中文语料训练的自然语言处理工具包。将提供中文分词、命名实体识别、关键词抽取、文本摘要、新词发现、文本相似度、计算器、数字转换、拼音转换、繁简转换等常见NLP功能。 >>> Macropodus是一个以Albert+BiLSTM+CRF网络架构为基础用大规模中文语料训练的自然语言处理工具包。将提供中文分词、词性标注、命名实体识别、关键词抽取、文本摘要、新词发现、文本相似度、计算器、数字转换、拼音转换、繁简转换等常见NLP功能。
## 目录 ## 目录
@ -222,9 +222,9 @@ print(sents)
``` ```
## 命名实体提取 ## 命名实体提取
ner, albert+bilstm+crf网络架构, 最大支持126个字符; * ner, albert+bilstm+crf网络架构, 最大支持126个字符;
需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以) * 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以);
需要下载模型(pip安装不默认下载, 将ner_albert_people_1998覆盖到安装目录macropodus/data/model); * 需要下载模型(pip安装不默认下载, 将ner_albert_people_1998覆盖到安装目录macropodus/data/model);
```python3 ```python3
import macropodus import macropodus
@ -237,9 +237,9 @@ print(res_ners)
``` ```
## 词性标注 ## 词性标注
pos tag, albert+bilstm+crf网络架构, 最大支持126个字符; * pos tag, albert+bilstm+crf网络架构, 最大支持126个字符;
需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以) * 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以);
需要下载模型(pip安装不默认下载, 将tag_albert_people_1998覆盖到安装目录macropodus/data/model); * 需要下载模型(pip安装不默认下载, 将tag_albert_people_1998覆盖到安装目录macropodus/data/model);
```python3 ```python3
import macropodus import macropodus
@ -253,7 +253,7 @@ print(res_postags)
## 常用小工具(tookit) ## 常用小工具(tookit)
工具包括科学计算器, 阿拉伯-中文数字转化 工具包括科学计算器, 中文繁体-简体转换, 阿拉伯-中文数字转换, 罗马数字-阿拉伯数字转换, 中文拼音
```python3 ```python3
import macropodus import macropodus

View File

@ -0,0 +1,5 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-
# @time : 2019/12/21 23:06
# @author : Mo
# @function:

View File

@ -0,0 +1,5 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-
# @time : 2019/12/21 23:06
# @author : Mo
# @function:

View File

@ -14,7 +14,6 @@ import re
logger = get_logger_root() logger = get_logger_root()
def rackets_replace(rackets_char, myformula): def rackets_replace(rackets_char, myformula):
""" """
将2(3换成2*(3, 3)4换成3)*4 将2(3换成2*(3, 3)4换成3)*4

View File

@ -5,8 +5,8 @@
# @function :extract number from sentence of chinese or mix。提取数字,中文,或者混合中文-阿拉伯数字 # @function :extract number from sentence of chinese or mix。提取数字,中文,或者混合中文-阿拉伯数字
import regex as re # import regex as re
# import re import re
# * 字符串预处理模块为分析器TimeNormalizer提供相应的字符串预处理服务 # * 字符串预处理模块为分析器TimeNormalizer提供相应的字符串预处理服务
@ -82,12 +82,14 @@ class StringPreHandler:
for m in match: for m in match:
target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1) target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1)
pattern = re.compile(u"(?<=(周|星期))[末天日]") # pattern = re.compile(u"(?<=(周|星期))[末天日]")
pattern = re.compile(u"((?<=周)[末天日])|((?<=星期)[末天日])")
match = pattern.finditer(target) match = pattern.finditer(target)
for m in match: for m in match:
target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1) target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1)
pattern = re.compile(u"(?<!(周|星期))0?[0-9]?十[0-9]?") # pattern = re.compile(u"(?<!(周|星期))0?[0-9]?十[0-9]?")
pattern = re.compile(u"((?<!(周))0?[0-9]?十[0-9]?)|(?<!(星期))0?[0-9]?十[0-9]?")
match = pattern.finditer(target) match = pattern.finditer(target)
for m in match: for m in match:
group = m.group() group = m.group()
@ -204,7 +206,8 @@ def extract_number(sentence):
find_list.append(i.group()) find_list.append(i.group())
return find_list return find_list
if __name__ == '__main__': if __name__ == '__main__':
sen = "1000.一加1等于几" sen = "1000.一加1等于几,周末和星期天,星期一星期二"
res = extract_number(sen) res = extract_number(sen)
print(res) print(res)

View File

@ -199,7 +199,8 @@ class Chi2Num():
result_pos += text_end[i] result_pos += text_end[i]
# 拼接 # 拼接
self.result_last = float(self.result_start + result_pos) if result_pos.isdigit() else self.result_start self.result_last = float(self.result_start + result_pos) if result_pos.isdigit() else self.result_start
else:
self.result_last =self.compose_integer(text)
return self.result_last return self.result_last

View File

@ -5,4 +5,4 @@
# @function: version of Macropodus # @function: version of Macropodus
__version__ = "0.0.4" __version__ = "0.0.5"

View File

@ -1,10 +1,10 @@
scikit-learn==0.19.1
pandas==0.23.4
passlib==1.7.1
gensim==3.7.1
numpy==1.16.2
tqdm==4.31.1
keras-bert==0.80.0
keras-adaptive-softmax==0.6.0
networkx==2.4
# tensorflow-gpu==1.15.0, tensorflow==1.15.0 # tensorflow-gpu==1.15.0, tensorflow==1.15.0
scikit-learn>=0.19.1
pandas>=0.23.4
passlib>=1.7.1
gensim>=3.7.1
numpy>=1.16.2
tqdm>=4.31.1
keras-bert>=0.80.0
keras-adaptive-softmax>=0.6.0
regex

View File

@ -35,10 +35,7 @@ setup(name=NAME,
packages=find_packages(exclude=('test')), packages=find_packages(exclude=('test')),
package_data={'macropodus': ['*.*', 'data/*', 'data/dict/*', package_data={'macropodus': ['*.*', 'data/*', 'data/dict/*',
'data/embedding/*', 'data/embedding/word2vec/*', 'data/embedding/*', 'data/embedding/word2vec/*',
'data/model/*', 'data/model/ner_albert_people_1998/*', 'data/model/*']
'data/model/tag_albert_people_1998/*'],
'test': ['*.*', 'evaluate/*', 'evaluate/data/*', 'images/*',
'style_data/*', 'version_and_enhance/*']
}, },
install_requires=install_requires, install_requires=install_requires,
license=LICENSE, license=LICENSE,
@ -69,3 +66,8 @@ if __name__ == "__main__":
# 方案二 # 方案二
# python setup.py bdist_wheel --universal # python setup.py bdist_wheel --universal
# twine upload dist/* # twine upload dist/*
#
# conda remove -n py35 --all
# conda create -n py351 python=3.5