fix install(remove regex), calcultor

This commit is contained in:
yongzhuo 2020-01-21 14:31:34 +08:00
parent 5c14e95dc1
commit 85470be266
9 changed files with 44 additions and 29 deletions

View File

@ -10,7 +10,7 @@
[![Stars](https://img.shields.io/github/stars/yongzhuo/Macropodus?style=social)](https://github.com/yongzhuo/Macropodus/stargazers)
[![Forks](https://img.shields.io/github/forks/yongzhuo/Macropodus.svg?style=social)](https://github.com/yongzhuo/Macropodus/network/members)
[![Join the chat at https://gitter.im/yongzhuo/Macropodus](https://badges.gitter.im/yongzhuo/Macropodus.svg)](https://gitter.im/yongzhuo/Macropodus?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
>>> Macropodus是一个以Albert+BiLSTM+CRF网络架构为基础用大规模中文语料训练的自然语言处理工具包。将提供中文分词、命名实体识别、关键词抽取、文本摘要、新词发现、文本相似度、计算器、数字转换、拼音转换、繁简转换等常见NLP功能。
>>> Macropodus是一个以Albert+BiLSTM+CRF网络架构为基础用大规模中文语料训练的自然语言处理工具包。将提供中文分词、词性标注、命名实体识别、关键词抽取、文本摘要、新词发现、文本相似度、计算器、数字转换、拼音转换、繁简转换等常见NLP功能。
## 目录
@ -222,9 +222,9 @@ print(sents)
```
## 命名实体提取
ner, albert+bilstm+crf网络架构, 最大支持126个字符;
需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以)
需要下载模型(pip安装不默认下载, 将ner_albert_people_1998覆盖到安装目录macropodus/data/model);
* ner, albert+bilstm+crf网络架构, 最大支持126个字符;
* 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以);
* 需要下载模型(pip安装不默认下载, 将ner_albert_people_1998覆盖到安装目录macropodus/data/model);
```python3
import macropodus
@ -237,9 +237,9 @@ print(res_ners)
```
## 词性标注
pos tag, albert+bilstm+crf网络架构, 最大支持126个字符;
需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以)
需要下载模型(pip安装不默认下载, 将tag_albert_people_1998覆盖到安装目录macropodus/data/model);
* pos tag, albert+bilstm+crf网络架构, 最大支持126个字符;
* 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以);
* 需要下载模型(pip安装不默认下载, 将tag_albert_people_1998覆盖到安装目录macropodus/data/model);
```python3
import macropodus
@ -253,7 +253,7 @@ print(res_postags)
## 常用小工具(tookit)
工具包括科学计算器, 阿拉伯-中文数字转化
工具包括科学计算器, 中文繁体-简体转换, 阿拉伯-中文数字转换, 罗马数字-阿拉伯数字转换, 中文拼音
```python3
import macropodus

View File

@ -0,0 +1,5 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-
# @time : 2019/12/21 23:06
# @author : Mo
# @function:

View File

@ -0,0 +1,5 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-
# @time : 2019/12/21 23:06
# @author : Mo
# @function:

View File

@ -14,7 +14,6 @@ import re
logger = get_logger_root()
def rackets_replace(rackets_char, myformula):
"""
将2(3换成2*(3, 3)4换成3)*4

View File

@ -5,8 +5,8 @@
# @function :extract number from sentence of chinese or mix。提取数字,中文,或者混合中文-阿拉伯数字
import regex as re
# import re
# import regex as re
import re
# * 字符串预处理模块为分析器TimeNormalizer提供相应的字符串预处理服务
@ -82,12 +82,14 @@ class StringPreHandler:
for m in match:
target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1)
pattern = re.compile(u"(?<=(周|星期))[末天日]")
# pattern = re.compile(u"(?<=(周|星期))[末天日]")
pattern = re.compile(u"((?<=周)[末天日])|((?<=星期)[末天日])")
match = pattern.finditer(target)
for m in match:
target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1)
pattern = re.compile(u"(?<!(周|星期))0?[0-9]?十[0-9]?")
# pattern = re.compile(u"(?<!(周|星期))0?[0-9]?十[0-9]?")
pattern = re.compile(u"((?<!(周))0?[0-9]?十[0-9]?)|(?<!(星期))0?[0-9]?十[0-9]?")
match = pattern.finditer(target)
for m in match:
group = m.group()
@ -204,7 +206,8 @@ def extract_number(sentence):
find_list.append(i.group())
return find_list
if __name__ == '__main__':
sen = "1000.一加1等于几"
sen = "1000.一加1等于几,周末和星期天,星期一星期二"
res = extract_number(sen)
print(res)

View File

@ -199,7 +199,8 @@ class Chi2Num():
result_pos += text_end[i]
# 拼接
self.result_last = float(self.result_start + result_pos) if result_pos.isdigit() else self.result_start
else:
self.result_last =self.compose_integer(text)
return self.result_last

View File

@ -5,4 +5,4 @@
# @function: version of Macropodus
__version__ = "0.0.4"
__version__ = "0.0.5"

View File

@ -1,10 +1,10 @@
scikit-learn==0.19.1
pandas==0.23.4
passlib==1.7.1
gensim==3.7.1
numpy==1.16.2
tqdm==4.31.1
keras-bert==0.80.0
keras-adaptive-softmax==0.6.0
networkx==2.4
# tensorflow-gpu==1.15.0, tensorflow==1.15.0
scikit-learn>=0.19.1
pandas>=0.23.4
passlib>=1.7.1
gensim>=3.7.1
numpy>=1.16.2
tqdm>=4.31.1
keras-bert>=0.80.0
keras-adaptive-softmax>=0.6.0
regex

View File

@ -35,10 +35,7 @@ setup(name=NAME,
packages=find_packages(exclude=('test')),
package_data={'macropodus': ['*.*', 'data/*', 'data/dict/*',
'data/embedding/*', 'data/embedding/word2vec/*',
'data/model/*', 'data/model/ner_albert_people_1998/*',
'data/model/tag_albert_people_1998/*'],
'test': ['*.*', 'evaluate/*', 'evaluate/data/*', 'images/*',
'style_data/*', 'version_and_enhance/*']
'data/model/*']
},
install_requires=install_requires,
license=LICENSE,
@ -69,3 +66,8 @@ if __name__ == "__main__":
# 方案二
# python setup.py bdist_wheel --universal
# twine upload dist/*
#
# conda remove -n py35 --all
# conda create -n py351 python=3.5