fix install(remove regex), calcultor
This commit is contained in:
parent
5c14e95dc1
commit
85470be266
16
README.md
16
README.md
@ -10,7 +10,7 @@
|
||||
[![Stars](https://img.shields.io/github/stars/yongzhuo/Macropodus?style=social)](https://github.com/yongzhuo/Macropodus/stargazers)
|
||||
[![Forks](https://img.shields.io/github/forks/yongzhuo/Macropodus.svg?style=social)](https://github.com/yongzhuo/Macropodus/network/members)
|
||||
[![Join the chat at https://gitter.im/yongzhuo/Macropodus](https://badges.gitter.im/yongzhuo/Macropodus.svg)](https://gitter.im/yongzhuo/Macropodus?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
>>> Macropodus是一个以Albert+BiLSTM+CRF网络架构为基础,用大规模中文语料训练的自然语言处理工具包。将提供中文分词、命名实体识别、关键词抽取、文本摘要、新词发现、文本相似度、计算器、数字转换、拼音转换、繁简转换等常见NLP功能。
|
||||
>>> Macropodus是一个以Albert+BiLSTM+CRF网络架构为基础,用大规模中文语料训练的自然语言处理工具包。将提供中文分词、词性标注、命名实体识别、关键词抽取、文本摘要、新词发现、文本相似度、计算器、数字转换、拼音转换、繁简转换等常见NLP功能。
|
||||
|
||||
|
||||
## 目录
|
||||
@ -222,9 +222,9 @@ print(sents)
|
||||
```
|
||||
|
||||
## 命名实体提取
|
||||
ner, albert+bilstm+crf网络架构, 最大支持126个字符;
|
||||
需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以)
|
||||
需要下载模型(pip安装不默认下载, 将ner_albert_people_1998覆盖到安装目录macropodus/data/model);
|
||||
* ner, albert+bilstm+crf网络架构, 最大支持126个字符;
|
||||
* 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以);
|
||||
* 需要下载模型(pip安装不默认下载, 将ner_albert_people_1998覆盖到安装目录macropodus/data/model);
|
||||
|
||||
```python3
|
||||
import macropodus
|
||||
@ -237,9 +237,9 @@ print(res_ners)
|
||||
```
|
||||
|
||||
## 词性标注
|
||||
pos tag, albert+bilstm+crf网络架构, 最大支持126个字符;
|
||||
需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以)
|
||||
需要下载模型(pip安装不默认下载, 将tag_albert_people_1998覆盖到安装目录macropodus/data/model);
|
||||
* pos tag, albert+bilstm+crf网络架构, 最大支持126个字符;
|
||||
* 需要安装tensorflow==1.15.0(pip安装不默认下载, 1.15.0以下未实验, 1.13以上应该可以);
|
||||
* 需要下载模型(pip安装不默认下载, 将tag_albert_people_1998覆盖到安装目录macropodus/data/model);
|
||||
|
||||
```python3
|
||||
import macropodus
|
||||
@ -253,7 +253,7 @@ print(res_postags)
|
||||
|
||||
## 常用小工具(tookit)
|
||||
|
||||
工具包括科学计算器, 阿拉伯-中文数字转化
|
||||
工具包括科学计算器, 中文繁体-简体转换, 阿拉伯-中文数字转换, 罗马数字-阿拉伯数字转换, 中文拼音
|
||||
```python3
|
||||
import macropodus
|
||||
|
||||
|
5
macropodus/data/model/ner_albert_people_1998/__init__.py
Normal file
5
macropodus/data/model/ner_albert_people_1998/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
# !/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# @time : 2019/12/21 23:06
|
||||
# @author : Mo
|
||||
# @function:
|
5
macropodus/data/model/tag_albert_people_1998/__init__.py
Normal file
5
macropodus/data/model/tag_albert_people_1998/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
# !/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# @time : 2019/12/21 23:06
|
||||
# @author : Mo
|
||||
# @function:
|
@ -14,7 +14,6 @@ import re
|
||||
logger = get_logger_root()
|
||||
|
||||
|
||||
|
||||
def rackets_replace(rackets_char, myformula):
|
||||
"""
|
||||
将2(3换成2*(3, 3)4换成3)*4
|
||||
|
@ -5,8 +5,8 @@
|
||||
# @function :extract number from sentence of chinese or mix。提取数字,中文,或者混合中文-阿拉伯数字
|
||||
|
||||
|
||||
import regex as re
|
||||
# import re
|
||||
# import regex as re
|
||||
import re
|
||||
|
||||
|
||||
# * 字符串预处理模块,为分析器TimeNormalizer提供相应的字符串预处理服务
|
||||
@ -82,12 +82,14 @@ class StringPreHandler:
|
||||
for m in match:
|
||||
target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1)
|
||||
|
||||
pattern = re.compile(u"(?<=(周|星期))[末天日]")
|
||||
# pattern = re.compile(u"(?<=(周|星期))[末天日]")
|
||||
pattern = re.compile(u"((?<=周)[末天日])|((?<=星期)[末天日])")
|
||||
match = pattern.finditer(target)
|
||||
for m in match:
|
||||
target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1)
|
||||
|
||||
pattern = re.compile(u"(?<!(周|星期))0?[0-9]?十[0-9]?")
|
||||
# pattern = re.compile(u"(?<!(周|星期))0?[0-9]?十[0-9]?")
|
||||
pattern = re.compile(u"((?<!(周))0?[0-9]?十[0-9]?)|(?<!(星期))0?[0-9]?十[0-9]?")
|
||||
match = pattern.finditer(target)
|
||||
for m in match:
|
||||
group = m.group()
|
||||
@ -204,7 +206,8 @@ def extract_number(sentence):
|
||||
find_list.append(i.group())
|
||||
return find_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sen = "1000.一加1等于几"
|
||||
sen = "1000.一加1等于几,周末和星期天,星期一星期二"
|
||||
res = extract_number(sen)
|
||||
print(res)
|
||||
|
@ -199,7 +199,8 @@ class Chi2Num():
|
||||
result_pos += text_end[i]
|
||||
# 拼接
|
||||
self.result_last = float(self.result_start + result_pos) if result_pos.isdigit() else self.result_start
|
||||
|
||||
else:
|
||||
self.result_last =self.compose_integer(text)
|
||||
return self.result_last
|
||||
|
||||
|
||||
|
@ -5,4 +5,4 @@
|
||||
# @function: version of Macropodus
|
||||
|
||||
|
||||
__version__ = "0.0.4"
|
||||
__version__ = "0.0.5"
|
||||
|
@ -1,10 +1,10 @@
|
||||
scikit-learn==0.19.1
|
||||
pandas==0.23.4
|
||||
passlib==1.7.1
|
||||
gensim==3.7.1
|
||||
numpy==1.16.2
|
||||
tqdm==4.31.1
|
||||
keras-bert==0.80.0
|
||||
keras-adaptive-softmax==0.6.0
|
||||
networkx==2.4
|
||||
# tensorflow-gpu==1.15.0, tensorflow==1.15.0
|
||||
scikit-learn>=0.19.1
|
||||
pandas>=0.23.4
|
||||
passlib>=1.7.1
|
||||
gensim>=3.7.1
|
||||
numpy>=1.16.2
|
||||
tqdm>=4.31.1
|
||||
keras-bert>=0.80.0
|
||||
keras-adaptive-softmax>=0.6.0
|
||||
regex
|
||||
|
10
setup.py
10
setup.py
@ -35,10 +35,7 @@ setup(name=NAME,
|
||||
packages=find_packages(exclude=('test')),
|
||||
package_data={'macropodus': ['*.*', 'data/*', 'data/dict/*',
|
||||
'data/embedding/*', 'data/embedding/word2vec/*',
|
||||
'data/model/*', 'data/model/ner_albert_people_1998/*',
|
||||
'data/model/tag_albert_people_1998/*'],
|
||||
'test': ['*.*', 'evaluate/*', 'evaluate/data/*', 'images/*',
|
||||
'style_data/*', 'version_and_enhance/*']
|
||||
'data/model/*']
|
||||
},
|
||||
install_requires=install_requires,
|
||||
license=LICENSE,
|
||||
@ -69,3 +66,8 @@ if __name__ == "__main__":
|
||||
# 方案二
|
||||
# python setup.py bdist_wheel --universal
|
||||
# twine upload dist/*
|
||||
|
||||
#
|
||||
# conda remove -n py35 --all
|
||||
# conda create -n py351 python=3.5
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user