Add files via upload
This commit is contained in:
parent
dcc40f4e43
commit
05d13a1a2f
49
README.md
Normal file
49
README.md
Normal file
@ -0,0 +1,49 @@
|
||||
# Keras-TextClassification
|
||||
|
||||
|
||||
# keras_textclassification(代码主体,未完待续...)
|
||||
- FastText
|
||||
- TextCNN
|
||||
- charCNN
|
||||
- TextRNN
|
||||
- TextRCNN
|
||||
|
||||
|
||||
# run(运行, 以FastText为例)
|
||||
- 1. 进入keras_textclassification/m01_FastText目录,
|
||||
- 2. 训练: 运行 train.py, 例如: python train.py
|
||||
- 3. 预测: 运行 predict.py, 例如: python predict.py
|
||||
- 说明: 默认不带pre train的random embedding,训练和验证语料只有100条,完整语料移步下面data查看下载
|
||||
|
||||
|
||||
# keras_textclassification/data
|
||||
- 数据下载
|
||||
** github项目中只是上传部分数据,需要的前往链接: https://pan.baidu.com/s/1I3vydhmFEQ9nuPG2fDou8Q 提取码: rket
|
||||
- baidu_qa_2019(百度qa问答语料,只取title作为分类样本,17个类,有一个是空'',已经压缩上传)
|
||||
- baike_qa_train.csv
|
||||
- baike_qa_valid.csv
|
||||
- embeddings
|
||||
- chinese_L-12_H-768_A-12(取谷歌预训练好点的模型,已经压缩上传)
|
||||
- model
|
||||
- 预训练模型存放地址
|
||||
|
||||
# 项目说明
|
||||
- 1. 构建了base基类(网络(graph)、向量嵌入(词、字、句子embedding)),后边的具体模型继承它们,代码简单
|
||||
- 2. conf存放项目数据、模型的地址, data存放数据和语料, etl为数据预处理模块,
|
||||
|
||||
|
||||
# 模型与论文paper题与地址
|
||||
* FastText: [Bag of Tricks for Efficient Text Classification](https://arxiv.org/abs/1607.01759)
|
||||
* TextCNN: [ConvolutionalNeuralNetworksforSentenceClassification](https://arxiv.org/abs/1408.5882)
|
||||
* charCNN: [Character-Aware Neural Language Models](https://arxiv.org/abs/1508.06615)
|
||||
* TextRNN: [Recurrent Neural Network for Text Classification with Multi-Task Learning](https://www.ijcai.org/Proceedings/16/Papers/408.pdf)
|
||||
* RCNN: [Recurrent Convolutional Neural Networks for Text Classification](http://www.nlpr.ia.ac.cn/cip/~liukang/liukangPageFile/Recurrent%20Convolutional%20Neural%20Networks%20for%20Text%20Classification.pdf)
|
||||
* DCNN: [A Convolutional Neural Network for Modelling Sentences](https://arxiv.org/abs/1404.2188)
|
||||
|
||||
|
||||
# 参考/感谢
|
||||
* 文本分类项目: [https://github.com/mosu027/TextClassification](https://github.com/mosu027/TextClassification)
|
||||
* 文本分类看山杯: [https://github.com/brightmart/text_classification](https://github.com/brightmart/text_classification)
|
||||
* Kashgari项目: [https://github.com/BrikerMan/Kashgari](https://github.com/BrikerMan/Kashgari)
|
||||
* 文本分类Ipty : [https://github.com/lpty/classifier](https://github.com/lpty/classifier)
|
||||
* keras文本分类: [https://github.com/ShawnyXiao/TextClassification-Keras](https://github.com/ShawnyXiao/TextClassification-Keras)
|
5
__init__.py
Normal file
5
__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
# -*- coding: UTF-8 -*-
|
||||
# !/usr/bin/python
|
||||
# @time :2019/6/3 10:50
|
||||
# @author :Mo
|
||||
# @function :
|
11
requirements.txt
Normal file
11
requirements.txt
Normal file
@ -0,0 +1,11 @@
|
||||
gensim==3.7.1
|
||||
jieba==0.39
|
||||
numpy==1.16.2
|
||||
pandas==0.23.4
|
||||
scikit-learn==0.19.1
|
||||
tflearn==0.3.2
|
||||
tqdm==4.31.1
|
||||
passlib==1.7.1
|
||||
keras==2.2.4
|
||||
tensorflow-gpu==1.12.0
|
||||
keras-bert==0.41.0
|
60
test/Dimension_error.py
Normal file
60
test/Dimension_error.py
Normal file
@ -0,0 +1,60 @@
|
||||
# -*- coding: UTF-8 -*-
|
||||
# !/usr/bin/python
|
||||
# @time :2019/6/11 22:57
|
||||
# @author :Mo
|
||||
# @function :
|
||||
|
||||
from keras.layers import Conv2D, MaxPooling2D, Input, Concatenate
|
||||
from keras.models import Model
|
||||
import keras.backend as K
|
||||
|
||||
"""This is the "inception" module."""
|
||||
def incepm_v1(out_filters, input_shape)->Model:
|
||||
input_img = Input(shape=input_shape)
|
||||
|
||||
tower_1 = Conv2D(out_filters, (1, 1), padding='same',
|
||||
activation='relu')(input_img)
|
||||
tower_1 = Conv2D(out_filters, (3, 3), padding='same',
|
||||
activation='relu')(tower_1)
|
||||
|
||||
tower_2 = Conv2D(out_filters, (1, 1), padding='same',
|
||||
activation='relu')(input_img)
|
||||
tower_2 = Conv2D(out_filters, (5, 5), padding='same',
|
||||
activation='relu')(tower_2)
|
||||
|
||||
tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_img)
|
||||
tower_3 = Conv2D(out_filters, (1, 1), padding='same',
|
||||
activation='relu')(tower_3)
|
||||
|
||||
output = Concatenate(axis=1)([tower_1, tower_2, tower_3])
|
||||
|
||||
model = Model(inputs=input_img, outputs=output)
|
||||
return model
|
||||
|
||||
"""This is then used in the following model"""
|
||||
def Unetish_model1(image_shape=(3000, 3000, 3)):
|
||||
image = Input(shape=image_shape)
|
||||
|
||||
#First layer 96X96
|
||||
conv1 = Conv2D(32, (3,3),padding='same', activation = 'relu')(image)
|
||||
conv1out = Conv2D(16, (1,1),padding = 'same', activation =
|
||||
'relu')(conv1)
|
||||
conv1out = MaxPooling2D((2,2), strides = (2,2))(conv1out)
|
||||
aux1out = Conv2D(16, (1,1), padding = 'same', activation = 'relu')(conv1)
|
||||
|
||||
#Second layer 48x48
|
||||
#conv2 = incepm_v1(64, conv1out.shape[1:])(conv1out)
|
||||
conv2 = incepm_v1(64, K.int_shape(conv1out)[1:])(conv1out)
|
||||
conv2out = Conv2D(32, (1,1), padding = 'same', activation =
|
||||
'relu')(conv2)
|
||||
conv2out = MaxPooling2D((2,2), strides = (2,2))(conv2out)
|
||||
aux2out = Conv2D(32, (1,1), padding = 'same', activation =
|
||||
'relu')(conv2)
|
||||
|
||||
#".... removed for sparsity"
|
||||
model = Model(inputs =image, outputs = aux2out)
|
||||
model.summary()
|
||||
return model
|
||||
|
||||
IMAGE_SIZE = 96
|
||||
Unet = Unetish_model1(image_shape=(3000, 3000, 3))
|
5
test/__init__.py
Normal file
5
test/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
# -*- coding: UTF-8 -*-
|
||||
# !/usr/bin/python
|
||||
# @time :2019/6/11 22:54
|
||||
# @author :Mo
|
||||
# @function :
|
Loading…
Reference in New Issue
Block a user