From 05d13a1a2f14d8bb6794bfa9fc738a057c774f52 Mon Sep 17 00:00:00 2001 From: yongzhuo <31341349+yongzhuo@users.noreply.github.com> Date: Thu, 13 Jun 2019 23:34:38 +0800 Subject: [PATCH] Add files via upload --- README.md | 49 +++++++++++++++++++++++++++++++++ __init__.py | 5 ++++ requirements.txt | 11 ++++++++ test/Dimension_error.py | 60 +++++++++++++++++++++++++++++++++++++++++ test/__init__.py | 5 ++++ 5 files changed, 130 insertions(+) create mode 100644 README.md create mode 100644 __init__.py create mode 100644 requirements.txt create mode 100644 test/Dimension_error.py create mode 100644 test/__init__.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..9a2830b --- /dev/null +++ b/README.md @@ -0,0 +1,49 @@ +# Keras-TextClassification + + +# keras_textclassification(代码主体,未完待续...) + - FastText + - TextCNN + - charCNN + - TextRNN + - TextRCNN + + +# run(运行, 以FastText为例) + - 1. 进入keras_textclassification/m01_FastText目录, + - 2. 训练: 运行 train.py, 例如: python train.py + - 3. 预测: 运行 predict.py, 例如: python predict.py + - 说明: 默认不带pre train的random embedding,训练和验证语料只有100条,完整语料移步下面data查看下载 + + +# keras_textclassification/data + - 数据下载 + ** github项目中只是上传部分数据,需要的前往链接: https://pan.baidu.com/s/1I3vydhmFEQ9nuPG2fDou8Q 提取码: rket + - baidu_qa_2019(百度qa问答语料,只取title作为分类样本,17个类,有一个是空'',已经压缩上传) + - baike_qa_train.csv + - baike_qa_valid.csv + - embeddings + - chinese_L-12_H-768_A-12(取谷歌预训练好点的模型,已经压缩上传) + - model + - 预训练模型存放地址 + +# 项目说明 + - 1. 构建了base基类(网络(graph)、向量嵌入(词、字、句子embedding)),后边的具体模型继承它们,代码简单 + - 2. conf存放项目数据、模型的地址, data存放数据和语料, etl为数据预处理模块, + + +# 模型与论文paper题与地址 +* FastText: [Bag of Tricks for Efficient Text Classification](https://arxiv.org/abs/1607.01759) +* TextCNN: [ConvolutionalNeuralNetworksforSentenceClassification](https://arxiv.org/abs/1408.5882) +* charCNN: [Character-Aware Neural Language Models](https://arxiv.org/abs/1508.06615) +* TextRNN: [Recurrent Neural Network for Text Classification with Multi-Task Learning](https://www.ijcai.org/Proceedings/16/Papers/408.pdf) +* RCNN: [Recurrent Convolutional Neural Networks for Text Classification](http://www.nlpr.ia.ac.cn/cip/~liukang/liukangPageFile/Recurrent%20Convolutional%20Neural%20Networks%20for%20Text%20Classification.pdf) +* DCNN: [A Convolutional Neural Network for Modelling Sentences](https://arxiv.org/abs/1404.2188) + + +# 参考/感谢 +* 文本分类项目: [https://github.com/mosu027/TextClassification](https://github.com/mosu027/TextClassification) +* 文本分类看山杯: [https://github.com/brightmart/text_classification](https://github.com/brightmart/text_classification) +* Kashgari项目: [https://github.com/BrikerMan/Kashgari](https://github.com/BrikerMan/Kashgari) +* 文本分类Ipty : [https://github.com/lpty/classifier](https://github.com/lpty/classifier) +* keras文本分类: [https://github.com/ShawnyXiao/TextClassification-Keras](https://github.com/ShawnyXiao/TextClassification-Keras) diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..eb11d15 --- /dev/null +++ b/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/3 10:50 +# @author :Mo +# @function : \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4b3544a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +gensim==3.7.1 +jieba==0.39 +numpy==1.16.2 +pandas==0.23.4 +scikit-learn==0.19.1 +tflearn==0.3.2 +tqdm==4.31.1 +passlib==1.7.1 +keras==2.2.4 +tensorflow-gpu==1.12.0 +keras-bert==0.41.0 \ No newline at end of file diff --git a/test/Dimension_error.py b/test/Dimension_error.py new file mode 100644 index 0000000..fa641d2 --- /dev/null +++ b/test/Dimension_error.py @@ -0,0 +1,60 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/11 22:57 +# @author :Mo +# @function : + +from keras.layers import Conv2D, MaxPooling2D, Input, Concatenate +from keras.models import Model +import keras.backend as K + +"""This is the "inception" module.""" +def incepm_v1(out_filters, input_shape)->Model: + input_img = Input(shape=input_shape) + + tower_1 = Conv2D(out_filters, (1, 1), padding='same', + activation='relu')(input_img) + tower_1 = Conv2D(out_filters, (3, 3), padding='same', + activation='relu')(tower_1) + + tower_2 = Conv2D(out_filters, (1, 1), padding='same', + activation='relu')(input_img) + tower_2 = Conv2D(out_filters, (5, 5), padding='same', + activation='relu')(tower_2) + + tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_img) + tower_3 = Conv2D(out_filters, (1, 1), padding='same', + activation='relu')(tower_3) + + output = Concatenate(axis=1)([tower_1, tower_2, tower_3]) + + model = Model(inputs=input_img, outputs=output) + return model + +"""This is then used in the following model""" +def Unetish_model1(image_shape=(3000, 3000, 3)): + image = Input(shape=image_shape) + + #First layer 96X96 + conv1 = Conv2D(32, (3,3),padding='same', activation = 'relu')(image) + conv1out = Conv2D(16, (1,1),padding = 'same', activation = + 'relu')(conv1) + conv1out = MaxPooling2D((2,2), strides = (2,2))(conv1out) + aux1out = Conv2D(16, (1,1), padding = 'same', activation = 'relu')(conv1) + + #Second layer 48x48 + #conv2 = incepm_v1(64, conv1out.shape[1:])(conv1out) + conv2 = incepm_v1(64, K.int_shape(conv1out)[1:])(conv1out) + conv2out = Conv2D(32, (1,1), padding = 'same', activation = + 'relu')(conv2) + conv2out = MaxPooling2D((2,2), strides = (2,2))(conv2out) + aux2out = Conv2D(32, (1,1), padding = 'same', activation = + 'relu')(conv2) + + #".... removed for sparsity" + model = Model(inputs =image, outputs = aux2out) + model.summary() + return model + +IMAGE_SIZE = 96 +Unet = Unetish_model1(image_shape=(3000, 3000, 3)) \ No newline at end of file diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..4b5ba51 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: UTF-8 -*- +# !/usr/bin/python +# @time :2019/6/11 22:54 +# @author :Mo +# @function : \ No newline at end of file