Add files via upload

2019-06-13 23:34:38 +08:00 · 2019-06-13 23:34:38 +08:00 · 05d13a1a2f
commit 05d13a1a2f
parent dcc40f4e43
5 changed files with 130 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,49 @@
+# Keras-TextClassification
+
+
+# keras_textclassification（代码主体,未完待续...）
+    - FastText
+    - TextCNN
+    - charCNN
+    - TextRNN
+    - TextRCNN
+
+
+# run(运行, 以FastText为例)
+    - 1. 进入keras_textclassification/m01_FastText目录，
+    - 2. 训练: 运行 train.py,   例如: python train.py
+    - 3. 预测: 运行 predict.py, 例如: python predict.py
+    - 说明: 默认不带pre train的random embedding，训练和验证语料只有100条，完整语料移步下面data查看下载
+
+
+# keras_textclassification/data
+    - 数据下载
+      ** github项目中只是上传部分数据，需要的前往链接: https://pan.baidu.com/s/1I3vydhmFEQ9nuPG2fDou8Q 提取码: rket
+    - baidu_qa_2019（百度qa问答语料，只取title作为分类样本，17个类，有一个是空''，已经压缩上传）
+       - baike_qa_train.csv
+       - baike_qa_valid.csv
+    - embeddings
+       - chinese_L-12_H-768_A-12(取谷歌预训练好点的模型，已经压缩上传)
+    - model
+       - 预训练模型存放地址
+
+# 项目说明
+  - 1. 构建了base基类(网络(graph)、向量嵌入(词、字、句子embedding)),后边的具体模型继承它们，代码简单
+  - 2. conf存放项目数据、模型的地址, data存放数据和语料, etl为数据预处理模块,
+
+
+# 模型与论文paper题与地址
+* FastText:   [Bag of Tricks for Efﬁcient Text Classiﬁcation](https://arxiv.org/abs/1607.01759)
+* TextCNN：   [ConvolutionalNeuralNetworksforSentenceClassiﬁcation](https://arxiv.org/abs/1408.5882)
+* charCNN：   [Character-Aware Neural Language Models](https://arxiv.org/abs/1508.06615)
+* TextRNN：   [Recurrent Neural Network for Text Classification with Multi-Task Learning](https://www.ijcai.org/Proceedings/16/Papers/408.pdf)
+* RCNN：      [Recurrent Convolutional Neural Networks for Text Classification](http://www.nlpr.ia.ac.cn/cip/~liukang/liukangPageFile/Recurrent%20Convolutional%20Neural%20Networks%20for%20Text%20Classification.pdf)
+* DCNN:       [A Convolutional Neural Network for Modelling Sentences](https://arxiv.org/abs/1404.2188)
+
+
+# 参考/感谢
+* 文本分类项目:   [https://github.com/mosu027/TextClassification](https://github.com/mosu027/TextClassification)
+* 文本分类看山杯: [https://github.com/brightmart/text_classification](https://github.com/brightmart/text_classification)
+* Kashgari项目: [https://github.com/BrikerMan/Kashgari](https://github.com/BrikerMan/Kashgari)
+* 文本分类Ipty : [https://github.com/lpty/classifier](https://github.com/lpty/classifier)
+* keras文本分类: [https://github.com/ShawnyXiao/TextClassification-Keras](https://github.com/ShawnyXiao/TextClassification-Keras)
--- a/init.py
+++ b/init.py
@ -0,0 +1,5 @@
+# -*- coding: UTF-8 -*-
+# !/usr/bin/python
+# @time     :2019/6/3 10:50
+# @author   :Mo
+# @function :
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,11 @@
+gensim==3.7.1
+jieba==0.39
+numpy==1.16.2
+pandas==0.23.4
+scikit-learn==0.19.1
+tflearn==0.3.2
+tqdm==4.31.1
+passlib==1.7.1
+keras==2.2.4
+tensorflow-gpu==1.12.0
+keras-bert==0.41.0
--- a/test/Dimension_error.py
+++ b/test/Dimension_error.py
@ -0,0 +1,60 @@
+# -*- coding: UTF-8 -*-
+# !/usr/bin/python
+# @time     :2019/6/11 22:57
+# @author   :Mo
+# @function :
+
+from keras.layers import Conv2D, MaxPooling2D, Input, Concatenate
+from keras.models import Model
+import keras.backend as K
+
+"""This is the "inception" module."""
+def incepm_v1(out_filters, input_shape)->Model:
+    input_img = Input(shape=input_shape)
+
+    tower_1 = Conv2D(out_filters, (1, 1), padding='same',
+        activation='relu')(input_img)
+    tower_1 = Conv2D(out_filters, (3, 3), padding='same',
+        activation='relu')(tower_1)
+
+    tower_2 = Conv2D(out_filters, (1, 1), padding='same',
+        activation='relu')(input_img)
+    tower_2 = Conv2D(out_filters, (5, 5), padding='same',
+        activation='relu')(tower_2)
+
+    tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_img)
+    tower_3 = Conv2D(out_filters, (1, 1), padding='same',
+        activation='relu')(tower_3)
+
+    output = Concatenate(axis=1)([tower_1, tower_2, tower_3])
+
+    model = Model(inputs=input_img, outputs=output)
+    return model
+
+"""This is then used in the following model"""
+def Unetish_model1(image_shape=(3000, 3000, 3)):
+    image = Input(shape=image_shape)
+
+    #First layer 96X96
+    conv1 = Conv2D(32, (3,3),padding='same', activation = 'relu')(image)
+    conv1out = Conv2D(16, (1,1),padding = 'same', activation =
+    'relu')(conv1)
+    conv1out = MaxPooling2D((2,2), strides = (2,2))(conv1out)
+    aux1out = Conv2D(16, (1,1), padding = 'same', activation  = 'relu')(conv1)
+
+    #Second layer 48x48
+    #conv2 = incepm_v1(64, conv1out.shape[1:])(conv1out)
+    conv2 = incepm_v1(64, K.int_shape(conv1out)[1:])(conv1out)
+    conv2out = Conv2D(32, (1,1), padding = 'same', activation =
+        'relu')(conv2)
+    conv2out = MaxPooling2D((2,2), strides = (2,2))(conv2out)
+    aux2out = Conv2D(32, (1,1), padding = 'same', activation  =
+        'relu')(conv2)
+
+    #".... removed for sparsity"
+    model = Model(inputs =image, outputs = aux2out)
+    model.summary()
+    return model
+
+IMAGE_SIZE = 96
+Unet = Unetish_model1(image_shape=(3000, 3000, 3))
--- a/test/init.py
+++ b/test/init.py
@ -0,0 +1,5 @@
+# -*- coding: UTF-8 -*-
+# !/usr/bin/python
+# @time     :2019/6/11 22:54
+# @author   :Mo
+# @function :