From dbfbe71602aee024f95098fe0e3daecd26a36131 Mon Sep 17 00:00:00 2001
From: yongzhuo <2714618994@qq.com>
Date: Sun, 13 Oct 2019 09:08:31 +0800
Subject: [PATCH] fix split of train,test

---
 README.md                                     |  2 +-
 .../fast_text}/__init__.py                    |  2 +-
 .../data_preprocess/data_split.py             | 46 +++++++++++++++++++
 keras_textclassification/m00_Bert/predict.py  |  2 +-
 keras_textclassification/m00_Bert/train.py    |  2 +-
 keras_textclassification/m00_Xlnet/predict.py |  2 +-
 keras_textclassification/m00_Xlnet/train.py   |  2 +-
 .../m01_FastText/predict.py                   |  2 +-
 .../m01_FastText/train.py                     |  2 +-
 .../m03_CharCNN/predict.py                    |  2 +-
 keras_textclassification/m03_CharCNN/train.py |  2 +-
 .../m03_CharCNN/train_zhang.py                |  2 +-
 .../m04_TextRNN/predict.py                    |  2 +-
 keras_textclassification/m04_TextRNN/train.py |  2 +-
 .../m05_TextRCNN/predict.py                   |  2 +-
 .../m05_TextRCNN/train.py                     |  2 +-
 .../m06_TextDCNN/predict.py                   |  2 +-
 .../m06_TextDCNN/train.py                     |  2 +-
 .../m07_TextDPCNN/predict.py                  |  2 +-
 .../m07_TextDPCNN/train.py                    |  2 +-
 .../m08_TextVDCNN/predict.py                  |  2 +-
 .../m08_TextVDCNN/train.py                    |  2 +-
 .../m09_TextCRNN/predict.py                   |  2 +-
 .../m09_TextCRNN/train.py                     |  2 +-
 .../m10_DeepMoji/predict.py                   |  2 +-
 .../m10_DeepMoji/train.py                     |  2 +-
 .../m11_SelfAttention/predict.py              |  2 +-
 .../m11_SelfAttention/train.py                |  2 +-
 keras_textclassification/m12_HAN/predict.py   |  2 +-
 keras_textclassification/m12_HAN/train.py     |  2 +-
 .../m13_CapsuleNet/predict.py                 |  2 +-
 .../m13_CapsuleNet/train.py                   |  2 +-
 .../m14_Transformer/predict.py                |  2 +-
 .../m14_Transformer/train.py                  |  2 +-
 test/multi_label_class/train_multi.py         |  2 +-
 test/tet_char_bert_embedding.py               |  2 +-
 test/tet_char_random_embedding.py             |  2 +-
 test/tet_char_word2vec_embedding.py           |  2 +-
 test/tet_char_xlnet_embedding.py              |  2 +-
 test/tet_word_random_embedding.py             |  2 +-
 test/tet_word_word2vec_embedding.py           |  2 +-
 41 files changed, 86 insertions(+), 40 deletions(-)
 rename keras_textclassification/data/{embeddings/chinese_L-12_H-768_A-12 => model/fast_text}/__init__.py (71%)
 create mode 100644 keras_textclassification/data_preprocess/data_split.py

diff --git a/README.md b/README.md
index 2b92fcf..c0a9ebe 100644
--- a/README.md
+++ b/README.md
@@ -199,7 +199,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/data/embeddings/chinese_L-12_H-768_A-12/__init__.py b/keras_textclassification/data/model/fast_text/__init__.py
similarity index 71%
rename from keras_textclassification/data/embeddings/chinese_L-12_H-768_A-12/__init__.py
rename to keras_textclassification/data/model/fast_text/__init__.py
index 2eb3ecb..360022a 100644
--- a/keras_textclassification/data/embeddings/chinese_L-12_H-768_A-12/__init__.py
+++ b/keras_textclassification/data/model/fast_text/__init__.py
@@ -1,5 +1,5 @@
 # -*- coding: UTF-8 -*-
 # !/usr/bin/python
-# @time     :2019/8/28 14:13
+# @time     :2019/10/13 9:00
 # @author   :Mo
 # @function :
\ No newline at end of file
diff --git a/keras_textclassification/data_preprocess/data_split.py b/keras_textclassification/data_preprocess/data_split.py
new file mode 100644
index 0000000..6824ab7
--- /dev/null
+++ b/keras_textclassification/data_preprocess/data_split.py
@@ -0,0 +1,46 @@
+# -*- coding: UTF-8 -*-
+# !/usr/bin/python
+# @time     :2019/10/13 8:07
+# @author   :Mo
+# @function :数据切分为训练集,验证集
+
+
+from sklearn.model_selection import StratifiedKFold
+import pandas as pd
+import numpy as np
+
+from keras_textclassification.data_preprocess.text_preprocess import txt_write
+
+
+def data_kfold(path_org_data, k_fold_split=10, path_save_dir=""):
+    """
+        切分训练-测试集
+    :param path_org_data: str, 原始语料绝对路径地址,utf-8的csv格式
+    :param k_fold_split: int, k折切分, 原始语料中每个类至少有k_fold_split条句子
+    :param path_save_dir: str, 生成训练集-测试集文件的保存目录
+    :return: 
+    """
+    label_ques = pd.read_csv(path_org_data, names=["label","ques"], usecols=["label","ques"])
+    quess = label_ques["ques"].values.tolist()[1:]
+    labels = label_ques["label"].values.tolist()[1:]
+
+    quess, labels = np.array(quess), np.array(labels)
+    kf_sp = StratifiedKFold(n_splits=k_fold_split)
+
+    for train_index, dev_index in kf_sp.split(quess, labels):
+        train_x, train_y = quess[train_index], labels[train_index]
+        dev_x, dev_y = quess[dev_index], labels[dev_index]
+        lq_train = [train_y[i].replace(",","，").strip() + "," + train_x[i].replace(",","，").strip() + "\n"
+                    for i in range(len(train_y))]
+        lq_valid = [dev_y[i].replace(",","，").strip() + "," + dev_x[i].replace(",","，").strip() + "\n"
+                  for i in range(len(dev_y))]
+        txt_write(["label,ques\n"] + lq_train, path_save_dir + "lq_train.csv")
+        txt_write(["label,ques\n"] + lq_valid, path_save_dir + "lq_valid.csv")
+        break
+
+if __name__ == '__main__':
+
+    from keras_textclassification.conf.path_config import path_root
+    filepath = path_root + "/data/baidu_qa_2019/baike_qa_train.csv" # 原始语料
+    k_fold_split = 10
+    data_kfold(path_org_data=filepath, k_fold_split=10, path_save_dir=path_root+ "/data/baidu_qa_2019/")
\ No newline at end of file
diff --git a/keras_textclassification/m00_Bert/predict.py b/keras_textclassification/m00_Bert/predict.py
index 79e71da..e5e8317 100644
--- a/keras_textclassification/m00_Bert/predict.py
+++ b/keras_textclassification/m00_Bert/predict.py
@@ -129,7 +129,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 if __name__=="__main__":
 
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m00_Bert/train.py b/keras_textclassification/m00_Bert/train.py
index fb0dd73..e2ec4de 100644
--- a/keras_textclassification/m00_Bert/train.py
+++ b/keras_textclassification/m00_Bert/train.py
@@ -90,7 +90,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1) # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m00_Xlnet/predict.py b/keras_textclassification/m00_Xlnet/predict.py
index 8e8ea35..7a5fcc5 100644
--- a/keras_textclassification/m00_Xlnet/predict.py
+++ b/keras_textclassification/m00_Xlnet/predict.py
@@ -129,7 +129,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 if __name__=="__main__":
 
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m00_Xlnet/train.py b/keras_textclassification/m00_Xlnet/train.py
index 82a95f0..73779a0 100644
--- a/keras_textclassification/m00_Xlnet/train.py
+++ b/keras_textclassification/m00_Xlnet/train.py
@@ -97,7 +97,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1) # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m01_FastText/predict.py b/keras_textclassification/m01_FastText/predict.py
index 1c4bc98..0fcebcc 100644
--- a/keras_textclassification/m01_FastText/predict.py
+++ b/keras_textclassification/m01_FastText/predict.py
@@ -128,7 +128,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m01_FastText/train.py b/keras_textclassification/m01_FastText/train.py
index 2b1ca50..7c618a4 100644
--- a/keras_textclassification/m01_FastText/train.py
+++ b/keras_textclassification/m01_FastText/train.py
@@ -82,7 +82,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m03_CharCNN/predict.py b/keras_textclassification/m03_CharCNN/predict.py
index 17f066e..b1798bc 100644
--- a/keras_textclassification/m03_CharCNN/predict.py
+++ b/keras_textclassification/m03_CharCNN/predict.py
@@ -119,7 +119,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    # pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    # pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m03_CharCNN/train.py b/keras_textclassification/m03_CharCNN/train.py
index bcbffed..a145c50 100644
--- a/keras_textclassification/m03_CharCNN/train.py
+++ b/keras_textclassification/m03_CharCNN/train.py
@@ -98,7 +98,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1) # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m03_CharCNN/train_zhang.py b/keras_textclassification/m03_CharCNN/train_zhang.py
index 22a9221..86c857d 100644
--- a/keras_textclassification/m03_CharCNN/train_zhang.py
+++ b/keras_textclassification/m03_CharCNN/train_zhang.py
@@ -97,4 +97,4 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1) # sample条件下设为1,否则训练语料可能会很少
diff --git a/keras_textclassification/m04_TextRNN/predict.py b/keras_textclassification/m04_TextRNN/predict.py
index 6a9a380..ed23057 100644
--- a/keras_textclassification/m04_TextRNN/predict.py
+++ b/keras_textclassification/m04_TextRNN/predict.py
@@ -117,7 +117,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m04_TextRNN/train.py b/keras_textclassification/m04_TextRNN/train.py
index 060f812..1a747fb 100644
--- a/keras_textclassification/m04_TextRNN/train.py
+++ b/keras_textclassification/m04_TextRNN/train.py
@@ -84,7 +84,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m05_TextRCNN/predict.py b/keras_textclassification/m05_TextRCNN/predict.py
index 38a73d0..9f2c4cc 100644
--- a/keras_textclassification/m05_TextRCNN/predict.py
+++ b/keras_textclassification/m05_TextRCNN/predict.py
@@ -117,7 +117,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m05_TextRCNN/train.py b/keras_textclassification/m05_TextRCNN/train.py
index 8e2fcb3..521473b 100644
--- a/keras_textclassification/m05_TextRCNN/train.py
+++ b/keras_textclassification/m05_TextRCNN/train.py
@@ -93,7 +93,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1) # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m06_TextDCNN/predict.py b/keras_textclassification/m06_TextDCNN/predict.py
index 339ca3c..7c46a9d 100644
--- a/keras_textclassification/m06_TextDCNN/predict.py
+++ b/keras_textclassification/m06_TextDCNN/predict.py
@@ -117,7 +117,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m06_TextDCNN/train.py b/keras_textclassification/m06_TextDCNN/train.py
index 9504100..f90abdf 100644
--- a/keras_textclassification/m06_TextDCNN/train.py
+++ b/keras_textclassification/m06_TextDCNN/train.py
@@ -86,7 +86,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m07_TextDPCNN/predict.py b/keras_textclassification/m07_TextDPCNN/predict.py
index 9d6bdcf..5007eb7 100644
--- a/keras_textclassification/m07_TextDPCNN/predict.py
+++ b/keras_textclassification/m07_TextDPCNN/predict.py
@@ -117,7 +117,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m07_TextDPCNN/train.py b/keras_textclassification/m07_TextDPCNN/train.py
index 1bae2ac..9ff8664 100644
--- a/keras_textclassification/m07_TextDPCNN/train.py
+++ b/keras_textclassification/m07_TextDPCNN/train.py
@@ -90,7 +90,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m08_TextVDCNN/predict.py b/keras_textclassification/m08_TextVDCNN/predict.py
index 4e6f6e8..cbec24f 100644
--- a/keras_textclassification/m08_TextVDCNN/predict.py
+++ b/keras_textclassification/m08_TextVDCNN/predict.py
@@ -118,7 +118,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m08_TextVDCNN/train.py b/keras_textclassification/m08_TextVDCNN/train.py
index 8b593db..a8e0447 100644
--- a/keras_textclassification/m08_TextVDCNN/train.py
+++ b/keras_textclassification/m08_TextVDCNN/train.py
@@ -98,7 +98,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m09_TextCRNN/predict.py b/keras_textclassification/m09_TextCRNN/predict.py
index 18247a3..0bfd22a 100644
--- a/keras_textclassification/m09_TextCRNN/predict.py
+++ b/keras_textclassification/m09_TextCRNN/predict.py
@@ -118,7 +118,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m09_TextCRNN/train.py b/keras_textclassification/m09_TextCRNN/train.py
index 953e79b..3fd3d1f 100644
--- a/keras_textclassification/m09_TextCRNN/train.py
+++ b/keras_textclassification/m09_TextCRNN/train.py
@@ -93,7 +93,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.001) # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1) # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m10_DeepMoji/predict.py b/keras_textclassification/m10_DeepMoji/predict.py
index 0c1236c..2402984 100644
--- a/keras_textclassification/m10_DeepMoji/predict.py
+++ b/keras_textclassification/m10_DeepMoji/predict.py
@@ -118,7 +118,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m10_DeepMoji/train.py b/keras_textclassification/m10_DeepMoji/train.py
index c09e799..b1d968f 100644
--- a/keras_textclassification/m10_DeepMoji/train.py
+++ b/keras_textclassification/m10_DeepMoji/train.py
@@ -91,7 +91,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__ == "__main__":
-    train(rate=0.001)  # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1)  # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m11_SelfAttention/predict.py b/keras_textclassification/m11_SelfAttention/predict.py
index 12a65f5..60a1ee7 100644
--- a/keras_textclassification/m11_SelfAttention/predict.py
+++ b/keras_textclassification/m11_SelfAttention/predict.py
@@ -118,7 +118,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m11_SelfAttention/train.py b/keras_textclassification/m11_SelfAttention/train.py
index 535a2fd..f9d4f95 100644
--- a/keras_textclassification/m11_SelfAttention/train.py
+++ b/keras_textclassification/m11_SelfAttention/train.py
@@ -86,7 +86,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__ == "__main__":
-    train(rate=0.001)  # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1)  # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m12_HAN/predict.py b/keras_textclassification/m12_HAN/predict.py
index 1c4c218..97dae2a 100644
--- a/keras_textclassification/m12_HAN/predict.py
+++ b/keras_textclassification/m12_HAN/predict.py
@@ -118,7 +118,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m12_HAN/train.py b/keras_textclassification/m12_HAN/train.py
index 3bafad1..c6a5467 100644
--- a/keras_textclassification/m12_HAN/train.py
+++ b/keras_textclassification/m12_HAN/train.py
@@ -90,7 +90,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__ == "__main__":
-    train(rate=0.01)  # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1)  # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m13_CapsuleNet/predict.py b/keras_textclassification/m13_CapsuleNet/predict.py
index 51c2672..eed9a51 100644
--- a/keras_textclassification/m13_CapsuleNet/predict.py
+++ b/keras_textclassification/m13_CapsuleNet/predict.py
@@ -118,7 +118,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m13_CapsuleNet/train.py b/keras_textclassification/m13_CapsuleNet/train.py
index 2169575..d1039ee 100644
--- a/keras_textclassification/m13_CapsuleNet/train.py
+++ b/keras_textclassification/m13_CapsuleNet/train.py
@@ -90,7 +90,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__ == "__main__":
-    train(rate=0.01)  # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1)  # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/keras_textclassification/m14_Transformer/predict.py b/keras_textclassification/m14_Transformer/predict.py
index 1b43a58..2276e59 100644
--- a/keras_textclassification/m14_Transformer/predict.py
+++ b/keras_textclassification/m14_Transformer/predict.py
@@ -118,7 +118,7 @@ def pred_input(path_hyper_parameter=path_hyper_parameters):
 
 if __name__=="__main__":
     # 测试集预测
-    pred_tet(path_test=path_baidu_qa_2019_valid, rate=0.01) # sample条件下设为1,否则训练语料可能会很少
+    pred_tet(path_test=path_baidu_qa_2019_valid, rate=1) # sample条件下设为1,否则训练语料可能会很少
 
     # 可输入 input 预测
     pred_input()
diff --git a/keras_textclassification/m14_Transformer/train.py b/keras_textclassification/m14_Transformer/train.py
index d5a8992..c754bb1 100644
--- a/keras_textclassification/m14_Transformer/train.py
+++ b/keras_textclassification/m14_Transformer/train.py
@@ -95,7 +95,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__ == "__main__":
-    train(rate=0.01)  # sample条件下设为1,否则训练语料可能会很少
+    train(rate=1)  # sample条件下设为1,否则训练语料可能会很少
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/test/multi_label_class/train_multi.py b/test/multi_label_class/train_multi.py
index 6b7770e..0f8583e 100644
--- a/test/multi_label_class/train_multi.py
+++ b/test/multi_label_class/train_multi.py
@@ -84,7 +84,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/test/tet_char_bert_embedding.py b/test/tet_char_bert_embedding.py
index 9ca2faf..26e9f5d 100644
--- a/test/tet_char_bert_embedding.py
+++ b/test/tet_char_bert_embedding.py
@@ -82,7 +82,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/test/tet_char_random_embedding.py b/test/tet_char_random_embedding.py
index c1b5f25..d7f7173 100644
--- a/test/tet_char_random_embedding.py
+++ b/test/tet_char_random_embedding.py
@@ -81,7 +81,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/test/tet_char_word2vec_embedding.py b/test/tet_char_word2vec_embedding.py
index 4b73e8d..8f46f0d 100644
--- a/test/tet_char_word2vec_embedding.py
+++ b/test/tet_char_word2vec_embedding.py
@@ -82,7 +82,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/test/tet_char_xlnet_embedding.py b/test/tet_char_xlnet_embedding.py
index ccdb8a8..c691f6b 100644
--- a/test/tet_char_xlnet_embedding.py
+++ b/test/tet_char_xlnet_embedding.py
@@ -85,4 +85,4 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
diff --git a/test/tet_word_random_embedding.py b/test/tet_word_random_embedding.py
index 27d0d19..168ed59 100644
--- a/test/tet_word_random_embedding.py
+++ b/test/tet_word_random_embedding.py
@@ -81,7 +81,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
diff --git a/test/tet_word_word2vec_embedding.py b/test/tet_word_word2vec_embedding.py
index c498e7f..f025837 100644
--- a/test/tet_word_word2vec_embedding.py
+++ b/test/tet_word_word2vec_embedding.py
@@ -81,7 +81,7 @@ def train(hyper_parameters=None, rate=1.0):
 
 
 if __name__=="__main__":
-    train(rate=0.01)
+    train(rate=1)
     # 注意: 4G的1050Ti的GPU、win10下batch_size=32,len_max=20, gpu<=0.87, 应该就可以bert-fineture了。
     # 全量数据训练一轮(batch_size=32),就能达到80%准确率(验证集), 效果还是不错的
     # win10下出现过错误,gpu、len_max、batch_size配小一点就好:ailed to allocate 3.56G (3822520832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory