bert layers
This commit is contained in:
parent
6de490e1e6
commit
be89b4ab22
File diff suppressed because one or more lines are too long
@ -21,7 +21,7 @@ path_save_model = 'model_webank_tdt/bert_avt_cnn.h5' # 'bert_bi_lstm_pair.h5'
|
||||
|
||||
# text-cnn
|
||||
filters = [3, 4, 5]
|
||||
num_filters = 512
|
||||
num_filters = 300
|
||||
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ num_filters = 512
|
||||
gpu_memory_fraction = 0.3
|
||||
|
||||
# 默认取倒数第二层的输出值作为句向量
|
||||
layer_indexes = [-2]
|
||||
layer_indexes = [-1]
|
||||
|
||||
# 序列的最大程度,单文本建议把该值调小
|
||||
max_seq_len = 98
|
||||
|
@ -42,27 +42,27 @@ class KerasBertVector():
|
||||
print(len(model.layers))
|
||||
# lay = model.layers
|
||||
#一共104个layer,其中前八层包括token,pos,embed等,
|
||||
# 每4层(MultiHeadAttention,Dropout,Add,LayerNormalization)
|
||||
# 一共24层
|
||||
# 每8层(MultiHeadAttention,Dropout,Add,LayerNormalization) resnet
|
||||
# 一共12层
|
||||
layer_dict = [7]
|
||||
layer_0 = 7
|
||||
for i in range(12):
|
||||
layer_0 = layer_0 + 4
|
||||
layer_0 = layer_0 + 8
|
||||
layer_dict.append(layer_0)
|
||||
# 输出它本身
|
||||
if len(layer_indexes) == 0:
|
||||
encoder_layer = model.output
|
||||
# 分类如果只有一层,就只取最后那一层的weight,取得不正确
|
||||
elif len(layer_indexes) == 1:
|
||||
if layer_indexes[0] in [i+1 for i in range(12)]:
|
||||
if layer_indexes[0] in [i+1 for i in range(13)]:
|
||||
encoder_layer = model.get_layer(index=layer_dict[layer_indexes[0]]).output
|
||||
else:
|
||||
encoder_layer = model.get_layer(index=layer_dict[-2]).output
|
||||
encoder_layer = model.get_layer(index=layer_dict[-1]).output
|
||||
# 否则遍历需要取的层,把所有层的weight取出来并拼接起来shape:768*层数
|
||||
else:
|
||||
# layer_indexes must be [1,2,3,......12...24]
|
||||
# layer_indexes must be [1,2,3,......13]
|
||||
# all_layers = [model.get_layer(index=lay).output if lay is not 1 else model.get_layer(index=lay).output[0] for lay in layer_indexes]
|
||||
all_layers = [model.get_layer(index=layer_dict[lay-1]).output if lay in [i+1 for i in range(12)]
|
||||
all_layers = [model.get_layer(index=layer_dict[lay-1]).output if lay in [i+1 for i in range(13)]
|
||||
else model.get_layer(index=layer_dict[-1]).output # 如果给出不正确,就默认输出最后一层
|
||||
for lay in layer_indexes]
|
||||
print(layer_indexes)
|
||||
|
@ -27,7 +27,7 @@ path_tag_li = 'models/bilstm/tag_l_i.pkl'
|
||||
gpu_memory_fraction = 0.32
|
||||
|
||||
# ner当然是所有层都会提取啦,句向量默认取倒数第二层的输出值作为句向量
|
||||
layer_indexes = [i for i in range(13)] # [-2]
|
||||
layer_indexes = [i+1 for i in range(13)] # [-2]
|
||||
|
||||
# 序列的最大程度
|
||||
max_seq_len = 50
|
||||
|
@ -49,7 +49,7 @@ class KerasBertEmbedding():
|
||||
# 一共12层+最开始未处理那层(可以理解为input)
|
||||
layer_dict = [7]
|
||||
layer_0 = 7
|
||||
for i in range(13):
|
||||
for i in range(12):
|
||||
layer_0 = layer_0 + 8
|
||||
layer_dict.append(layer_0)
|
||||
|
||||
@ -58,7 +58,7 @@ class KerasBertEmbedding():
|
||||
encoder_layer = model.output
|
||||
# 分类如果只有一层,就只取最后那一层的weight,取得不正确
|
||||
elif len(layer_indexes) == 1:
|
||||
if layer_indexes[0] in [i+1 for i in range(12)]:
|
||||
if layer_indexes[0] in [i+1 for i in range(13)]:
|
||||
encoder_layer = model.get_layer(index=layer_dict[layer_indexes[0]]).output
|
||||
else:
|
||||
encoder_layer = model.get_layer(index=layer_dict[-1]).output
|
||||
|
@ -21,6 +21,6 @@ vocab_file = model_dir + '/vocab.txt'
|
||||
# gpu使用率
|
||||
gpu_memory_fraction = 0.32
|
||||
# 默认取倒数第二层的输出值作为句向量
|
||||
layer_indexes = [-2]
|
||||
layer_indexes = [-2] # 可填 1, 2, 3, 4, 5, 6, 7..., 13,其中1为embedding层
|
||||
# 序列的最大程度
|
||||
max_seq_len = 32
|
||||
|
Loading…
Reference in New Issue
Block a user