nlp_xiaojiang/FeatureProject/xlnet/tet_xlnet_keras_sim.py
2019-11-12 19:59:53 +08:00

82 lines
2.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: UTF-8 -*-
# !/usr/bin/python
# @time :2019/5/7 20:27
# @author :Mo
# @function :test sentence of xlnet encode and cosin sim of two question
def calculate_count():
"""
统计一下1000条测试数据的平均耗时
:return:
"""
from FeatureProject.xlnet.extract_keras_xlnet_feature import KerasXlnetVector
import time
xlnet_vector = KerasXlnetVector()
print("xlnet start ok!")
time_start = time.time()
for i in range(1000):
vector = xlnet_vector.xlnet_encode(["yx你知道吗我很喜欢你呀在一起在一起在一起哈哈哈哈"])
time_end = time.time()
time_avg = (time_end-time_start)/1000
print(vector)
print(time_avg)
# 0.12605296468734742 win10 gpu avg
# 0.01629048466682434 linux cpu avg
def sim_two_question():
"""测试一下两个问题的相似句子"""
from FeatureProject.xlnet.extract_keras_xlnet_feature import KerasXlnetVector
from sklearn import preprocessing
from math import pi
import numpy as np
import time
import math
def cosine_distance(v1, v2): # 余弦距离
if type(v1)==list:
v1 = np.array(v1)
if type(v2)==list:
v2 = np.array(v2)
if v1.all() and v2.all():
return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
else:
return 0
def scale_zoom(rate): # sig 缩放
zoom = (1 + np.exp(-float(rate))) / 2
return zoom
def scale_triangle(rate): # sin 缩放
triangle = math.sin(rate/1*pi/2 - pi/2)
return triangle
xlnet_vector = KerasXlnetVector()
print("xlnet start ok!")
while True:
print("input ques-1: ")
ques_1 = input()
print("input ques_2: ")
ques_2 = input()
vector_1 = xlnet_vector.xlnet_encode([ques_1])
vector_2 = xlnet_vector.xlnet_encode([ques_2])
sim = cosine_distance(vector_1[0], vector_2[0])
# sim_list = [sim, 0, 0.2, 0.4, 0.6, 0.8, 1.0]
# sim = preprocessing.scale(sim_list)[0]
# sim = preprocessing.MinMaxScaler(feature_range=(0, 1)).fit_transform(sim_list)[0]
# sim_1 = preprocessing.normalize(sim_list, norm='l1')[0]
# sim_2 = preprocessing.normalize(sim_list, norm='l2')[0]
# sim = scale_zoom(sim)
# sim = scale_triangle(sim)
# print(sim_1)
# print(sim_2)
print(sim)
if __name__=="__main__":
calculate_count()
sim_two_question()