add demo
This commit is contained in:
parent
befb46393c
commit
42ac6d5f7c
@ -8,18 +8,25 @@ generate sentence embeddings for a given list of sentences.
|
||||
import sys
|
||||
|
||||
sys.path.append('..')
|
||||
from similarities.similarity import WSimilarity
|
||||
from similarities.termsim import WordEmbeddingSimilarity
|
||||
from text2vec import Word2Vec
|
||||
from loguru import logger
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="INFO")
|
||||
|
||||
if __name__ == '__main__':
|
||||
model = Similarity("shibing624/text2vec-base-chinese") # 中文句向量模型(CoSENT)
|
||||
wv_model = Word2Vec()
|
||||
model = WordEmbeddingSimilarity(wv_model)
|
||||
# Embed a list of sentences
|
||||
sentences = ['如何更换花呗绑定银行卡',
|
||||
'花呗更改绑定银行卡']
|
||||
sentence_embeddings = model.encode(sentences)
|
||||
sentences2 = ['如何更换 银行卡',
|
||||
'西方开花北方结果']
|
||||
sentence_embeddings = model.get_vector(sentences)
|
||||
print(type(sentence_embeddings), sentence_embeddings.shape)
|
||||
similarity_score = model.similarity_score([sentences[0]], [sentences[1]])
|
||||
print(similarity_score)
|
||||
similarity_score = model.similarity(sentences[0], sentences[1])
|
||||
print(similarity_score.numpy())
|
||||
|
||||
similarity_score = model.similarity(sentences, sentences2)
|
||||
print(similarity_score.numpy())
|
||||
|
@ -1,19 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@author:XuMing(xuming624@qq.com)
|
||||
@description:
|
||||
This basic example loads a pre-trained model from the web and uses it to
|
||||
generate sentence embeddings for a given list of sentences.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
sys.path.append('..')
|
||||
from similarities import BertSimilarity
|
||||
|
||||
model = BertSimilarity("shibing624/text2vec-base-chinese") # 中文句向量模型(CoSENT)
|
||||
# Embed a list of sentences
|
||||
sentences = ['如何更换花呗绑定银行卡',
|
||||
'花呗更改绑定银行卡']
|
||||
sentence_embeddings = model.encode(sentences)
|
||||
print(type(sentence_embeddings), sentence_embeddings.shape)
|
@ -1,60 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@author:XuMing(xuming624@qq.com)
|
||||
@description:
|
||||
"""
|
||||
import numpy as np
|
||||
import gensim
|
||||
from gensim.matutils import *
|
||||
from gensim import matutils
|
||||
from scipy.sparse import csc_matrix, csr_matrix
|
||||
|
||||
vec_1 = [(2, 1), (3, 4), (4, 1), (5, 1), (1, 1), (7, 2)]
|
||||
vec_2 = [(1, 1), (3, 8), (4, 1)]
|
||||
result = matutils.jaccard(vec_2, vec_1)
|
||||
expected = 1 - 0.3
|
||||
print(result)
|
||||
|
||||
# checking ndarray, csr_matrix as inputs
|
||||
vec_1 = np.array([[1, 3], [0, 4], [2, 3]])
|
||||
vec_2 = csr_matrix([[1, 4], [0, 2], [2, 2]])
|
||||
result = matutils.jaccard(vec_1, vec_2)
|
||||
expected = 1 - 0.388888888889
|
||||
print(result)
|
||||
|
||||
# checking ndarray, list as inputs
|
||||
vec_1 = np.array([6, 1, 2, 3])
|
||||
vec_2 = [4, 3, 2, 5]
|
||||
result = matutils.jaccard(vec_1, vec_2)
|
||||
expected = 1 - 0.333333333333
|
||||
print(result)
|
||||
|
||||
vec_1 = [[1, 3], [2, 4], [3, 3]]
|
||||
vec_2 = [[1, 6], [2, 2], [3, 2]]
|
||||
|
||||
vec_1 = [[0, 1], [1, 4], [2, 6]]
|
||||
vec_2 = [[0, 1], [1, 2], [2, 3]]
|
||||
a = cossim(vec_1, vec_2)
|
||||
print(a)
|
||||
|
||||
vec_1 = [[0, 1], [1, 1], [2, 1]]
|
||||
vec_2 = [[0, 1], [1, 2], [2, 3]]
|
||||
a = cossim(vec_1, vec_2)
|
||||
print(a)
|
||||
|
||||
vec_1 = [[0, 2], [1, 4], [2, 6]]
|
||||
vec_2 = [[0, 1], [1, 2], [2, 3]]
|
||||
a = cossim(vec_1, vec_2)
|
||||
print(a)
|
||||
print("jaccard:", matutils.jaccard(vec_1, vec_2))
|
||||
|
||||
vec_1 = np.array([2,4,6])
|
||||
vec_2 = np.array([1,2,3])
|
||||
|
||||
# vec_1 = np.array([3,4,3])
|
||||
# vec_2 = np.array([6,2,2])
|
||||
#
|
||||
# vec_1 = np.array([[3],[4],[3]])
|
||||
# vec_2 = np.array([[6],[2],[2]])
|
||||
print("jaccard2:", matutils.jaccard(vec_1, vec_2))
|
||||
|
@ -3,16 +3,16 @@
|
||||
@author:XuMing(xuming624@qq.com)
|
||||
@description: pip install gradio
|
||||
"""
|
||||
|
||||
from text2vec import Word2Vec
|
||||
import gradio as gr
|
||||
from similarities import BertSimilarity
|
||||
from similarities.termsim import WordEmbeddingSimilarity
|
||||
|
||||
# 中文句向量模型(CoSENT)
|
||||
sim_model = BertSimilarity(model_name_or_path='shibing624/text2vec-base-chinese')
|
||||
wv_model = Word2Vec()
|
||||
sim_model = WordEmbeddingSimilarity(wv_model)
|
||||
|
||||
|
||||
def ai_text(sentence1, sentence2):
|
||||
score = sim_model.similarity_score(sentence1, sentence2)
|
||||
score = sim_model.similarity(sentence1, sentence2).numpy()[0][0]
|
||||
print("{} \t\t {} \t\t Score: {:.4f}".format(sentence1, sentence2, score))
|
||||
|
||||
return score
|
||||
@ -33,7 +33,7 @@ if __name__ == '__main__':
|
||||
inputs=[input1, input2],
|
||||
outputs=[output_text],
|
||||
# theme="grass",
|
||||
title="Chinese Text Matching Model shibing624/text2vec-base-chinese",
|
||||
title="Chinese Text Matching Model",
|
||||
description="Copy or input Chinese text here. Submit and the machine will calculate the cosine score.",
|
||||
article="Link to <a href='https://github.com/shibing624/similarities' style='color:blue;' target='_blank\'>Github REPO</a>",
|
||||
examples=examples
|
||||
|
66181
similarities/data/hownet.txt
Normal file
66181
similarities/data/hownet.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -310,7 +310,7 @@ class HownetSimilarity(object):
|
||||
Computes hownet similarities between word embeddings and retrieves most
|
||||
similar terms for a given term.
|
||||
"""
|
||||
default_hownet_path = os.path.join(pwd_path, 'data', 'hownet.dat')
|
||||
default_hownet_path = os.path.join(pwd_path, 'data', 'hownet.txt')
|
||||
|
||||
def __init__(self, cilin_path: str = default_hownet_path, docs: List[str] = None):
|
||||
super().__init__()
|
||||
|
Loading…
Reference in New Issue
Block a user