diff --git a/AugmentText/augment_translate/translate_web/translate_google.py b/AugmentText/augment_translate/translate_web/translate_google.py index 3781b07..203cfc8 100644 --- a/AugmentText/augment_translate/translate_web/translate_google.py +++ b/AugmentText/augment_translate/translate_web/translate_google.py @@ -4,14 +4,23 @@ # @author :Mo # @function :回译调用谷歌翻译,模拟google token访问 + +# 适配linux +import sys +import os +path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")) +sys.path.append(path_root) +print(path_root) + + import logging as logger import urllib.parse as parse import execjs import requests -from nlp_xiaojiang.conf.augment_constant import language_short_google -from nlp_xiaojiang.utils.text_tools import judge_translate_english +from conf.augment_constant import language_short_google +from utils.text_tools import judge_translate_english class GoogleToken: diff --git a/ChatBot/chatbot_search/chatbot_sentence_vec_by_char.py b/ChatBot/chatbot_search/chatbot_sentence_vec_by_char.py index 80fd465..ec38dca 100644 --- a/ChatBot/chatbot_search/chatbot_sentence_vec_by_char.py +++ b/ChatBot/chatbot_search/chatbot_sentence_vec_by_char.py @@ -4,7 +4,14 @@ # @author :Mo # @function :chatbot based search, encode sentence_vec by char + +# 适配linux +import sys import os +path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) +sys.path.append(path_root) +print(path_root) + import pickle import gensim @@ -12,10 +19,10 @@ import numpy as np from gensim import matutils from numpy import float32 as numpy_type -from nlp_xiaojiang.conf.path_config import matrix_ques_part_path_char -from nlp_xiaojiang.conf.path_config import projectdir, chicken_and_gossip_path -from nlp_xiaojiang.conf.path_config import w2v_model_char_path -from nlp_xiaojiang.utils.text_tools import txtRead, getChinese +from conf.path_config import matrix_ques_part_path_char +from conf.path_config import projectdir, chicken_and_gossip_path +from conf.path_config import w2v_model_char_path +from utils.text_tools import txtRead, getChinese def load_word2vec_model(path, bin=False, limit=None): @@ -143,6 +150,21 @@ if __name__ == '__main__': # 标准问句矩阵初始化和预处理 matrix_org_norm, matrix_org_index, top_vec = basic_questions_matrix_init(matrix_ques, top_vec=20) + ### 测试一个例子 + ques_clean = getChinese("小姜机器人是谁呀") + char_list = [ques_char for ques_char in ques_clean] + sentence_vec = question_encoding(word2vec_model, char_list) + top_20_qid = calculate_text_similar(sentence_vec, matrix_org_norm, matrix_org_index, top_vec=top_vec) + try: + print("小姜机器人: " + syn_qa_dails[top_20_qid[0][0]].strip().split("\t")[1]) + print([(syn_qa_dails[top_20_qid[i][0]].strip().split("\t")[0], + syn_qa_dails[top_20_qid[i][0]].strip().split("\t")[1]) for i in range(len(top_20_qid))]) + except Exception as e: + # 有的字符可能打不出来 + print(str(e)) + + + while True: print("你问: ") ques_ask = input() diff --git a/ChatBot/chatbot_search/chatbot_sentence_vec_by_word.py b/ChatBot/chatbot_search/chatbot_sentence_vec_by_word.py index 72b3dc7..8d40c63 100644 --- a/ChatBot/chatbot_search/chatbot_sentence_vec_by_word.py +++ b/ChatBot/chatbot_search/chatbot_sentence_vec_by_word.py @@ -5,7 +5,14 @@ # @function :chatbot based search, encode sentence_vec by word +# 适配linux +import sys import os +path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) +sys.path.append(path_root) +print(path_root) + + import pickle import gensim @@ -15,10 +22,10 @@ import numpy as np from gensim import matutils from numpy import float32 as numpy_type -from nlp_xiaojiang.conf.path_config import matrix_ques_part_path -from nlp_xiaojiang.conf.path_config import projectdir, chicken_and_gossip_path -from nlp_xiaojiang.conf.path_config import w2v_model_merge_short_path, w2v_model_wiki_word_path -from nlp_xiaojiang.utils.text_tools import txtRead, getChinese +from conf.path_config import matrix_ques_part_path +from conf.path_config import projectdir, chicken_and_gossip_path +from conf.path_config import w2v_model_merge_short_path, w2v_model_wiki_word_path +from utils.text_tools import txtRead, getChinese def load_word2vec_model(path, bin=False, limit=None): @@ -203,6 +210,18 @@ if __name__ == '__main__': # 标准问句矩阵初始化和预处理 matrix_org_norm, matrix_org_index, top_vec = basic_questions_matrix_init(matrix_ques, top_vec=20) + ques_clean = getChinese("小姜机器人叫什么呀") + word_list, flag_list = word_flag_cut(ques_clean) + sentence_vec = basic_questions_encoding(word2vec_model, word_list, flag_list) + top_20_qid = calculate_text_similar(sentence_vec, matrix_org_norm, matrix_org_index, top_vec=top_vec) + try: + print("小姜机器人: " + syn_qa_dails[top_20_qid[0][0]].strip().split("\t")[1]) + print([(syn_qa_dails[top_20_qid[i][0]].strip().split("\t")[0], + syn_qa_dails[top_20_qid[i][0]].strip().split("\t")[1]) for i in range(len(top_20_qid))]) + except Exception as e: + # 有的字符可能打不出来 + print(str(e)) + while True: print("你: ") ques_ask = input()