nlp_xiaojiang/conf/path_config.py

83 lines
4.0 KiB
Python
Raw Permalink Normal View History

2019-04-09 15:26:07 +08:00
# -*- coding: UTF-8 -*-
# !/usr/bin/python
# @time :2019/4/3 11:23
# @author :Mo
# @function :path
import pathlib
import sys
import os
# base dir
projectdir = str(pathlib.Path(os.path.abspath(__file__)).parent.parent)
sys.path.append(projectdir)
print(projectdir)
2019-04-18 17:10:22 +08:00
# stop_words_path
stop_words_path = projectdir + '/Data/common_words/stopwords.txt'
2019-04-09 15:26:07 +08:00
# corpus
chicken_and_gossip_path = projectdir + '/Data/corpus/chicken_and_gossip.txt'
# word2vec
w2v_model_merge_short_path = projectdir + "/Data/chinese_vector/w2v_model_merge_short.vec"
# tf_idf
td_idf_cut_path = projectdir + '/Data/tf_idf/td_idf_cut.csv'
td_idf_cut_pinyin = projectdir + '/Data/tf_idf/td_idf_cut_pinyin.csv'
td_idf_path_pinyin = projectdir + '/Data/tf_idf/td_idf_cut_pinyin_dictionary_model.pkl'
td_idf_path = projectdir + '/Data/tf_idf/td_idf_cut_dictionary_model.pkl'
# word, 句向量
w2v_model_wiki_word_path = projectdir + '/Data/chinese_vector/w2v_model_wiki_word.vec'
matrix_ques_part_path = projectdir + '/Data/sentence_vec_encode_word/1.txt'
# char, 句向量
w2v_model_char_path = projectdir + '/Data/chinese_vector/w2v_model_wiki_char.vec'
matrix_ques_part_path_char = projectdir + '/Data/sentence_vec_encode_char/1.txt'
# word2vec select
2019-04-10 10:03:30 +08:00
word2_vec_path = w2v_model_wiki_word_path if os.path.exists(w2v_model_wiki_word_path) else w2v_model_merge_short_path
2019-04-10 10:03:58 +08:00
# stanford_corenlp_full_path需要自己下载配置stanford-corenlp-full-2018-10-05
2019-04-10 10:03:30 +08:00
stanford_corenlp_full_path = "Y:/segment/stanford-corenlp-full-2018-10-05"
2019-04-17 20:09:20 +08:00
# corpus webbank sim data char
train_data_web_char_dir = projectdir + '/AugmentText/augment_seq2seq/data_mid/char/'
train_data_web_ws_anti=projectdir + '/AugmentText/augment_seq2seq/data_mid/char/train_data_web_ws_anti.pkl'
train_data_web_xy_anti=projectdir + '/AugmentText/augment_seq2seq/data_mid/char/train_data_web_xy_anti.pkl'
model_ckpt_web_anti=projectdir + '/AugmentText/augment_seq2seq/model_seq2seq_tp/seq2seq_char_webank/model_ckpt_char_webank.ckp'
path_params=projectdir + '/conf/params.json'
path_webank_sim=projectdir + '/Data/corpus/sim_webank.csv'
# corpus webbank sim data word
train_data_web_word_dir = projectdir + '/AugmentText/augment_seq2seq/data_mid/word/'
train_data_web_emb_anti=projectdir + '/AugmentText/augment_seq2seq/data_mid/word/train_data_web_emb_anti.pkl'
train_data_web_xyw_anti=projectdir + '/AugmentText/augment_seq2seq/data_mid/word/train_data_web_ws_anti.pkl'
model_ckpt_web_anti_word=projectdir + '/AugmentText/augment_seq2seq/model_seq2seq_tp/seq2seq_word_webank/train_data_web_ws_anti.pkl'
2019-04-22 17:26:22 +08:00
# chatbot data char
chatbot_data_cg_char_dir = projectdir + '/ChatBot/chatbot_generate/seq2seq/data_mid/char/'
chatbot_data_cg_ws_anti=projectdir + '/ChatBot/chatbot_generate/seq2seq/data_mid/char/train_data_web_ws_anti.pkl'
chatbot_data_cg_xy_anti=projectdir + '/ChatBot/chatbot_generate/seq2seq/data_mid/char/train_data_web_xy_anti.pkl'
model_ckpt_cg_anti=projectdir + '/ChatBot/chatbot_generate/seq2seq/model_seq2seq_tp/seq2seq_char_cg/model_ckpt_char_cg.ckp'
# chatbot data word
chatbot_data_cg_word_dir = projectdir + '/ChatBot/chatbot_generate/seq2seq/data_mid/word/'
chatbot_data_cg_xyw_anti_word=projectdir + '/ChatBot/chatbot_generate/seq2seq/data_mid/word/train_data_cg_word_xyw.pkl'
chatbot_data_cg_emb_anti_word=projectdir + '/ChatBot/chatbot_generate/seq2seq/data_mid/word/train_data_cg_word_emb.pkl'
model_ckpt_cg_anti_word=projectdir + '/ChatBot/chatbot_generate/seq2seq/model_seq2seq_tp/seq2seq_word_cg/model_ckpt_word_cg.ckp'
2019-05-12 09:53:22 +08:00
# webank corpus for classify train-dev-test
path_webank_train=projectdir + '/Data/corpus/webank/train.csv'
path_webank_dev=projectdir + '/Data/corpus/webank/dev.csv'
path_webank_test=projectdir + '/Data/corpus/webank/test.csv'
2019-07-01 21:59:57 +08:00
# ner chinese_people_daily
path_ner_people_train = projectdir + '/Data/corpus/ner/people_daily/people.train'
path_ner_people_dev = projectdir + '/Data/corpus/ner/people_daily/people.dev'
path_ner_people_test = projectdir + '/Data/corpus/ner/people_daily/people.test'