Add files via upload

This commit is contained in:
yongzhuo 2019-04-10 10:02:54 +08:00 committed by GitHub
parent 247e58f3ff
commit 3df53aabbb

View File

@ -0,0 +1,163 @@
# -*- coding: UTF-8 -*-
# !/usr/bin/python
# @time :2019/4/9 22:49
# @author :Mo
# @function :get main_part by stanfordcorenlp
from conf.path_config import stanford_corenlp_full_path
from stanfordcorenlp import StanfordCoreNLP
# stanford-corenlp-full-2018-10-05需要预先下载启动较慢
nlp = StanfordCoreNLP(stanford_corenlp_full_path, lang='zh')
def stanford_parse(sentence):
tokenize = nlp.word_tokenize(sentence)
pos_tag = nlp.pos_tag(sentence)
name_entity = nlp.ner(sentence)
syntax_tree = nlp.parse(sentence)
dependence = nlp.dependency_parse(sentence)
result_dict = {}
result_dict['tokenize'] = tokenize
result_dict['dependence'] = dependence
result_dict['parse'] = syntax_tree
return result_dict
def combine_nn(tokenize, dependence, target):
:param dependence: dict, enhancedPlusPlusDependencies
:param target: str, subject or object
:return: str, nn
if not target:
return target
for dependence_one in dependence:
if target == tokenize[dependence_one[1]-1] if dependence_one[1]!=0 else "root" and dependence_one[0] == "nn":
target = tokenize[dependence_one[2]-1] + target
return target
return target
def get_main_part_by_stanfordcorenlp(text):
:param text: str, 输入
:return: str, result of syn sentence
# standcoreNLP 分词
result_dict = stanford_parse(text)
tokenize = result_dict['tokenize']
dependence = result_dict['dependence']
syntax_tree = result_dict['parse']
# 提取主谓宾
part_main = {"": "", "": "", "": ""}
if len(syntax_tree) >= 2:
if "NP" in syntax_tree[1] or "ROOT" not in str(dependence): # 名词短语 或者是没有谓语
count = 0
for syntax_tree_single in syntax_tree:
if "NP" in syntax_tree_single and "(" in syntax_tree_single and ")" in syntax_tree_single:
token_np = syntax_tree_single.split(" ")[-1]
token_np = token_np.replace("'", "").replace(")", "").strip()
part_main[""] = token_np if count == 0 else part_main[""] + token_np
count += 1
return part_main[""] + part_main[""] + part_main[""]
for dependence_one in dependence:
dep = dependence_one[0]
dep_dep_gloss = tokenize[dependence_one[2]-1]
if dep == "ROOT": # ROOT作谓语
part_main[""] = dep_dep_gloss
elif dep == "cop": # 主系结构
part_main[""] = dep_dep_gloss + part_main[""]
else: # 主语和宾语
if dep == "nsubjpass" or dep == "dobj" or dep == "attr":
part_main[""] = dep_dep_gloss
elif dep == "nsubj" or dep == "top":
part_main[""] = dep_dep_gloss
part_main[""] = combine_nn(tokenize, dependence, part_main[""])
part_main[""] = combine_nn(tokenize, dependence, part_main[""])
return part_main[""] + part_main[""] + part_main[""]
if __name__ == "__main__":
sentence_list = [
"我打量了他一眼", ]
sentence_type = ["陈述句与否定句",
"你可以那到100分, 是吗?",
"我们是从广州走, 还是从成都走?",
"你是来帮助我们的, 还是来拆我们的台的?",
"这些人甘愿当走狗, 你说可恨不可恨?",
"给我喝水, 我渴!",
"走哇, 妈妈!",
for sen_one in sentence_list:
subject_object = get_main_part_by_stanfordcorenlp(sen_one)
print(sen_one + " " + subject_object)
while True:
print("请输入sentence ")
sen_test = input()
# syn_sentence_test = syn_by_syntactic_analys==(test_test)
syn_sentence_test = get_main_part_by_stanfordcorenlp(sen_test)
# Do not forget to close! The backend server will consume a lot memery