完成知识查询

This commit is contained in:
liu huanyong 2018-10-05 17:20:22 +08:00
parent f690b80e2a
commit 69303312ed
11 changed files with 151 additions and 26 deletions

View File

@ -10,6 +10,17 @@ self-implement of disease centered Medical graph from zero to full and sever as
1) 基于垂直网站数据的医药知识图谱构建 1) 基于垂直网站数据的医药知识图谱构建
2) 基于医药知识图谱的自动问答 2) 基于医药知识图谱的自动问答
# 项目最终效果
话不多少,直接上图。以下两图是实际问答运行过程中的截图:
![image](https://github.com/liuhuanyong/QABasedOnMedicalKnowledgeGraph/blob/master/img/chat1.png)
![image](https://github.com/liuhuanyong/QABasedOnMedicalKnowledgeGraph/blob/master/img/chat2.png)
# 项目运行:
1、配置要求要求配置neo4j数据库及相应的python依赖包。neo4j数据库用户名密码记住并修改相应文件。
2、知识图谱数据导入python build_medicalgraph.py导入的数据较多估计需要几个小时。
3、启动问答python chat_graph.py
# 一、医疗知识图谱构建 # 一、医疗知识图谱构建
# 1.1 业务驱动的知识图谱构建框架 # 1.1 业务驱动的知识图谱构建框架
![image](https://github.com/liuhuanyong/QABasedOnMedicalKnowledgeGraph/blob/master/img/kg_route.png) ![image](https://github.com/liuhuanyong/QABasedOnMedicalKnowledgeGraph/blob/master/img/kg_route.png)
@ -72,7 +83,6 @@ build_medicalgraph.py知识图谱入库脚本   
![image](https://github.com/liuhuanyong/QABasedOnMedicalKnowledgeGraph/blob/master/img/qa_route.png) ![image](https://github.com/liuhuanyong/QABasedOnMedicalKnowledgeGraph/blob/master/img/qa_route.png)
# 2.2 脚本结构 # 2.2 脚本结构
question_classifier.py问句类型分类脚本 question_classifier.py问句类型分类脚本
question_parser.py问句解析脚本 question_parser.py问句解析脚本

View File

@ -4,7 +4,7 @@
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io> # Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
# Date: 18-10-5 # Date: 18-10-5
from py2neo import Graph,Node from py2neo import Graph
class AnswerSearcher: class AnswerSearcher:
def __init__(self): def __init__(self):
@ -13,17 +13,123 @@ class AnswerSearcher:
http_port=7474, http_port=7474,
user="lhy", user="lhy",
password="lhy123") password="lhy123")
self.num_limit = 20
'''执行cypher查询并返回相应结果''' '''执行cypher查询并返回相应结果'''
def search_main(self, sqls): def search_main(self, sqls):
for sql in sqls: final_answers = []
ress = self.g.run(sql).data() for sql_ in sqls:
for res in ress: question_type = sql_['question_type']
print(res) queries = sql_['sql']
return answers = []
for query in queries:
ress = self.g.run(query).data()
answers += ress
final_answer = self.answer_prettify(question_type, answers)
if final_answer:
final_answers.append(final_answer)
return final_answers
'''根据对应的qustion_type调用相应的回复模板'''
def answer_prettify(self, question_type, answers):
final_answer = []
if not answers:
return ''
if question_type == 'disease_symptom':
desc = [i['n.name'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}的症状包括:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'symptom_disease':
desc = [i['m.name'] for i in answers]
subject = answers[0]['n.name']
final_answer = '症状{0}可能染上的疾病有:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_cause':
desc = [i['m.cause'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}可能的成因有:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_prevent':
desc = [i['m.prevent'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}的预防措施包括:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_lasttime':
desc = [i['m.cure_lasttime'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}治疗可能持续的周期为:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_cureway':
desc = [i['m.cure_way'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}可以尝试如下治疗:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_cureprob':
desc = [i['m.cured_prob'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}治愈的概率为(仅供参考):{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_easyget':
desc = [i['m.easy_get'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}的易感人群包括:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_desc':
desc = [i['m.desc'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0},熟悉一下:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_acompany':
desc1 = [i['n.name'] for i in answers]
desc2 = [i['m.name'] for i in answers]
subject = answers[0]['m.name']
desc = [i for i in desc1 + desc2 if i != subject]
final_answer = '{0}的症状包括:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_not_food':
desc = [i['n.name'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}忌食的食物包括有:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_do_food':
do_desc = [i['n.name'] for i in answers if i['r.name'] == '宜吃']
recommand_desc = [i['n.name'] for i in answers if i['r.name'] == '推荐食谱']
subject = answers[0]['m.name']
final_answer = '{0}宜食的食物包括有:{1}\n推荐食谱包括有:{2}'.format(subject, ';'.join(list(set(do_desc))[:self.num_limit]), ';'.join(list(set(recommand_desc))[:self.num_limit]))
elif question_type == 'food_not_disease':
desc = [i['m.name'] for i in answers]
subject = answers[0]['n.name']
final_answer = '患有{0}的人最好不要吃{1}'.format(''.join(list(set(desc))[:self.num_limit]), subject)
elif question_type == 'food_do_disease':
desc = [i['m.name'] for i in answers]
subject = answers[0]['n.name']
final_answer = '患有{0}的人建议多试试{1}'.format(''.join(list(set(desc))[:self.num_limit]), subject)
elif question_type == 'disease_drug':
desc = [i['n.name'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}通常的使用的药品包括:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'drug_disease':
desc = [i['m.name'] for i in answers]
subject = answers[0]['n.name']
final_answer = '{0}主治的疾病有{1},可以试试'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'disease_check':
desc = [i['n.name'] for i in answers]
subject = answers[0]['m.name']
final_answer = '{0}通常可以通过以下方式检查出来:{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
elif question_type == 'check_disease':
desc = [i['m.name'] for i in answers]
subject = answers[0]['n.name']
final_answer = '通常可以通过{0}检查出来的疾病有{1}'.format(subject, ''.join(list(set(desc))[:self.num_limit]))
return final_answer
if __name__ == '__main__': if __name__ == '__main__':
searcher = AnswerSearch() searcher = AnswerSearcher()

View File

@ -4,13 +4,11 @@
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io> # Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
# Date: 18-10-4 # Date: 18-10-4
import os
from question_classifier import * from question_classifier import *
from question_parser import * from question_parser import *
from answer_search import * from answer_search import *
'''问答类'''
class ChatBotGraph: class ChatBotGraph:
def __init__(self): def __init__(self):
self.classifier = QuestionClassifier() self.classifier = QuestionClassifier()
@ -18,18 +16,21 @@ class ChatBotGraph:
self.searcher = AnswerSearcher() self.searcher = AnswerSearcher()
def chat_main(self, sent): def chat_main(self, sent):
answer = '对不起,小生愚钝,祝您身体健康!每天开开心心的....' answer = '您好我是小勇医药智能助理希望可以帮到您。如果没答上来可联系https://liuhuanyong.github.io/。祝您身体棒棒!'
res_classify = self.classifier.classify(sent) res_classify = self.classifier.classify(sent)
if not res_classify: if not res_classify:
return return answer
res_sql = self.parser.parser_main(res_classify) res_sql = self.parser.parser_main(res_classify)
print(res_sql) final_answers = self.searcher.search_main(res_sql)
self.searcher.search_main(res_sql) if not final_answers:
return answer
else:
return '\n'.join(final_answers)
if __name__ == '__main__': if __name__ == '__main__':
handler = ChatBotGraph() handler = ChatBotGraph()
while 1: while 1:
question = input('enter an question to search:') question = input('user:')
handler.chat_main(question) answer = handler.chat_main(question)
print('RoBot:', answer)

View File

@ -32,3 +32,7 @@
不可 不可
管住 管住
注意
小心

BIN
img/chat1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 563 KiB

BIN
img/chat2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 554 KiB

View File

@ -35,7 +35,7 @@ class QuestionClassifier:
self.wdtype_dict = self.build_wdtype_dict() self.wdtype_dict = self.build_wdtype_dict()
# 问句疑问词 # 问句疑问词
self.symptom_qwds = ['症状', '表征', '现象', '症候', '表现'] self.symptom_qwds = ['症状', '表征', '现象', '症候', '表现']
self.cause_qwds = ['原因', '为什么', '怎么会', '怎样才', '咋样才', '怎样会', '如何会', '为啥', '为何', '如何', '怎么才会', '会导致', '会造成'] self.cause_qwds = ['原因','成因', '为什么', '怎么会', '怎样才', '咋样才', '怎样会', '如何会', '为啥', '为何', '如何才会', '怎么才会', '会导致', '会造成']
self.acompany_qwds = ['并发症', '并发', '一起发生', '一并发生', '一起出现', '一并出现', '一同发生', '一同出现', '伴随发生', '伴随', '共现'] self.acompany_qwds = ['并发症', '并发', '一起发生', '一并发生', '一起出现', '一并出现', '一同发生', '一同出现', '伴随发生', '伴随', '共现']
self.food_qwds = ['饮食', '饮用', '', '', '伙食', '膳食', '', '' ,'忌口', '补品', '保健品', '食谱', '菜谱', '食用', '食物','补品'] self.food_qwds = ['饮食', '饮用', '', '', '伙食', '膳食', '', '' ,'忌口', '补品', '保健品', '食谱', '菜谱', '食用', '食物','补品']
self.drug_qwds = ['', '药品', '用药', '胶囊', '口服液', '炎片'] self.drug_qwds = ['', '药品', '用药', '胶囊', '口服液', '炎片']
@ -48,7 +48,7 @@ class QuestionClassifier:
self.cureway_qwds = ['怎么治疗', '如何医治', '怎么医治', '怎么治', '怎么医', '如何治', '医治方式', '疗法', '咋治', '怎么办', '咋办', '咋治'] self.cureway_qwds = ['怎么治疗', '如何医治', '怎么医治', '怎么治', '怎么医', '如何治', '医治方式', '疗法', '咋治', '怎么办', '咋办', '咋治']
self.cureprob_qwds = ['多大概率能治好', '多大几率能治好', '治好希望大么', '几率', '几成', '比例', '可能性', '能治', '可治', '可以治', '可以医'] self.cureprob_qwds = ['多大概率能治好', '多大几率能治好', '治好希望大么', '几率', '几成', '比例', '可能性', '能治', '可治', '可以治', '可以医']
self.easyget_qwds = ['易感人群', '容易感染', '易发人群', '什么人', '哪些人', '感染', '染上', '得上'] self.easyget_qwds = ['易感人群', '容易感染', '易发人群', '什么人', '哪些人', '感染', '染上', '得上']
self.check_qwds = ['检查', '检查项目'] self.check_qwds = ['检查', '检查项目', '查出', '检查', '测出', '试出']
self.belong_qwds = ['属于什么科', '属于', '什么科', '科室'] self.belong_qwds = ['属于什么科', '属于', '什么科', '科室']
self.cure_qwds = ['治疗什么', '治啥', '治疗啥', '医治啥', '治愈啥', '主治啥', '主治什么', '有什么用', '有何用', '用处', '用途', self.cure_qwds = ['治疗什么', '治啥', '治疗啥', '医治啥', '治愈啥', '主治啥', '主治什么', '有什么用', '有何用', '用处', '用途',
'有什么好处', '有什么益处', '有何益处', '用来', '用来做啥', '用来作甚', '需要', ''] '有什么好处', '有什么益处', '有何益处', '用来', '用来做啥', '用来作甚', '需要', '']

View File

@ -23,11 +23,10 @@ class QuestionPaser:
args = res_classify['args'] args = res_classify['args']
entity_dict = self.build_entitydict(args) entity_dict = self.build_entitydict(args)
question_types = res_classify['question_types'] question_types = res_classify['question_types']
sqls = [] sqls = []
for question_type in question_types: for question_type in question_types:
# print(question_type) sql_ = {}
sql_['question_type'] = question_type
sql = [] sql = []
if question_type == 'disease_symptom': if question_type == 'disease_symptom':
sql = self.sql_transfer(question_type, entity_dict.get('disease')) sql = self.sql_transfer(question_type, entity_dict.get('disease'))
@ -63,7 +62,7 @@ class QuestionPaser:
sql = self.sql_transfer(question_type, entity_dict.get('disease')) sql = self.sql_transfer(question_type, entity_dict.get('disease'))
elif question_type == 'check_disease': elif question_type == 'check_disease':
sql = self.sql_transfer(question_type, entity_dict.get('disease')) sql = self.sql_transfer(question_type, entity_dict.get('check'))
elif question_type == 'disease_prevent': elif question_type == 'disease_prevent':
sql = self.sql_transfer(question_type, entity_dict.get('disease')) sql = self.sql_transfer(question_type, entity_dict.get('disease'))
@ -84,11 +83,12 @@ class QuestionPaser:
sql = self.sql_transfer(question_type, entity_dict.get('disease')) sql = self.sql_transfer(question_type, entity_dict.get('disease'))
if sql: if sql:
sqls += sql sql_['sql'] = sql
sqls.append(sql_)
return sqls return sqls
'''针对不同的问题,分开进行处理''' '''针对不同的问题,分开进行处理'''
def sql_transfer(self, question_type, entities): def sql_transfer(self, question_type, entities):
if not entities: if not entities:
@ -112,6 +112,10 @@ class QuestionPaser:
elif question_type == 'disease_cureprob': elif question_type == 'disease_cureprob':
sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.cured_prob".format(i) for i in entities] sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.cured_prob".format(i) for i in entities]
# 查询疾病的治疗方式
elif question_type == 'disease_cureway':
sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.cure_way".format(i) for i in entities]
# 查询疾病的易发人群 # 查询疾病的易发人群
elif question_type == 'disease_easyget': elif question_type == 'disease_easyget':
sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.easy_get".format(i) for i in entities] sql = ["MATCH (m:Disease) where m.name = '{0}' return m.name, m.easy_get".format(i) for i in entities]