Add files via upload

This commit is contained in:
Tommy in Tongji 2022-06-26 10:40:08 +08:00 committed by GitHub
parent 96991cd644
commit 3f64288ea2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 610 additions and 0 deletions

View File

@ -0,0 +1,287 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 上一个代码的输出"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"sqls = [{'question_type': 'check_disease', 'sql': [\"MATCH (m:Disease)-[r:need_check]->(n:Check) where n.name = '血常规' return m.name, r.name, n.name\"]}]"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from py2neo import Graph"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 连接图数据库"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"g = Graph(\"http://localhost:7474\", auth=(\"neo4j\", \"111695\"))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"num_limit = 20"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"final_answers = []"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'question_type': 'check_disease',\n",
" 'sql': [\"MATCH (m:Disease)-[r:need_check]->(n:Check) where n.name = '血常规' return m.name, r.name, n.name\"]}]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sqls"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"'''根据对应的qustion_type调用相应的回复模板'''\n",
"def answer_prettify(question_type, answers):\n",
" final_answer = []\n",
" if not answers:\n",
" return ''\n",
" if question_type == 'disease_symptom':\n",
" desc = [i['n.name'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}的症状包括:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'symptom_disease':\n",
" desc = [i['m.name'] for i in answers]\n",
" subject = answers[0]['n.name']\n",
" final_answer = '症状{0}可能染上的疾病有:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_cause':\n",
" desc = [i['m.cause'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}可能的成因有:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_prevent':\n",
" desc = [i['m.prevent'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}的预防措施包括:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_lasttime':\n",
" desc = [i['m.cure_lasttime'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}治疗可能持续的周期为:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_cureway':\n",
" desc = [';'.join(i['m.cure_way']) for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}可以尝试如下治疗:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_cureprob':\n",
" desc = [i['m.cured_prob'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}治愈的概率为(仅供参考):{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_easyget':\n",
" desc = [i['m.easy_get'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
"\n",
" final_answer = '{0}的易感人群包括:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_desc':\n",
" desc = [i['m.desc'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0},熟悉一下:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_acompany':\n",
" desc1 = [i['n.name'] for i in answers]\n",
" desc2 = [i['m.name'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" desc = [i for i in desc1 + desc2 if i != subject]\n",
" final_answer = '{0}的症状包括:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_not_food':\n",
" desc = [i['n.name'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}忌食的食物包括有:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_do_food':\n",
" do_desc = [i['n.name'] for i in answers if i['r.name'] == '宜吃']\n",
" recommand_desc = [i['n.name'] for i in answers if i['r.name'] == '推荐食谱']\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}宜食的食物包括有:{1}\\n推荐食谱包括有{2}'.format(subject, ';'.join(list(set(do_desc))[:num_limit]), ';'.join(list(set(recommand_desc))[:self.num_limit]))\n",
"\n",
" elif question_type == 'food_not_disease':\n",
" desc = [i['m.name'] for i in answers]\n",
" subject = answers[0]['n.name']\n",
" final_answer = '患有{0}的人最好不要吃{1}'.format(''.join(list(set(desc))[:num_limit]), subject)\n",
"\n",
" elif question_type == 'food_do_disease':\n",
" desc = [i['m.name'] for i in answers]\n",
" subject = answers[0]['n.name']\n",
" final_answer = '患有{0}的人建议多试试{1}'.format(''.join(list(set(desc))[:num_limit]), subject)\n",
"\n",
" elif question_type == 'disease_drug':\n",
" desc = [i['n.name'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}通常的使用的药品包括:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'drug_disease':\n",
" desc = [i['m.name'] for i in answers]\n",
" subject = answers[0]['n.name']\n",
" final_answer = '{0}主治的疾病有{1},可以试试'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'disease_check':\n",
" desc = [i['n.name'] for i in answers]\n",
" subject = answers[0]['m.name']\n",
" final_answer = '{0}通常可以通过以下方式检查出来:{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" elif question_type == 'check_disease':\n",
" desc = [i['m.name'] for i in answers]\n",
" subject = answers[0]['n.name']\n",
" final_answer = '通常可以通过{0}检查出来的疾病有{1}'.format(subject, ''.join(list(set(desc))[:num_limit]))\n",
"\n",
" return final_answer"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"for sql_ in sqls:\n",
" question_type = sql_['question_type']\n",
" queries = sql_['sql']\n",
" answers = []\n",
" \n",
" \n",
" for query in queries: # 运行每一条cypher查询语句\n",
" ress = g.run(query).data()\n",
" answers += ress\n",
" final_answer = answer_prettify(question_type, answers)\n",
" if final_answer:\n",
" final_answers.append(final_answer)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'m.name': '肺炎杆菌肺炎', 'r.name': '诊断检查', 'n.name': '血常规'},\n",
" {'m.name': '大叶性肺炎', 'r.name': '诊断检查', 'n.name': '血常规'},\n",
" {'m.name': '二硫化碳中毒', 'r.name': '诊断检查', 'n.name': '血常规'},\n",
" {'m.name': '百日咳', 'r.name': '诊断检查', 'n.name': '血常规'},\n",
" {'m.name': '放射性肺炎', 'r.name': '诊断检查', 'n.name': '血常规'},\n",
" {'m.name': '苯中毒', 'r.name': '诊断检查', 'n.name': '血常规'},\n",
" {'m.name': '大楼病综合征', 'r.name': '诊断检查', 'n.name': '血常规'},\n",
" {'m.name': '肺炎球菌肺炎', 'r.name': '诊断检查', 'n.name': '血常规'}]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ress"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'通常可以通过血常规检查出来的疾病有二硫化碳中毒;大楼病综合征;肺炎杆菌肺炎;大叶性肺炎;苯中毒;放射性肺炎;肺炎球菌肺炎;百日咳'"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_answer"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,323 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 导入json文件"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import json\n",
"from tqdm import tqdm\n",
"df = pd.DataFrame()\n",
"data_path = 'QAMedicalKG/data/medical3.json'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## json转dataframe"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"21it [00:00, 194.84it/s]\n"
]
}
],
"source": [
"p_feature = ['category', 'cure_department', 'symptom','check', 'acompany','cure_way','recommand_drug' , 'common_drug', 'drug_detail', 'do_eat','not_eat', 'recommand_eat']\n",
"\n",
"for data in tqdm(open(data_path, encoding='utf8')):\n",
" temp_data = json.loads(data)\n",
" for each in p_feature:\n",
" try:\n",
" temp_data[each] = ','.join(temp_data[each])\n",
" except:\n",
" pass\n",
" \n",
" df = df.append(temp_data, ignore_index=True)\n",
"del df['_id']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 调整列名和顺序"
]
},
{
"cell_type": "code",
"execution_count": 182,
"metadata": {},
"outputs": [],
"source": [
"orders = ['name', 'desc', 'category', 'cure_department', 'cause', 'symptom', 'check', 'acompany', 'cost_money',\n",
" 'cure_lasttime', 'cure_way', 'cured_prob', 'easy_get',\n",
" 'get_prob', 'get_way', 'prevent',\n",
" 'recommand_drug' , 'common_drug', 'drug_detail', 'do_eat',\n",
" 'not_eat', 'recommand_eat', 'yibao_status']\n",
"df = df[orders]\n",
"orders_cn = ['疾病名称','疾病描述','疾病种类','科室','病因','症状','检查','并发症','花费','疗程','疗法','治愈率','易感人群','感染概率','感染途径','预防措施','推荐药物','常用药物','具体药物','可以吃','不可以吃','推荐吃','是否纳入医保']\n",
"df.columns = orders_cn"
]
},
{
"cell_type": "code",
"execution_count": 183,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>疾病名称</th>\n",
" <th>疾病描述</th>\n",
" <th>疾病种类</th>\n",
" <th>科室</th>\n",
" <th>病因</th>\n",
" <th>症状</th>\n",
" <th>检查</th>\n",
" <th>并发症</th>\n",
" <th>花费</th>\n",
" <th>疗程</th>\n",
" <th>...</th>\n",
" <th>感染概率</th>\n",
" <th>感染途径</th>\n",
" <th>预防措施</th>\n",
" <th>推荐药物</th>\n",
" <th>常用药物</th>\n",
" <th>具体药物</th>\n",
" <th>可以吃</th>\n",
" <th>不可以吃</th>\n",
" <th>推荐吃</th>\n",
" <th>是否纳入医保</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>肺泡蛋白质沉积症</td>\n",
" <td>肺泡蛋白质沉积症(简称PAP)又称Rosen-Castle-man-Liebow综合征是...</td>\n",
" <td>疾病百科,内科,呼吸内科</td>\n",
" <td>内科,呼吸内科</td>\n",
" <td>病因未明,推测与几方面因素有关:如大量粉尘吸入(铝,二氧化硅等),机体免疫功能下降(尤其婴幼...</td>\n",
" <td>紫绀,胸痛,呼吸困难,乏力,毓卓</td>\n",
" <td>胸部CT检查,肺活检,支气管镜检查</td>\n",
" <td>多重肺部感染</td>\n",
" <td>根据不同医院,收费标准不一致,省市三甲医院约( 8000——15000 元)</td>\n",
" <td>约3个月</td>\n",
" <td>...</td>\n",
" <td>0.00002%</td>\n",
" <td>无传染性</td>\n",
" <td>1、避免感染分支杆菌病卡氏肺囊肿肺炎巨细胞病毒等。\\n2、注意锻炼身体提高免疫力。</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>否</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>百日咳</td>\n",
" <td>百日咳(pertussiswhoopingcough)是由百日咳杆菌所致的急性呼吸道传染病...</td>\n",
" <td>疾病百科,儿科,小儿内科</td>\n",
" <td>儿科,小儿内科</td>\n",
" <td>(一)发病原因\\n病原菌是鲍特菌属(Bordetella)中的百日咳鲍特菌(B.pertus...</td>\n",
" <td>吸气时有蝉鸣音,痉挛性咳嗽,胸闷,肺阴虚,抽搐,低热,闫鹏辉,惊厥</td>\n",
" <td>耳、鼻、咽拭子细菌培养,周围血白细胞计数及分类检验,血常规,酶联免疫吸附试验,白细胞分类计数</td>\n",
" <td>肺不张</td>\n",
" <td>根据不同医院收费标准不一致市三甲医院约1000-4000元</td>\n",
" <td>1-2个月</td>\n",
" <td>...</td>\n",
" <td>0.5%</td>\n",
" <td>呼吸道传播</td>\n",
" <td>1、控制传染源在流行季节若有前驱症状应及早抗生素治疗。\\n2、切断传播途径由于百日咳杆...</td>\n",
" <td>琥乙红霉素片,琥乙红霉素颗粒,百咳静糖浆,穿心莲内酯片,红霉素肠溶片,环酯红霉素片</td>\n",
" <td>穿心莲内酯片,百咳静糖浆</td>\n",
" <td>惠普森穿心莲内酯片(穿心莲内酯片),北京同仁堂百咳静糖浆(百咳静糖浆),邦琪药业百咳静糖浆(...</td>\n",
" <td>南瓜子仁,圆白菜,樱桃番茄,小白菜</td>\n",
" <td>螃蟹,海蟹,海虾,海螺</td>\n",
" <td>清蒸鸡蛋羹,百合双耳鸡蛋羹,排骨汤,罗汉果雪耳鸡汤,小黄瓜凉拌面,黄瓜三丝汤,黄瓜拌兔丝,黄...</td>\n",
" <td>否</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 23 columns</p>\n",
"</div>"
],
"text/plain": [
" 疾病名称 疾病描述 疾病种类 \\\n",
"0 肺泡蛋白质沉积症 肺泡蛋白质沉积症(简称PAP)又称Rosen-Castle-man-Liebow综合征是... 疾病百科,内科,呼吸内科 \n",
"1 百日咳 百日咳(pertussiswhoopingcough)是由百日咳杆菌所致的急性呼吸道传染病... 疾病百科,儿科,小儿内科 \n",
"\n",
" 科室 病因 \\\n",
"0 内科,呼吸内科 病因未明,推测与几方面因素有关:如大量粉尘吸入(铝,二氧化硅等),机体免疫功能下降(尤其婴幼... \n",
"1 儿科,小儿内科 (一)发病原因\\n病原菌是鲍特菌属(Bordetella)中的百日咳鲍特菌(B.pertus... \n",
"\n",
" 症状 \\\n",
"0 紫绀,胸痛,呼吸困难,乏力,毓卓 \n",
"1 吸气时有蝉鸣音,痉挛性咳嗽,胸闷,肺阴虚,抽搐,低热,闫鹏辉,惊厥 \n",
"\n",
" 检查 并发症 \\\n",
"0 胸部CT检查,肺活检,支气管镜检查 多重肺部感染 \n",
"1 耳、鼻、咽拭子细菌培养,周围血白细胞计数及分类检验,血常规,酶联免疫吸附试验,白细胞分类计数 肺不张 \n",
"\n",
" 花费 疗程 ... 感染概率 感染途径 \\\n",
"0 根据不同医院,收费标准不一致,省市三甲医院约( 8000——15000 元) 约3个月 ... 0.00002% 无传染性 \n",
"1 根据不同医院收费标准不一致市三甲医院约1000-4000元 1-2个月 ... 0.5% 呼吸道传播 \n",
"\n",
" 预防措施 \\\n",
"0 1、避免感染分支杆菌病卡氏肺囊肿肺炎巨细胞病毒等。\\n2、注意锻炼身体提高免疫力。 \n",
"1 1、控制传染源在流行季节若有前驱症状应及早抗生素治疗。\\n2、切断传播途径由于百日咳杆... \n",
"\n",
" 推荐药物 常用药物 \\\n",
"0 NaN \n",
"1 琥乙红霉素片,琥乙红霉素颗粒,百咳静糖浆,穿心莲内酯片,红霉素肠溶片,环酯红霉素片 穿心莲内酯片,百咳静糖浆 \n",
"\n",
" 具体药物 可以吃 \\\n",
"0 NaN \n",
"1 惠普森穿心莲内酯片(穿心莲内酯片),北京同仁堂百咳静糖浆(百咳静糖浆),邦琪药业百咳静糖浆(... 南瓜子仁,圆白菜,樱桃番茄,小白菜 \n",
"\n",
" 不可以吃 推荐吃 是否纳入医保 \n",
"0 NaN NaN 否 \n",
"1 螃蟹,海蟹,海虾,海螺 清蒸鸡蛋羹,百合双耳鸡蛋羹,排骨汤,罗汉果雪耳鸡汤,小黄瓜凉拌面,黄瓜三丝汤,黄瓜拌兔丝,黄... 否 \n",
"\n",
"[2 rows x 23 columns]"
]
},
"execution_count": 183,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"疾病名称 百日咳\n",
"疾病描述 百日咳(pertussiswhoopingcough)是由百日咳杆菌所致的急性呼吸道传染病...\n",
"疾病种类 疾病百科,儿科,小儿内科\n",
"科室 儿科,小儿内科\n",
"病因 (一)发病原因\\n病原菌是鲍特菌属(Bordetella)中的百日咳鲍特菌(B.pertus...\n",
"症状 吸气时有蝉鸣音,痉挛性咳嗽,胸闷,肺阴虚,抽搐,低热,闫鹏辉,惊厥\n",
"检查 耳、鼻、咽拭子细菌培养,周围血白细胞计数及分类检验,血常规,酶联免疫吸附试验,白细胞分类计数\n",
"并发症 肺不张\n",
"花费 根据不同医院收费标准不一致市三甲医院约1000-4000元\n",
"疗程 1-2个月\n",
"疗法 药物治疗,支持性治疗\n",
"治愈率 98%\n",
"易感人群 多见于小儿\n",
"感染概率 0.5%\n",
"感染途径 呼吸道传播\n",
"预防措施 1、控制传染源在流行季节若有前驱症状应及早抗生素治疗。\\n2、切断传播途径由于百日咳杆...\n",
"推荐药物 琥乙红霉素片,琥乙红霉素颗粒,百咳静糖浆,穿心莲内酯片,红霉素肠溶片,环酯红霉素片\n",
"常用药物 穿心莲内酯片,百咳静糖浆\n",
"具体药物 惠普森穿心莲内酯片(穿心莲内酯片),北京同仁堂百咳静糖浆(百咳静糖浆),邦琪药业百咳静糖浆(...\n",
"可以吃 南瓜子仁,圆白菜,樱桃番茄,小白菜\n",
"不可以吃 螃蟹,海蟹,海虾,海螺\n",
"推荐吃 清蒸鸡蛋羹,百合双耳鸡蛋羹,排骨汤,罗汉果雪耳鸡汤,小黄瓜凉拌面,黄瓜三丝汤,黄瓜拌兔丝,黄...\n",
"是否纳入医保 否\n",
"Name: 1, dtype: object"
]
},
"execution_count": 184,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 导出csv文件"
]
},
{
"cell_type": "code",
"execution_count": 185,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('medical_data.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}