QAonMilitaryKG/military_qa.py

593 lines
35 KiB
Python
Raw Permalink Normal View History

2019-05-02 00:44:38 +08:00
#!/usr/bin/env python3
# coding: utf-8
# File: militarygraph.py
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
# Date: 19-3-11
import os
import re
import json
import jieba
import jieba.posseg as pseg
import pymongo
class MilitaryGraph:
def __init__(self):
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
self.datapath = os.path.join(cur, 'data/military.json')
self.conn = pymongo.MongoClient()
db_name = 'military_qa'
col_name = 'data'
self.col = self.conn[db_name][col_name]
self.attributes ={'同型': ['同型'], '机高': ['机高'],
'战斗全重': ['战斗全重'], '水下排水量': ['水下排水量'],
'处理器': ['处理器'], '主炮': ['主炮'],
'制导系统': ['制导系统'], '全重': ['全重'],
'纬度': ['纬度'], '炮口初速': ['炮口初速'],
'发射性能': ['发射性能'], '兵装': ['兵装'],
'型号': ['型号'],
'长度': ['长度', '全长', '多长'], '翼展': ['翼展', '翼长'],
'全枪长': ['全枪长', '枪长'], '射程': ['射程'],
'前型': ['前型'],
'发射地点': ['发射地点', '发射地点'], '首飞时间': ['首飞时间', '首飞', '初次飞行', '首次飞行'],
'发动机数量': ['发动机数量', '几个发动机', '多少个发动机', '发动机个数', '发动机数目', '发动机个','发动机数'], '乘员': ['乘员'],
'战斗射速': ['战斗射速'], '生产单位': ['生产单位', '产商', '制造商', '厂家', '制造机构'],
'最大行程': ['最大行程', '最常距离'], '炮管长度': ['炮管长度', '炮管长', '炮管全长'],
'气动布局': ['气动布局'], '武备': ['武备'],
'武器装备': ['武器装备'], '引信': ['引信'],
'参战情况': ['参战情况'],
'动力装置': ['动力装置'], '飞行速度': ['飞行速度'],
'服役时间': ['服役时间'], '新造时': ['新造时'],
'活动范围': ['活动范围'], '弹匣容弹量': ['弹匣容弹量'],
'编制': ['编制'], '高度': ['高度'],
'制造厂': ['制造厂'], '口径': ['口径'],
'鱼雷': ['鱼雷'], '经度': ['经度'],
'研发时间': ['研发时间'], '简介': ['简介'],
'首次轨道发射': ['首次轨道发射'],
'挂载点': ['挂载点'], '刀锋宽度': ['刀锋宽度'],
'续航距离': ['续航距离'], '枪械': ['枪械'],
'最大速度': ['最大速度'], '运载火箭': ['运载火箭'],
'生产年限': ['生产年限'], '全枪重': ['全枪重'],
'空重': ['空重'], '水雷': ['水雷'],
'枪炮': ['枪炮'], '水上排水量': ['水上排水量', '排水量'],
'诞生时间': ['诞生时间'], '内置武器': ['内置武器'],
'机长': ['机长'], '中心直径': ['中心直径', '直径'],
'装药类型': ['装药类型'], '最大起飞重量': ['最大起飞重量', '起飞重量'],
'有效射程': ['有效射程'], '现状': ['现状'],
'研制时间': ['研制时间'], '舰舰导弹': ['舰舰导弹'],
'下水时间': ['下水时间', '下水'], '机炮': ['机炮'],
'弹长': ['弹长'], '退役时间': ['退役时间', '退役'],
'最大射程': ['最大射程'], '改装时': ['改装时'],
'刀重': ['刀重'], '自持力': ['自持力'],
'产国': ['产国'], '航速': ['航速'],
'制造商': ['制造商'], '型宽': ['型宽'],
'弹重': ['弹重'], '刀长': ['刀长'],
'舰长': ['舰长'], '研发厂商': ['研发厂商'],
'旋翼直径': ['旋翼直径'], '导弹': ['导弹'],
'满排吨位': ['满排吨位'], '底盘类型': ['底盘类型'],
'刀锋长度': ['刀锋长度'], '弹径': ['弹径'],
'全长': ['全长'], '竣工时': ['竣工时'],
'发射日期': ['发射日期'], '宽度': ['宽度'],
'总重': ['总重'], '建造时间': ['建造时间'],
'射控装置': ['射控装置'], '图片': ['图片'],
'轨道': ['轨道'], '改装前': ['改装前'],
'发动机': ['发动机'], '最大航程': ['最大航程'],
'研发单位': ['研发单位'], '大类': ['大类'],
'关注度': ['关注度'], '最大飞行速度': ['最大飞行速度'],
'火炮': ['火炮'], '战地机型': ['战地机型'],
'防空兵器': ['防空兵器'], '潜航深度': ['潜航深度'],
'轨道卫星': ['轨道卫星'], '尾翼装置': ['尾翼装置'],
'乘员与载员': ['乘员与载员'], '名称': ['名称'],
'引信装置': ['引信装置'], '次型': ['次型'],
'车长': ['车长'], '武装': ['武装'],"航长":['航长'],
'反舰导弹': ['反舰导弹'],
'满载排水量': ['满载排水量'], '装备': ['装备']}
self.big_cates ={'火炮': ['火炮'], '飞行器': ['飞行器'],
'舰船舰艇': ['舰船舰艇'], '坦克装甲车辆': ['坦克装甲车辆'],
'太空装备': ['太空装备'], '爆炸物': ['爆炸物'],
'导弹武器': ['导弹武器'], '枪械与单兵': ['枪械与单兵', '枪械', '', '单兵']}
self.second_cates = {'榴弹发射器': ['榴弹发射器'], '炸弹': ['炸弹', '炸药'],
'手榴弹': ['手榴弹'], '电子战机': ['电子战机'],
'机枪': ['机枪'], '宇宙飞船': ['宇宙飞船', '飞船'],
'加农炮': ['加农炮'], '救护车': ['救护车'],
'攻击机': ['攻击机'], '非自动步枪': ['非自动步枪', '步枪'],
'火箭弹': ['火箭弹'], '地雷': ['地雷'],
'高射炮': ['高射炮'], '航天飞机': ['航天飞机'],
'航天机构': ['航天机构', '航天局', '航天部门'], '舰舰导弹': ['舰舰导弹'],
'通用飞机': ['通用飞机'], '岸舰导弹': ['岸舰导弹', '导弹'],
'舰炮': ['舰炮'], '巡洋舰': ['巡洋舰'],
'气垫艇/气垫船': ['气垫艇/气垫船','气垫艇','气垫船'], '装甲指挥车': ['装甲指挥车', '装甲车', '指挥车'],
'无人机': ['无人机'], '氢弹': ['氢弹'],
'坦克炮': ['坦克炮'], '干线': ['干线'],
'原子弹': ['原子弹'], '冲锋枪': ['冲锋枪'],
'导弹艇': ['导弹艇'], '水雷战舰艇': ['水雷战舰艇'],
'侦察机': ['侦察机'], '试验机': ['试验机'],
'舰地(潜地)导弹': ['舰地(潜地)导弹','舰地导弹','潜地导弹', '导弹'],
'支线': ['支线'], '军事卫星': ['军事卫星'],
'地空导弹': ['地空导弹'], '航空炮': ['航空炮'],
'战列舰': ['战列舰'], '无后坐炮': ['无后坐炮'],
'空地导弹': ['空地导弹'], '加农榴弹炮': ['加农榴弹炮'],
'运输机': ['运输机'], '自行火炮': ['自行火炮'],
'地地导弹': ['地地导弹'], '空舰导弹': ['空舰导弹'],
'教练机': ['教练机'], '其他特种装甲车辆': ['其他特种装甲车辆'],
'火箭筒': ['火箭筒'], '空间探测器': ['空间探测器', '探测器'],
'预警机': ['预警机'], '航空母舰': ['航空母舰', '航母'],
'迷彩服': ['迷彩服'],'弹炮结合系统': ['弹炮结合系统'],
'科学卫星': ['科学卫星'], '空空导弹': ['空空导弹','导弹'],
'迫击炮': ['迫击炮'],
'应用卫星': ['应用卫星', '卫星'], '保障辅助舰艇': ['保障辅助舰艇'],
'刀具': ['刀具'], '霰弹枪': ['霰弹枪'],
'自动步枪': ['自动步枪'], '手枪': ['手枪'],
'反弹道导弹': ['反弹道导弹'], '两栖作战舰艇': ['两栖作战舰艇'],
'特种坦克': ['特种坦克', '坦克'], '运输直升机': ['运输直升机', '直升机'],
'巡逻舰/艇': ['巡逻舰/艇', '巡逻舰', '巡逻舰艇', '巡逻舰艇'], '加油机': ['加油机'],
'反坦克炮': ['反坦克炮'],
'越野车': ['越野车'], '步兵战车': ['步兵战车'],
'战斗机': ['战斗机'], '护卫舰': ['护卫舰'],
'工程抢修车': ['工程抢修车'],'反潜机': ['反潜机'],
'常规潜艇': ['常规潜艇'], '装甲侦察车': ['装甲侦察车'],
'舰空导弹': ['舰空导弹'], '运载火箭': ['运载火箭'],
'中子弹': ['中子弹'], '飞艇': ['飞艇'],
'航天基地': ['航天基地'], '鱼雷': ['鱼雷'],
'轰炸机': ['轰炸机'], '技术试验卫星': ['技术试验卫星', '卫星'],
'狙击枪': ['狙击枪'], '水雷': ['水雷'],
'装甲车载炮': ['装甲车载炮'], '榴弹炮': ['榴弹炮'],
'驱逐舰': ['驱逐舰'], '装甲运兵车': ['装甲运兵车'],
'火箭炮': ['火箭炮'], '多用途直升机': ['多用途直升机', '直升机'],
'核潜艇': ['核潜艇'], '武装直升机': ['武装直升机', '直升机'],
'布/扫雷车': ['布/扫雷车', '扫雷车', '扫雷车'], '潜舰导弹': ['潜舰导弹', '导弹'],
'主战坦克': ['主战坦克', '坦克']}
self.weapons = self.load_weapons()
self.weapon_dict = {i:i for i in self.weapons}
self.countries = {'荷兰': ['荷兰'], '阿根廷': ['阿根廷'], '瑞士': ['瑞士'],
'伊朗': ['伊朗'], '以色列': ['以色列'], '前南斯拉夫': ['前南斯拉夫'],
'越南': ['越南'], '葡萄牙': ['葡萄牙'], '乌克兰': ['乌克兰'],
'新西兰': ['新西兰'], '奥地利': ['奥地利'], '希腊': ['希腊'],
'塞尔维亚': ['塞尔维亚'], '比利时': ['比利时'],
'俄罗斯': ['俄罗斯'], '前捷克斯洛伐克': ['前捷克斯洛伐克'],
'捷克': ['捷克'], '土耳其': ['土耳其'], '缅甸': ['缅甸'],
'美国': ['美国'], '德国': ['德国'], '巴西': ['巴西'],
'印度尼西亚': ['印度尼西亚'], '法国': ['法国'],
'瑞典': ['瑞典'], '前苏联': ['前苏联'],
'朝鲜': ['朝鲜'],
'埃及': ['埃及'], '墨西哥': ['墨西哥'], '巴基斯坦': ['巴基斯坦'],
'马来西亚': ['马来西亚'], '澳大利亚': ['澳大利亚'], '泰国': ['泰国'],
'欧盟': ['欧盟'], '波兰': ['波兰'],
'韩国': ['韩国'], '日本': ['日本'],
'罗马尼亚': ['罗马尼亚'], '克罗地亚': ['克罗地亚'], '智利': ['智利'],
'匈牙利': ['匈牙利'], '意大利': ['意大利'], '英国': ['英国'],
'丹麦': ['丹麦'], '挪威': ['挪威'], '哈萨克斯坦': ['哈萨克斯坦'],
'爱尔兰': ['爱尔兰'], '伊拉克': ['伊拉克'],
'中国': ['中国','中华人民共和国'], '印度': ['印度'],
'保加利亚': ['保加利亚'], '斯洛伐克': ['斯洛伐克'],
'西班牙': ['西班牙'], '秘鲁': ['秘鲁'],
'阿联酋': ['阿联酋'], '卢森堡': ['卢森堡'],
'巴拿马': ['巴拿马'], '新加坡': ['新加坡'],
'波黑': ['波黑'], '南非': ['南非'],
'苏/俄': ['苏/俄', '苏联', '俄罗斯'], '加拿大': ['加拿大'], '芬兰': ['芬兰']}
self.compares = {
'$gt': ['高于','大于','长于','高过','大过','长过','多于', '远于', '远过', '之后', '晚于', '后于'],
'$lt': ['低于', '小于', '短于', '低过', '短过', '少于', '近于', '近过', '未达到', '没达到', '之前', '先于', '早于'],
'$lte': ['不高于','不大于','不长于','不高过','不大过','不长过','不多于', '不远于', '不远过'],
'$gte': ['不低于', '不小于', '不短于', '不低过', '不短过', '不少于', '不近于', '不近过', '达到'],
'$eq': ['等于', '差不多'],
'$ne': ['不等于', '不是']}
self.counts = ['多少', '', '几多']
self.mosts = {
-1:['最大', '最远', '最长', '最高', '最久', '最快', '最多', '最强'],
1:['最小', '最短', '最近', '最低', '最矮', '最慢', '最少', '最弱'],
}
self.unit_dict = {
'海里': [1852, ''],
'英里': [1610, ''],
'/节': [1852, ''],
'km/节': [1000, ''],
'': [1000, '千克'],
'-吨': [1000, '千克'],
'公里': [1000, ''],
'公里/节': [1000, ''],
'公里/小时': [1000, ''],
'海里节': [1852, ''],
'海里,节': [1852, ''],
'海里/节': [1852, ''],
'海哩/节': [1852, ''],
'海浬/节': [1852, ''],
'毫米': [0.001, ''],
'': [1852, ''],
'节/海里': [1852, ''],
'节海里': [1852, ''],
'节行驶英里': [1852, ''],
'节下海里': [1852, ''],
'': [0.001, '千克'],
'': [1852, ''],
'里/节': [1852, ''],
'': [1, ''],
'千克': [1, ''],
'千米': [1000, ''],
'千米/节': [1000, ''],
'千米/时': [1000, ''],
'千米/小时': [1000, ''],
'千米每小时': [1000, ''],
'万海里/节': [18520000, ''],
'英里,节': [1610, ''],
'英里/节': [1610, ''],
'余英里': [1610, ''],
'约海里': [1852, ''],
'最大海里': [1852, ''],
'厘米': [0.01, ''],
'分米': [0.1, ''],
'': [1, ''],
'': [1, '']}
unit_dict = {i:len(i) for i in self.unit_dict}
unit_wds = [i[0] for i in sorted(unit_dict.items(), key = lambda asd: asd[1], reverse=True)]
unit_regex = '([0-9]+.?[0-9]+)(%s)+' % '|'.join(unit_wds)
time_regex = '[0-9]{4}年[0-9]{0,4}月?[0-9]{0,4}日?'
self.unit_pattern = re.compile(unit_regex)
self.time_pattern = re.compile(time_regex)
self.country_dict = self.build_dict(self.countries)
self.big_dict = self.build_dict(self.big_cates)
self.small_dict = self.build_dict(self.second_cates)
self.attribute_dict = self.build_dict(self.attributes)
self.compare_dict = self.build_dict(self.compares)
self.most_dict = self.build_dict(self.mosts)
self.add_jieba(self.country_dict, 'n_country')
self.add_jieba(self.big_dict, 'n_big')
self.add_jieba(self.small_dict, 'n_small')
self.add_jieba(self.attribute_dict, 'n_attr')
self.add_jieba(self.compare_dict, 'n_compare')
self.add_jieba(self.most_dict, 'n_most')
self.add_jieba(self.weapons, 'n_weapon')
return
'''加载武器实体'''
def load_weapons(self):
weapons = []
for record in open(self.datapath):
data = json.loads(record)
weapons.append(data['名称'])
return list(set(weapons))
'''构造映射字典'''
def build_dict(self, dict):
wd_dict = {}
for cate, wds in dict.items():
for wd in wds:
wd_dict[wd] = cate
return wd_dict
'''检测单位'''
def detect_entity(self, question):
units = [i[0] + i[1] for i in self.unit_pattern.findall(question) if i]
times = self.time_pattern.findall(question)
return times, units
'''检查年份并统一时间'''
def standard_year(self, sent):
sent = sent.replace(' ', '')
pattern_year = re.compile('[0-9]{4}')
pattern_month = re.compile('[0-9]{1,4}月')
pattern_day = re.compile('[0-9]{1,4}日')
default_day = ''
default_month = ''
month = pattern_month.findall(sent)
day = pattern_day.findall(sent)
year = pattern_year.findall(sent)
if year:
year = year[0].replace('', '')
if month:
default_month = month[0].replace('', '')
if day:
default_day = day[0].replace('', '')
if year:
date_new = year + self.full_date(default_month) + self.full_date(default_day)
else:
date_new = ''
else:
return ''
return date_new
'''补全日期'''
def full_date(self, date):
if not date:
date = '01'
if int(date) < 10 and len(date) < 2:
date = '0' + date
return date
'检测是否有数字'
def check_num(self, sent):
pattern = re.compile('\d+')
res = pattern.findall(str(sent))
return res[0]
'''检查单位并统一数量'''
def standard_unit(self, unit_value):
num = self.check_num(unit_value)
unit = unit_value.replace(num, '')
unit_info = self.unit_dict.get(unit, [1, 'default'])
plus = unit_info[0]
num_standrd = float(num) * plus
return num_standrd
'''将实体标记和实体词加入到jieba当中'''
def add_jieba(self, wds, tag):
for wd in wds:
jieba.add_word(wd, tag=tag, freq=300000)
return
'''问句解析'''
def question_parser(self, question):
times, units = self.detect_entity(question)
self.add_jieba(times, 'n_time')
self.add_jieba(units, 'n_unit')
wds = [(i.word, i.flag) for i in pseg.cut(question)]
parser_dict = {}
parser_dict['n_attrs'] = [wd for wd,flag in wds if flag == 'n_attr']
parser_dict['n_times'] = [wd for wd,flag in wds if flag == 'n_time']
parser_dict['n_bigs'] = [wd for wd,flag in wds if flag == 'n_big']
parser_dict['n_smalls'] = [wd for wd,flag in wds if flag == 'n_small']
parser_dict['n_countries'] = [wd for wd,flag in wds if flag == 'n_country']
parser_dict['n_compares'] = [wd for wd,flag in wds if flag == 'n_compare']
parser_dict['n_mosts'] = [wd for wd,flag in wds if flag == 'n_most']
parser_dict['n_units'] = [wd for wd,flag in wds if flag == 'n_unit']
parser_dict['n_weapons'] = [wd for wd,flag in wds if flag == 'n_weapon']
parser_dict['pattern'] = [flag for wd, flag in wds if flag in ['n_attr', 'n_time', 'n_big', 'n_small', 'n_unit', 'n_country', 'n_compare', 'n_most', 'n_weapon']]
2019-05-02 01:25:00 +08:00
# parser_dict['wds'] = wds
2019-05-02 00:44:38 +08:00
return parser_dict
'''答案搜索'''
def search_answer(self, parser_dict):
2019-05-02 01:25:00 +08:00
print('step1:问句解析 >>', parser_dict)
2019-05-02 00:44:38 +08:00
pattern = parser_dict['pattern']
2019-05-02 01:25:00 +08:00
print('step2:查询模板 >>',pattern)
2019-05-02 00:44:38 +08:00
search_data = []
condition = {}
targets = ['名称']
search_flag = 1
if pattern in [['n_country', 'n_small'], ['n_small', 'n_country']]:
country = self.country_dict.get(parser_dict.get('n_countries')[0])
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
condition = {'产国': country, '类型':n_small}
targets = ['名称']
search_data.append({'condition':condition, 'targets':targets})
elif pattern in [['n_country', 'n_big'], ['n_big', 'n_country']]:
country = self.country_dict.get(parser_dict.get('n_countries')[0])
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
condition = {'产国': country, '类型': n_big}
targets = ['名称']
search_data.append({'condition': condition, 'targets': targets})
elif pattern in [['n_country', 'n_weapon'], ['n_weapon']]:
n_weapon = self.weapon_dict.get(parser_dict.get('n_weapons')[0])
condition = {'名称': n_weapon}
targets = ['简介']
search_data.append({'condition': condition, 'targets': targets})
# 单实体多属性查询
elif pattern in [['n_country', 'n_weapon'],
['n_weapon', 'n_attr'],
['n_weapon', 'n_attr', 'n_attr'],
['n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_country', 'n_weapon', 'n_attr'],
['n_country', 'n_weapon', 'n_attr', 'n_attr'],
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr']
]:
n_weapon = self.weapon_dict.get(parser_dict.get('n_weapons')[0])
condition = {'名称': n_weapon}
targets = [self.attribute_dict.get(attr) for attr in parser_dict.get('n_attrs')]
search_data.append({'condition': condition, 'targets': targets})
# 多实体多属性查询
elif pattern in [
['n_weapon', 'n_weapon', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon','n_attr'],
['n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
]:
n_weapons = [self.weapon_dict.get(weapon) for weapon in parser_dict.get('n_weapons')]
condition = {'名称': {"$in": n_weapons}}
targets = [self.attribute_dict.get(attr) for attr in parser_dict.get('n_attrs')]
search_data.append({'condition': condition, 'targets': targets})
# 实体、实体属性相间隔
elif pattern in [
['n_weapon', 'n_attr','n_weapon', 'n_attr'],
['n_country','n_weapon', 'n_attr', 'n_weapon', 'n_attr'],
['n_country','n_weapon', 'n_attr', 'n_country','n_weapon', 'n_attr'],
['n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr'],
2019-05-02 01:25:00 +08:00
['n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr', 'n_attr'],
2019-05-02 00:44:38 +08:00
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr'],
['n_country','n_weapon', 'n_attr', 'n_country',' n_weapon', 'n_attr', 'n_attr'],
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr', 'n_attr'],
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country', 'n_weapon', 'n_attr', 'n_attr'],
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country', 'n_weapon', 'n_attr', 'n_attr'],
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr'],
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr'],
]:
n_indxes = [indx for indx, name in enumerate(pattern) if name == 'n_weapon']
n_weapons = [self.weapon_dict.get(weapon) for weapon in parser_dict.get('n_weapons')]
n1_weapon = n_weapons[0]
n2_weapon = n_weapons[1]
2019-05-02 01:25:00 +08:00
targets1 = [self.attribute_dict.get(weapon) for indx, weapon in enumerate(parser_dict.get('n_attrs')) if indx < n_indxes[1]-1]
targets2 = [self.attribute_dict.get(weapon) for indx, weapon in enumerate(parser_dict.get('n_attrs')) if indx >= n_indxes[1]-1]
2019-05-02 00:44:38 +08:00
condition1 = {'名称': n1_weapon}
condition2 = {'名称': n2_weapon}
search_data.append({'condition':condition1, 'targets': targets1})
search_data.append({'condition':condition2, 'targets': targets2})
# 比较查找,单操作符+操作数的实体
elif pattern in [
['n_attr', 'n_compare', 'n_unit', 'n_small'],
['n_small', 'n_attr', 'n_compare', 'n_unit'],
['n_attr', 'n_compare', 'n_time', 'n_small'],
['n_attr', 'n_time', 'n_compare', 'n_small'],
['n_small', 'n_attr', 'n_compare', 'n_time'],
['n_small', 'n_attr', 'n_time', 'n_compare'],
['n_attr', 'n_compare', 'n_unit', 'n_big'],
['n_big', 'n_attr', 'n_compare', 'n_unit'],
['n_attr', 'n_compare', 'n_time', 'n_big'],
['n_attr', 'n_time', 'n_compare', 'n_big'],
['n_big', 'n_attr', 'n_compare', 'n_time'],
['n_big', 'n_attr', 'n_time', 'n_compare'],
]:
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
n_compare = self.compare_dict.get(parser_dict.get('n_compares')[0])
if 'n_unit' in pattern:
n_unit = self.standard_unit(parser_dict.get('n_units')[0])
if 'n_small' in pattern:
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
condition = {n_attr:{n_compare:n_unit}, '类型':n_small}
else:
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
condition = {n_attr:{n_compare:n_unit}, '大类':n_big}
else:
n_time = self.standard_year(parser_dict.get('n_times')[0])
if 'n_small' in pattern:
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
condition = {n_attr: {n_compare: n_time}, '类型': n_small}
else:
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
condition = {n_attr: {n_compare: n_time}, '大类': n_big}
targets = [n_attr]
search_data.append({'condition':condition, 'targets':targets})
# 比较查找,双操作符+操作数的实体
elif pattern in [
['n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit', 'n_small'],
['n_small', 'n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit'],
['n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time', 'n_small'],
['n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare', 'n_small'],
['n_small', 'n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time'],
['n_small', 'n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare'],
['n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit', 'n_big'],
['n_big', 'n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit'],
['n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time', 'n_big'],
['n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare', 'n_big'],
['n_big', 'n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time'],
['n_big', 'n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare'],
]:
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
n_compares = [self.compare_dict.get(compare) for compare in parser_dict.get('n_compares')]
if 'n_unit' in pattern:
n_units = [self.standard_unit(unit) for unit in parser_dict.get('n_units')]
if 'n_small' in pattern:
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
condition = {n_attr:{n_compares[0]:n_units[0], n_compares[1]:n_units[1]}, '类型':n_small}
else:
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
condition = {n_attr:{n_compares[0]:n_units[0], n_compares[1]:n_units[1]},'大类':n_big}
else:
n_times = [self.standard_year(year) for year in parser_dict.get('n_times')]
if 'n_small' in pattern:
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
condition = {n_attr:{n_compares[0]:n_times[0], n_compares[1]:n_times[1]}, '类型': n_small}
else:
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
condition = {n_attr:{n_compares[0]:n_times[0], n_compares[1]:n_times[1]}, '大类': n_big}
targets = [n_attr]
search_data.append({'condition':condition, 'targets':targets})
# 属性最值查找
elif pattern in [['n_small', 'n_attr', 'n_most'],
['n_attr', 'n_most', 'n_small'],
['n_big', 'n_attr', 'n_most'],
['n_attr', 'n_most', 'n_big'],
]:
search_flag = 0
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
n_most = self.most_dict.get(parser_dict.get('n_mosts')[0])
if 'n_small' in pattern:
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
condition = {'类型': n_small, 'sort_key':{n_attr: n_most}}
else:
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
condition = {'大类': n_big, 'sort_key': {n_attr: n_most}}
targets.append(n_attr)
search_data.append({'condition':condition, 'targets':targets})
result = self.query_mongo(search_flag, search_data)
return result
'''查询mongo数据库'''
def query_mongo(self, search_flag, search_data):
result = []
if search_flag:
result = self.query_mongo_attr(search_data)
else:
result = self.query_mongo_sort(search_data)
return result
'''查询mongo数据库正常'''
def query_mongo_attr(self, search_data):
result = []
for search in search_data:
condition = search['condition']
targets = search['targets']
for res in self.col.find(condition):
result.append([res.get('名称') + target + ':' + str(res.get(target,'null')) for target in targets if res.get(target, 'null') != 'null'])
return result
'''按照最值方法查找mongo数据库'''
def query_mongo_sort(self, search_data):
result = []
for search in search_data:
condition = {key:value for key, value in search['condition'].items() if key != 'sort_key'}
sort_condition = [(i,j) for i, j in search['condition'].get('sort_key').items()]
targets = search['targets']
for res in self.col.find(condition).sort(sort_condition).limit(1):
result_ = [res.get('名称') + target + ':' + str(res.get(target, 'null')) for target in targets]
result.append(result_)
return result
'问答主函数'
def qa_main(self, question):
parser_dict = self.question_parser(question)
results = self.search_answer(parser_dict)
if results == [[]]:
2019-05-02 01:25:00 +08:00
print('小勇:对不起,目前暂时还无法回答此类问题...')
2019-05-02 00:44:38 +08:00
else:
2019-05-02 01:25:00 +08:00
print('小勇:共找到%s个答案, 下面是具体明细:'% len(results))
2019-05-02 00:44:38 +08:00
for result in results:
print(result)
return
if __name__ == '__main__':
handler = MilitaryGraph()
while 1:
2019-05-02 01:25:00 +08:00
question = input("用户:").strip()
2019-05-02 00:44:38 +08:00
handler.qa_main(question)