593 lines
35 KiB
Python
593 lines
35 KiB
Python
#!/usr/bin/env python3
|
||
# coding: utf-8
|
||
# File: militarygraph.py
|
||
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
|
||
# Date: 19-3-11
|
||
|
||
import os
|
||
import re
|
||
import json
|
||
import jieba
|
||
import jieba.posseg as pseg
|
||
import pymongo
|
||
|
||
class MilitaryGraph:
|
||
def __init__(self):
|
||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||
self.datapath = os.path.join(cur, 'data/military.json')
|
||
self.conn = pymongo.MongoClient()
|
||
db_name = 'military_qa'
|
||
col_name = 'data'
|
||
self.col = self.conn[db_name][col_name]
|
||
self.attributes ={'同型': ['同型'], '机高': ['机高'],
|
||
'战斗全重': ['战斗全重'], '水下排水量': ['水下排水量'],
|
||
'处理器': ['处理器'], '主炮': ['主炮'],
|
||
'制导系统': ['制导系统'], '全重': ['全重'],
|
||
'纬度': ['纬度'], '炮口初速': ['炮口初速'],
|
||
'发射性能': ['发射性能'], '兵装': ['兵装'],
|
||
'型号': ['型号'],
|
||
'长度': ['长度', '全长', '多长'], '翼展': ['翼展', '翼长'],
|
||
'全枪长': ['全枪长', '枪长'], '射程': ['射程'],
|
||
'前型': ['前型'],
|
||
'发射地点': ['发射地点', '发射地点'], '首飞时间': ['首飞时间', '首飞', '初次飞行', '首次飞行'],
|
||
'发动机数量': ['发动机数量', '几个发动机', '多少个发动机', '发动机个数', '发动机数目', '发动机个','发动机数'], '乘员': ['乘员'],
|
||
'战斗射速': ['战斗射速'], '生产单位': ['生产单位', '产商', '制造商', '厂家', '制造机构'],
|
||
'最大行程': ['最大行程', '最常距离'], '炮管长度': ['炮管长度', '炮管长', '炮管全长'],
|
||
'气动布局': ['气动布局'], '武备': ['武备'],
|
||
'武器装备': ['武器装备'], '引信': ['引信'],
|
||
'参战情况': ['参战情况'],
|
||
'动力装置': ['动力装置'], '飞行速度': ['飞行速度'],
|
||
'服役时间': ['服役时间'], '新造时': ['新造时'],
|
||
'活动范围': ['活动范围'], '弹匣容弹量': ['弹匣容弹量'],
|
||
'编制': ['编制'], '高度': ['高度'],
|
||
'制造厂': ['制造厂'], '口径': ['口径'],
|
||
'鱼雷': ['鱼雷'], '经度': ['经度'],
|
||
'研发时间': ['研发时间'], '简介': ['简介'],
|
||
'首次轨道发射': ['首次轨道发射'],
|
||
'挂载点': ['挂载点'], '刀锋宽度': ['刀锋宽度'],
|
||
'续航距离': ['续航距离'], '枪械': ['枪械'],
|
||
'最大速度': ['最大速度'], '运载火箭': ['运载火箭'],
|
||
'生产年限': ['生产年限'], '全枪重': ['全枪重'],
|
||
'空重': ['空重'], '水雷': ['水雷'],
|
||
'枪炮': ['枪炮'], '水上排水量': ['水上排水量', '排水量'],
|
||
'诞生时间': ['诞生时间'], '内置武器': ['内置武器'],
|
||
'机长': ['机长'], '中心直径': ['中心直径', '直径'],
|
||
'装药类型': ['装药类型'], '最大起飞重量': ['最大起飞重量', '起飞重量'],
|
||
'有效射程': ['有效射程'], '现状': ['现状'],
|
||
'研制时间': ['研制时间'], '舰舰导弹': ['舰舰导弹'],
|
||
'下水时间': ['下水时间', '下水'], '机炮': ['机炮'],
|
||
'弹长': ['弹长'], '退役时间': ['退役时间', '退役'],
|
||
'最大射程': ['最大射程'], '改装时': ['改装时'],
|
||
'刀重': ['刀重'], '自持力': ['自持力'],
|
||
'产国': ['产国'], '航速': ['航速'],
|
||
'制造商': ['制造商'], '型宽': ['型宽'],
|
||
'弹重': ['弹重'], '刀长': ['刀长'],
|
||
'舰长': ['舰长'], '研发厂商': ['研发厂商'],
|
||
'旋翼直径': ['旋翼直径'], '导弹': ['导弹'],
|
||
'满排吨位': ['满排吨位'], '底盘类型': ['底盘类型'],
|
||
'刀锋长度': ['刀锋长度'], '弹径': ['弹径'],
|
||
'全长': ['全长'], '竣工时': ['竣工时'],
|
||
'发射日期': ['发射日期'], '宽度': ['宽度'],
|
||
'总重': ['总重'], '建造时间': ['建造时间'],
|
||
'射控装置': ['射控装置'], '图片': ['图片'],
|
||
'轨道': ['轨道'], '改装前': ['改装前'],
|
||
'发动机': ['发动机'], '最大航程': ['最大航程'],
|
||
'研发单位': ['研发单位'], '大类': ['大类'],
|
||
'关注度': ['关注度'], '最大飞行速度': ['最大飞行速度'],
|
||
'火炮': ['火炮'], '战地机型': ['战地机型'],
|
||
'防空兵器': ['防空兵器'], '潜航深度': ['潜航深度'],
|
||
'轨道卫星': ['轨道卫星'], '尾翼装置': ['尾翼装置'],
|
||
'乘员与载员': ['乘员与载员'], '名称': ['名称'],
|
||
'引信装置': ['引信装置'], '次型': ['次型'],
|
||
'车长': ['车长'], '武装': ['武装'],"航长":['航长'],
|
||
'反舰导弹': ['反舰导弹'],
|
||
'满载排水量': ['满载排水量'], '装备': ['装备']}
|
||
|
||
self.big_cates ={'火炮': ['火炮'], '飞行器': ['飞行器'],
|
||
'舰船舰艇': ['舰船舰艇'], '坦克装甲车辆': ['坦克装甲车辆'],
|
||
'太空装备': ['太空装备'], '爆炸物': ['爆炸物'],
|
||
'导弹武器': ['导弹武器'], '枪械与单兵': ['枪械与单兵', '枪械', '枪', '单兵']}
|
||
self.second_cates = {'榴弹发射器': ['榴弹发射器'], '炸弹': ['炸弹', '炸药'],
|
||
'手榴弹': ['手榴弹'], '电子战机': ['电子战机'],
|
||
'机枪': ['机枪'], '宇宙飞船': ['宇宙飞船', '飞船'],
|
||
'加农炮': ['加农炮'], '救护车': ['救护车'],
|
||
'攻击机': ['攻击机'], '非自动步枪': ['非自动步枪', '步枪'],
|
||
'火箭弹': ['火箭弹'], '地雷': ['地雷'],
|
||
'高射炮': ['高射炮'], '航天飞机': ['航天飞机'],
|
||
'航天机构': ['航天机构', '航天局', '航天部门'], '舰舰导弹': ['舰舰导弹'],
|
||
'通用飞机': ['通用飞机'], '岸舰导弹': ['岸舰导弹', '导弹'],
|
||
'舰炮': ['舰炮'], '巡洋舰': ['巡洋舰'],
|
||
'气垫艇/气垫船': ['气垫艇/气垫船','气垫艇','气垫船'], '装甲指挥车': ['装甲指挥车', '装甲车', '指挥车'],
|
||
'无人机': ['无人机'], '氢弹': ['氢弹'],
|
||
'坦克炮': ['坦克炮'], '干线': ['干线'],
|
||
'原子弹': ['原子弹'], '冲锋枪': ['冲锋枪'],
|
||
'导弹艇': ['导弹艇'], '水雷战舰艇': ['水雷战舰艇'],
|
||
'侦察机': ['侦察机'], '试验机': ['试验机'],
|
||
'舰地(潜地)导弹': ['舰地(潜地)导弹','舰地导弹','潜地导弹', '导弹'],
|
||
'支线': ['支线'], '军事卫星': ['军事卫星'],
|
||
'地空导弹': ['地空导弹'], '航空炮': ['航空炮'],
|
||
'战列舰': ['战列舰'], '无后坐炮': ['无后坐炮'],
|
||
'空地导弹': ['空地导弹'], '加农榴弹炮': ['加农榴弹炮'],
|
||
'运输机': ['运输机'], '自行火炮': ['自行火炮'],
|
||
'地地导弹': ['地地导弹'], '空舰导弹': ['空舰导弹'],
|
||
'教练机': ['教练机'], '其他特种装甲车辆': ['其他特种装甲车辆'],
|
||
'火箭筒': ['火箭筒'], '空间探测器': ['空间探测器', '探测器'],
|
||
'预警机': ['预警机'], '航空母舰': ['航空母舰', '航母'],
|
||
'迷彩服': ['迷彩服'],'弹炮结合系统': ['弹炮结合系统'],
|
||
'科学卫星': ['科学卫星'], '空空导弹': ['空空导弹','导弹'],
|
||
'迫击炮': ['迫击炮'],
|
||
'应用卫星': ['应用卫星', '卫星'], '保障辅助舰艇': ['保障辅助舰艇'],
|
||
'刀具': ['刀具'], '霰弹枪': ['霰弹枪'],
|
||
'自动步枪': ['自动步枪'], '手枪': ['手枪'],
|
||
'反弹道导弹': ['反弹道导弹'], '两栖作战舰艇': ['两栖作战舰艇'],
|
||
'特种坦克': ['特种坦克', '坦克'], '运输直升机': ['运输直升机', '直升机'],
|
||
'巡逻舰/艇': ['巡逻舰/艇', '巡逻舰', '巡逻舰艇', '巡逻舰艇'], '加油机': ['加油机'],
|
||
'反坦克炮': ['反坦克炮'],
|
||
'越野车': ['越野车'], '步兵战车': ['步兵战车'],
|
||
'战斗机': ['战斗机'], '护卫舰': ['护卫舰'],
|
||
'工程抢修车': ['工程抢修车'],'反潜机': ['反潜机'],
|
||
'常规潜艇': ['常规潜艇'], '装甲侦察车': ['装甲侦察车'],
|
||
'舰空导弹': ['舰空导弹'], '运载火箭': ['运载火箭'],
|
||
'中子弹': ['中子弹'], '飞艇': ['飞艇'],
|
||
'航天基地': ['航天基地'], '鱼雷': ['鱼雷'],
|
||
'轰炸机': ['轰炸机'], '技术试验卫星': ['技术试验卫星', '卫星'],
|
||
'狙击枪': ['狙击枪'], '水雷': ['水雷'],
|
||
'装甲车载炮': ['装甲车载炮'], '榴弹炮': ['榴弹炮'],
|
||
'驱逐舰': ['驱逐舰'], '装甲运兵车': ['装甲运兵车'],
|
||
'火箭炮': ['火箭炮'], '多用途直升机': ['多用途直升机', '直升机'],
|
||
'核潜艇': ['核潜艇'], '武装直升机': ['武装直升机', '直升机'],
|
||
'布/扫雷车': ['布/扫雷车', '扫雷车', '扫雷车'], '潜舰导弹': ['潜舰导弹', '导弹'],
|
||
'主战坦克': ['主战坦克', '坦克']}
|
||
self.weapons = self.load_weapons()
|
||
self.weapon_dict = {i:i for i in self.weapons}
|
||
self.countries = {'荷兰': ['荷兰'], '阿根廷': ['阿根廷'], '瑞士': ['瑞士'],
|
||
'伊朗': ['伊朗'], '以色列': ['以色列'], '前南斯拉夫': ['前南斯拉夫'],
|
||
'越南': ['越南'], '葡萄牙': ['葡萄牙'], '乌克兰': ['乌克兰'],
|
||
'新西兰': ['新西兰'], '奥地利': ['奥地利'], '希腊': ['希腊'],
|
||
'塞尔维亚': ['塞尔维亚'], '比利时': ['比利时'],
|
||
'俄罗斯': ['俄罗斯'], '前捷克斯洛伐克': ['前捷克斯洛伐克'],
|
||
'捷克': ['捷克'], '土耳其': ['土耳其'], '缅甸': ['缅甸'],
|
||
'美国': ['美国'], '德国': ['德国'], '巴西': ['巴西'],
|
||
'印度尼西亚': ['印度尼西亚'], '法国': ['法国'],
|
||
'瑞典': ['瑞典'], '前苏联': ['前苏联'],
|
||
'朝鲜': ['朝鲜'],
|
||
'埃及': ['埃及'], '墨西哥': ['墨西哥'], '巴基斯坦': ['巴基斯坦'],
|
||
'马来西亚': ['马来西亚'], '澳大利亚': ['澳大利亚'], '泰国': ['泰国'],
|
||
'欧盟': ['欧盟'], '波兰': ['波兰'],
|
||
'韩国': ['韩国'], '日本': ['日本'],
|
||
'罗马尼亚': ['罗马尼亚'], '克罗地亚': ['克罗地亚'], '智利': ['智利'],
|
||
'匈牙利': ['匈牙利'], '意大利': ['意大利'], '英国': ['英国'],
|
||
'丹麦': ['丹麦'], '挪威': ['挪威'], '哈萨克斯坦': ['哈萨克斯坦'],
|
||
'爱尔兰': ['爱尔兰'], '伊拉克': ['伊拉克'],
|
||
'中国': ['中国','中华人民共和国'], '印度': ['印度'],
|
||
'保加利亚': ['保加利亚'], '斯洛伐克': ['斯洛伐克'],
|
||
'西班牙': ['西班牙'], '秘鲁': ['秘鲁'],
|
||
'阿联酋': ['阿联酋'], '卢森堡': ['卢森堡'],
|
||
'巴拿马': ['巴拿马'], '新加坡': ['新加坡'],
|
||
'波黑': ['波黑'], '南非': ['南非'],
|
||
'苏/俄': ['苏/俄', '苏联', '俄罗斯'], '加拿大': ['加拿大'], '芬兰': ['芬兰']}
|
||
|
||
self.compares = {
|
||
'$gt': ['高于','大于','长于','高过','大过','长过','多于', '远于', '远过', '之后', '晚于', '后于'],
|
||
'$lt': ['低于', '小于', '短于', '低过', '短过', '少于', '近于', '近过', '未达到', '没达到', '之前', '先于', '早于'],
|
||
'$lte': ['不高于','不大于','不长于','不高过','不大过','不长过','不多于', '不远于', '不远过'],
|
||
'$gte': ['不低于', '不小于', '不短于', '不低过', '不短过', '不少于', '不近于', '不近过', '达到'],
|
||
'$eq': ['等于', '差不多'],
|
||
'$ne': ['不等于', '不是']}
|
||
self.counts = ['多少', '几', '几多']
|
||
self.mosts = {
|
||
-1:['最大', '最远', '最长', '最高', '最久', '最快', '最多', '最强'],
|
||
1:['最小', '最短', '最近', '最低', '最矮', '最慢', '最少', '最弱'],
|
||
}
|
||
|
||
self.unit_dict = {
|
||
'海里': [1852, '米'],
|
||
'英里': [1610, '米'],
|
||
'/节': [1852, '米'],
|
||
'km/节': [1000, '米'],
|
||
'吨': [1000, '千克'],
|
||
'-吨': [1000, '千克'],
|
||
'公里': [1000, '米'],
|
||
'公里/节': [1000, '米'],
|
||
'公里/小时': [1000, '米'],
|
||
'海里节': [1852, '米'],
|
||
'海里,节': [1852, '米'],
|
||
'海里/节': [1852, '米'],
|
||
'海哩/节': [1852, '米'],
|
||
'海浬/节': [1852, '米'],
|
||
'毫米': [0.001, '米'],
|
||
'节': [1852, '米'],
|
||
'节/海里': [1852, '米'],
|
||
'节海里': [1852, '米'],
|
||
'节行驶英里': [1852, '米'],
|
||
'节下海里': [1852, '米'],
|
||
'克': [0.001, '千克'],
|
||
'里': [1852, '米'],
|
||
'里/节': [1852, '米'],
|
||
'米': [1, '米'],
|
||
'千克': [1, '克'],
|
||
'千米': [1000, '米'],
|
||
'千米/节': [1000, '米'],
|
||
'千米/时': [1000, '米'],
|
||
'千米/小时': [1000, '米'],
|
||
'千米每小时': [1000, '米'],
|
||
'万海里/节': [18520000, '米'],
|
||
'英里,节': [1610, '米'],
|
||
'英里/节': [1610, '米'],
|
||
'余英里': [1610, '米'],
|
||
'约海里': [1852, '米'],
|
||
'最大海里': [1852, '米'],
|
||
'厘米': [0.01, '米'],
|
||
'分米': [0.1, '米'],
|
||
'人': [1, '人'],
|
||
'位': [1, '位']}
|
||
|
||
unit_dict = {i:len(i) for i in self.unit_dict}
|
||
unit_wds = [i[0] for i in sorted(unit_dict.items(), key = lambda asd: asd[1], reverse=True)]
|
||
unit_regex = '([0-9]+.?[0-9]+)(%s)+' % '|'.join(unit_wds)
|
||
time_regex = '[0-9]{4}年[0-9]{0,4}月?[0-9]{0,4}日?'
|
||
self.unit_pattern = re.compile(unit_regex)
|
||
self.time_pattern = re.compile(time_regex)
|
||
self.country_dict = self.build_dict(self.countries)
|
||
self.big_dict = self.build_dict(self.big_cates)
|
||
self.small_dict = self.build_dict(self.second_cates)
|
||
self.attribute_dict = self.build_dict(self.attributes)
|
||
self.compare_dict = self.build_dict(self.compares)
|
||
self.most_dict = self.build_dict(self.mosts)
|
||
self.add_jieba(self.country_dict, 'n_country')
|
||
self.add_jieba(self.big_dict, 'n_big')
|
||
self.add_jieba(self.small_dict, 'n_small')
|
||
self.add_jieba(self.attribute_dict, 'n_attr')
|
||
self.add_jieba(self.compare_dict, 'n_compare')
|
||
self.add_jieba(self.most_dict, 'n_most')
|
||
self.add_jieba(self.weapons, 'n_weapon')
|
||
|
||
return
|
||
|
||
'''加载武器实体'''
|
||
def load_weapons(self):
|
||
weapons = []
|
||
for record in open(self.datapath):
|
||
data = json.loads(record)
|
||
weapons.append(data['名称'])
|
||
return list(set(weapons))
|
||
|
||
'''构造映射字典'''
|
||
def build_dict(self, dict):
|
||
wd_dict = {}
|
||
for cate, wds in dict.items():
|
||
for wd in wds:
|
||
wd_dict[wd] = cate
|
||
return wd_dict
|
||
|
||
'''检测单位'''
|
||
def detect_entity(self, question):
|
||
units = [i[0] + i[1] for i in self.unit_pattern.findall(question) if i]
|
||
times = self.time_pattern.findall(question)
|
||
return times, units
|
||
|
||
'''检查年份并统一时间'''
|
||
def standard_year(self, sent):
|
||
sent = sent.replace(' ', '')
|
||
pattern_year = re.compile('[0-9]{4}年')
|
||
pattern_month = re.compile('[0-9]{1,4}月')
|
||
pattern_day = re.compile('[0-9]{1,4}日')
|
||
default_day = ''
|
||
default_month = ''
|
||
month = pattern_month.findall(sent)
|
||
day = pattern_day.findall(sent)
|
||
year = pattern_year.findall(sent)
|
||
if year:
|
||
year = year[0].replace('年', '')
|
||
if month:
|
||
default_month = month[0].replace('月', '')
|
||
if day:
|
||
default_day = day[0].replace('日', '')
|
||
if year:
|
||
date_new = year + self.full_date(default_month) + self.full_date(default_day)
|
||
else:
|
||
date_new = ''
|
||
else:
|
||
return ''
|
||
return date_new
|
||
|
||
'''补全日期'''
|
||
def full_date(self, date):
|
||
if not date:
|
||
date = '01'
|
||
if int(date) < 10 and len(date) < 2:
|
||
date = '0' + date
|
||
return date
|
||
|
||
'检测是否有数字'
|
||
def check_num(self, sent):
|
||
pattern = re.compile('\d+')
|
||
res = pattern.findall(str(sent))
|
||
return res[0]
|
||
|
||
'''检查单位并统一数量'''
|
||
def standard_unit(self, unit_value):
|
||
num = self.check_num(unit_value)
|
||
unit = unit_value.replace(num, '')
|
||
unit_info = self.unit_dict.get(unit, [1, 'default'])
|
||
plus = unit_info[0]
|
||
num_standrd = float(num) * plus
|
||
return num_standrd
|
||
|
||
'''将实体标记和实体词加入到jieba当中'''
|
||
def add_jieba(self, wds, tag):
|
||
for wd in wds:
|
||
jieba.add_word(wd, tag=tag, freq=300000)
|
||
return
|
||
|
||
'''问句解析'''
|
||
def question_parser(self, question):
|
||
times, units = self.detect_entity(question)
|
||
self.add_jieba(times, 'n_time')
|
||
self.add_jieba(units, 'n_unit')
|
||
wds = [(i.word, i.flag) for i in pseg.cut(question)]
|
||
parser_dict = {}
|
||
parser_dict['n_attrs'] = [wd for wd,flag in wds if flag == 'n_attr']
|
||
parser_dict['n_times'] = [wd for wd,flag in wds if flag == 'n_time']
|
||
parser_dict['n_bigs'] = [wd for wd,flag in wds if flag == 'n_big']
|
||
parser_dict['n_smalls'] = [wd for wd,flag in wds if flag == 'n_small']
|
||
parser_dict['n_countries'] = [wd for wd,flag in wds if flag == 'n_country']
|
||
parser_dict['n_compares'] = [wd for wd,flag in wds if flag == 'n_compare']
|
||
parser_dict['n_mosts'] = [wd for wd,flag in wds if flag == 'n_most']
|
||
parser_dict['n_units'] = [wd for wd,flag in wds if flag == 'n_unit']
|
||
parser_dict['n_weapons'] = [wd for wd,flag in wds if flag == 'n_weapon']
|
||
parser_dict['pattern'] = [flag for wd, flag in wds if flag in ['n_attr', 'n_time', 'n_big', 'n_small', 'n_unit', 'n_country', 'n_compare', 'n_most', 'n_weapon']]
|
||
parser_dict['wds'] = wds
|
||
return parser_dict
|
||
|
||
'''答案搜索'''
|
||
def search_answer(self, parser_dict):
|
||
print(parser_dict)
|
||
pattern = parser_dict['pattern']
|
||
print(pattern)
|
||
search_data = []
|
||
condition = {}
|
||
targets = ['名称']
|
||
search_flag = 1
|
||
|
||
if pattern in [['n_country', 'n_small'], ['n_small', 'n_country']]:
|
||
country = self.country_dict.get(parser_dict.get('n_countries')[0])
|
||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||
condition = {'产国': country, '类型':n_small}
|
||
targets = ['名称']
|
||
search_data.append({'condition':condition, 'targets':targets})
|
||
|
||
elif pattern in [['n_country', 'n_big'], ['n_big', 'n_country']]:
|
||
country = self.country_dict.get(parser_dict.get('n_countries')[0])
|
||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||
condition = {'产国': country, '类型': n_big}
|
||
targets = ['名称']
|
||
search_data.append({'condition': condition, 'targets': targets})
|
||
|
||
elif pattern in [['n_country', 'n_weapon'], ['n_weapon']]:
|
||
n_weapon = self.weapon_dict.get(parser_dict.get('n_weapons')[0])
|
||
condition = {'名称': n_weapon}
|
||
targets = ['简介']
|
||
search_data.append({'condition': condition, 'targets': targets})
|
||
|
||
# 单实体多属性查询
|
||
elif pattern in [['n_country', 'n_weapon'],
|
||
['n_weapon', 'n_attr'],
|
||
['n_weapon', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_country', 'n_weapon', 'n_attr'],
|
||
['n_country', 'n_weapon', 'n_attr', 'n_attr'],
|
||
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr']
|
||
]:
|
||
n_weapon = self.weapon_dict.get(parser_dict.get('n_weapons')[0])
|
||
condition = {'名称': n_weapon}
|
||
targets = [self.attribute_dict.get(attr) for attr in parser_dict.get('n_attrs')]
|
||
search_data.append({'condition': condition, 'targets': targets})
|
||
|
||
# 多实体多属性查询
|
||
elif pattern in [
|
||
['n_weapon', 'n_weapon', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon','n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||
]:
|
||
n_weapons = [self.weapon_dict.get(weapon) for weapon in parser_dict.get('n_weapons')]
|
||
condition = {'名称': {"$in": n_weapons}}
|
||
targets = [self.attribute_dict.get(attr) for attr in parser_dict.get('n_attrs')]
|
||
search_data.append({'condition': condition, 'targets': targets})
|
||
|
||
# 实体、实体属性相间隔
|
||
elif pattern in [
|
||
['n_weapon', 'n_attr','n_weapon', 'n_attr'],
|
||
['n_country','n_weapon', 'n_attr', 'n_weapon', 'n_attr'],
|
||
['n_country','n_weapon', 'n_attr', 'n_country','n_weapon', 'n_attr'],
|
||
['n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr'],
|
||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr'],
|
||
['n_country','n_weapon', 'n_attr', 'n_country',' n_weapon', 'n_attr', 'n_attr'],
|
||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country', 'n_weapon', 'n_attr', 'n_attr'],
|
||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country', 'n_weapon', 'n_attr', 'n_attr'],
|
||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||
]:
|
||
n_indxes = [indx for indx, name in enumerate(pattern) if name == 'n_weapon']
|
||
n_weapons = [self.weapon_dict.get(weapon) for weapon in parser_dict.get('n_weapons')]
|
||
n1_weapon = n_weapons[0]
|
||
n2_weapon = n_weapons[1]
|
||
targets1 = [self.attribute_dict.get(weapon) for indx, weapon in enumerate(parser_dict.get('n_attrs')) if indx < len(n_indxes)]
|
||
targets2 = [self.attribute_dict.get(weapon) for indx, weapon in enumerate(parser_dict.get('n_attrs')) if indx >= len(n_indxes)]
|
||
condition1 = {'名称': n1_weapon}
|
||
condition2 = {'名称': n2_weapon}
|
||
search_data.append({'condition':condition1, 'targets': targets1})
|
||
search_data.append({'condition':condition2, 'targets': targets2})
|
||
|
||
# 比较查找,单操作符+操作数的实体
|
||
elif pattern in [
|
||
['n_attr', 'n_compare', 'n_unit', 'n_small'],
|
||
['n_small', 'n_attr', 'n_compare', 'n_unit'],
|
||
['n_attr', 'n_compare', 'n_time', 'n_small'],
|
||
['n_attr', 'n_time', 'n_compare', 'n_small'],
|
||
['n_small', 'n_attr', 'n_compare', 'n_time'],
|
||
['n_small', 'n_attr', 'n_time', 'n_compare'],
|
||
['n_attr', 'n_compare', 'n_unit', 'n_big'],
|
||
['n_big', 'n_attr', 'n_compare', 'n_unit'],
|
||
['n_attr', 'n_compare', 'n_time', 'n_big'],
|
||
['n_attr', 'n_time', 'n_compare', 'n_big'],
|
||
['n_big', 'n_attr', 'n_compare', 'n_time'],
|
||
['n_big', 'n_attr', 'n_time', 'n_compare'],
|
||
]:
|
||
|
||
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
|
||
n_compare = self.compare_dict.get(parser_dict.get('n_compares')[0])
|
||
|
||
if 'n_unit' in pattern:
|
||
n_unit = self.standard_unit(parser_dict.get('n_units')[0])
|
||
if 'n_small' in pattern:
|
||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||
condition = {n_attr:{n_compare:n_unit}, '类型':n_small}
|
||
else:
|
||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||
condition = {n_attr:{n_compare:n_unit}, '大类':n_big}
|
||
else:
|
||
n_time = self.standard_year(parser_dict.get('n_times')[0])
|
||
if 'n_small' in pattern:
|
||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||
condition = {n_attr: {n_compare: n_time}, '类型': n_small}
|
||
else:
|
||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||
condition = {n_attr: {n_compare: n_time}, '大类': n_big}
|
||
|
||
targets = [n_attr]
|
||
search_data.append({'condition':condition, 'targets':targets})
|
||
|
||
# 比较查找,双操作符+操作数的实体
|
||
elif pattern in [
|
||
['n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit', 'n_small'],
|
||
['n_small', 'n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit'],
|
||
['n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time', 'n_small'],
|
||
['n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare', 'n_small'],
|
||
['n_small', 'n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time'],
|
||
['n_small', 'n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare'],
|
||
['n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit', 'n_big'],
|
||
['n_big', 'n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit'],
|
||
['n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time', 'n_big'],
|
||
['n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare', 'n_big'],
|
||
['n_big', 'n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time'],
|
||
['n_big', 'n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare'],
|
||
]:
|
||
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
|
||
n_compares = [self.compare_dict.get(compare) for compare in parser_dict.get('n_compares')]
|
||
|
||
if 'n_unit' in pattern:
|
||
n_units = [self.standard_unit(unit) for unit in parser_dict.get('n_units')]
|
||
if 'n_small' in pattern:
|
||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||
condition = {n_attr:{n_compares[0]:n_units[0], n_compares[1]:n_units[1]}, '类型':n_small}
|
||
else:
|
||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||
condition = {n_attr:{n_compares[0]:n_units[0], n_compares[1]:n_units[1]},'大类':n_big}
|
||
else:
|
||
n_times = [self.standard_year(year) for year in parser_dict.get('n_times')]
|
||
if 'n_small' in pattern:
|
||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||
condition = {n_attr:{n_compares[0]:n_times[0], n_compares[1]:n_times[1]}, '类型': n_small}
|
||
else:
|
||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||
condition = {n_attr:{n_compares[0]:n_times[0], n_compares[1]:n_times[1]}, '大类': n_big}
|
||
targets = [n_attr]
|
||
search_data.append({'condition':condition, 'targets':targets})
|
||
|
||
# 属性最值查找
|
||
elif pattern in [['n_small', 'n_attr', 'n_most'],
|
||
['n_attr', 'n_most', 'n_small'],
|
||
['n_big', 'n_attr', 'n_most'],
|
||
['n_attr', 'n_most', 'n_big'],
|
||
]:
|
||
search_flag = 0
|
||
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
|
||
n_most = self.most_dict.get(parser_dict.get('n_mosts')[0])
|
||
if 'n_small' in pattern:
|
||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||
condition = {'类型': n_small, 'sort_key':{n_attr: n_most}}
|
||
else:
|
||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||
condition = {'大类': n_big, 'sort_key': {n_attr: n_most}}
|
||
targets.append(n_attr)
|
||
search_data.append({'condition':condition, 'targets':targets})
|
||
|
||
result = self.query_mongo(search_flag, search_data)
|
||
return result
|
||
|
||
'''查询mongo数据库'''
|
||
def query_mongo(self, search_flag, search_data):
|
||
result = []
|
||
if search_flag:
|
||
result = self.query_mongo_attr(search_data)
|
||
else:
|
||
result = self.query_mongo_sort(search_data)
|
||
return result
|
||
|
||
'''查询mongo数据库,正常'''
|
||
def query_mongo_attr(self, search_data):
|
||
result = []
|
||
for search in search_data:
|
||
condition = search['condition']
|
||
targets = search['targets']
|
||
for res in self.col.find(condition):
|
||
result.append([res.get('名称') + target + ':' + str(res.get(target,'null')) for target in targets if res.get(target, 'null') != 'null'])
|
||
return result
|
||
|
||
'''按照最值方法查找mongo数据库'''
|
||
def query_mongo_sort(self, search_data):
|
||
result = []
|
||
for search in search_data:
|
||
condition = {key:value for key, value in search['condition'].items() if key != 'sort_key'}
|
||
sort_condition = [(i,j) for i, j in search['condition'].get('sort_key').items()]
|
||
targets = search['targets']
|
||
for res in self.col.find(condition).sort(sort_condition).limit(1):
|
||
result_ = [res.get('名称') + target + ':' + str(res.get(target, 'null')) for target in targets]
|
||
result.append(result_)
|
||
return result
|
||
|
||
'问答主函数'
|
||
def qa_main(self, question):
|
||
parser_dict = self.question_parser(question)
|
||
results = self.search_answer(parser_dict)
|
||
if results == [[]]:
|
||
print('sorry, do not know the answer yet...')
|
||
else:
|
||
print('find %s result:'% len(results))
|
||
print('answer detail:')
|
||
for result in results:
|
||
print(result)
|
||
return
|
||
|
||
if __name__ == '__main__':
|
||
handler = MilitaryGraph()
|
||
while 1:
|
||
question = input("enter an question to parser:\n")
|
||
handler.qa_main(question) |