添加自动问答模块

This commit is contained in:
liu huanyong 2018-11-11 20:52:33 +08:00
parent 222ade9217
commit 9f9cbf7570
4 changed files with 225 additions and 59 deletions

View File

@ -20,12 +20,24 @@
</component>
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="crime_classify.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/crime_classify.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="561">
<caret line="82" column="34" selection-start-line="82" selection-start-column="34" selection-end-line="82" selection-end-column="34" />
<folding>
<element signature="e#150#159#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="README.md" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;MarkdownPreviewEditor]">
<state split_layout="SPLIT">
<first_editor relative-caret-position="719">
<caret line="110" column="0" selection-start-line="110" selection-start-column="0" selection-end-line="110" selection-end-column="0" />
<first_editor relative-caret-position="458">
<caret line="115" column="14" selection-start-line="115" selection-start-column="14" selection-end-line="115" selection-end-column="14" />
<folding />
</first_editor>
<second_editor>
@ -35,25 +47,13 @@
</provider>
</entry>
</file>
<file leaf-file-name="crime_classify.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/crime_classify.py">
<file leaf-file-name="crime_qa.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/crime_qa.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="349">
<caret line="95" column="28" selection-start-line="95" selection-start-column="28" selection-end-line="95" selection-end-column="28" />
<state relative-caret-position="825">
<caret line="137" column="26" selection-start-line="137" selection-start-column="26" selection-end-line="137" selection-end-column="26" />
<folding>
<element signature="e#150#159#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="question_classify.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/question_classify.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1268">
<caret line="149" column="29" selection-start-line="149" selection-start-column="29" selection-end-line="149" selection-end-column="29" />
<folding>
<element signature="e#150#159#0" expanded="true" />
<element signature="e#147#156#0" expanded="true" />
</folding>
</state>
</provider>
@ -73,8 +73,9 @@
<list>
<option value="$PROJECT_DIR$/question_classify_train.py" />
<option value="$PROJECT_DIR$/crime_qa_server.py" />
<option value="$PROJECT_DIR$/crime_classify.py" />
<option value="$PROJECT_DIR$/question_classify.py" />
<option value="$PROJECT_DIR$/crime_classify.py" />
<option value="$PROJECT_DIR$/crime_qa.py" />
<option value="$PROJECT_DIR$/README.md" />
</list>
</option>
@ -141,6 +142,20 @@
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="CrimeKgAssistant" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="CrimeKgAssistant" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="data" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane>
</pane>
<pane id="Scope" />
@ -162,7 +177,7 @@
<recent name="$PROJECT_DIR$/embedding" />
</key>
</component>
<component name="RunManager" selected="Python.question_classify">
<component name="RunManager" selected="Python.crime_qa">
<configuration default="false" name="question_classify" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
@ -197,6 +212,23 @@
<option name="SHOW_COMMAND_LINE" value="false" />
<method />
</configuration>
<configuration default="false" name="crime_qa" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="CrimeKgAssistant" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/crime_qa.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<method />
</configuration>
<configuration default="true" type="BashConfigurationType" factoryName="Bash">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="INTERPRETER_PATH" value="/bin/bash" />
@ -342,14 +374,16 @@
<option name="USE_KEYWORD" value="false" />
<method />
</configuration>
<list size="2">
<list size="3">
<item index="0" class="java.lang.String" itemvalue="Python.question_classify" />
<item index="1" class="java.lang.String" itemvalue="Python.crime_classify" />
<item index="2" class="java.lang.String" itemvalue="Python.crime_qa" />
</list>
<recent_temporary>
<list size="2">
<item index="0" class="java.lang.String" itemvalue="Python.question_classify" />
<item index="1" class="java.lang.String" itemvalue="Python.crime_classify" />
<list size="3">
<item index="0" class="java.lang.String" itemvalue="Python.crime_qa" />
<item index="1" class="java.lang.String" itemvalue="Python.question_classify" />
<item index="2" class="java.lang.String" itemvalue="Python.crime_classify" />
</list>
</recent_temporary>
</component>
@ -375,7 +409,7 @@
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3598448" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.30940834" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
@ -420,7 +454,7 @@
<state relative-caret-position="484">
<caret line="22" column="33" selection-start-line="0" selection-start-column="0" selection-end-line="184" selection-end-column="0" />
<folding>
<element signature="e#146#155#0" expanded="true" />
<element signature="e#146#155#0" expanded="false" />
</folding>
</state>
</provider>
@ -433,26 +467,6 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/crime_qa.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="352">
<caret line="16" column="15" selection-start-line="16" selection-start-column="15" selection-end-line="16" selection-end-column="15" />
<folding>
<element signature="e#147#156#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/crime_classify.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="349">
<caret line="95" column="28" selection-start-line="95" selection-start-column="28" selection-end-line="95" selection-end-column="28" />
<folding>
<element signature="e#150#159#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dict/crime.txt">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="693">
@ -471,11 +485,31 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/crime_qa.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="825">
<caret line="137" column="26" selection-start-line="137" selection-start-column="26" selection-end-line="137" selection-end-column="26" />
<folding>
<element signature="e#147#156#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/crime_classify.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="561">
<caret line="82" column="34" selection-start-line="82" selection-start-column="34" selection-end-line="82" selection-end-column="34" />
<folding>
<element signature="e#150#159#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;MarkdownPreviewEditor]">
<state split_layout="SPLIT">
<first_editor relative-caret-position="719">
<caret line="110" column="0" selection-start-line="110" selection-start-column="0" selection-end-line="110" selection-end-column="0" />
<first_editor relative-caret-position="458">
<caret line="115" column="14" selection-start-line="115" selection-start-column="14" selection-end-line="115" selection-end-column="14" />
<folding />
</first_editor>
<second_editor>

View File

@ -105,9 +105,58 @@ Crime assistant including crime type prediction and crime consult service based
question_type: 婚姻家庭 0.9993444085121155
# 法务咨询自动问答
运行 python crime_qa.py
question:朋友欠钱不还咋办
answers: ['欠款金额是多少 ', '多少钱呢', '律师费诉讼费都非常少都很合理,一定要起诉。', '大概金额多少?', '需要看标的额和案情复杂程度,建议细致面谈']
*******************************************************
question:昨天把人家车刮了,要赔多少
answers: ['您好,建议协商处理,如果对方告了你们,就只能积极应诉了。', '您好,建议尽量协商处理,协商不成可起诉']
*******************************************************
question:最近丈夫经常家暴,我受不了了
answers: ['报警要求追究刑事责任。', '您好,建议起诉离婚并请求补偿。', '你好!可以起诉离婚,并主张精神损害赔偿。']
*******************************************************
question:毕业生拿了户口就跑路可以吗
answers: 您好,对于此类问题,您可以咨询公安部门
*******************************************************
question:孩子离家出走,怎么找回来
answers: ['孩子父母没有结婚,孩子母亲把孩子带走了?这样的话可以起诉要求抚养权的。毕竟母亲也是孩子的合法监护人,报警警察一般不受理。']
*******************************************************
question:村霸把我田地给占了,我要怎么起诉
answers: ['可以向上级主管部门投诉解决', '您好,您可以及时向土地管理部门投诉的!', '对方侵权,可以向法院起诉。', '你好,对方侵权,可以向法院起诉。', '你好,可起诉处理,一、当事人起诉,首先应提交起诉书,并按对方当事人人数提交相应份数的副本。当事人是公民的,应写明双方当事人的姓名、性别、年龄、籍贯、住址;当事人是单位的,应写明单位名称、地址、法定代表人或负责人姓名。起诉书正文应写明请求事项和起诉事实、理由,尾部须署名或盖公章。二、根据"谁主张谁举证"原则原告向法院起诉应提交下列材料1、原告主体资格的材料。如居民身份证、户口本、护照、港澳同胞回乡证、结婚证等证据的原件和复印件;企业单位作为原告的应提交营业执照、商业登记证明等材料的复印件。2、证明原告诉讼主张的证据。如合同、协议、债权文书(借条、欠条等)、收发货凭证、往来信函等。', '您好,起诉维权。', '您好,可以起诉解决。']
*******************************************************
question:售卖危违禁物品,有什么风险
answers: ['没什么']
*******************************************************
question:找不到女朋友啊..
answers: 您好,对于此类问题,您可以咨询公安部门
*******************************************************
question:我要离婚
answers: ['现在就可向法院起诉离婚。', '不需要分开两年起诉离婚。感情完全破裂就可以提起诉讼离婚。', '你可以直接起诉离婚', '直接起诉']
*******************************************************
question:醉驾,要坐牢吗
answers: ['要负刑事责任很可能坐牢', '由警方处理.,']
*******************************************************
question:你好,我向大学提出退学申请,大学拒绝,理由是家长不同意。我该怎么办?
answers: ['自己可决定的 ']
*******************************************************
question:请问在上班途中,出车祸我的责任偏大属于工伤吗?
answers: ['属于工伤']
*******************************************************
question:结婚时女方拿了彩礼就逃了能要回来吗
answers: ['可以要求退还彩礼。,']
*******************************************************
question:房产证上是不是一定要写夫妻双方姓名
answers: ['可以不填,即使一个人的名字,婚后买房是共同财产。', '不是必须的', '可以写一方名字,对方公证,证明该房产系你一人财产', '你好,不是必须']
*******************************************************
question:儿女不履行赡养义务是不是要判刑
answers: ['什么情况了?']
*******************************************************
question:和未成年人发生关系,需要坐牢吗
answers: ['女孩子在发生关系的时候是否满14周岁如果是且自愿就不是犯罪', '你好,如果是双方愿意的情况下是不犯法的。', '发生性关系时已满十四岁并且是自愿的依法律规定不构成强奸罪,不构成犯罪的。', '若是自愿,那就没什么可说了。', '双方愿意不犯法', '你好 如果是自愿的 不犯法 ', '自愿的就没事']
*******************************************************
question:撞死人逃跑要怎么处理
answers: ['等待警察处理。,']
# 罪刑知识查询

View File

@ -88,6 +88,8 @@ class CrimeClassify(object):
return label
def test():
handler = CrimeClassify()
while(1):

View File

@ -8,15 +8,20 @@ import os
import time
import json
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import pymongo
import numpy as np
import jieba.posseg as pseg
class CrimeQA:
def __init__(self):
self._index = "crime_data"
self.es = Elasticsearch([{"host": "127.0.0.1", "port": 9200}])
self.doc_type = "crime"
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
self.embdding_dict = self.load_embedding(self.embedding_path)
self.embedding_size = 300
self.min_score = 0.4
self.min_sim = 0.8
'''根据question进行事件的匹配查询'''
def search_specific(self, value, key="question"):
@ -43,16 +48,92 @@ class CrimeQA:
answers.append(answer_dict)
return answers
'''加载词向量'''
def load_embedding(self, embedding_path):
embedding_dict = {}
count = 0
for line in open(embedding_path):
line = line.strip().split(' ')
if len(line) < 300:
continue
wd = line[0]
vector = np.array([float(i) for i in line[1:]])
embedding_dict[wd] = vector
count += 1
if count%10000 == 0:
print(count, 'loaded')
print('loaded %s word embedding, finished'%count, )
return embedding_dict
'''对文本进行分词处理'''
def seg_sent(self, s):
wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['x', 'u', 'c', 'p', 'm', 't']]
return wds
'''基于wordvector通过lookup table的方式找到句子的wordvector的表示'''
def rep_sentencevector(self, sentence, flag='seg'):
if flag == 'seg':
word_list = [i for i in sentence.split(' ') if i]
else:
word_list = self.seg_sent(sentence)
embedding = np.zeros(self.embedding_size)
sent_len = 0
for index, wd in enumerate(word_list):
if wd in self.embdding_dict:
embedding += self.embdding_dict.get(wd)
sent_len += 1
else:
continue
return embedding/sent_len
'''计算问句与库中问句的相似度,对候选结果加以二次筛选'''
def similarity_cosine(self, vector1, vector2):
cos1 = np.sum(vector1*vector2)
cos21 = np.sqrt(sum(vector1**2))
cos22 = np.sqrt(sum(vector2**2))
similarity = cos1/float(cos21*cos22)
if similarity == 'nan':
return 0
else:
return similarity
'''问答主函数'''
def search_main(self, question):
candi_answers = self.search_es(question)
for candi in candi_answers:
print(candi)
question_vector = self.rep_sentencevector(question,flag='noseg')
answer_dict = {}
for indx, candi in enumerate(candi_answers):
candi_question = candi['sim_question']
score = candi['score']/100
candi_vector = self.rep_sentencevector(candi_question, flag='noseg')
sim = self.similarity_cosine(question_vector, candi_vector)
if sim < self.min_sim:
continue
final_score = (score + sim)/2
if final_score < self.min_score:
continue
answer_dict[indx] = final_score
if answer_dict:
answer_dict = sorted(answer_dict.items(), key=lambda asd:asd[1], reverse=True)
final_answer = candi_answers[answer_dict[0][0]]['answers']
else:
final_answer = '您好,对于此类问题,您可以咨询公安部门'
#
# for i in answer_dict:
# answer_indx = i[0]
# score = i[1]
# print(i, score, candi_answers[answer_indx])
# print('******'*6)
return final_answer
if __name__ == "__main__":
handler = CrimeQA()
question = '最近买了一把枪,会犯什么罪?'
handler.search_main(question)
while(1):
question = input('question:')
final_answer = handler.search_main(question)
print('answers:', final_answer)