添加自动问答模块

This commit is contained in:
liu huanyong 2018-11-11 20:52:33 +08:00
parent 222ade9217
commit 9f9cbf7570
4 changed files with 225 additions and 59 deletions

View File

@ -20,12 +20,24 @@
</component> </component>
<component name="FileEditorManager"> <component name="FileEditorManager">
<leaf> <leaf>
<file leaf-file-name="crime_classify.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/crime_classify.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="561">
<caret line="82" column="34" selection-start-line="82" selection-start-column="34" selection-end-line="82" selection-end-column="34" />
<folding>
<element signature="e#150#159#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="README.md" pinned="false" current-in-tab="true"> <file leaf-file-name="README.md" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/README.md"> <entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;MarkdownPreviewEditor]"> <provider selected="true" editor-type-id="split-provider[text-editor;MarkdownPreviewEditor]">
<state split_layout="SPLIT"> <state split_layout="SPLIT">
<first_editor relative-caret-position="719"> <first_editor relative-caret-position="458">
<caret line="110" column="0" selection-start-line="110" selection-start-column="0" selection-end-line="110" selection-end-column="0" /> <caret line="115" column="14" selection-start-line="115" selection-start-column="14" selection-end-line="115" selection-end-column="14" />
<folding /> <folding />
</first_editor> </first_editor>
<second_editor> <second_editor>
@ -35,25 +47,13 @@
</provider> </provider>
</entry> </entry>
</file> </file>
<file leaf-file-name="crime_classify.py" pinned="false" current-in-tab="false"> <file leaf-file-name="crime_qa.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/crime_classify.py"> <entry file="file://$PROJECT_DIR$/crime_qa.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="349"> <state relative-caret-position="825">
<caret line="95" column="28" selection-start-line="95" selection-start-column="28" selection-end-line="95" selection-end-column="28" /> <caret line="137" column="26" selection-start-line="137" selection-start-column="26" selection-end-line="137" selection-end-column="26" />
<folding> <folding>
<element signature="e#150#159#0" expanded="true" /> <element signature="e#147#156#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="question_classify.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/question_classify.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1268">
<caret line="149" column="29" selection-start-line="149" selection-start-column="29" selection-end-line="149" selection-end-column="29" />
<folding>
<element signature="e#150#159#0" expanded="true" />
</folding> </folding>
</state> </state>
</provider> </provider>
@ -73,8 +73,9 @@
<list> <list>
<option value="$PROJECT_DIR$/question_classify_train.py" /> <option value="$PROJECT_DIR$/question_classify_train.py" />
<option value="$PROJECT_DIR$/crime_qa_server.py" /> <option value="$PROJECT_DIR$/crime_qa_server.py" />
<option value="$PROJECT_DIR$/crime_classify.py" />
<option value="$PROJECT_DIR$/question_classify.py" /> <option value="$PROJECT_DIR$/question_classify.py" />
<option value="$PROJECT_DIR$/crime_classify.py" />
<option value="$PROJECT_DIR$/crime_qa.py" />
<option value="$PROJECT_DIR$/README.md" /> <option value="$PROJECT_DIR$/README.md" />
</list> </list>
</option> </option>
@ -141,6 +142,20 @@
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT> </PATH_ELEMENT>
</PATH> </PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="CrimeKgAssistant" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="CrimeKgAssistant" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="data" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane> </subPane>
</pane> </pane>
<pane id="Scope" /> <pane id="Scope" />
@ -162,7 +177,7 @@
<recent name="$PROJECT_DIR$/embedding" /> <recent name="$PROJECT_DIR$/embedding" />
</key> </key>
</component> </component>
<component name="RunManager" selected="Python.question_classify"> <component name="RunManager" selected="Python.crime_qa">
<configuration default="false" name="question_classify" type="PythonConfigurationType" factoryName="Python" temporary="true"> <configuration default="false" name="question_classify" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
@ -197,6 +212,23 @@
<option name="SHOW_COMMAND_LINE" value="false" /> <option name="SHOW_COMMAND_LINE" value="false" />
<method /> <method />
</configuration> </configuration>
<configuration default="false" name="crime_qa" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="CrimeKgAssistant" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/crime_qa.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<method />
</configuration>
<configuration default="true" type="BashConfigurationType" factoryName="Bash"> <configuration default="true" type="BashConfigurationType" factoryName="Bash">
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="INTERPRETER_PATH" value="/bin/bash" /> <option name="INTERPRETER_PATH" value="/bin/bash" />
@ -342,14 +374,16 @@
<option name="USE_KEYWORD" value="false" /> <option name="USE_KEYWORD" value="false" />
<method /> <method />
</configuration> </configuration>
<list size="2"> <list size="3">
<item index="0" class="java.lang.String" itemvalue="Python.question_classify" /> <item index="0" class="java.lang.String" itemvalue="Python.question_classify" />
<item index="1" class="java.lang.String" itemvalue="Python.crime_classify" /> <item index="1" class="java.lang.String" itemvalue="Python.crime_classify" />
<item index="2" class="java.lang.String" itemvalue="Python.crime_qa" />
</list> </list>
<recent_temporary> <recent_temporary>
<list size="2"> <list size="3">
<item index="0" class="java.lang.String" itemvalue="Python.question_classify" /> <item index="0" class="java.lang.String" itemvalue="Python.crime_qa" />
<item index="1" class="java.lang.String" itemvalue="Python.crime_classify" /> <item index="1" class="java.lang.String" itemvalue="Python.question_classify" />
<item index="2" class="java.lang.String" itemvalue="Python.crime_classify" />
</list> </list>
</recent_temporary> </recent_temporary>
</component> </component>
@ -375,7 +409,7 @@
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" /> <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" /> <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" /> <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3598448" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.30940834" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" /> <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" /> <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
@ -420,7 +454,7 @@
<state relative-caret-position="484"> <state relative-caret-position="484">
<caret line="22" column="33" selection-start-line="0" selection-start-column="0" selection-end-line="184" selection-end-column="0" /> <caret line="22" column="33" selection-start-line="0" selection-start-column="0" selection-end-line="184" selection-end-column="0" />
<folding> <folding>
<element signature="e#146#155#0" expanded="true" /> <element signature="e#146#155#0" expanded="false" />
</folding> </folding>
</state> </state>
</provider> </provider>
@ -433,26 +467,6 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/crime_qa.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="352">
<caret line="16" column="15" selection-start-line="16" selection-start-column="15" selection-end-line="16" selection-end-column="15" />
<folding>
<element signature="e#147#156#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/crime_classify.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="349">
<caret line="95" column="28" selection-start-line="95" selection-start-column="28" selection-end-line="95" selection-end-column="28" />
<folding>
<element signature="e#150#159#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dict/crime.txt"> <entry file="file://$PROJECT_DIR$/dict/crime.txt">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="693"> <state relative-caret-position="693">
@ -471,11 +485,31 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/crime_qa.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="825">
<caret line="137" column="26" selection-start-line="137" selection-start-column="26" selection-end-line="137" selection-end-column="26" />
<folding>
<element signature="e#147#156#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/crime_classify.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="561">
<caret line="82" column="34" selection-start-line="82" selection-start-column="34" selection-end-line="82" selection-end-column="34" />
<folding>
<element signature="e#150#159#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/README.md"> <entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;MarkdownPreviewEditor]"> <provider selected="true" editor-type-id="split-provider[text-editor;MarkdownPreviewEditor]">
<state split_layout="SPLIT"> <state split_layout="SPLIT">
<first_editor relative-caret-position="719"> <first_editor relative-caret-position="458">
<caret line="110" column="0" selection-start-line="110" selection-start-column="0" selection-end-line="110" selection-end-column="0" /> <caret line="115" column="14" selection-start-line="115" selection-start-column="14" selection-end-line="115" selection-end-column="14" />
<folding /> <folding />
</first_editor> </first_editor>
<second_editor> <second_editor>

View File

@ -105,9 +105,58 @@ Crime assistant including crime type prediction and crime consult service based
question_type: 婚姻家庭 0.9993444085121155 question_type: 婚姻家庭 0.9993444085121155
# 法务咨询自动问答 # 法务咨询自动问答
运行 python crime_qa.py
question:朋友欠钱不还咋办
answers: ['欠款金额是多少 ', '多少钱呢', '律师费诉讼费都非常少都很合理,一定要起诉。', '大概金额多少?', '需要看标的额和案情复杂程度,建议细致面谈']
*******************************************************
question:昨天把人家车刮了,要赔多少
answers: ['您好,建议协商处理,如果对方告了你们,就只能积极应诉了。', '您好,建议尽量协商处理,协商不成可起诉']
*******************************************************
question:最近丈夫经常家暴,我受不了了
answers: ['报警要求追究刑事责任。', '您好,建议起诉离婚并请求补偿。', '你好!可以起诉离婚,并主张精神损害赔偿。']
*******************************************************
question:毕业生拿了户口就跑路可以吗
answers: 您好,对于此类问题,您可以咨询公安部门
*******************************************************
question:孩子离家出走,怎么找回来
answers: ['孩子父母没有结婚,孩子母亲把孩子带走了?这样的话可以起诉要求抚养权的。毕竟母亲也是孩子的合法监护人,报警警察一般不受理。']
*******************************************************
question:村霸把我田地给占了,我要怎么起诉
answers: ['可以向上级主管部门投诉解决', '您好,您可以及时向土地管理部门投诉的!', '对方侵权,可以向法院起诉。', '你好,对方侵权,可以向法院起诉。', '你好,可起诉处理,一、当事人起诉,首先应提交起诉书,并按对方当事人人数提交相应份数的副本。当事人是公民的,应写明双方当事人的姓名、性别、年龄、籍贯、住址;当事人是单位的,应写明单位名称、地址、法定代表人或负责人姓名。起诉书正文应写明请求事项和起诉事实、理由,尾部须署名或盖公章。二、根据"谁主张谁举证"原则原告向法院起诉应提交下列材料1、原告主体资格的材料。如居民身份证、户口本、护照、港澳同胞回乡证、结婚证等证据的原件和复印件;企业单位作为原告的应提交营业执照、商业登记证明等材料的复印件。2、证明原告诉讼主张的证据。如合同、协议、债权文书(借条、欠条等)、收发货凭证、往来信函等。', '您好,起诉维权。', '您好,可以起诉解决。']
*******************************************************
question:售卖危违禁物品,有什么风险
answers: ['没什么']
*******************************************************
question:找不到女朋友啊..
answers: 您好,对于此类问题,您可以咨询公安部门
*******************************************************
question:我要离婚
answers: ['现在就可向法院起诉离婚。', '不需要分开两年起诉离婚。感情完全破裂就可以提起诉讼离婚。', '你可以直接起诉离婚', '直接起诉']
*******************************************************
question:醉驾,要坐牢吗
answers: ['要负刑事责任很可能坐牢', '由警方处理.,']
*******************************************************
question:你好,我向大学提出退学申请,大学拒绝,理由是家长不同意。我该怎么办?
answers: ['自己可决定的 ']
*******************************************************
question:请问在上班途中,出车祸我的责任偏大属于工伤吗?
answers: ['属于工伤']
*******************************************************
question:结婚时女方拿了彩礼就逃了能要回来吗
answers: ['可以要求退还彩礼。,']
*******************************************************
question:房产证上是不是一定要写夫妻双方姓名
answers: ['可以不填,即使一个人的名字,婚后买房是共同财产。', '不是必须的', '可以写一方名字,对方公证,证明该房产系你一人财产', '你好,不是必须']
*******************************************************
question:儿女不履行赡养义务是不是要判刑
answers: ['什么情况了?']
*******************************************************
question:和未成年人发生关系,需要坐牢吗
answers: ['女孩子在发生关系的时候是否满14周岁如果是且自愿就不是犯罪', '你好,如果是双方愿意的情况下是不犯法的。', '发生性关系时已满十四岁并且是自愿的依法律规定不构成强奸罪,不构成犯罪的。', '若是自愿,那就没什么可说了。', '双方愿意不犯法', '你好 如果是自愿的 不犯法 ', '自愿的就没事']
*******************************************************
question:撞死人逃跑要怎么处理
answers: ['等待警察处理。,']
# 罪刑知识查询 # 罪刑知识查询

View File

@ -88,6 +88,8 @@ class CrimeClassify(object):
return label return label
def test(): def test():
handler = CrimeClassify() handler = CrimeClassify()
while(1): while(1):

View File

@ -8,15 +8,20 @@ import os
import time import time
import json import json
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk import numpy as np
import pymongo import jieba.posseg as pseg
class CrimeQA: class CrimeQA:
def __init__(self): def __init__(self):
self._index = "crime_data" self._index = "crime_data"
self.es = Elasticsearch([{"host": "127.0.0.1", "port": 9200}]) self.es = Elasticsearch([{"host": "127.0.0.1", "port": 9200}])
self.doc_type = "crime" self.doc_type = "crime"
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
self.embdding_dict = self.load_embedding(self.embedding_path)
self.embedding_size = 300
self.min_score = 0.4
self.min_sim = 0.8
'''根据question进行事件的匹配查询''' '''根据question进行事件的匹配查询'''
def search_specific(self, value, key="question"): def search_specific(self, value, key="question"):
@ -43,16 +48,92 @@ class CrimeQA:
answers.append(answer_dict) answers.append(answer_dict)
return answers return answers
'''加载词向量'''
def load_embedding(self, embedding_path):
embedding_dict = {}
count = 0
for line in open(embedding_path):
line = line.strip().split(' ')
if len(line) < 300:
continue
wd = line[0]
vector = np.array([float(i) for i in line[1:]])
embedding_dict[wd] = vector
count += 1
if count%10000 == 0:
print(count, 'loaded')
print('loaded %s word embedding, finished'%count, )
return embedding_dict
'''对文本进行分词处理'''
def seg_sent(self, s):
wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['x', 'u', 'c', 'p', 'm', 't']]
return wds
'''基于wordvector通过lookup table的方式找到句子的wordvector的表示'''
def rep_sentencevector(self, sentence, flag='seg'):
if flag == 'seg':
word_list = [i for i in sentence.split(' ') if i]
else:
word_list = self.seg_sent(sentence)
embedding = np.zeros(self.embedding_size)
sent_len = 0
for index, wd in enumerate(word_list):
if wd in self.embdding_dict:
embedding += self.embdding_dict.get(wd)
sent_len += 1
else:
continue
return embedding/sent_len
'''计算问句与库中问句的相似度,对候选结果加以二次筛选'''
def similarity_cosine(self, vector1, vector2):
cos1 = np.sum(vector1*vector2)
cos21 = np.sqrt(sum(vector1**2))
cos22 = np.sqrt(sum(vector2**2))
similarity = cos1/float(cos21*cos22)
if similarity == 'nan':
return 0
else:
return similarity
'''问答主函数''' '''问答主函数'''
def search_main(self, question): def search_main(self, question):
candi_answers = self.search_es(question) candi_answers = self.search_es(question)
for candi in candi_answers: question_vector = self.rep_sentencevector(question,flag='noseg')
print(candi) answer_dict = {}
for indx, candi in enumerate(candi_answers):
candi_question = candi['sim_question']
score = candi['score']/100
candi_vector = self.rep_sentencevector(candi_question, flag='noseg')
sim = self.similarity_cosine(question_vector, candi_vector)
if sim < self.min_sim:
continue
final_score = (score + sim)/2
if final_score < self.min_score:
continue
answer_dict[indx] = final_score
if answer_dict:
answer_dict = sorted(answer_dict.items(), key=lambda asd:asd[1], reverse=True)
final_answer = candi_answers[answer_dict[0][0]]['answers']
else:
final_answer = '您好,对于此类问题,您可以咨询公安部门'
#
# for i in answer_dict:
# answer_indx = i[0]
# score = i[1]
# print(i, score, candi_answers[answer_indx])
# print('******'*6)
return final_answer
if __name__ == "__main__": if __name__ == "__main__":
handler = CrimeQA() handler = CrimeQA()
question = '最近买了一把枪,会犯什么罪?' while(1):
handler.search_main(question) question = input('question:')
final_answer = handler.search_main(question)
print('answers:', final_answer)