添加自动问答模块
This commit is contained in:
parent
222ade9217
commit
9f9cbf7570
@ -20,12 +20,24 @@
|
||||
</component>
|
||||
<component name="FileEditorManager">
|
||||
<leaf>
|
||||
<file leaf-file-name="crime_classify.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/crime_classify.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="561">
|
||||
<caret line="82" column="34" selection-start-line="82" selection-start-column="34" selection-end-line="82" selection-end-column="34" />
|
||||
<folding>
|
||||
<element signature="e#150#159#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="README.md" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/README.md">
|
||||
<provider selected="true" editor-type-id="split-provider[text-editor;MarkdownPreviewEditor]">
|
||||
<state split_layout="SPLIT">
|
||||
<first_editor relative-caret-position="719">
|
||||
<caret line="110" column="0" selection-start-line="110" selection-start-column="0" selection-end-line="110" selection-end-column="0" />
|
||||
<first_editor relative-caret-position="458">
|
||||
<caret line="115" column="14" selection-start-line="115" selection-start-column="14" selection-end-line="115" selection-end-column="14" />
|
||||
<folding />
|
||||
</first_editor>
|
||||
<second_editor>
|
||||
@ -35,25 +47,13 @@
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="crime_classify.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/crime_classify.py">
|
||||
<file leaf-file-name="crime_qa.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/crime_qa.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="349">
|
||||
<caret line="95" column="28" selection-start-line="95" selection-start-column="28" selection-end-line="95" selection-end-column="28" />
|
||||
<state relative-caret-position="825">
|
||||
<caret line="137" column="26" selection-start-line="137" selection-start-column="26" selection-end-line="137" selection-end-column="26" />
|
||||
<folding>
|
||||
<element signature="e#150#159#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="question_classify.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/question_classify.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1268">
|
||||
<caret line="149" column="29" selection-start-line="149" selection-start-column="29" selection-end-line="149" selection-end-column="29" />
|
||||
<folding>
|
||||
<element signature="e#150#159#0" expanded="true" />
|
||||
<element signature="e#147#156#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
@ -73,8 +73,9 @@
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/question_classify_train.py" />
|
||||
<option value="$PROJECT_DIR$/crime_qa_server.py" />
|
||||
<option value="$PROJECT_DIR$/crime_classify.py" />
|
||||
<option value="$PROJECT_DIR$/question_classify.py" />
|
||||
<option value="$PROJECT_DIR$/crime_classify.py" />
|
||||
<option value="$PROJECT_DIR$/crime_qa.py" />
|
||||
<option value="$PROJECT_DIR$/README.md" />
|
||||
</list>
|
||||
</option>
|
||||
@ -141,6 +142,20 @@
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="CrimeKgAssistant" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="CrimeKgAssistant" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="data" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
@ -162,7 +177,7 @@
|
||||
<recent name="$PROJECT_DIR$/embedding" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="RunManager" selected="Python.question_classify">
|
||||
<component name="RunManager" selected="Python.crime_qa">
|
||||
<configuration default="false" name="question_classify" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
@ -197,6 +212,23 @@
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="false" name="crime_qa" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/crime_qa.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="BashConfigurationType" factoryName="Bash">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="INTERPRETER_PATH" value="/bin/bash" />
|
||||
@ -342,14 +374,16 @@
|
||||
<option name="USE_KEYWORD" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<list size="2">
|
||||
<list size="3">
|
||||
<item index="0" class="java.lang.String" itemvalue="Python.question_classify" />
|
||||
<item index="1" class="java.lang.String" itemvalue="Python.crime_classify" />
|
||||
<item index="2" class="java.lang.String" itemvalue="Python.crime_qa" />
|
||||
</list>
|
||||
<recent_temporary>
|
||||
<list size="2">
|
||||
<item index="0" class="java.lang.String" itemvalue="Python.question_classify" />
|
||||
<item index="1" class="java.lang.String" itemvalue="Python.crime_classify" />
|
||||
<list size="3">
|
||||
<item index="0" class="java.lang.String" itemvalue="Python.crime_qa" />
|
||||
<item index="1" class="java.lang.String" itemvalue="Python.question_classify" />
|
||||
<item index="2" class="java.lang.String" itemvalue="Python.crime_classify" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
</component>
|
||||
@ -375,7 +409,7 @@
|
||||
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3598448" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.30940834" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
@ -420,7 +454,7 @@
|
||||
<state relative-caret-position="484">
|
||||
<caret line="22" column="33" selection-start-line="0" selection-start-column="0" selection-end-line="184" selection-end-column="0" />
|
||||
<folding>
|
||||
<element signature="e#146#155#0" expanded="true" />
|
||||
<element signature="e#146#155#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
@ -433,26 +467,6 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/crime_qa.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="352">
|
||||
<caret line="16" column="15" selection-start-line="16" selection-start-column="15" selection-end-line="16" selection-end-column="15" />
|
||||
<folding>
|
||||
<element signature="e#147#156#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/crime_classify.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="349">
|
||||
<caret line="95" column="28" selection-start-line="95" selection-start-column="28" selection-end-line="95" selection-end-column="28" />
|
||||
<folding>
|
||||
<element signature="e#150#159#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/dict/crime.txt">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="693">
|
||||
@ -471,11 +485,31 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/crime_qa.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="825">
|
||||
<caret line="137" column="26" selection-start-line="137" selection-start-column="26" selection-end-line="137" selection-end-column="26" />
|
||||
<folding>
|
||||
<element signature="e#147#156#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/crime_classify.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="561">
|
||||
<caret line="82" column="34" selection-start-line="82" selection-start-column="34" selection-end-line="82" selection-end-column="34" />
|
||||
<folding>
|
||||
<element signature="e#150#159#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/README.md">
|
||||
<provider selected="true" editor-type-id="split-provider[text-editor;MarkdownPreviewEditor]">
|
||||
<state split_layout="SPLIT">
|
||||
<first_editor relative-caret-position="719">
|
||||
<caret line="110" column="0" selection-start-line="110" selection-start-column="0" selection-end-line="110" selection-end-column="0" />
|
||||
<first_editor relative-caret-position="458">
|
||||
<caret line="115" column="14" selection-start-line="115" selection-start-column="14" selection-end-line="115" selection-end-column="14" />
|
||||
<folding />
|
||||
</first_editor>
|
||||
<second_editor>
|
||||
|
53
README.md
53
README.md
@ -105,9 +105,58 @@ Crime assistant including crime type prediction and crime consult service based
|
||||
question_type: 婚姻家庭 0.9993444085121155
|
||||
|
||||
# 法务咨询自动问答
|
||||
运行 python crime_qa.py
|
||||
|
||||
|
||||
|
||||
question:朋友欠钱不还咋办
|
||||
answers: ['欠款金额是多少 ', '多少钱呢', '律师费诉讼费都非常少都很合理,一定要起诉。', '大概金额多少?', '需要看标的额和案情复杂程度,建议细致面谈']
|
||||
*******************************************************
|
||||
question:昨天把人家车刮了,要赔多少
|
||||
answers: ['您好,建议协商处理,如果对方告了你们,就只能积极应诉了。', '您好,建议尽量协商处理,协商不成可起诉']
|
||||
*******************************************************
|
||||
question:最近丈夫经常家暴,我受不了了
|
||||
answers: ['报警要求追究刑事责任。', '您好,建议起诉离婚并请求补偿。', '你好!可以起诉离婚,并主张精神损害赔偿。']
|
||||
*******************************************************
|
||||
question:毕业生拿了户口就跑路可以吗
|
||||
answers: 您好,对于此类问题,您可以咨询公安部门
|
||||
*******************************************************
|
||||
question:孩子离家出走,怎么找回来
|
||||
answers: ['孩子父母没有结婚,孩子母亲把孩子带走了?这样的话可以起诉要求抚养权的。毕竟母亲也是孩子的合法监护人,报警警察一般不受理。']
|
||||
*******************************************************
|
||||
question:村霸把我田地给占了,我要怎么起诉
|
||||
answers: ['可以向上级主管部门投诉解决', '您好,您可以及时向土地管理部门投诉的!', '对方侵权,可以向法院起诉。', '你好,对方侵权,可以向法院起诉。', '你好,可起诉处理,一、当事人起诉,首先应提交起诉书,并按对方当事人人数提交相应份数的副本。当事人是公民的,应写明双方当事人的姓名、性别、年龄、籍贯、住址;当事人是单位的,应写明单位名称、地址、法定代表人或负责人姓名。起诉书正文应写明请求事项和起诉事实、理由,尾部须署名或盖公章。二、根据"谁主张谁举证"原则,原告向法院起诉应提交下列材料:1、原告主体资格的材料。如居民身份证、户口本、护照、港澳同胞回乡证、结婚证等证据的原件和复印件;企业单位作为原告的应提交营业执照、商业登记证明等材料的复印件。2、证明原告诉讼主张的证据。如合同、协议、债权文书(借条、欠条等)、收发货凭证、往来信函等。', '您好,起诉维权。', '您好,可以起诉解决。']
|
||||
*******************************************************
|
||||
question:售卖危违禁物品,有什么风险
|
||||
answers: ['没什么']
|
||||
*******************************************************
|
||||
question:找不到女朋友啊..
|
||||
answers: 您好,对于此类问题,您可以咨询公安部门
|
||||
*******************************************************
|
||||
question:我要离婚
|
||||
answers: ['现在就可向法院起诉离婚。', '不需要分开两年起诉离婚。感情完全破裂就可以提起诉讼离婚。', '你可以直接起诉离婚', '直接起诉']
|
||||
*******************************************************
|
||||
question:醉驾,要坐牢吗
|
||||
answers: ['要负刑事责任很可能坐牢', '由警方处理.,']
|
||||
*******************************************************
|
||||
question:你好,我向大学提出退学申请,大学拒绝,理由是家长不同意。我该怎么办?
|
||||
answers: ['自己可决定的 ']
|
||||
*******************************************************
|
||||
question:请问在上班途中,出车祸我的责任偏大属于工伤吗?
|
||||
answers: ['属于工伤']
|
||||
*******************************************************
|
||||
question:结婚时女方拿了彩礼就逃了能要回来吗
|
||||
answers: ['可以要求退还彩礼。,']
|
||||
*******************************************************
|
||||
question:房产证上是不是一定要写夫妻双方姓名
|
||||
answers: ['可以不填,即使一个人的名字,婚后买房是共同财产。', '不是必须的', '可以写一方名字,对方公证,证明该房产系你一人财产', '你好,不是必须']
|
||||
*******************************************************
|
||||
question:儿女不履行赡养义务是不是要判刑
|
||||
answers: ['什么情况了?']
|
||||
*******************************************************
|
||||
question:和未成年人发生关系,需要坐牢吗
|
||||
answers: ['女孩子在发生关系的时候是否满14周岁,如果是且自愿就不是犯罪', '你好,如果是双方愿意的情况下是不犯法的。', '发生性关系时已满十四岁并且是自愿的依法律规定不构成强奸罪,不构成犯罪的。', '若是自愿,那就没什么可说了。', '双方愿意不犯法', '你好 如果是自愿的 不犯法 ', '自愿的就没事']
|
||||
*******************************************************
|
||||
question:撞死人逃跑要怎么处理
|
||||
answers: ['等待警察处理。,']
|
||||
|
||||
# 罪刑知识查询
|
||||
|
||||
|
@ -88,6 +88,8 @@ class CrimeClassify(object):
|
||||
return label
|
||||
|
||||
|
||||
|
||||
|
||||
def test():
|
||||
handler = CrimeClassify()
|
||||
while(1):
|
||||
|
97
crime_qa.py
97
crime_qa.py
@ -8,15 +8,20 @@ import os
|
||||
import time
|
||||
import json
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch.helpers import bulk
|
||||
import pymongo
|
||||
import numpy as np
|
||||
import jieba.posseg as pseg
|
||||
|
||||
class CrimeQA:
|
||||
def __init__(self):
|
||||
self._index = "crime_data"
|
||||
self.es = Elasticsearch([{"host": "127.0.0.1", "port": 9200}])
|
||||
self.doc_type = "crime"
|
||||
|
||||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||||
self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
|
||||
self.embdding_dict = self.load_embedding(self.embedding_path)
|
||||
self.embedding_size = 300
|
||||
self.min_score = 0.4
|
||||
self.min_sim = 0.8
|
||||
|
||||
'''根据question进行事件的匹配查询'''
|
||||
def search_specific(self, value, key="question"):
|
||||
@ -43,16 +48,92 @@ class CrimeQA:
|
||||
answers.append(answer_dict)
|
||||
return answers
|
||||
|
||||
|
||||
'''加载词向量'''
|
||||
def load_embedding(self, embedding_path):
|
||||
embedding_dict = {}
|
||||
count = 0
|
||||
for line in open(embedding_path):
|
||||
line = line.strip().split(' ')
|
||||
if len(line) < 300:
|
||||
continue
|
||||
wd = line[0]
|
||||
vector = np.array([float(i) for i in line[1:]])
|
||||
embedding_dict[wd] = vector
|
||||
count += 1
|
||||
if count%10000 == 0:
|
||||
print(count, 'loaded')
|
||||
print('loaded %s word embedding, finished'%count, )
|
||||
return embedding_dict
|
||||
|
||||
|
||||
'''对文本进行分词处理'''
|
||||
def seg_sent(self, s):
|
||||
wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['x', 'u', 'c', 'p', 'm', 't']]
|
||||
return wds
|
||||
|
||||
'''基于wordvector,通过lookup table的方式找到句子的wordvector的表示'''
|
||||
def rep_sentencevector(self, sentence, flag='seg'):
|
||||
if flag == 'seg':
|
||||
word_list = [i for i in sentence.split(' ') if i]
|
||||
else:
|
||||
word_list = self.seg_sent(sentence)
|
||||
embedding = np.zeros(self.embedding_size)
|
||||
sent_len = 0
|
||||
for index, wd in enumerate(word_list):
|
||||
if wd in self.embdding_dict:
|
||||
embedding += self.embdding_dict.get(wd)
|
||||
sent_len += 1
|
||||
else:
|
||||
continue
|
||||
return embedding/sent_len
|
||||
|
||||
|
||||
'''计算问句与库中问句的相似度,对候选结果加以二次筛选'''
|
||||
def similarity_cosine(self, vector1, vector2):
|
||||
cos1 = np.sum(vector1*vector2)
|
||||
cos21 = np.sqrt(sum(vector1**2))
|
||||
cos22 = np.sqrt(sum(vector2**2))
|
||||
similarity = cos1/float(cos21*cos22)
|
||||
if similarity == 'nan':
|
||||
return 0
|
||||
else:
|
||||
return similarity
|
||||
|
||||
'''问答主函数'''
|
||||
def search_main(self, question):
|
||||
candi_answers = self.search_es(question)
|
||||
for candi in candi_answers:
|
||||
print(candi)
|
||||
|
||||
question_vector = self.rep_sentencevector(question,flag='noseg')
|
||||
answer_dict = {}
|
||||
for indx, candi in enumerate(candi_answers):
|
||||
candi_question = candi['sim_question']
|
||||
score = candi['score']/100
|
||||
candi_vector = self.rep_sentencevector(candi_question, flag='noseg')
|
||||
sim = self.similarity_cosine(question_vector, candi_vector)
|
||||
if sim < self.min_sim:
|
||||
continue
|
||||
final_score = (score + sim)/2
|
||||
if final_score < self.min_score:
|
||||
continue
|
||||
answer_dict[indx] = final_score
|
||||
if answer_dict:
|
||||
answer_dict = sorted(answer_dict.items(), key=lambda asd:asd[1], reverse=True)
|
||||
final_answer = candi_answers[answer_dict[0][0]]['answers']
|
||||
else:
|
||||
final_answer = '您好,对于此类问题,您可以咨询公安部门'
|
||||
#
|
||||
# for i in answer_dict:
|
||||
# answer_indx = i[0]
|
||||
# score = i[1]
|
||||
# print(i, score, candi_answers[answer_indx])
|
||||
# print('******'*6)
|
||||
return final_answer
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
handler = CrimeQA()
|
||||
question = '最近买了一把枪,会犯什么罪?'
|
||||
handler.search_main(question)
|
||||
while(1):
|
||||
question = input('question:')
|
||||
final_answer = handler.search_main(question)
|
||||
print('answers:', final_answer)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user