创建法务智能项目

2018-11-11 15:49:36 +08:00 · 2018-11-11 15:49:36 +08:00 · 90b2429647
commit 90b2429647
18 changed files with 2355 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+embedding/*.bin
--- a/.idea/CrimeKgAssistant.iml
+++ b/.idea/CrimeKgAssistant.iml
@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.6.3 (~/anaconda3/envs/py3/bin/python)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>
--- a/.idea/markdown-navigator/profiles_settings.xml
+++ b/.idea/markdown-navigator/profiles_settings.xml
@ -0,0 +1,3 @@
+<component name="MarkdownNavigator.ProfileManager">
+  <settings default="" pdf-export="" />
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,83 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="MarkdownProjectSettings">
+    <PreviewSettings splitEditorLayout="SPLIT" splitEditorPreview="PREVIEW" useGrayscaleRendering="false" zoomFactor="1.0" maxImageWidth="0" showGitHubPageIfSynced="false" allowBrowsingInPreview="false" synchronizePreviewPosition="true" highlightPreviewType="NONE" highlightFadeOut="5" highlightOnTyping="true" synchronizeSourcePosition="true" verticallyAlignSourceAndPreviewSyncPosition="true" showSearchHighlightsInPreview="false" showSelectionInPreview="true" openRemoteLinks="true">
+      <PanelProvider>
+        <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.panel" providerName="Default - Swing" />
+      </PanelProvider>
+    </PreviewSettings>
+    <ParserSettings gitHubSyntaxChange="false">
+      <PegdownExtensions>
+        <option name="ABBREVIATIONS" value="false" />
+        <option name="ANCHORLINKS" value="true" />
+        <option name="ASIDE" value="false" />
+        <option name="ATXHEADERSPACE" value="true" />
+        <option name="AUTOLINKS" value="true" />
+        <option name="DEFINITIONS" value="false" />
+        <option name="DEFINITION_BREAK_DOUBLE_BLANK_LINE" value="false" />
+        <option name="FENCED_CODE_BLOCKS" value="true" />
+        <option name="FOOTNOTES" value="false" />
+        <option name="HARDWRAPS" value="false" />
+        <option name="HTML_DEEP_PARSER" value="false" />
+        <option name="INSERTED" value="false" />
+        <option name="QUOTES" value="false" />
+        <option name="RELAXEDHRULES" value="true" />
+        <option name="SMARTS" value="false" />
+        <option name="STRIKETHROUGH" value="true" />
+        <option name="SUBSCRIPT" value="false" />
+        <option name="SUPERSCRIPT" value="false" />
+        <option name="SUPPRESS_HTML_BLOCKS" value="false" />
+        <option name="SUPPRESS_INLINE_HTML" value="false" />
+        <option name="TABLES" value="true" />
+        <option name="TASKLISTITEMS" value="true" />
+        <option name="TOC" value="false" />
+        <option name="WIKILINKS" value="true" />
+      </PegdownExtensions>
+      <ParserOptions>
+        <option name="COMMONMARK_LISTS" value="true" />
+        <option name="DUMMY" value="false" />
+        <option name="EMOJI_SHORTCUTS" value="true" />
+        <option name="FLEXMARK_FRONT_MATTER" value="false" />
+        <option name="GFM_LOOSE_BLANK_LINE_AFTER_ITEM_PARA" value="false" />
+        <option name="GFM_TABLE_RENDERING" value="true" />
+        <option name="GITBOOK_URL_ENCODING" value="false" />
+        <option name="GITHUB_EMOJI_URL" value="false" />
+        <option name="GITHUB_LISTS" value="false" />
+        <option name="GITHUB_WIKI_LINKS" value="true" />
+        <option name="JEKYLL_FRONT_MATTER" value="false" />
+        <option name="SIM_TOC_BLANK_LINE_SPACER" value="true" />
+      </ParserOptions>
+    </ParserSettings>
+    <HtmlSettings headerTopEnabled="false" headerBottomEnabled="false" bodyTopEnabled="false" bodyBottomEnabled="false" embedUrlContent="false" addPageHeader="true" embedImages="false" embedHttpImages="false">
+      <GeneratorProvider>
+        <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.generator" providerName="Default Swing HTML Generator" />
+      </GeneratorProvider>
+      <headerTop />
+      <headerBottom />
+      <bodyTop />
+      <bodyBottom />
+    </HtmlSettings>
+    <CssSettings previewScheme="UI_SCHEME" cssUri="" isCssUriEnabled="false" isCssTextEnabled="false" isDynamicPageWidth="true">
+      <StylesheetProvider>
+        <provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.css" providerName="Default Swing Stylesheet" />
+      </StylesheetProvider>
+      <ScriptProviders />
+      <cssText />
+    </CssSettings>
+    <HtmlExportSettings updateOnSave="false" parentDir="$ProjectFileDir$" targetDir="$ProjectFileDir$" cssDir="" scriptDir="" plainHtml="false" imageDir="" copyLinkedImages="false" imageUniquifyType="0" targetExt="" useTargetExt="false" noCssNoScripts="false" linkToExportedHtml="true" exportOnSettingsChange="true" regenerateOnProjectOpen="false" linkFormatType="HTTP_ABSOLUTE" />
+    <LinkMapSettings>
+      <textMaps />
+    </LinkMapSettings>
+  </component>
+  <component name="ProjectLevelVcsManager" settingsEditedManually="false">
+    <OptionsSetting value="true" id="Add" />
+    <OptionsSetting value="true" id="Remove" />
+    <OptionsSetting value="true" id="Checkout" />
+    <OptionsSetting value="true" id="Update" />
+    <OptionsSetting value="true" id="Status" />
+    <OptionsSetting value="true" id="Edit" />
+    <ConfirmationsSetting value="0" id="Add" />
+    <ConfirmationsSetting value="0" id="Remove" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.3 (~/anaconda3/envs/py3/bin/python)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/CrimeKgAssistant.iml" filepath="$PROJECT_DIR$/.idea/CrimeKgAssistant.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@ -0,0 +1,421 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="8b1874b6-a1e9-45fd-a2c5-b0b8b2b7649b" name="Default" comment="" />
+    <ignored path="CrimeKgAssistant.iws" />
+    <ignored path=".idea/workspace.xml" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="TRACKING_ENABLED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="CreatePatchCommitExecutor">
+    <option name="PATCH_PATH" value="" />
+  </component>
+  <component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
+  <component name="FavoritesManager">
+    <favorites_list name="CrimeKgAssistant" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/question_classify_train.py" />
+        <option value="$PROJECT_DIR$/question_classify.py" />
+        <option value="$PROJECT_DIR$/crime_classify.py" />
+        <option value="$PROJECT_DIR$/crime_qa_server.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds">
+    <option name="x" value="631" />
+    <option name="y" value="106" />
+    <option name="width" value="1261" />
+    <option name="height" value="1001" />
+  </component>
+  <component name="ProjectLevelVcsManager" settingsEditedManually="false">
+    <OptionsSetting value="true" id="Add" />
+    <OptionsSetting value="true" id="Remove" />
+    <OptionsSetting value="true" id="Checkout" />
+    <OptionsSetting value="true" id="Update" />
+    <OptionsSetting value="true" id="Status" />
+    <OptionsSetting value="true" id="Edit" />
+    <ConfirmationsSetting value="0" id="Add" />
+    <ConfirmationsSetting value="0" id="Remove" />
+  </component>
+  <component name="ProjectView">
+    <navigator currentView="ProjectPane" proportions="" version="1">
+      <flattenPackages />
+      <showMembers />
+      <showModules />
+      <showLibraryContents />
+      <hideEmptyPackages />
+      <abbreviatePackageNames />
+      <autoscrollToSource />
+      <autoscrollFromSource />
+      <sortByType />
+      <manualOrder />
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="ProjectPane">
+        <subPane>
+          <PATH>
+            <PATH_ELEMENT>
+              <option name="myItemId" value="CrimeKgAssistant" />
+              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
+            </PATH_ELEMENT>
+          </PATH>
+          <PATH>
+            <PATH_ELEMENT>
+              <option name="myItemId" value="CrimeKgAssistant" />
+              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
+            </PATH_ELEMENT>
+            <PATH_ELEMENT>
+              <option name="myItemId" value="CrimeKgAssistant" />
+              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
+            </PATH_ELEMENT>
+          </PATH>
+        </subPane>
+      </pane>
+      <pane id="Scope" />
+      <pane id="Scratches" />
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
+    <property name="settings.editor.splitter.proportion" value="0.2" />
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+  </component>
+  <component name="RecentsManager">
+    <key name="CopyFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$" />
+      <recent name="$PROJECT_DIR$/data" />
+    </key>
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$/dict" />
+      <recent name="$PROJECT_DIR$/embedding" />
+    </key>
+  </component>
+  <component name="RunManager" selected="Python.crime_classify">
+    <configuration default="false" name="question_classify" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/question_classify.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <method />
+    </configuration>
+    <configuration default="false" name="crime_classify" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/crime_classify.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <method />
+    </configuration>
+    <configuration default="true" type="BashConfigurationType" factoryName="Bash">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="INTERPRETER_PATH" value="/bin/bash" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <option name="SCRIPT_NAME" value="" />
+      <option name="PARAMETERS" value="" />
+      <module name="" />
+      <envs />
+      <method />
+    </configuration>
+    <configuration default="true" type="PythonConfigurationType" factoryName="Python">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <option name="SCRIPT_NAME" value="" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <method />
+    </configuration>
+    <configuration default="true" type="Tox" factoryName="Tox">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <method />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="Attests">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <option name="SCRIPT_NAME" value="" />
+      <option name="CLASS_NAME" value="" />
+      <option name="METHOD_NAME" value="" />
+      <option name="FOLDER_NAME" value="" />
+      <option name="TEST_TYPE" value="TEST_SCRIPT" />
+      <option name="PATTERN" value="" />
+      <option name="USE_PATTERN" value="false" />
+      <method />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="Doctests">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <option name="SCRIPT_NAME" value="" />
+      <option name="CLASS_NAME" value="" />
+      <option name="METHOD_NAME" value="" />
+      <option name="FOLDER_NAME" value="" />
+      <option name="TEST_TYPE" value="TEST_SCRIPT" />
+      <option name="PATTERN" value="" />
+      <option name="USE_PATTERN" value="false" />
+      <method />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="Nosetests">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <option name="SCRIPT_NAME" value="" />
+      <option name="CLASS_NAME" value="" />
+      <option name="METHOD_NAME" value="" />
+      <option name="FOLDER_NAME" value="" />
+      <option name="TEST_TYPE" value="TEST_SCRIPT" />
+      <option name="PATTERN" value="" />
+      <option name="USE_PATTERN" value="false" />
+      <option name="PARAMS" value="" />
+      <option name="USE_PARAM" value="false" />
+      <method />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="Unittests">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <option name="SCRIPT_NAME" value="" />
+      <option name="CLASS_NAME" value="" />
+      <option name="METHOD_NAME" value="" />
+      <option name="FOLDER_NAME" value="" />
+      <option name="TEST_TYPE" value="TEST_SCRIPT" />
+      <option name="PATTERN" value="" />
+      <option name="USE_PATTERN" value="false" />
+      <option name="PUREUNITTEST" value="true" />
+      <option name="PARAMS" value="" />
+      <option name="USE_PARAM" value="false" />
+      <method />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="py.test">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="CrimeKgAssistant" />
+      <option name="SCRIPT_NAME" value="" />
+      <option name="CLASS_NAME" value="" />
+      <option name="METHOD_NAME" value="" />
+      <option name="FOLDER_NAME" value="" />
+      <option name="TEST_TYPE" value="TEST_SCRIPT" />
+      <option name="PATTERN" value="" />
+      <option name="USE_PATTERN" value="false" />
+      <option name="testToRun" value="" />
+      <option name="keywords" value="" />
+      <option name="params" value="" />
+      <option name="USE_PARAM" value="false" />
+      <option name="USE_KEYWORD" value="false" />
+      <method />
+    </configuration>
+    <list size="2">
+      <item index="0" class="java.lang.String" itemvalue="Python.question_classify" />
+      <item index="1" class="java.lang.String" itemvalue="Python.crime_classify" />
+    </list>
+    <recent_temporary>
+      <list size="2">
+        <item index="0" class="java.lang.String" itemvalue="Python.crime_classify" />
+        <item index="1" class="java.lang.String" itemvalue="Python.question_classify" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="ShelveChangesManager" show_recycled="false">
+    <option name="remove_strategy" value="false" />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="8b1874b6-a1e9-45fd-a2c5-b0b8b2b7649b" name="Default" comment="" />
+      <created>1541920128942</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1541920128942</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="631" y="106" width="1261" height="1001" extended-state="0" />
+    <editor active="true" />
+    <layout>
+      <window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.24959612" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
+      <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
+      <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
+      <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
+      <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
+      <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.6682832" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+      <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+      <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
+      <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
+      <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
+      <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
+      <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
+      <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
+      <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+      <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
+      <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+    </layout>
+  </component>
+  <component name="Vcs.Log.UiProperties">
+    <option name="RECENTLY_FILTERED_USER_GROUPS">
+      <collection />
+    </option>
+    <option name="RECENTLY_FILTERED_BRANCH_GROUPS">
+      <collection />
+    </option>
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="XDebuggerManager">
+    <breakpoint-manager />
+    <watches-manager />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/question_classify_train.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="2241">
+          <caret line="160" column="31" selection-start-line="160" selection-start-column="31" selection-end-line="160" selection-end-column="31" />
+          <folding>
+            <element signature="e#149#158#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/question_classify.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="484">
+          <caret line="40" column="15" selection-start-line="40" selection-start-column="15" selection-end-line="40" selection-end-column="15" />
+          <folding>
+            <element signature="e#150#159#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/dict/crime.txt">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="0">
+          <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/crime_classify_train.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="484">
+          <caret line="22" column="33" selection-start-line="0" selection-start-column="0" selection-end-line="184" selection-end-column="0" />
+          <folding>
+            <element signature="e#146#155#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/crime_classify.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="275">
+          <caret line="72" column="13" selection-start-line="72" selection-start-column="13" selection-end-line="72" selection-end-column="13" />
+          <folding>
+            <element signature="e#150#159#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/build_qa_database.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="0">
+          <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/crime_qa.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="352">
+          <caret line="16" column="15" selection-start-line="16" selection-start-column="15" selection-end-line="16" selection-end-column="15" />
+          <folding>
+            <element signature="e#147#156#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+</project>
--- a/build_qa_database.py
+++ b/build_qa_database.py
@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# coding: utf-8
+# File: insert_es.py
+# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
+# Date: 18-10-10
+
+import os
+import time
+
+import json
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import bulk
+import pymongo
+
+class ProcessIntoES:
+    def __init__(self):
+        self._index = "crime_data"
+        self.es = Elasticsearch([{"host": "127.0.0.1", "port": 9200}])
+        self.doc_type = "crime"
+        cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
+        self.music_file = os.path.join(cur, 'qa_corpus.json')
+
+    '''创建ES索引，确定分词类型'''
+    def create_mapping(self):
+        node_mappings = {
+            "mappings": {
+                self.doc_type: {    # type
+                    "properties": {
+                        "question": {    # field: 问题
+                            "type": "text",    # lxw NOTE: cannot be string
+                            "analyzer": "ik_max_word",
+                            "search_analyzer": "ik_smart",
+                            "index": "true"    # The index option controls whether field values are indexed.
+                        },
+                        "answers": {  # field: 问题
+                            "type": "text",  # lxw NOTE: cannot be string
+                            "analyzer": "ik_max_word",
+                            "search_analyzer": "ik_smart",
+                            "index": "true"  # The index option controls whether field values are indexed.
+                        },
+                    }
+                }
+            }
+        }
+        if not self.es.indices.exists(index=self._index):
+            self.es.indices.create(index=self._index, body=node_mappings)
+            print("Create {} mapping successfully.".format(self._index))
+        else:
+            print("index({}) already exists.".format(self._index))
+
+    '''批量插入数据'''
+    def insert_data_bulk(self, action_list):
+        success, _ = bulk(self.es, action_list, index=self._index, raise_on_error=True)
+        print("Performed {0} actions. _: {1}".format(success, _))
+
+
+'''初始化ES，将数据插入到ES数据库当中'''
+def init_ES():
+    pie = ProcessIntoES()
+    # 创建ES的index
+    pie.create_mapping()
+    start_time = time.time()
+    index = 0
+    count = 0
+    action_list = []
+    BULK_COUNT = 1000  # 每BULK_COUNT个句子一起插入到ES中
+
+    for line in open(pie.music_file):
+        if not line:
+            continue
+        item = json.loads(line)
+        index += 1
+        action = {
+            "_index": pie._index,
+            "_type": pie.doc_type,
+            "_source": {
+                "question": item['question'],
+                "answers": '\n'.join(item['answers']),
+            }
+        }
+        action_list.append(action)
+        if index > BULK_COUNT:
+            pie.insert_data_bulk(action_list=action_list)
+            index = 0
+            count += 1
+            print(count)
+            action_list = []
+        end_time = time.time()
+
+        print("Time Cost:{0}".format(end_time - start_time))
+
+
+if __name__ == "__main__":
+    # 将数据库插入到elasticsearch当中
+    # init_ES()
+    # 按照标题进行查询
+    question = '我老公要起诉离婚 我不想离婚怎么办'
+
--- a/crime_classify.py
+++ b/crime_classify.py
@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# coding: utf-8
+# File: crime_classify.py.py
+# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
+# Date: 18-11-11
+
+
+import os
+import numpy as np
+import jieba.posseg as pseg
+from sklearn.externals import joblib
+
+class CrimeClassify(object):
+    def __init__(self):
+        cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
+        crime_file = os.path.join(cur, 'dict/crime.txt')
+        self.label_dict = self.build_crime_dict(crime_file)
+        self.id_dict = {j:i for i,j in self.label_dict.items()}
+        self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
+        self.embdding_dict = self.load_embedding(self.embedding_path)
+        self.embedding_size = 300
+        self.model_path = 'model/crime_predict.model'
+        return
+
+    '''构建罪名词类型'''
+    def build_crime_dict(self, crimefile):
+        label_dict = {}
+        i = 0
+        for line in open(crimefile):
+            crime = line.strip()
+            if not crime:
+                continue
+            label_dict[crime] = i
+            i +=1
+        return label_dict
+
+    '''加载词向量'''
+    def load_embedding(self, embedding_path):
+        embedding_dict = {}
+        count = 0
+        for line in open(embedding_path):
+            line = line.strip().split(' ')
+            if len(line) < 300:
+                continue
+            wd = line[0]
+            vector = np.array([float(i) for i in line[1:]])
+            embedding_dict[wd] = vector
+            count += 1
+            if count%10000 == 0:
+                print(count, 'loaded')
+        print('loaded %s word embedding, finished'%count, )
+        return embedding_dict
+
+    '''对文本进行分词处理'''
+    def seg_sent(self, s):
+        wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['x', 'u', 'c', 'p', 'm', 't']]
+        return wds
+
+    '''基于wordvector，通过lookup table的方式找到句子的wordvector的表示'''
+    def rep_sentencevector(self, sentence, flag='seg'):
+        if flag == 'seg':
+            word_list = [i for i in sentence.split(' ') if i]
+        else:
+            word_list = self.seg_sent(sentence)
+        embedding = np.zeros(self.embedding_size)
+        sent_len = 0
+        for index, wd in enumerate(word_list):
+            if wd in self.embdding_dict:
+                embedding += self.embdding_dict.get(wd)
+                sent_len += 1
+            else:
+                continue
+        return embedding/sent_len
+
+    '''对数据进行onehot映射操作'''
+    def label_onehot(self, label):
+        one_hot = [0]*len(self.label_dict)
+        one_hot[int(label)] = 1
+        return one_hot
+
+    '''使用svm模型进行预测'''
+    def predict(self, sent):
+        model = joblib.load(self.model_path)
+        represent_sent = self.rep_sentencevector(sent, flag='noseg')
+        text_vector = np.array(represent_sent).reshape(1, -1)
+        res = model.predict(text_vector)[0]
+        label = self.id_dict.get(res)
+        return label
+
+
+def test():
+    handler = CrimeClassify()
+    while(1):
+        sent = input('enter an sent to search:')
+        label = handler.predict(sent)
+        print(label)
+
+if __name__ == '__main__':
+    test()
--- a/crime_classify_train.py
+++ b/crime_classify_train.py
@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+# coding: utf-8
+# File: crime_classify.py
+# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
+# Date: 18-11-10
+
+import os
+import numpy as np
+from sklearn.svm import SVC, LinearSVC
+import jieba.posseg as pseg
+from collections import Counter
+from sklearn.externals import joblib
+
+class CrimeClassify(object):
+    def __init__(self):
+        cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
+        crime_file = os.path.join(cur, 'crime.txt')
+        self.label_dict = self.build_crime_dict(crime_file)
+        self.id_dict = {j:i for i,j in self.label_dict.items()}
+        self.train_file = os.path.join(cur, 'crime_train_all.txt')
+        self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
+        self.embdding_dict = self.load_embedding(self.embedding_path)
+        self.embedding_size = 300
+        self.model_path = 'crime_predict_svm_all.model'
+        return
+
+    '''构建罪名词类型'''
+    def build_crime_dict(self, crimefile):
+        label_dict = {}
+        i = 0
+        for line in open(crimefile):
+            crime = line.strip()
+            if not crime:
+                continue
+            label_dict[crime] = i
+            i +=1
+        return label_dict
+
+    '''加载词向量'''
+    def load_embedding(self, embedding_path):
+        embedding_dict = {}
+        count = 0
+        for line in open(embedding_path):
+            line = line.strip().split(' ')
+            if len(line) < 300:
+                continue
+            wd = line[0]
+            vector = np.array([float(i) for i in line[1:]])
+            embedding_dict[wd] = vector
+            count += 1
+            if count%10000 == 0:
+                print(count, 'loaded')
+        print('loaded %s word embedding, finished'%count, )
+        return embedding_dict
+
+    '''对文本进行分词处理'''
+    def seg_sent(self, s):
+        wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['x', 'u', 'c', 'p', 'm', 't']]
+        return wds
+
+    '''基于wordvector，通过lookup table的方式找到句子的wordvector的表示'''
+    def rep_sentencevector(self, sentence, flag='seg'):
+        if flag == 'seg':
+            word_list = [i for i in sentence.split(' ') if i]
+        else:
+            word_list = self.seg_sent(sentence)
+        embedding = np.zeros(self.embedding_size)
+        sent_len = 0
+        for index, wd in enumerate(word_list):
+            if wd in self.embdding_dict:
+                embedding += self.embdding_dict.get(wd)
+                sent_len += 1
+            else:
+                continue
+        return embedding/sent_len
+
+    '''对数据进行onehot映射操作'''
+    def label_onehot(self, label):
+        one_hot = [0]*len(self.label_dict)
+        one_hot[int(label)] = 1
+        return one_hot
+
+    '''加载数据集'''
+    def load_traindata(self):
+        train_X = []
+        train_Y = []
+        count = 0
+        for line in open(self.train_file):
+            line = line.strip().strip().split('\t')
+            if len(line) < 2:
+                continue
+            count += 1
+            # if count > 1000:
+            #     break
+            sent = line[1]
+            label_id = int(line[0])
+            sent_vector = self.rep_sentencevector(sent, flag='seg')
+            train_X.append(sent_vector)
+            train_Y.append(label_id)
+            if count % 10000 == 0:
+                print('loaded %s lines'%count)
+        return np.array(train_X), np.array(train_Y)
+
+    '''使用SVM进行分类'''
+    def train_classifer(self):
+        x_train, y_train = self.load_traindata()
+        model = LinearSVC()
+        model.fit(x_train, y_train)
+        joblib.dump(model, self.model_path)
+        y_predict = model.predict(x_train)
+        all = len(y_predict)
+        right = 0
+        for i in range(len(y_train)):
+            y = y_train[i]
+            y_pred = y_predict[i]
+            if y_pred == y:
+                right += 1
+        print('precision:%s/%s=%s'%(right, all, right/all))
+
+    '''使用svm模型进行预测'''
+    def predict(self, sent):
+        model = joblib.load(self.model_path)
+        represent_sent = self.rep_sentencevector(sent, flag='noseg')
+        text_vector = np.array(represent_sent).reshape(1, -1)
+        res = model.predict(text_vector)[0]
+        label = self.id_dict.get(res)
+        return label
+
+
+    '''检查测试合准确率'''
+    def check_precision(self):
+        model = joblib.load(self.model_path)
+        x_train, y_train = self.load_traindata()
+        y_predict = model.predict(x_train)
+        all = len(y_predict)
+        right = 0
+        for i in range(len(y_train)):
+            y = y_train[i]
+            y_pred = y_predict[i]
+            if y_pred == y:
+                right += 1
+        print('precision:%s/%s=%s'%(right, all, right/all))
+        # precision:170231 / 204231 = 0.83352184536138
+        # precision:2650780 / 2880306 = 0.9203119390786951
+
+
+def test():
+    handler = CrimeClassify()
+    # handler.train_classifer()
+    while(1):
+        sent = input('enter an sent to search:')
+        label = handler.predict(sent)
+        print(label)
+
+def build_data():
+    label_dict = {}
+    i = 0
+    for line in open('crime.txt'):
+        crime = line.strip()
+        if not crime:
+            continue
+        label_dict[crime] = i
+        i += 1
+
+    f = open('crime_train_all.txt', 'w+')
+    count = 0
+    for line in open('accu_train.txt'):
+        line = line.strip().split('###')
+        if len(line) < 3:
+            continue
+        crime = line[1].split(';')[0]
+        sent = line[-1]
+        label = label_dict.get(crime)
+        f.write(str(label) + '\t' + sent + '\n')
+        count += 1
+        print(count)
+    f.close()
+
+
+if __name__ == '__main__':
+    test()
+    #build_data()
+    #handler = CrimeClassify()
+    #handler.check_precision()
--- a/crime_qa.py
+++ b/crime_qa.py
@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# coding: utf-8
+# File: crime_qa_server.py
+# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
+# Date: 18-11-10
+
+import os
+import time
+import json
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import bulk
+import pymongo
+
+class CrimeQA:
+    def __init__(self):
+        self._index = "crime_data"
+        self.es = Elasticsearch([{"host": "127.0.0.1", "port": 9200}])
+        self.doc_type = "crime"
+
+
+    '''根据question进行事件的匹配查询'''
+    def search_specific(self, value, key="question"):
+        query_body = {
+            "query": {
+                "match": {
+                    key: value,
+                }
+            }
+        }
+        searched = self.es.search(index=self._index, doc_type=self.doc_type, body=query_body, size=20)
+        # 输出查询到的结果
+        return searched["hits"]["hits"]
+
+    '''基于ES的问题查询'''
+    def search_es(self, question):
+        answers = []
+        res = self.search_specific(question)
+        for hit in res:
+            answer_dict = {}
+            answer_dict['score'] = hit['_score']
+            answer_dict['sim_question'] = hit['_source']['question']
+            answer_dict['answers'] = hit['_source']['answers'].split('\n')
+            answers.append(answer_dict)
+        return answers
+
+    '''问答主函数'''
+    def search_main(self, question):
+        candi_answers = self.search_es(question)
+        for candi in candi_answers:
+            print(candi)
+
+
+
+if __name__ == "__main__":
+    handler = CrimeQA()
+    question = '最近买了一把枪,会犯什么罪?'
+    handler.search_main(question)
+
--- a/data/kg_crime.json
+++ b/data/kg_crime.json
--- a/data/qa_corpus.json.zip
+++ b/data/qa_corpus.json.zip
--- a/dict/crime.txt
+++ b/dict/crime.txt
@ -0,0 +1,202 @@
+妨害公务
+寻衅滋事
+盗窃、侮辱尸体
+危险物品肇事
+非法采矿
+组织、强迫、引诱、容留、介绍卖淫
+开设赌场
+聚众斗殴
+绑架
+非法持有毒品
+销售假冒注册商标的商品
+容留他人吸毒
+假冒注册商标
+交通肇事
+破坏电力设备
+组织卖淫
+合同诈骗
+走私武器、弹药
+抢劫
+非法处置查封、扣押、冻结的财产
+以危险方法危害公共安全
+过失投放危险物质
+非法制造、买卖、运输、邮寄、储存枪支、弹药、爆炸物
+伪造、变造、买卖武装部队公文、证件、印章
+持有、使用假币
+重婚
+聚众冲击国家机关
+生产、销售伪劣农药、兽药、化肥、种子
+收买被拐卖的妇女、儿童
+聚众哄抢
+重大劳动安全事故
+侵占
+包庇毒品犯罪分子
+虚报注册资本
+违法发放贷款
+制造、贩卖、传播淫秽物品
+窝藏、包庇
+帮助毁灭、伪造证据
+放火
+强奸
+非法携带枪支、弹药、管制刀具、危险物品危及公共安全
+伪造、变造金融票证
+爆炸
+玩忽职守
+对非国家工作人员行贿
+伪造、倒卖伪造的有价票证
+私分国有资产
+非法收购、运输、加工、出售国家重点保护植物、国家重点保护植物制品
+生产、销售假药
+挪用特定款物
+过失致人死亡
+走私国家禁止进出口的货物、物品
+非法制造、买卖、运输、储存危险物质
+洗钱
+骗取贷款、票据承兑、金融票证
+非法买卖制毒物品
+非法买卖、运输、携带、持有毒品原植物种子、幼苗
+生产、销售有毒、有害食品
+滥用职权
+招收公务员、学生徇私舞弊
+诬告陷害
+非法获取国家秘密
+非法行医
+非法收购、运输、出售珍贵、濒危野生动物、珍贵、濒危野生动物制品
+非法出售发票
+行贿
+高利转贷
+非法吸收公众存款
+传播淫秽物品
+非法进行节育手术
+盗伐林木
+聚众扰乱社会秩序
+走私、贩卖、运输、制造毒品
+滥伐林木
+赌博
+非法经营
+生产、销售不符合安全标准的食品
+提供侵入、非法控制计算机信息系统程序、工具
+倒卖文物
+窃取、收买、非法提供信用卡信息
+盗掘古文化遗址、古墓葬
+协助组织卖淫
+破坏广播电视设施、公用电信设施
+走私普通货物、物品
+逃税
+破坏监管秩序
+失火
+受贿
+组织、领导、参加黑社会性质组织
+票据诈骗
+非法制造、销售非法制造的注册商标标识
+侵犯著作权
+伪造、变造、买卖国家机关公文、证件、印章
+徇私舞弊不征、少征税款
+强迫劳动
+贷款诈骗
+劫持船只、汽车
+诈骗
+非法种植毒品原植物
+非法狩猎
+挪用资金
+非法收购、运输盗伐、滥伐的林木
+出售、购买、运输假币
+抢夺
+虐待被监管人
+窝藏、转移、收购、销售赃物
+破坏计算机信息系统
+制作、复制、出版、贩卖、传播淫秽物品牟利
+拒不支付劳动报酬
+盗窃、抢夺枪支、弹药、爆炸物
+强迫他人吸毒
+走私珍贵动物、珍贵动物制品
+虐待
+非法获取公民个人信息
+破坏交通设施
+非法转让、倒卖土地使用权
+非法捕捞水产品
+非法占用农用地
+非法制造、出售非法制造的发票
+非法持有、私藏枪支、弹药
+集资诈骗
+强迫卖淫
+伪造公司、企业、事业单位、人民团体印章
+利用影响力受贿
+编造、故意传播虚假恐怖信息
+介绍贿赂
+传播性病
+拐卖妇女、儿童
+倒卖车票、船票
+窝藏、转移、隐瞒毒品、毒赃
+徇私舞弊不移交刑事案件
+过失损坏广播电视设施、公用电信设施
+动植物检疫徇私舞弊
+破坏交通工具
+猥亵儿童
+挪用公款
+伪造货币
+冒充军人招摇撞骗
+非法采伐、毁坏国家重点保护植物
+故意毁坏财物
+非法拘禁
+招摇撞骗
+伪造、变造居民身份证
+徇私枉法
+非法生产、买卖警用装备
+掩饰、隐瞒犯罪所得、犯罪所得收益
+生产、销售伪劣产品
+破坏生产经营
+帮助犯罪分子逃避处罚
+贪污
+投放危险物质
+持有伪造的发票
+危险驾驶
+妨害作证
+非法猎捕、杀害珍贵、濒危野生动物
+重大责任事故
+诽谤
+虚开发票
+引诱、教唆、欺骗他人吸毒
+脱逃
+扰乱无线电通讯管理秩序
+保险诈骗
+非法生产、销售间谍专用器材
+非法组织卖血
+强迫交易
+串通投标
+破坏易燃易爆设备
+传授犯罪方法
+妨害信用卡管理
+拐骗儿童
+单位行贿
+打击报复证人
+拒不执行判决、裁定
+经济犯
+金融凭证诈骗
+虚开增值税专用发票、用于骗取出口退税、抵扣税款发票
+走私废物
+组织、领导传销活动
+单位受贿
+盗窃、抢夺枪支、弹药、爆炸物、危险物质
+过失以危险方法危害公共安全
+过失致人重伤
+引诱、容留、介绍卖淫
+遗弃
+走私
+信用卡诈骗
+对单位行贿
+故意杀人
+聚众扰乱公共场所秩序、交通秩序
+盗窃
+故意伤害
+非法侵入住宅
+强制猥亵、侮辱妇女
+伪证
+污染环境
+巨额财产来源不明
+非国家工作人员受贿
+侮辱
+隐匿、故意销毁会计凭证、会计帐簿、财务会计报告
+过失损坏武器装备、军事设施、军事通信
+敲诈勒索
+职务侵占
--- a/model/cnn_question_classify.h5
+++ b/model/cnn_question_classify.h5
--- a/model/crime_predict.model
+++ b/model/crime_predict.model
--- a/model/lstm_question_predict.h5
+++ b/model/lstm_question_predict.h5
--- a/question_classify.py
+++ b/question_classify.py
@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# coding: utf-8
+# File: question_classify.py
+# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
+# Date: 18-11-11
+
+
+import os
+import numpy as np
+import jieba.posseg as pseg
+from keras.models import Sequential, load_model
+from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dense, Dropout, LSTM, Bidirectional
+
+
+class QuestionClassify(object):
+    def __init__(self):
+        self.label_dict = {
+            0: "婚姻家庭",
+            1: "劳动纠纷",
+            2: "交通事故",
+            3: "债权债务",
+            4: "刑事辩护",
+            5: "合同纠纷",
+            6: "房产纠纷",
+            7: "侵权",
+            8: "公司法",
+            9: "医疗纠纷",
+            10: "拆迁安置",
+            11: "行政诉讼",
+            12: "建设工程"
+        }
+        cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
+        self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
+        self.embdding_dict = self.load_embedding(self.embedding_path)
+        self.max_length = 60
+        self.embedding_size = 300
+        self.lstm_modelpath = 'model/lstm_question_classify.h5'
+        self.cnn_modelpath = 'model/cnn_question_classify.h5'
+        return
+
+    '''加载词向量'''
+    def load_embedding(self, embedding_path):
+        embedding_dict = {}
+        count = 0
+        for line in open(embedding_path):
+            line = line.strip().split(' ')
+            if len(line) < 300:
+                continue
+            wd = line[0]
+            vector = np.array([float(i) for i in line[1:]])
+            embedding_dict[wd] = vector
+            count += 1
+            if count % 10000 == 0:
+                print(count, 'loaded')
+        print('loaded %s word embedding, finished' % count, )
+        return embedding_dict
+
+    '''对文本进行分词处理'''
+
+    def seg_sent(self, s):
+        wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['w', 'x']]
+        return wds
+
+    '''基于wordvector，通过lookup table的方式找到句子的wordvector的表示'''
+
+    def rep_sentencevector(self, sentence):
+        word_list = self.seg_sent(sentence)[:self.max_length]
+        embedding_matrix = np.zeros((self.max_length, self.embedding_size))
+        for index, wd in enumerate(word_list):
+            if wd in self.embdding_dict:
+                embedding_matrix[index] = self.embdding_dict.get(wd)
+            else:
+                continue
+        len_sent = len(word_list)
+        embedding_matrix = self.modify_sentencevector(embedding_matrix, len_sent)
+
+        return embedding_matrix
+
+    '''对于OOV词,通过左右词的词向量作平均,作为词向量表示'''
+
+    def modify_sentencevector(self, embedding_matrix, len_sent):
+        context_window = 2
+        for indx, vec in enumerate(embedding_matrix):
+            left = indx - context_window
+            right = indx + context_window
+            if left < 0:
+                left = 0
+            if right > len(embedding_matrix) - 1:
+                right = -2
+            context = embedding_matrix[left:right + 1]
+            if vec.tolist() == [0] * 300 and indx < len_sent:
+                context_vector = context.mean(axis=0)
+                embedding_matrix[indx] = context_vector
+
+        return embedding_matrix
+
+    '''对数据进行onehot映射操作'''
+
+    def label_onehot(self, label):
+        one_hot = [0] * len(self.label_dict)
+        one_hot[int(label)] = 1
+        return one_hot
+
+
+    '''构造CNN网络模型'''
+    def build_cnn_model(self):
+        model = Sequential()
+        model.add(Conv1D(64, 3, activation='relu', input_shape=(self.max_length, self.embedding_size)))
+        model.add(Conv1D(64, 3, activation='relu'))
+        model.add(MaxPooling1D(3))
+        model.add(Conv1D(128, 3, activation='relu'))
+        model.add(Conv1D(128, 3, activation='relu'))
+        model.add(GlobalAveragePooling1D())
+        model.add(Dropout(0.5))
+        model.add(Dense(13, activation='sigmoid'))
+        model.compile(loss='binary_crossentropy',
+                      optimizer='rmsprop',
+                      metrics=['accuracy'])
+        model.summary()
+        return model
+
+    '''构造LSTM网络'''
+    def build_lstm_model(self):
+        model = Sequential()
+        model.add(LSTM(32, return_sequences=True, input_shape=(
+        self.max_length, self.embedding_size)))  # returns a sequence of vectors of dimension 32
+        model.add(LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
+        model.add(LSTM(32))  # return a single vector of dimension 32
+        model.add(Dense(13, activation='softmax'))
+        model.compile(loss='categorical_crossentropy',
+                      optimizer='rmsprop',
+                      metrics=['accuracy'])
+
+        return model
+
+    '''问题分类'''
+    def predict(self, sent):
+        model = load_model(self.cnn_modelpath)
+        sentence_vector = np.array([self.rep_sentencevector(sent)])
+        res = model.predict(sentence_vector)[0].tolist()
+        prob = max(res)
+        label = self.label_dict.get(res.index(prob))
+        return label, prob
+
+if __name__ == '__main__':
+    handler = QuestionClassify()
+    while (1):
+        sent = input('enter an sent to search:')
+        label, prob = handler.predict(sent)
+        print(label, prob)
--- a/question_classify_train.py
+++ b/question_classify_train.py
@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+# coding: utf-8
+# File: question_classify.py
+# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
+# Date: 18-11-10
+
+import os
+import numpy as np
+import jieba.posseg as pseg
+from keras.models import Sequential, load_model
+from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dense, Dropout, LSTM, Bidirectional
+
+class QuestionClassify(object):
+    def __init__(self):
+        self.label_dict = {
+            0:"婚姻家庭",
+            1:"劳动纠纷",
+            2:"交通事故",
+            3:"债权债务",
+            4:"刑事辩护",
+            5:"合同纠纷",
+            6:"房产纠纷",
+            7:"侵权",
+            8:"公司法",
+            9:"医疗纠纷",
+            10:"拆迁安置",
+            11:"行政诉讼",
+            12:"建设工程"
+            }
+        cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
+        self.train_file = os.path.join(cur, 'question_train.txt')
+        self.embedding_path = os.path.join(cur, 'word_vec_300.bin')
+        self.embdding_dict = self.load_embedding(self.embedding_path)
+        self.max_length = 60
+        self.embedding_size = 300
+        self.lstm_modelpath = 'model/lstm_question_classify.h5'
+        self.cnn_modelpath = 'model/cnn_question_classify.h5'
+        return
+
+    '''加载词向量'''
+    def load_embedding(self, embedding_path):
+        embedding_dict = {}
+        count = 0
+        for line in open(embedding_path):
+            line = line.strip().split(' ')
+            if len(line) < 300:
+                continue
+            wd = line[0]
+            vector = np.array([float(i) for i in line[1:]])
+            embedding_dict[wd] = vector
+            count += 1
+            if count%10000 == 0:
+                print(count, 'loaded')
+        print('loaded %s word embedding, finished'%count, )
+        return embedding_dict
+
+    '''对文本进行分词处理'''
+    def seg_sent(self, s):
+        wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['w', 'x']]
+        return wds
+
+    '''基于wordvector，通过lookup table的方式找到句子的wordvector的表示'''
+    def rep_sentencevector(self, sentence):
+        word_list = self.seg_sent(sentence)[:self.max_length]
+        embedding_matrix = np.zeros((self.max_length, self.embedding_size))
+        for index, wd in enumerate(word_list):
+            if wd in self.embdding_dict:
+                embedding_matrix[index] = self.embdding_dict.get(wd)
+            else:
+                continue
+        len_sent = len(word_list)
+        embedding_matrix = self.modify_sentencevector(embedding_matrix, len_sent)
+
+        return embedding_matrix
+
+    '''对于OOV词,通过左右词的词向量作平均,作为词向量表示'''
+    def modify_sentencevector(self, embedding_matrix, len_sent):
+        context_window = 2
+        for indx, vec in enumerate(embedding_matrix):
+            left = indx-context_window
+            right = indx+context_window
+            if left < 0:
+                left = 0
+            if right > len(embedding_matrix)-1:
+                right = -2
+            context = embedding_matrix[left:right+1]
+            if vec.tolist() == [0]*300 and indx < len_sent:
+                context_vector = context.mean(axis=0)
+                embedding_matrix[indx] = context_vector
+
+        return embedding_matrix
+
+    '''对数据进行onehot映射操作'''
+    def label_onehot(self, label):
+        one_hot = [0]*len(self.label_dict)
+        one_hot[int(label)] = 1
+        return one_hot
+
+    '''加载数据集'''
+    def load_traindata(self):
+        train_X = []
+        train_Y = []
+        count = 0
+        for line in open(self.train_file):
+
+            line = line.strip().strip().split('\t')
+            if len(line) < 2:
+                continue
+            count += 1
+            sent = line[0]
+            label = line[1]
+            sent_vector = self.rep_sentencevector(sent)
+            label_vector = self.label_onehot(label)
+            train_X.append(sent_vector)
+            train_Y.append(label_vector)
+
+            if count % 10000 == 0:
+                print('loaded %s lines'%count)
+
+        return np.array(train_X), np.array(train_Y)
+
+    '''构造CNN网络模型'''
+    def build_cnn_model(self):
+        model = Sequential()
+        model.add(Conv1D(64, 3, activation='relu', input_shape=(self.max_length, self.embedding_size)))
+        model.add(Conv1D(64, 3, activation='relu'))
+        model.add(MaxPooling1D(3))
+        model.add(Conv1D(128, 3, activation='relu'))
+        model.add(Conv1D(128, 3, activation='relu'))
+        model.add(GlobalAveragePooling1D())
+        model.add(Dropout(0.5))
+        model.add(Dense(13, activation='sigmoid'))
+        model.compile(loss='binary_crossentropy',
+                      optimizer='rmsprop',
+                      metrics=['accuracy'])
+        model.summary()
+        return model
+
+    '''构造LSTM网络'''
+    def build_lstm_model(self):
+        model = Sequential()
+        model.add(LSTM(32, return_sequences=True, input_shape=(self.max_length, self.embedding_size)))  # returns a sequence of vectors of dimension 32
+        model.add(LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
+        model.add(LSTM(32))  # return a single vector of dimension 32
+        model.add(Dense(13, activation='softmax'))
+        model.compile(loss='categorical_crossentropy',
+                      optimizer='rmsprop',
+                      metrics=['accuracy'])
+
+        return model
+
+    '''训练CNN模型'''
+    def train_cnn(self):
+        X_train, Y_train, X_test, Y_test = self.split_trainset()
+        model = self.build_cnn_model()
+        model.fit(X_train, Y_train, batch_size=100, epochs=20, validation_data=(X_test, Y_test))
+        model.save(self.cnn_modelpath)
+
+    '''训练CNN模型'''
+    def train_lstm(self):
+        X_train, Y_train, X_test, Y_test = self.split_trainset()
+        model = self.build_lstm_model()
+        model.fit(X_train, Y_train, batch_size=100, epochs=50, validation_data=(X_test, Y_test))
+        model.save(self.lstm_modelpath)
+
+    '''划分数据集,按一定比例划分训练集和测试集'''
+    def split_trainset(self):
+        X, Y = self.load_traindata()
+        split_rate = 0.8
+        indx = int(len(X)*split_rate)
+        X_train = X[:indx]
+        Y_train = Y[:indx]
+        X_test = X[indx:]
+        Y_test = Y[indx:]
+        return X_train, Y_train, X_test, Y_test
+
+
+if __name__ == '__main__':
+    handler = QuestionClassify()
+    handler.train_cnn()
+    handler.train_lstm()