创建法务智能项目
This commit is contained in:
commit
90b2429647
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
embedding/*.bin
|
11
.idea/CrimeKgAssistant.iml
Normal file
11
.idea/CrimeKgAssistant.iml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.6.3 (~/anaconda3/envs/py3/bin/python)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||
</component>
|
||||
</module>
|
3
.idea/markdown-navigator/profiles_settings.xml
Normal file
3
.idea/markdown-navigator/profiles_settings.xml
Normal file
@ -0,0 +1,3 @@
|
||||
<component name="MarkdownNavigator.ProfileManager">
|
||||
<settings default="" pdf-export="" />
|
||||
</component>
|
83
.idea/misc.xml
Normal file
83
.idea/misc.xml
Normal file
@ -0,0 +1,83 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="MarkdownProjectSettings">
|
||||
<PreviewSettings splitEditorLayout="SPLIT" splitEditorPreview="PREVIEW" useGrayscaleRendering="false" zoomFactor="1.0" maxImageWidth="0" showGitHubPageIfSynced="false" allowBrowsingInPreview="false" synchronizePreviewPosition="true" highlightPreviewType="NONE" highlightFadeOut="5" highlightOnTyping="true" synchronizeSourcePosition="true" verticallyAlignSourceAndPreviewSyncPosition="true" showSearchHighlightsInPreview="false" showSelectionInPreview="true" openRemoteLinks="true">
|
||||
<PanelProvider>
|
||||
<provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.panel" providerName="Default - Swing" />
|
||||
</PanelProvider>
|
||||
</PreviewSettings>
|
||||
<ParserSettings gitHubSyntaxChange="false">
|
||||
<PegdownExtensions>
|
||||
<option name="ABBREVIATIONS" value="false" />
|
||||
<option name="ANCHORLINKS" value="true" />
|
||||
<option name="ASIDE" value="false" />
|
||||
<option name="ATXHEADERSPACE" value="true" />
|
||||
<option name="AUTOLINKS" value="true" />
|
||||
<option name="DEFINITIONS" value="false" />
|
||||
<option name="DEFINITION_BREAK_DOUBLE_BLANK_LINE" value="false" />
|
||||
<option name="FENCED_CODE_BLOCKS" value="true" />
|
||||
<option name="FOOTNOTES" value="false" />
|
||||
<option name="HARDWRAPS" value="false" />
|
||||
<option name="HTML_DEEP_PARSER" value="false" />
|
||||
<option name="INSERTED" value="false" />
|
||||
<option name="QUOTES" value="false" />
|
||||
<option name="RELAXEDHRULES" value="true" />
|
||||
<option name="SMARTS" value="false" />
|
||||
<option name="STRIKETHROUGH" value="true" />
|
||||
<option name="SUBSCRIPT" value="false" />
|
||||
<option name="SUPERSCRIPT" value="false" />
|
||||
<option name="SUPPRESS_HTML_BLOCKS" value="false" />
|
||||
<option name="SUPPRESS_INLINE_HTML" value="false" />
|
||||
<option name="TABLES" value="true" />
|
||||
<option name="TASKLISTITEMS" value="true" />
|
||||
<option name="TOC" value="false" />
|
||||
<option name="WIKILINKS" value="true" />
|
||||
</PegdownExtensions>
|
||||
<ParserOptions>
|
||||
<option name="COMMONMARK_LISTS" value="true" />
|
||||
<option name="DUMMY" value="false" />
|
||||
<option name="EMOJI_SHORTCUTS" value="true" />
|
||||
<option name="FLEXMARK_FRONT_MATTER" value="false" />
|
||||
<option name="GFM_LOOSE_BLANK_LINE_AFTER_ITEM_PARA" value="false" />
|
||||
<option name="GFM_TABLE_RENDERING" value="true" />
|
||||
<option name="GITBOOK_URL_ENCODING" value="false" />
|
||||
<option name="GITHUB_EMOJI_URL" value="false" />
|
||||
<option name="GITHUB_LISTS" value="false" />
|
||||
<option name="GITHUB_WIKI_LINKS" value="true" />
|
||||
<option name="JEKYLL_FRONT_MATTER" value="false" />
|
||||
<option name="SIM_TOC_BLANK_LINE_SPACER" value="true" />
|
||||
</ParserOptions>
|
||||
</ParserSettings>
|
||||
<HtmlSettings headerTopEnabled="false" headerBottomEnabled="false" bodyTopEnabled="false" bodyBottomEnabled="false" embedUrlContent="false" addPageHeader="true" embedImages="false" embedHttpImages="false">
|
||||
<GeneratorProvider>
|
||||
<provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.generator" providerName="Default Swing HTML Generator" />
|
||||
</GeneratorProvider>
|
||||
<headerTop />
|
||||
<headerBottom />
|
||||
<bodyTop />
|
||||
<bodyBottom />
|
||||
</HtmlSettings>
|
||||
<CssSettings previewScheme="UI_SCHEME" cssUri="" isCssUriEnabled="false" isCssTextEnabled="false" isDynamicPageWidth="true">
|
||||
<StylesheetProvider>
|
||||
<provider providerId="com.vladsch.idea.multimarkdown.editor.swing.html.css" providerName="Default Swing Stylesheet" />
|
||||
</StylesheetProvider>
|
||||
<ScriptProviders />
|
||||
<cssText />
|
||||
</CssSettings>
|
||||
<HtmlExportSettings updateOnSave="false" parentDir="$ProjectFileDir$" targetDir="$ProjectFileDir$" cssDir="" scriptDir="" plainHtml="false" imageDir="" copyLinkedImages="false" imageUniquifyType="0" targetExt="" useTargetExt="false" noCssNoScripts="false" linkToExportedHtml="true" exportOnSettingsChange="true" regenerateOnProjectOpen="false" linkFormatType="HTTP_ABSOLUTE" />
|
||||
<LinkMapSettings>
|
||||
<textMaps />
|
||||
</LinkMapSettings>
|
||||
</component>
|
||||
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
|
||||
<OptionsSetting value="true" id="Add" />
|
||||
<OptionsSetting value="true" id="Remove" />
|
||||
<OptionsSetting value="true" id="Checkout" />
|
||||
<OptionsSetting value="true" id="Update" />
|
||||
<OptionsSetting value="true" id="Status" />
|
||||
<OptionsSetting value="true" id="Edit" />
|
||||
<ConfirmationsSetting value="0" id="Add" />
|
||||
<ConfirmationsSetting value="0" id="Remove" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.3 (~/anaconda3/envs/py3/bin/python)" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/CrimeKgAssistant.iml" filepath="$PROJECT_DIR$/.idea/CrimeKgAssistant.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
421
.idea/workspace.xml
Normal file
421
.idea/workspace.xml
Normal file
@ -0,0 +1,421 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="8b1874b6-a1e9-45fd-a2c5-b0b8b2b7649b" name="Default" comment="" />
|
||||
<ignored path="CrimeKgAssistant.iws" />
|
||||
<ignored path=".idea/workspace.xml" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="TRACKING_ENABLED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="CreatePatchCommitExecutor">
|
||||
<option name="PATCH_PATH" value="" />
|
||||
</component>
|
||||
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
|
||||
<component name="FavoritesManager">
|
||||
<favorites_list name="CrimeKgAssistant" />
|
||||
</component>
|
||||
<component name="FileEditorManager">
|
||||
<leaf />
|
||||
</component>
|
||||
<component name="FileTemplateManagerImpl">
|
||||
<option name="RECENT_TEMPLATES">
|
||||
<list>
|
||||
<option value="Python Script" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="IdeDocumentHistory">
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/question_classify_train.py" />
|
||||
<option value="$PROJECT_DIR$/question_classify.py" />
|
||||
<option value="$PROJECT_DIR$/crime_classify.py" />
|
||||
<option value="$PROJECT_DIR$/crime_qa_server.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ProjectFrameBounds">
|
||||
<option name="x" value="631" />
|
||||
<option name="y" value="106" />
|
||||
<option name="width" value="1261" />
|
||||
<option name="height" value="1001" />
|
||||
</component>
|
||||
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
|
||||
<OptionsSetting value="true" id="Add" />
|
||||
<OptionsSetting value="true" id="Remove" />
|
||||
<OptionsSetting value="true" id="Checkout" />
|
||||
<OptionsSetting value="true" id="Update" />
|
||||
<OptionsSetting value="true" id="Status" />
|
||||
<OptionsSetting value="true" id="Edit" />
|
||||
<ConfirmationsSetting value="0" id="Add" />
|
||||
<ConfirmationsSetting value="0" id="Remove" />
|
||||
</component>
|
||||
<component name="ProjectView">
|
||||
<navigator currentView="ProjectPane" proportions="" version="1">
|
||||
<flattenPackages />
|
||||
<showMembers />
|
||||
<showModules />
|
||||
<showLibraryContents />
|
||||
<hideEmptyPackages />
|
||||
<abbreviatePackageNames />
|
||||
<autoscrollToSource />
|
||||
<autoscrollFromSource />
|
||||
<sortByType />
|
||||
<manualOrder />
|
||||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="CrimeKgAssistant" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="CrimeKgAssistant" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="CrimeKgAssistant" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
<pane id="Scratches" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
|
||||
<property name="settings.editor.splitter.proportion" value="0.2" />
|
||||
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="RecentsManager">
|
||||
<key name="CopyFile.RECENT_KEYS">
|
||||
<recent name="$PROJECT_DIR$" />
|
||||
<recent name="$PROJECT_DIR$/data" />
|
||||
</key>
|
||||
<key name="MoveFile.RECENT_KEYS">
|
||||
<recent name="$PROJECT_DIR$/dict" />
|
||||
<recent name="$PROJECT_DIR$/embedding" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="RunManager" selected="Python.crime_classify">
|
||||
<configuration default="false" name="question_classify" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/question_classify.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="false" name="crime_classify" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/crime_classify.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="BashConfigurationType" factoryName="Bash">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="INTERPRETER_PATH" value="/bin/bash" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<module name="" />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="Tox" factoryName="Tox">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="tests" factoryName="Attests">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="CLASS_NAME" value="" />
|
||||
<option name="METHOD_NAME" value="" />
|
||||
<option name="FOLDER_NAME" value="" />
|
||||
<option name="TEST_TYPE" value="TEST_SCRIPT" />
|
||||
<option name="PATTERN" value="" />
|
||||
<option name="USE_PATTERN" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="tests" factoryName="Doctests">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="CLASS_NAME" value="" />
|
||||
<option name="METHOD_NAME" value="" />
|
||||
<option name="FOLDER_NAME" value="" />
|
||||
<option name="TEST_TYPE" value="TEST_SCRIPT" />
|
||||
<option name="PATTERN" value="" />
|
||||
<option name="USE_PATTERN" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="tests" factoryName="Nosetests">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="CLASS_NAME" value="" />
|
||||
<option name="METHOD_NAME" value="" />
|
||||
<option name="FOLDER_NAME" value="" />
|
||||
<option name="TEST_TYPE" value="TEST_SCRIPT" />
|
||||
<option name="PATTERN" value="" />
|
||||
<option name="USE_PATTERN" value="false" />
|
||||
<option name="PARAMS" value="" />
|
||||
<option name="USE_PARAM" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="tests" factoryName="Unittests">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="CLASS_NAME" value="" />
|
||||
<option name="METHOD_NAME" value="" />
|
||||
<option name="FOLDER_NAME" value="" />
|
||||
<option name="TEST_TYPE" value="TEST_SCRIPT" />
|
||||
<option name="PATTERN" value="" />
|
||||
<option name="USE_PATTERN" value="false" />
|
||||
<option name="PUREUNITTEST" value="true" />
|
||||
<option name="PARAMS" value="" />
|
||||
<option name="USE_PARAM" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="tests" factoryName="py.test">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="CrimeKgAssistant" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="CLASS_NAME" value="" />
|
||||
<option name="METHOD_NAME" value="" />
|
||||
<option name="FOLDER_NAME" value="" />
|
||||
<option name="TEST_TYPE" value="TEST_SCRIPT" />
|
||||
<option name="PATTERN" value="" />
|
||||
<option name="USE_PATTERN" value="false" />
|
||||
<option name="testToRun" value="" />
|
||||
<option name="keywords" value="" />
|
||||
<option name="params" value="" />
|
||||
<option name="USE_PARAM" value="false" />
|
||||
<option name="USE_KEYWORD" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<list size="2">
|
||||
<item index="0" class="java.lang.String" itemvalue="Python.question_classify" />
|
||||
<item index="1" class="java.lang.String" itemvalue="Python.crime_classify" />
|
||||
</list>
|
||||
<recent_temporary>
|
||||
<list size="2">
|
||||
<item index="0" class="java.lang.String" itemvalue="Python.crime_classify" />
|
||||
<item index="1" class="java.lang.String" itemvalue="Python.question_classify" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
</component>
|
||||
<component name="ShelveChangesManager" show_recycled="false">
|
||||
<option name="remove_strategy" value="false" />
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="8b1874b6-a1e9-45fd-a2c5-b0b8b2b7649b" name="Default" comment="" />
|
||||
<created>1541920128942</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1541920128942</updated>
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="631" y="106" width="1261" height="1001" extended-state="0" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.24959612" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.6682832" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
|
||||
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
</layout>
|
||||
</component>
|
||||
<component name="Vcs.Log.UiProperties">
|
||||
<option name="RECENTLY_FILTERED_USER_GROUPS">
|
||||
<collection />
|
||||
</option>
|
||||
<option name="RECENTLY_FILTERED_BRANCH_GROUPS">
|
||||
<collection />
|
||||
</option>
|
||||
</component>
|
||||
<component name="VcsContentAnnotationSettings">
|
||||
<option name="myLimit" value="2678400000" />
|
||||
</component>
|
||||
<component name="XDebuggerManager">
|
||||
<breakpoint-manager />
|
||||
<watches-manager />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/question_classify_train.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="2241">
|
||||
<caret line="160" column="31" selection-start-line="160" selection-start-column="31" selection-end-line="160" selection-end-column="31" />
|
||||
<folding>
|
||||
<element signature="e#149#158#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/question_classify.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="484">
|
||||
<caret line="40" column="15" selection-start-line="40" selection-start-column="15" selection-end-line="40" selection-end-column="15" />
|
||||
<folding>
|
||||
<element signature="e#150#159#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/dict/crime.txt">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/crime_classify_train.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="484">
|
||||
<caret line="22" column="33" selection-start-line="0" selection-start-column="0" selection-end-line="184" selection-end-column="0" />
|
||||
<folding>
|
||||
<element signature="e#146#155#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/crime_classify.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="275">
|
||||
<caret line="72" column="13" selection-start-line="72" selection-start-column="13" selection-end-line="72" selection-end-column="13" />
|
||||
<folding>
|
||||
<element signature="e#150#159#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/build_qa_database.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/crime_qa.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="352">
|
||||
<caret line="16" column="15" selection-start-line="16" selection-start-column="15" selection-end-line="16" selection-end-column="15" />
|
||||
<folding>
|
||||
<element signature="e#147#156#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
</project>
|
98
build_qa_database.py
Normal file
98
build_qa_database.py
Normal file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
# File: insert_es.py
|
||||
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
|
||||
# Date: 18-10-10
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
import json
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch.helpers import bulk
|
||||
import pymongo
|
||||
|
||||
class ProcessIntoES:
|
||||
def __init__(self):
|
||||
self._index = "crime_data"
|
||||
self.es = Elasticsearch([{"host": "127.0.0.1", "port": 9200}])
|
||||
self.doc_type = "crime"
|
||||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||||
self.music_file = os.path.join(cur, 'qa_corpus.json')
|
||||
|
||||
'''创建ES索引,确定分词类型'''
|
||||
def create_mapping(self):
|
||||
node_mappings = {
|
||||
"mappings": {
|
||||
self.doc_type: { # type
|
||||
"properties": {
|
||||
"question": { # field: 问题
|
||||
"type": "text", # lxw NOTE: cannot be string
|
||||
"analyzer": "ik_max_word",
|
||||
"search_analyzer": "ik_smart",
|
||||
"index": "true" # The index option controls whether field values are indexed.
|
||||
},
|
||||
"answers": { # field: 问题
|
||||
"type": "text", # lxw NOTE: cannot be string
|
||||
"analyzer": "ik_max_word",
|
||||
"search_analyzer": "ik_smart",
|
||||
"index": "true" # The index option controls whether field values are indexed.
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if not self.es.indices.exists(index=self._index):
|
||||
self.es.indices.create(index=self._index, body=node_mappings)
|
||||
print("Create {} mapping successfully.".format(self._index))
|
||||
else:
|
||||
print("index({}) already exists.".format(self._index))
|
||||
|
||||
'''批量插入数据'''
|
||||
def insert_data_bulk(self, action_list):
|
||||
success, _ = bulk(self.es, action_list, index=self._index, raise_on_error=True)
|
||||
print("Performed {0} actions. _: {1}".format(success, _))
|
||||
|
||||
|
||||
'''初始化ES,将数据插入到ES数据库当中'''
|
||||
def init_ES():
|
||||
pie = ProcessIntoES()
|
||||
# 创建ES的index
|
||||
pie.create_mapping()
|
||||
start_time = time.time()
|
||||
index = 0
|
||||
count = 0
|
||||
action_list = []
|
||||
BULK_COUNT = 1000 # 每BULK_COUNT个句子一起插入到ES中
|
||||
|
||||
for line in open(pie.music_file):
|
||||
if not line:
|
||||
continue
|
||||
item = json.loads(line)
|
||||
index += 1
|
||||
action = {
|
||||
"_index": pie._index,
|
||||
"_type": pie.doc_type,
|
||||
"_source": {
|
||||
"question": item['question'],
|
||||
"answers": '\n'.join(item['answers']),
|
||||
}
|
||||
}
|
||||
action_list.append(action)
|
||||
if index > BULK_COUNT:
|
||||
pie.insert_data_bulk(action_list=action_list)
|
||||
index = 0
|
||||
count += 1
|
||||
print(count)
|
||||
action_list = []
|
||||
end_time = time.time()
|
||||
|
||||
print("Time Cost:{0}".format(end_time - start_time))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 将数据库插入到elasticsearch当中
|
||||
# init_ES()
|
||||
# 按照标题进行查询
|
||||
question = '我老公要起诉离婚 我不想离婚怎么办'
|
||||
|
99
crime_classify.py
Normal file
99
crime_classify.py
Normal file
@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
# File: crime_classify.py.py
|
||||
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
|
||||
# Date: 18-11-11
|
||||
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import jieba.posseg as pseg
|
||||
from sklearn.externals import joblib
|
||||
|
||||
class CrimeClassify(object):
|
||||
def __init__(self):
|
||||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||||
crime_file = os.path.join(cur, 'dict/crime.txt')
|
||||
self.label_dict = self.build_crime_dict(crime_file)
|
||||
self.id_dict = {j:i for i,j in self.label_dict.items()}
|
||||
self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
|
||||
self.embdding_dict = self.load_embedding(self.embedding_path)
|
||||
self.embedding_size = 300
|
||||
self.model_path = 'model/crime_predict.model'
|
||||
return
|
||||
|
||||
'''构建罪名词类型'''
|
||||
def build_crime_dict(self, crimefile):
|
||||
label_dict = {}
|
||||
i = 0
|
||||
for line in open(crimefile):
|
||||
crime = line.strip()
|
||||
if not crime:
|
||||
continue
|
||||
label_dict[crime] = i
|
||||
i +=1
|
||||
return label_dict
|
||||
|
||||
'''加载词向量'''
|
||||
def load_embedding(self, embedding_path):
|
||||
embedding_dict = {}
|
||||
count = 0
|
||||
for line in open(embedding_path):
|
||||
line = line.strip().split(' ')
|
||||
if len(line) < 300:
|
||||
continue
|
||||
wd = line[0]
|
||||
vector = np.array([float(i) for i in line[1:]])
|
||||
embedding_dict[wd] = vector
|
||||
count += 1
|
||||
if count%10000 == 0:
|
||||
print(count, 'loaded')
|
||||
print('loaded %s word embedding, finished'%count, )
|
||||
return embedding_dict
|
||||
|
||||
'''对文本进行分词处理'''
|
||||
def seg_sent(self, s):
|
||||
wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['x', 'u', 'c', 'p', 'm', 't']]
|
||||
return wds
|
||||
|
||||
'''基于wordvector,通过lookup table的方式找到句子的wordvector的表示'''
|
||||
def rep_sentencevector(self, sentence, flag='seg'):
|
||||
if flag == 'seg':
|
||||
word_list = [i for i in sentence.split(' ') if i]
|
||||
else:
|
||||
word_list = self.seg_sent(sentence)
|
||||
embedding = np.zeros(self.embedding_size)
|
||||
sent_len = 0
|
||||
for index, wd in enumerate(word_list):
|
||||
if wd in self.embdding_dict:
|
||||
embedding += self.embdding_dict.get(wd)
|
||||
sent_len += 1
|
||||
else:
|
||||
continue
|
||||
return embedding/sent_len
|
||||
|
||||
'''对数据进行onehot映射操作'''
|
||||
def label_onehot(self, label):
|
||||
one_hot = [0]*len(self.label_dict)
|
||||
one_hot[int(label)] = 1
|
||||
return one_hot
|
||||
|
||||
'''使用svm模型进行预测'''
|
||||
def predict(self, sent):
|
||||
model = joblib.load(self.model_path)
|
||||
represent_sent = self.rep_sentencevector(sent, flag='noseg')
|
||||
text_vector = np.array(represent_sent).reshape(1, -1)
|
||||
res = model.predict(text_vector)[0]
|
||||
label = self.id_dict.get(res)
|
||||
return label
|
||||
|
||||
|
||||
def test():
|
||||
handler = CrimeClassify()
|
||||
while(1):
|
||||
sent = input('enter an sent to search:')
|
||||
label = handler.predict(sent)
|
||||
print(label)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
184
crime_classify_train.py
Normal file
184
crime_classify_train.py
Normal file
@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
# File: crime_classify.py
|
||||
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
|
||||
# Date: 18-11-10
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from sklearn.svm import SVC, LinearSVC
|
||||
import jieba.posseg as pseg
|
||||
from collections import Counter
|
||||
from sklearn.externals import joblib
|
||||
|
||||
class CrimeClassify(object):
|
||||
def __init__(self):
|
||||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||||
crime_file = os.path.join(cur, 'crime.txt')
|
||||
self.label_dict = self.build_crime_dict(crime_file)
|
||||
self.id_dict = {j:i for i,j in self.label_dict.items()}
|
||||
self.train_file = os.path.join(cur, 'crime_train_all.txt')
|
||||
self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
|
||||
self.embdding_dict = self.load_embedding(self.embedding_path)
|
||||
self.embedding_size = 300
|
||||
self.model_path = 'crime_predict_svm_all.model'
|
||||
return
|
||||
|
||||
'''构建罪名词类型'''
|
||||
def build_crime_dict(self, crimefile):
|
||||
label_dict = {}
|
||||
i = 0
|
||||
for line in open(crimefile):
|
||||
crime = line.strip()
|
||||
if not crime:
|
||||
continue
|
||||
label_dict[crime] = i
|
||||
i +=1
|
||||
return label_dict
|
||||
|
||||
'''加载词向量'''
|
||||
def load_embedding(self, embedding_path):
|
||||
embedding_dict = {}
|
||||
count = 0
|
||||
for line in open(embedding_path):
|
||||
line = line.strip().split(' ')
|
||||
if len(line) < 300:
|
||||
continue
|
||||
wd = line[0]
|
||||
vector = np.array([float(i) for i in line[1:]])
|
||||
embedding_dict[wd] = vector
|
||||
count += 1
|
||||
if count%10000 == 0:
|
||||
print(count, 'loaded')
|
||||
print('loaded %s word embedding, finished'%count, )
|
||||
return embedding_dict
|
||||
|
||||
'''对文本进行分词处理'''
|
||||
def seg_sent(self, s):
|
||||
wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['x', 'u', 'c', 'p', 'm', 't']]
|
||||
return wds
|
||||
|
||||
'''基于wordvector,通过lookup table的方式找到句子的wordvector的表示'''
|
||||
def rep_sentencevector(self, sentence, flag='seg'):
|
||||
if flag == 'seg':
|
||||
word_list = [i for i in sentence.split(' ') if i]
|
||||
else:
|
||||
word_list = self.seg_sent(sentence)
|
||||
embedding = np.zeros(self.embedding_size)
|
||||
sent_len = 0
|
||||
for index, wd in enumerate(word_list):
|
||||
if wd in self.embdding_dict:
|
||||
embedding += self.embdding_dict.get(wd)
|
||||
sent_len += 1
|
||||
else:
|
||||
continue
|
||||
return embedding/sent_len
|
||||
|
||||
'''对数据进行onehot映射操作'''
|
||||
def label_onehot(self, label):
|
||||
one_hot = [0]*len(self.label_dict)
|
||||
one_hot[int(label)] = 1
|
||||
return one_hot
|
||||
|
||||
'''加载数据集'''
|
||||
def load_traindata(self):
|
||||
train_X = []
|
||||
train_Y = []
|
||||
count = 0
|
||||
for line in open(self.train_file):
|
||||
line = line.strip().strip().split('\t')
|
||||
if len(line) < 2:
|
||||
continue
|
||||
count += 1
|
||||
# if count > 1000:
|
||||
# break
|
||||
sent = line[1]
|
||||
label_id = int(line[0])
|
||||
sent_vector = self.rep_sentencevector(sent, flag='seg')
|
||||
train_X.append(sent_vector)
|
||||
train_Y.append(label_id)
|
||||
if count % 10000 == 0:
|
||||
print('loaded %s lines'%count)
|
||||
return np.array(train_X), np.array(train_Y)
|
||||
|
||||
'''使用SVM进行分类'''
|
||||
def train_classifer(self):
|
||||
x_train, y_train = self.load_traindata()
|
||||
model = LinearSVC()
|
||||
model.fit(x_train, y_train)
|
||||
joblib.dump(model, self.model_path)
|
||||
y_predict = model.predict(x_train)
|
||||
all = len(y_predict)
|
||||
right = 0
|
||||
for i in range(len(y_train)):
|
||||
y = y_train[i]
|
||||
y_pred = y_predict[i]
|
||||
if y_pred == y:
|
||||
right += 1
|
||||
print('precision:%s/%s=%s'%(right, all, right/all))
|
||||
|
||||
'''使用svm模型进行预测'''
|
||||
def predict(self, sent):
|
||||
model = joblib.load(self.model_path)
|
||||
represent_sent = self.rep_sentencevector(sent, flag='noseg')
|
||||
text_vector = np.array(represent_sent).reshape(1, -1)
|
||||
res = model.predict(text_vector)[0]
|
||||
label = self.id_dict.get(res)
|
||||
return label
|
||||
|
||||
|
||||
'''检查测试合准确率'''
|
||||
def check_precision(self):
|
||||
model = joblib.load(self.model_path)
|
||||
x_train, y_train = self.load_traindata()
|
||||
y_predict = model.predict(x_train)
|
||||
all = len(y_predict)
|
||||
right = 0
|
||||
for i in range(len(y_train)):
|
||||
y = y_train[i]
|
||||
y_pred = y_predict[i]
|
||||
if y_pred == y:
|
||||
right += 1
|
||||
print('precision:%s/%s=%s'%(right, all, right/all))
|
||||
# precision:170231 / 204231 = 0.83352184536138
|
||||
# precision:2650780 / 2880306 = 0.9203119390786951
|
||||
|
||||
|
||||
def test():
|
||||
handler = CrimeClassify()
|
||||
# handler.train_classifer()
|
||||
while(1):
|
||||
sent = input('enter an sent to search:')
|
||||
label = handler.predict(sent)
|
||||
print(label)
|
||||
|
||||
def build_data():
|
||||
label_dict = {}
|
||||
i = 0
|
||||
for line in open('crime.txt'):
|
||||
crime = line.strip()
|
||||
if not crime:
|
||||
continue
|
||||
label_dict[crime] = i
|
||||
i += 1
|
||||
|
||||
f = open('crime_train_all.txt', 'w+')
|
||||
count = 0
|
||||
for line in open('accu_train.txt'):
|
||||
line = line.strip().split('###')
|
||||
if len(line) < 3:
|
||||
continue
|
||||
crime = line[1].split(';')[0]
|
||||
sent = line[-1]
|
||||
label = label_dict.get(crime)
|
||||
f.write(str(label) + '\t' + sent + '\n')
|
||||
count += 1
|
||||
print(count)
|
||||
f.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
#build_data()
|
||||
#handler = CrimeClassify()
|
||||
#handler.check_precision()
|
58
crime_qa.py
Normal file
58
crime_qa.py
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
# File: crime_qa_server.py
|
||||
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
|
||||
# Date: 18-11-10
|
||||
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch.helpers import bulk
|
||||
import pymongo
|
||||
|
||||
class CrimeQA:
|
||||
def __init__(self):
|
||||
self._index = "crime_data"
|
||||
self.es = Elasticsearch([{"host": "127.0.0.1", "port": 9200}])
|
||||
self.doc_type = "crime"
|
||||
|
||||
|
||||
'''根据question进行事件的匹配查询'''
|
||||
def search_specific(self, value, key="question"):
|
||||
query_body = {
|
||||
"query": {
|
||||
"match": {
|
||||
key: value,
|
||||
}
|
||||
}
|
||||
}
|
||||
searched = self.es.search(index=self._index, doc_type=self.doc_type, body=query_body, size=20)
|
||||
# 输出查询到的结果
|
||||
return searched["hits"]["hits"]
|
||||
|
||||
'''基于ES的问题查询'''
|
||||
def search_es(self, question):
|
||||
answers = []
|
||||
res = self.search_specific(question)
|
||||
for hit in res:
|
||||
answer_dict = {}
|
||||
answer_dict['score'] = hit['_score']
|
||||
answer_dict['sim_question'] = hit['_source']['question']
|
||||
answer_dict['answers'] = hit['_source']['answers'].split('\n')
|
||||
answers.append(answer_dict)
|
||||
return answers
|
||||
|
||||
'''问答主函数'''
|
||||
def search_main(self, question):
|
||||
candi_answers = self.search_es(question)
|
||||
for candi in candi_answers:
|
||||
print(candi)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
handler = CrimeQA()
|
||||
question = '最近买了一把枪,会犯什么罪?'
|
||||
handler.search_main(question)
|
||||
|
856
data/kg_crime.json
Normal file
856
data/kg_crime.json
Normal file
File diff suppressed because one or more lines are too long
BIN
data/qa_corpus.json.zip
Normal file
BIN
data/qa_corpus.json.zip
Normal file
Binary file not shown.
202
dict/crime.txt
Normal file
202
dict/crime.txt
Normal file
@ -0,0 +1,202 @@
|
||||
妨害公务
|
||||
寻衅滋事
|
||||
盗窃、侮辱尸体
|
||||
危险物品肇事
|
||||
非法采矿
|
||||
组织、强迫、引诱、容留、介绍卖淫
|
||||
开设赌场
|
||||
聚众斗殴
|
||||
绑架
|
||||
非法持有毒品
|
||||
销售假冒注册商标的商品
|
||||
容留他人吸毒
|
||||
假冒注册商标
|
||||
交通肇事
|
||||
破坏电力设备
|
||||
组织卖淫
|
||||
合同诈骗
|
||||
走私武器、弹药
|
||||
抢劫
|
||||
非法处置查封、扣押、冻结的财产
|
||||
以危险方法危害公共安全
|
||||
过失投放危险物质
|
||||
非法制造、买卖、运输、邮寄、储存枪支、弹药、爆炸物
|
||||
伪造、变造、买卖武装部队公文、证件、印章
|
||||
持有、使用假币
|
||||
重婚
|
||||
聚众冲击国家机关
|
||||
生产、销售伪劣农药、兽药、化肥、种子
|
||||
收买被拐卖的妇女、儿童
|
||||
聚众哄抢
|
||||
重大劳动安全事故
|
||||
侵占
|
||||
包庇毒品犯罪分子
|
||||
虚报注册资本
|
||||
违法发放贷款
|
||||
制造、贩卖、传播淫秽物品
|
||||
窝藏、包庇
|
||||
帮助毁灭、伪造证据
|
||||
放火
|
||||
强奸
|
||||
非法携带枪支、弹药、管制刀具、危险物品危及公共安全
|
||||
伪造、变造金融票证
|
||||
爆炸
|
||||
玩忽职守
|
||||
对非国家工作人员行贿
|
||||
伪造、倒卖伪造的有价票证
|
||||
私分国有资产
|
||||
非法收购、运输、加工、出售国家重点保护植物、国家重点保护植物制品
|
||||
生产、销售假药
|
||||
挪用特定款物
|
||||
过失致人死亡
|
||||
走私国家禁止进出口的货物、物品
|
||||
非法制造、买卖、运输、储存危险物质
|
||||
洗钱
|
||||
骗取贷款、票据承兑、金融票证
|
||||
非法买卖制毒物品
|
||||
非法买卖、运输、携带、持有毒品原植物种子、幼苗
|
||||
生产、销售有毒、有害食品
|
||||
滥用职权
|
||||
招收公务员、学生徇私舞弊
|
||||
诬告陷害
|
||||
非法获取国家秘密
|
||||
非法行医
|
||||
非法收购、运输、出售珍贵、濒危野生动物、珍贵、濒危野生动物制品
|
||||
非法出售发票
|
||||
行贿
|
||||
高利转贷
|
||||
非法吸收公众存款
|
||||
传播淫秽物品
|
||||
非法进行节育手术
|
||||
盗伐林木
|
||||
聚众扰乱社会秩序
|
||||
走私、贩卖、运输、制造毒品
|
||||
滥伐林木
|
||||
赌博
|
||||
非法经营
|
||||
生产、销售不符合安全标准的食品
|
||||
提供侵入、非法控制计算机信息系统程序、工具
|
||||
倒卖文物
|
||||
窃取、收买、非法提供信用卡信息
|
||||
盗掘古文化遗址、古墓葬
|
||||
协助组织卖淫
|
||||
破坏广播电视设施、公用电信设施
|
||||
走私普通货物、物品
|
||||
逃税
|
||||
破坏监管秩序
|
||||
失火
|
||||
受贿
|
||||
组织、领导、参加黑社会性质组织
|
||||
票据诈骗
|
||||
非法制造、销售非法制造的注册商标标识
|
||||
侵犯著作权
|
||||
伪造、变造、买卖国家机关公文、证件、印章
|
||||
徇私舞弊不征、少征税款
|
||||
强迫劳动
|
||||
贷款诈骗
|
||||
劫持船只、汽车
|
||||
诈骗
|
||||
非法种植毒品原植物
|
||||
非法狩猎
|
||||
挪用资金
|
||||
非法收购、运输盗伐、滥伐的林木
|
||||
出售、购买、运输假币
|
||||
抢夺
|
||||
虐待被监管人
|
||||
窝藏、转移、收购、销售赃物
|
||||
破坏计算机信息系统
|
||||
制作、复制、出版、贩卖、传播淫秽物品牟利
|
||||
拒不支付劳动报酬
|
||||
盗窃、抢夺枪支、弹药、爆炸物
|
||||
强迫他人吸毒
|
||||
走私珍贵动物、珍贵动物制品
|
||||
虐待
|
||||
非法获取公民个人信息
|
||||
破坏交通设施
|
||||
非法转让、倒卖土地使用权
|
||||
非法捕捞水产品
|
||||
非法占用农用地
|
||||
非法制造、出售非法制造的发票
|
||||
非法持有、私藏枪支、弹药
|
||||
集资诈骗
|
||||
强迫卖淫
|
||||
伪造公司、企业、事业单位、人民团体印章
|
||||
利用影响力受贿
|
||||
编造、故意传播虚假恐怖信息
|
||||
介绍贿赂
|
||||
传播性病
|
||||
拐卖妇女、儿童
|
||||
倒卖车票、船票
|
||||
窝藏、转移、隐瞒毒品、毒赃
|
||||
徇私舞弊不移交刑事案件
|
||||
过失损坏广播电视设施、公用电信设施
|
||||
动植物检疫徇私舞弊
|
||||
破坏交通工具
|
||||
猥亵儿童
|
||||
挪用公款
|
||||
伪造货币
|
||||
冒充军人招摇撞骗
|
||||
非法采伐、毁坏国家重点保护植物
|
||||
故意毁坏财物
|
||||
非法拘禁
|
||||
招摇撞骗
|
||||
伪造、变造居民身份证
|
||||
徇私枉法
|
||||
非法生产、买卖警用装备
|
||||
掩饰、隐瞒犯罪所得、犯罪所得收益
|
||||
生产、销售伪劣产品
|
||||
破坏生产经营
|
||||
帮助犯罪分子逃避处罚
|
||||
贪污
|
||||
投放危险物质
|
||||
持有伪造的发票
|
||||
危险驾驶
|
||||
妨害作证
|
||||
非法猎捕、杀害珍贵、濒危野生动物
|
||||
重大责任事故
|
||||
诽谤
|
||||
虚开发票
|
||||
引诱、教唆、欺骗他人吸毒
|
||||
脱逃
|
||||
扰乱无线电通讯管理秩序
|
||||
保险诈骗
|
||||
非法生产、销售间谍专用器材
|
||||
非法组织卖血
|
||||
强迫交易
|
||||
串通投标
|
||||
破坏易燃易爆设备
|
||||
传授犯罪方法
|
||||
妨害信用卡管理
|
||||
拐骗儿童
|
||||
单位行贿
|
||||
打击报复证人
|
||||
拒不执行判决、裁定
|
||||
经济犯
|
||||
金融凭证诈骗
|
||||
虚开增值税专用发票、用于骗取出口退税、抵扣税款发票
|
||||
走私废物
|
||||
组织、领导传销活动
|
||||
单位受贿
|
||||
盗窃、抢夺枪支、弹药、爆炸物、危险物质
|
||||
过失以危险方法危害公共安全
|
||||
过失致人重伤
|
||||
引诱、容留、介绍卖淫
|
||||
遗弃
|
||||
走私
|
||||
信用卡诈骗
|
||||
对单位行贿
|
||||
故意杀人
|
||||
聚众扰乱公共场所秩序、交通秩序
|
||||
盗窃
|
||||
故意伤害
|
||||
非法侵入住宅
|
||||
强制猥亵、侮辱妇女
|
||||
伪证
|
||||
污染环境
|
||||
巨额财产来源不明
|
||||
非国家工作人员受贿
|
||||
侮辱
|
||||
隐匿、故意销毁会计凭证、会计帐簿、财务会计报告
|
||||
过失损坏武器装备、军事设施、军事通信
|
||||
敲诈勒索
|
||||
职务侵占
|
BIN
model/cnn_question_classify.h5
Normal file
BIN
model/cnn_question_classify.h5
Normal file
Binary file not shown.
BIN
model/crime_predict.model
Normal file
BIN
model/crime_predict.model
Normal file
Binary file not shown.
BIN
model/lstm_question_predict.h5
Normal file
BIN
model/lstm_question_predict.h5
Normal file
Binary file not shown.
150
question_classify.py
Normal file
150
question_classify.py
Normal file
@ -0,0 +1,150 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
# File: question_classify.py
|
||||
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
|
||||
# Date: 18-11-11
|
||||
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import jieba.posseg as pseg
|
||||
from keras.models import Sequential, load_model
|
||||
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dense, Dropout, LSTM, Bidirectional
|
||||
|
||||
|
||||
class QuestionClassify(object):
|
||||
def __init__(self):
|
||||
self.label_dict = {
|
||||
0: "婚姻家庭",
|
||||
1: "劳动纠纷",
|
||||
2: "交通事故",
|
||||
3: "债权债务",
|
||||
4: "刑事辩护",
|
||||
5: "合同纠纷",
|
||||
6: "房产纠纷",
|
||||
7: "侵权",
|
||||
8: "公司法",
|
||||
9: "医疗纠纷",
|
||||
10: "拆迁安置",
|
||||
11: "行政诉讼",
|
||||
12: "建设工程"
|
||||
}
|
||||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||||
self.embedding_path = os.path.join(cur, 'embedding/word_vec_300.bin')
|
||||
self.embdding_dict = self.load_embedding(self.embedding_path)
|
||||
self.max_length = 60
|
||||
self.embedding_size = 300
|
||||
self.lstm_modelpath = 'model/lstm_question_classify.h5'
|
||||
self.cnn_modelpath = 'model/cnn_question_classify.h5'
|
||||
return
|
||||
|
||||
'''加载词向量'''
|
||||
def load_embedding(self, embedding_path):
|
||||
embedding_dict = {}
|
||||
count = 0
|
||||
for line in open(embedding_path):
|
||||
line = line.strip().split(' ')
|
||||
if len(line) < 300:
|
||||
continue
|
||||
wd = line[0]
|
||||
vector = np.array([float(i) for i in line[1:]])
|
||||
embedding_dict[wd] = vector
|
||||
count += 1
|
||||
if count % 10000 == 0:
|
||||
print(count, 'loaded')
|
||||
print('loaded %s word embedding, finished' % count, )
|
||||
return embedding_dict
|
||||
|
||||
'''对文本进行分词处理'''
|
||||
|
||||
def seg_sent(self, s):
|
||||
wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['w', 'x']]
|
||||
return wds
|
||||
|
||||
'''基于wordvector,通过lookup table的方式找到句子的wordvector的表示'''
|
||||
|
||||
def rep_sentencevector(self, sentence):
|
||||
word_list = self.seg_sent(sentence)[:self.max_length]
|
||||
embedding_matrix = np.zeros((self.max_length, self.embedding_size))
|
||||
for index, wd in enumerate(word_list):
|
||||
if wd in self.embdding_dict:
|
||||
embedding_matrix[index] = self.embdding_dict.get(wd)
|
||||
else:
|
||||
continue
|
||||
len_sent = len(word_list)
|
||||
embedding_matrix = self.modify_sentencevector(embedding_matrix, len_sent)
|
||||
|
||||
return embedding_matrix
|
||||
|
||||
'''对于OOV词,通过左右词的词向量作平均,作为词向量表示'''
|
||||
|
||||
def modify_sentencevector(self, embedding_matrix, len_sent):
|
||||
context_window = 2
|
||||
for indx, vec in enumerate(embedding_matrix):
|
||||
left = indx - context_window
|
||||
right = indx + context_window
|
||||
if left < 0:
|
||||
left = 0
|
||||
if right > len(embedding_matrix) - 1:
|
||||
right = -2
|
||||
context = embedding_matrix[left:right + 1]
|
||||
if vec.tolist() == [0] * 300 and indx < len_sent:
|
||||
context_vector = context.mean(axis=0)
|
||||
embedding_matrix[indx] = context_vector
|
||||
|
||||
return embedding_matrix
|
||||
|
||||
'''对数据进行onehot映射操作'''
|
||||
|
||||
def label_onehot(self, label):
|
||||
one_hot = [0] * len(self.label_dict)
|
||||
one_hot[int(label)] = 1
|
||||
return one_hot
|
||||
|
||||
|
||||
'''构造CNN网络模型'''
|
||||
def build_cnn_model(self):
|
||||
model = Sequential()
|
||||
model.add(Conv1D(64, 3, activation='relu', input_shape=(self.max_length, self.embedding_size)))
|
||||
model.add(Conv1D(64, 3, activation='relu'))
|
||||
model.add(MaxPooling1D(3))
|
||||
model.add(Conv1D(128, 3, activation='relu'))
|
||||
model.add(Conv1D(128, 3, activation='relu'))
|
||||
model.add(GlobalAveragePooling1D())
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(13, activation='sigmoid'))
|
||||
model.compile(loss='binary_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
metrics=['accuracy'])
|
||||
model.summary()
|
||||
return model
|
||||
|
||||
'''构造LSTM网络'''
|
||||
def build_lstm_model(self):
|
||||
model = Sequential()
|
||||
model.add(LSTM(32, return_sequences=True, input_shape=(
|
||||
self.max_length, self.embedding_size))) # returns a sequence of vectors of dimension 32
|
||||
model.add(LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32
|
||||
model.add(LSTM(32)) # return a single vector of dimension 32
|
||||
model.add(Dense(13, activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
metrics=['accuracy'])
|
||||
|
||||
return model
|
||||
|
||||
'''问题分类'''
|
||||
def predict(self, sent):
|
||||
model = load_model(self.cnn_modelpath)
|
||||
sentence_vector = np.array([self.rep_sentencevector(sent)])
|
||||
res = model.predict(sentence_vector)[0].tolist()
|
||||
prob = max(res)
|
||||
label = self.label_dict.get(res.index(prob))
|
||||
return label, prob
|
||||
|
||||
if __name__ == '__main__':
|
||||
handler = QuestionClassify()
|
||||
while (1):
|
||||
sent = input('enter an sent to search:')
|
||||
label, prob = handler.predict(sent)
|
||||
print(label, prob)
|
181
question_classify_train.py
Normal file
181
question_classify_train.py
Normal file
@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
# File: question_classify.py
|
||||
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
|
||||
# Date: 18-11-10
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import jieba.posseg as pseg
|
||||
from keras.models import Sequential, load_model
|
||||
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dense, Dropout, LSTM, Bidirectional
|
||||
|
||||
class QuestionClassify(object):
|
||||
def __init__(self):
|
||||
self.label_dict = {
|
||||
0:"婚姻家庭",
|
||||
1:"劳动纠纷",
|
||||
2:"交通事故",
|
||||
3:"债权债务",
|
||||
4:"刑事辩护",
|
||||
5:"合同纠纷",
|
||||
6:"房产纠纷",
|
||||
7:"侵权",
|
||||
8:"公司法",
|
||||
9:"医疗纠纷",
|
||||
10:"拆迁安置",
|
||||
11:"行政诉讼",
|
||||
12:"建设工程"
|
||||
}
|
||||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||||
self.train_file = os.path.join(cur, 'question_train.txt')
|
||||
self.embedding_path = os.path.join(cur, 'word_vec_300.bin')
|
||||
self.embdding_dict = self.load_embedding(self.embedding_path)
|
||||
self.max_length = 60
|
||||
self.embedding_size = 300
|
||||
self.lstm_modelpath = 'model/lstm_question_classify.h5'
|
||||
self.cnn_modelpath = 'model/cnn_question_classify.h5'
|
||||
return
|
||||
|
||||
'''加载词向量'''
|
||||
def load_embedding(self, embedding_path):
|
||||
embedding_dict = {}
|
||||
count = 0
|
||||
for line in open(embedding_path):
|
||||
line = line.strip().split(' ')
|
||||
if len(line) < 300:
|
||||
continue
|
||||
wd = line[0]
|
||||
vector = np.array([float(i) for i in line[1:]])
|
||||
embedding_dict[wd] = vector
|
||||
count += 1
|
||||
if count%10000 == 0:
|
||||
print(count, 'loaded')
|
||||
print('loaded %s word embedding, finished'%count, )
|
||||
return embedding_dict
|
||||
|
||||
'''对文本进行分词处理'''
|
||||
def seg_sent(self, s):
|
||||
wds = [i.word for i in pseg.cut(s) if i.flag[0] not in ['w', 'x']]
|
||||
return wds
|
||||
|
||||
'''基于wordvector,通过lookup table的方式找到句子的wordvector的表示'''
|
||||
def rep_sentencevector(self, sentence):
|
||||
word_list = self.seg_sent(sentence)[:self.max_length]
|
||||
embedding_matrix = np.zeros((self.max_length, self.embedding_size))
|
||||
for index, wd in enumerate(word_list):
|
||||
if wd in self.embdding_dict:
|
||||
embedding_matrix[index] = self.embdding_dict.get(wd)
|
||||
else:
|
||||
continue
|
||||
len_sent = len(word_list)
|
||||
embedding_matrix = self.modify_sentencevector(embedding_matrix, len_sent)
|
||||
|
||||
return embedding_matrix
|
||||
|
||||
'''对于OOV词,通过左右词的词向量作平均,作为词向量表示'''
|
||||
def modify_sentencevector(self, embedding_matrix, len_sent):
|
||||
context_window = 2
|
||||
for indx, vec in enumerate(embedding_matrix):
|
||||
left = indx-context_window
|
||||
right = indx+context_window
|
||||
if left < 0:
|
||||
left = 0
|
||||
if right > len(embedding_matrix)-1:
|
||||
right = -2
|
||||
context = embedding_matrix[left:right+1]
|
||||
if vec.tolist() == [0]*300 and indx < len_sent:
|
||||
context_vector = context.mean(axis=0)
|
||||
embedding_matrix[indx] = context_vector
|
||||
|
||||
return embedding_matrix
|
||||
|
||||
'''对数据进行onehot映射操作'''
|
||||
def label_onehot(self, label):
|
||||
one_hot = [0]*len(self.label_dict)
|
||||
one_hot[int(label)] = 1
|
||||
return one_hot
|
||||
|
||||
'''加载数据集'''
|
||||
def load_traindata(self):
|
||||
train_X = []
|
||||
train_Y = []
|
||||
count = 0
|
||||
for line in open(self.train_file):
|
||||
|
||||
line = line.strip().strip().split('\t')
|
||||
if len(line) < 2:
|
||||
continue
|
||||
count += 1
|
||||
sent = line[0]
|
||||
label = line[1]
|
||||
sent_vector = self.rep_sentencevector(sent)
|
||||
label_vector = self.label_onehot(label)
|
||||
train_X.append(sent_vector)
|
||||
train_Y.append(label_vector)
|
||||
|
||||
if count % 10000 == 0:
|
||||
print('loaded %s lines'%count)
|
||||
|
||||
return np.array(train_X), np.array(train_Y)
|
||||
|
||||
'''构造CNN网络模型'''
|
||||
def build_cnn_model(self):
|
||||
model = Sequential()
|
||||
model.add(Conv1D(64, 3, activation='relu', input_shape=(self.max_length, self.embedding_size)))
|
||||
model.add(Conv1D(64, 3, activation='relu'))
|
||||
model.add(MaxPooling1D(3))
|
||||
model.add(Conv1D(128, 3, activation='relu'))
|
||||
model.add(Conv1D(128, 3, activation='relu'))
|
||||
model.add(GlobalAveragePooling1D())
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Dense(13, activation='sigmoid'))
|
||||
model.compile(loss='binary_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
metrics=['accuracy'])
|
||||
model.summary()
|
||||
return model
|
||||
|
||||
'''构造LSTM网络'''
|
||||
def build_lstm_model(self):
|
||||
model = Sequential()
|
||||
model.add(LSTM(32, return_sequences=True, input_shape=(self.max_length, self.embedding_size))) # returns a sequence of vectors of dimension 32
|
||||
model.add(LSTM(32, return_sequences=True)) # returns a sequence of vectors of dimension 32
|
||||
model.add(LSTM(32)) # return a single vector of dimension 32
|
||||
model.add(Dense(13, activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
metrics=['accuracy'])
|
||||
|
||||
return model
|
||||
|
||||
'''训练CNN模型'''
|
||||
def train_cnn(self):
|
||||
X_train, Y_train, X_test, Y_test = self.split_trainset()
|
||||
model = self.build_cnn_model()
|
||||
model.fit(X_train, Y_train, batch_size=100, epochs=20, validation_data=(X_test, Y_test))
|
||||
model.save(self.cnn_modelpath)
|
||||
|
||||
'''训练CNN模型'''
|
||||
def train_lstm(self):
|
||||
X_train, Y_train, X_test, Y_test = self.split_trainset()
|
||||
model = self.build_lstm_model()
|
||||
model.fit(X_train, Y_train, batch_size=100, epochs=50, validation_data=(X_test, Y_test))
|
||||
model.save(self.lstm_modelpath)
|
||||
|
||||
'''划分数据集,按一定比例划分训练集和测试集'''
|
||||
def split_trainset(self):
|
||||
X, Y = self.load_traindata()
|
||||
split_rate = 0.8
|
||||
indx = int(len(X)*split_rate)
|
||||
X_train = X[:indx]
|
||||
Y_train = Y[:indx]
|
||||
X_test = X[indx:]
|
||||
Y_test = Y[indx:]
|
||||
return X_train, Y_train, X_test, Y_test
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
handler = QuestionClassify()
|
||||
handler.train_cnn()
|
||||
handler.train_lstm()
|
Loading…
Reference in New Issue
Block a user