create new project
This commit is contained in:
commit
46e8b676e6
3
.idea/markdown-navigator/profiles_settings.xml
Normal file
3
.idea/markdown-navigator/profiles_settings.xml
Normal file
@ -0,0 +1,3 @@
|
||||
<component name="MarkdownNavigator.ProfileManager">
|
||||
<settings default="" pdf-export="" />
|
||||
</component>
|
11
.idea/military_graph.iml
Normal file
11
.idea/military_graph.iml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.5 (nlp)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||
</component>
|
||||
</module>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5 (nlp)" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/military_graph.iml" filepath="$PROJECT_DIR$/.idea/military_graph.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
427
.idea/workspace.xml
Normal file
427
.idea/workspace.xml
Normal file
@ -0,0 +1,427 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="7d73eeac-bcfb-4439-a5bc-734bf782761e" name="Default" comment="" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="CreatePatchCommitExecutor">
|
||||
<option name="PATCH_PATH" value="" />
|
||||
</component>
|
||||
<component name="FUSProjectUsageTrigger">
|
||||
<session id="-1972844595">
|
||||
<usages-collector id="statistics.lifecycle.project">
|
||||
<counts>
|
||||
<entry key="project.open.time.2" value="1" />
|
||||
<entry key="project.opened" value="1" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.edit">
|
||||
<counts>
|
||||
<entry key="py" value="14481" />
|
||||
<entry key="txt" value="14" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.edit">
|
||||
<counts>
|
||||
<entry key="PLAIN_TEXT" value="14" />
|
||||
<entry key="Python" value="14481" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.open">
|
||||
<counts>
|
||||
<entry key="py" value="2" />
|
||||
<entry key="txt" value="1" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.open">
|
||||
<counts>
|
||||
<entry key="PLAIN_TEXT" value="1" />
|
||||
<entry key="Python" value="2" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
</session>
|
||||
</component>
|
||||
<component name="FavoritesManager">
|
||||
<favorites_list name="military_graph" />
|
||||
</component>
|
||||
<component name="FileEditorManager">
|
||||
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/collect_data.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-1087">
|
||||
<caret line="91" column="8" selection-start-line="91" selection-start-column="8" selection-end-line="91" selection-end-column="8" />
|
||||
<folding>
|
||||
<element signature="e#16#25#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/militarygraph.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="2758">
|
||||
<caret line="567" column="48" lean-forward="true" selection-start-line="567" selection-start-column="48" selection-end-line="567" selection-end-column="48" />
|
||||
<folding>
|
||||
<element signature="e#144#153#0" expanded="true" />
|
||||
<marker date="1556728777011" expanded="true" signature="13604:13634" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="14921:14926" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="15186:15279" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="29015:29043" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="29132:29246" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="30060:30088" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/unit.txt">
|
||||
<provider selected="true" editor-type-id="LargeFileEditor">
|
||||
<state relative-caret-position="-3353">
|
||||
<caret line="114" column="1" lean-forward="true" selection-start-line="114" selection-start-column="1" selection-end-line="114" selection-end-column="1" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/insert_data.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="79">
|
||||
<caret line="105" column="13" selection-start-line="105" selection-start-column="13" selection-end-line="105" selection-end-column="13" />
|
||||
<folding>
|
||||
<element signature="e#0#9#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
</component>
|
||||
<component name="FileTemplateManagerImpl">
|
||||
<option name="RECENT_TEMPLATES">
|
||||
<list>
|
||||
<option value="Python Script" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="FindInProjectRecents">
|
||||
<findStrings>
|
||||
<find>class="pic"</find>
|
||||
<find>机长</find>
|
||||
<find>歼-15</find>
|
||||
<find>类型</find>
|
||||
<find>航空母舰</find>
|
||||
<find>坦克</find>
|
||||
<find>直升机</find>
|
||||
<find>速度</find>
|
||||
<find>$</find>
|
||||
<find>最大航程</find>
|
||||
<find>n_mos</find>
|
||||
<find>pattern</find>
|
||||
<find>数量</find>
|
||||
<find>print</find>
|
||||
</findStrings>
|
||||
</component>
|
||||
<component name="IdeDocumentHistory">
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/data.txt" />
|
||||
<option value="$PROJECT_DIR$/drug_graph.py" />
|
||||
<option value="$PROJECT_DIR$/co_drug_graph.txt" />
|
||||
<option value="$PROJECT_DIR$/data.py" />
|
||||
<option value="$PROJECT_DIR$/insert_data.py" />
|
||||
<option value="$PROJECT_DIR$/collect_data.py" />
|
||||
<option value="$PROJECT_DIR$/militarygraph.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ProjectFrameBounds" fullScreen="true">
|
||||
<option name="x" value="2" />
|
||||
<option name="y" value="23" />
|
||||
<option name="width" value="1680" />
|
||||
<option name="height" value="971" />
|
||||
</component>
|
||||
<component name="ProjectView">
|
||||
<navigator proportions="" version="1">
|
||||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<expand>
|
||||
<path>
|
||||
<item name="military_graph" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="military_graph" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
</expand>
|
||||
<select />
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
|
||||
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
|
||||
</component>
|
||||
<component name="RecentsManager">
|
||||
<key name="MoveFile.RECENT_KEYS">
|
||||
<recent name="$PROJECT_DIR$/data" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="RunDashboard">
|
||||
<option name="ruleStates">
|
||||
<list>
|
||||
<RuleState>
|
||||
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
|
||||
</RuleState>
|
||||
<RuleState>
|
||||
<option name="name" value="StatusDashboardGroupingRule" />
|
||||
</RuleState>
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="RunManager" selected="Python.militarygraph">
|
||||
<configuration default="true" type="tests" factoryName="Attests">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="military_graph" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="CLASS_NAME" value="" />
|
||||
<option name="METHOD_NAME" value="" />
|
||||
<option name="FOLDER_NAME" value="" />
|
||||
<option name="TEST_TYPE" value="TEST_SCRIPT" />
|
||||
<option name="PATTERN" value="" />
|
||||
<option name="USE_PATTERN" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<module name="" />
|
||||
<option name="SCRIPT_NAME" value="" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="data" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="military_graph" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/collect_data.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="drug_graph" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="military_graph" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/drug_graph.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="insert_data" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="military_graph" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/insert_data.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="militarygraph" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="military_graph" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/militarygraph.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<list>
|
||||
<item itemvalue="Python.data" />
|
||||
<item itemvalue="Python.drug_graph" />
|
||||
<item itemvalue="Python.militarygraph" />
|
||||
<item itemvalue="Python.insert_data" />
|
||||
</list>
|
||||
<recent_temporary>
|
||||
<list>
|
||||
<item itemvalue="Python.militarygraph" />
|
||||
<item itemvalue="Python.insert_data" />
|
||||
<item itemvalue="Python.data" />
|
||||
<item itemvalue="Python.drug_graph" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
</component>
|
||||
<component name="SvnConfiguration">
|
||||
<configuration />
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="7d73eeac-bcfb-4439-a5bc-734bf782761e" name="Default" comment="" />
|
||||
<created>1552034938625</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1552034938625</updated>
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="0" y="0" width="1680" height="1050" extended-state="0" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.0596745" />
|
||||
<window_info id="Structure" order="1" weight="0.25" />
|
||||
<window_info id="Favorites" order="2" side_tool="true" />
|
||||
<window_info anchor="bottom" id="Message" order="0" />
|
||||
<window_info anchor="bottom" id="Find" order="1" />
|
||||
<window_info anchor="bottom" id="Run" order="2" weight="0.6765306" />
|
||||
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
|
||||
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
|
||||
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
||||
<window_info anchor="bottom" id="TODO" order="6" />
|
||||
<window_info anchor="bottom" id="Version Control" order="7" show_stripe_button="false" />
|
||||
<window_info anchor="bottom" id="Terminal" order="8" />
|
||||
<window_info anchor="bottom" id="Python Console" order="9" />
|
||||
<window_info anchor="bottom" id="Event Log" order="10" side_tool="true" />
|
||||
<window_info anchor="right" id="Commander" order="0" weight="0.4" />
|
||||
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
|
||||
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
||||
<window_info anchor="right" id="Data View" order="3" />
|
||||
</layout>
|
||||
</component>
|
||||
<component name="Vcs.Log.UiProperties">
|
||||
<option name="RECENTLY_FILTERED_USER_GROUPS">
|
||||
<collection />
|
||||
</option>
|
||||
<option name="RECENTLY_FILTERED_BRANCH_GROUPS">
|
||||
<collection />
|
||||
</option>
|
||||
</component>
|
||||
<component name="VcsContentAnnotationSettings">
|
||||
<option name="myLimit" value="2678400000" />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/data.txt" />
|
||||
<entry file="file://$PROJECT_DIR$/drug_graph.py" />
|
||||
<entry file="file://$PROJECT_DIR$/attributes_all.txt" />
|
||||
<entry file="file://$PROJECT_DIR$/co_drug_graph.txt" />
|
||||
<entry file="file://$PROJECT_DIR$/unit.txt">
|
||||
<provider selected="true" editor-type-id="LargeFileEditor">
|
||||
<state relative-caret-position="-3353">
|
||||
<caret line="114" column="1" lean-forward="true" selection-start-line="114" selection-start-column="1" selection-end-line="114" selection-end-column="1" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/collect_data.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-1087">
|
||||
<caret line="91" column="8" selection-start-line="91" selection-start-column="8" selection-end-line="91" selection-end-column="8" />
|
||||
<folding>
|
||||
<element signature="e#16#25#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/insert_data.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="79">
|
||||
<caret line="105" column="13" selection-start-line="105" selection-start-column="13" selection-end-line="105" selection-end-column="13" />
|
||||
<folding>
|
||||
<element signature="e#0#9#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/militarygraph.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="2758">
|
||||
<caret line="567" column="48" lean-forward="true" selection-start-line="567" selection-start-column="48" selection-end-line="567" selection-end-column="48" />
|
||||
<folding>
|
||||
<element signature="e#144#153#0" expanded="true" />
|
||||
<marker date="1556728777011" expanded="true" signature="13604:13634" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="14921:14926" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="15186:15279" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="29015:29043" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="29132:29246" ph="..." />
|
||||
<marker date="1556728777011" expanded="true" signature="30060:30088" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
</project>
|
172
collect_data.py
Normal file
172
collect_data.py
Normal file
@ -0,0 +1,172 @@
|
||||
#coding = utf-8
|
||||
import os
|
||||
from urllib import request
|
||||
from lxml import etree
|
||||
import gzip
|
||||
import pymongo
|
||||
import datetime
|
||||
|
||||
class NewspaperSpider:
|
||||
def __init__(self):
|
||||
self.term_dict = {
|
||||
'aircraft': "飞行器",
|
||||
'warship': "舰船舰艇",
|
||||
'guns': "枪械与单兵",
|
||||
'tank': "坦克装甲车辆",
|
||||
'artillery': "火炮",
|
||||
'missile': "导弹武器",
|
||||
'spaceship': "太空装备",
|
||||
'explosive': "爆炸物",
|
||||
}
|
||||
|
||||
self.conn = pymongo.MongoClient()
|
||||
return
|
||||
|
||||
'''get html '''
|
||||
def get_html(self, url):
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ',
|
||||
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
'Accept-Encoding':'gzip, deflate',
|
||||
'Accept-Language':'en-US,en;q=0.8',
|
||||
'Cache-Control':'max-age=0',
|
||||
'Connection':'keep-alive',
|
||||
'Cookie':'Hm_lvt_1fc983b4c305d209e7e05d96e713939f=1552034977; Hm_lpvt_1fc983b4c305d209e7e05d96e713939f=1552036141',
|
||||
'Host':'weapon.huanqiu.com'
|
||||
}
|
||||
req = request.Request(url, headers=headers)
|
||||
page = request.urlopen(req).read()
|
||||
page = gzip.decompress(page).decode('utf-8')
|
||||
|
||||
return page
|
||||
|
||||
'''get_urllist'''
|
||||
def get_urllist(self, url):
|
||||
html = self.get_html(url)
|
||||
selector = etree.HTML(html)
|
||||
papers = ['http://weapon.huanqiu.com' + i for i in selector.xpath('//li/span[@class="pic"]/a/@href')]
|
||||
return list(set(papers))
|
||||
|
||||
'''content parser'''
|
||||
def html_parser(self, url):
|
||||
html = self.get_html(url)
|
||||
selector = etree.HTML(html)
|
||||
title = selector.xpath('//title/text()')[0]
|
||||
attrs =selector.xpath('//div[@class="dataInfo"]/ul/li')
|
||||
contents = [html, title]
|
||||
for article in attrs:
|
||||
content = article.xpath('string(.)')
|
||||
contents.append(content)
|
||||
return contents
|
||||
|
||||
'''modify data'''
|
||||
def modify_data(self):
|
||||
keys = []
|
||||
for item in self.conn['military']['kb'].find():
|
||||
body = item['contents']
|
||||
title = body[1].replace(' ','').replace('-','-').replace('(','(').replace(')',')')
|
||||
title = title.split('_')
|
||||
data = {}
|
||||
name = title[0]
|
||||
category = title[1]
|
||||
data['名称'] = name
|
||||
data['类别'] = category
|
||||
attrs = body[2:]
|
||||
html = body[0]
|
||||
selector = etree.HTML(html)
|
||||
country = selector.xpath('//span[@class="country"]/b/a/text()')[0]
|
||||
data['产国'] = country
|
||||
for attr in attrs:
|
||||
if len(attr.split(':')) < 2:
|
||||
continue
|
||||
key = attr.split(':')[0].replace('(','(').replace(' ','').replace('\t','')
|
||||
if key.startswith('(') or len(key) > 6:
|
||||
continue
|
||||
value = attr.split(':')[1]
|
||||
data[key] = value.replace('\t','').replace('\n','').replace(',','')
|
||||
keys.append(key)
|
||||
self.conn['military']['graph_data'].insert(data)
|
||||
return
|
||||
|
||||
'''采集主函数'''
|
||||
def spider_main(self):
|
||||
big_cates = ['aircraft', 'warship',
|
||||
'guns', 'tank',
|
||||
'artillery', 'missile',
|
||||
'spaceship', 'explosive'
|
||||
]
|
||||
for big_cate in big_cates:
|
||||
big_url = 'http://weapon.huanqiu.com/weaponlist/%s'%big_cate
|
||||
html = self.get_html(big_url)
|
||||
selector = etree.HTML(html)
|
||||
span = selector.xpath('//span[@class="list"]')[0]
|
||||
second_urls = ['http://weapon.huanqiu.com' + i for i in span.xpath('./a/@href')]
|
||||
second_cates = [i for i in span.xpath('./a/text()')]
|
||||
second_dict = {}
|
||||
for indx, second_cate in enumerate(second_cates):
|
||||
second_dict[second_cate] = second_urls[indx]
|
||||
for second_cate, second_url in second_dict.items():
|
||||
max_pages = self.get_maxpage(second_url)
|
||||
for page in range(1, max_pages+1):
|
||||
url = second_url + '_0_0_%s'%page
|
||||
seed_urls = self.get_urllist(url)
|
||||
for seed in seed_urls:
|
||||
self.get_info(seed, big_cate, second_cate)
|
||||
|
||||
|
||||
'''根据最大值,获取所有信息'''
|
||||
def get_info(self, url, big_cate, second_cate):
|
||||
content = self.html_parser(url)
|
||||
data = self.extract_data(content)
|
||||
data['大类'] = self.term_dict.get(big_cate)
|
||||
data['类型'] = second_cate
|
||||
if data:
|
||||
print(data)
|
||||
self.conn['military']['knowledge_base'].insert(data)
|
||||
return
|
||||
|
||||
'''modify data'''
|
||||
def extract_data(self, content):
|
||||
title = content[1].replace(' ', '').replace('-', '-').replace('(', '(').replace(')', ')')
|
||||
title = title.split('_')
|
||||
data = {}
|
||||
name = title[0]
|
||||
data['名称'] = name
|
||||
attrs = content[2:]
|
||||
html = content[0]
|
||||
selector = etree.HTML(html)
|
||||
country = selector.xpath('//span[@class="country"]/b/a/text()')[0]
|
||||
image = selector.xpath('//div[@class="maxPic"]/img/@src')
|
||||
if not image:
|
||||
image = ''
|
||||
else:
|
||||
image = image[0]
|
||||
data['产国'] = country
|
||||
data['图片'] = image
|
||||
data['简介'] = ''.join(selector.xpath('//div[@class="module"]/p/text()')).replace('\xa0','').replace('\u3000', '').replace('\t', '')
|
||||
for attr in attrs:
|
||||
if len(attr.split(':')) < 2:
|
||||
continue
|
||||
key = attr.split(':')[0].replace('(', '(').replace(' ', '').replace('\t', '')
|
||||
if key.startswith('(') or len(key) > 6:
|
||||
continue
|
||||
value = attr.split(':')[1]
|
||||
data[key] = value.replace('\t', '').replace('\n', '').replace(',', '')
|
||||
return data
|
||||
|
||||
'''获取最大值'''
|
||||
def get_maxpage(self, url):
|
||||
html = self.get_html(url)
|
||||
selector = etree.HTML(html)
|
||||
max_pages = selector.xpath('//div[@class="pages"]/a/text()')
|
||||
if not max_pages:
|
||||
max_page = 1
|
||||
else:
|
||||
max_page = int(max_pages[-2])
|
||||
|
||||
return max_page
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
handler = NewspaperSpider()
|
||||
handler.spider_main()
|
5800
data/military.json
Normal file
5800
data/military.json
Normal file
File diff suppressed because it is too large
Load Diff
137
insert_data.py
Normal file
137
insert_data.py
Normal file
@ -0,0 +1,137 @@
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
|
||||
import pymongo
|
||||
class InsertData:
|
||||
def __init__(self):
|
||||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||||
self.datapath = os.path.join(cur, 'data/military.json')
|
||||
self.conn = pymongo.MongoClient()
|
||||
self.db = self.conn['military_qa']
|
||||
self.collection = self.db['data']
|
||||
self.unit_dict = {
|
||||
'海里':[1852,'米'],
|
||||
'英里':[1610,'米'],
|
||||
'/节':[1852,'米'],
|
||||
'km/节':[1000,'米'],
|
||||
'吨':[1000,'千克'],
|
||||
'-吨':[1000,'千克'],
|
||||
'公里':[1000,'米'],
|
||||
'公里/节':[1000,'米'],
|
||||
'公里/小时':[1000,'米'],
|
||||
'海里节':[1852,'米'],
|
||||
'海里,节':[1852,'米'],
|
||||
'海里/节':[1852,'米'],
|
||||
'海哩/节':[1852,'米'],
|
||||
'海浬/节':[1852,'米'],
|
||||
'毫米':[0.001,'米'],
|
||||
'节':[1852,'米'],
|
||||
'节/海里':[1852,'米'],
|
||||
'节海里':[1852,'米'],
|
||||
'节行驶英里':[1852,'米'],
|
||||
'节下海里':[1852,'米'],
|
||||
'克':[0.001,'千克'],
|
||||
'里':[1852,'米'],
|
||||
'里/节':[1852,'米'],
|
||||
'米':[1,'米'],
|
||||
'千克':[1,'克'],
|
||||
'千米':[1000,'米'],
|
||||
'千米/节':[1000,'米'],
|
||||
'千米/时':[1000,'米'],
|
||||
'千米/小时':[1000,'米'],
|
||||
'千米每小时':[1000,'米'],
|
||||
'万海里/节':[18520000,'米'],
|
||||
'英里,节':[1610,'米'],
|
||||
'英里/节':[1610,'米'],
|
||||
'余英里':[1610,'米'],
|
||||
'约海里':[1852,'米'],
|
||||
'最大海里':[1852,'米'],
|
||||
'人': [1, '人'],
|
||||
'位': [1, '位']}
|
||||
return
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def insert_main(self):
|
||||
count = 0
|
||||
for record in open(self.datapath):
|
||||
data = {i:j for i,j in json.loads(record).items() if i !='_id'}
|
||||
data_new = data.copy()
|
||||
for key, value in data.items():
|
||||
if key not in ['简介', '_id'] and self.check_num(value) and (value.endswith('米') or value.endswith('里') or value.endswith('克') or value.endswith('吨') or value.endswith('时') or value.endswith('节')) and len(value) < 11:
|
||||
value_ = ''.join([i for i in value if i not in ['0','1','2','3','4','5','6','7','8','9','.']]).replace(' ','')
|
||||
try:
|
||||
num = float(value.replace(value_,''))
|
||||
unit_info = self.unit_dict.get(value_)
|
||||
plus = unit_info[0]
|
||||
unit = unit_info[1]
|
||||
num_standrd = num * plus
|
||||
value_new = num_standrd
|
||||
value_unit = unit
|
||||
key_unit = key + '_单位'
|
||||
data_new[key_unit] = value_unit
|
||||
except Exception as e:
|
||||
print(e)
|
||||
value_new = value
|
||||
pass
|
||||
data_new[key] = value_new
|
||||
|
||||
elif key not in ['简介', '_id'] and self.check_year(value) and len(value) <= 15:
|
||||
new_key = key + '_详细'
|
||||
new_value = self.check_year(value)
|
||||
data_new[new_key] = value
|
||||
data_new[key] = new_value
|
||||
print(data_new)
|
||||
self.collection.insert(data_new)
|
||||
count += 1
|
||||
print('finished insert into database with %s records!'%count)
|
||||
return
|
||||
|
||||
'检测是否有数字'
|
||||
def check_num(self, sent):
|
||||
pattern = re.compile('\d+')
|
||||
res = pattern.findall(str(sent))
|
||||
return res
|
||||
|
||||
'''检查年份'''
|
||||
def check_year(self, sent):
|
||||
sent = sent.replace(' ', '')
|
||||
pattern_year = re.compile('[0-9]{4}年')
|
||||
pattern_month = re.compile('[0-9]{1,4}月')
|
||||
pattern_day = re.compile('[0-9]{1,4}日')
|
||||
default_day = ''
|
||||
default_month = ''
|
||||
month = pattern_month.findall(sent)
|
||||
day = pattern_day.findall(sent)
|
||||
year = pattern_year.findall(sent)
|
||||
if year:
|
||||
year = year[0].replace('年', '')
|
||||
if month:
|
||||
default_month = month[0].replace('月', '')
|
||||
if day:
|
||||
default_day = day[0].replace('日', '')
|
||||
if year:
|
||||
date_new = year + self.full_date(default_month) + self.full_date(default_day)
|
||||
else:
|
||||
date_new = ''
|
||||
else:
|
||||
return ''
|
||||
return date_new
|
||||
|
||||
'''补全日期'''
|
||||
def full_date(self, date):
|
||||
if not date:
|
||||
date = '01'
|
||||
if int(date) < 10 and len(date) < 2:
|
||||
date = '0' + date
|
||||
return date
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
handler = InsertData()
|
||||
handler.insert_main()
|
593
military_qa.py
Normal file
593
military_qa.py
Normal file
@ -0,0 +1,593 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
# File: militarygraph.py
|
||||
# Author: lhy<lhy_in_blcu@126.com,https://huangyong.github.io>
|
||||
# Date: 19-3-11
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import jieba
|
||||
import jieba.posseg as pseg
|
||||
import pymongo
|
||||
|
||||
class MilitaryGraph:
|
||||
def __init__(self):
|
||||
cur = '/'.join(os.path.abspath(__file__).split('/')[:-1])
|
||||
self.datapath = os.path.join(cur, 'data/military.json')
|
||||
self.conn = pymongo.MongoClient()
|
||||
db_name = 'military_qa'
|
||||
col_name = 'data'
|
||||
self.col = self.conn[db_name][col_name]
|
||||
self.attributes ={'同型': ['同型'], '机高': ['机高'],
|
||||
'战斗全重': ['战斗全重'], '水下排水量': ['水下排水量'],
|
||||
'处理器': ['处理器'], '主炮': ['主炮'],
|
||||
'制导系统': ['制导系统'], '全重': ['全重'],
|
||||
'纬度': ['纬度'], '炮口初速': ['炮口初速'],
|
||||
'发射性能': ['发射性能'], '兵装': ['兵装'],
|
||||
'型号': ['型号'],
|
||||
'长度': ['长度', '全长', '多长'], '翼展': ['翼展', '翼长'],
|
||||
'全枪长': ['全枪长', '枪长'], '射程': ['射程'],
|
||||
'前型': ['前型'],
|
||||
'发射地点': ['发射地点', '发射地点'], '首飞时间': ['首飞时间', '首飞', '初次飞行', '首次飞行'],
|
||||
'发动机数量': ['发动机数量', '几个发动机', '多少个发动机', '发动机个数', '发动机数目', '发动机个','发动机数'], '乘员': ['乘员'],
|
||||
'战斗射速': ['战斗射速'], '生产单位': ['生产单位', '产商', '制造商', '厂家', '制造机构'],
|
||||
'最大行程': ['最大行程', '最常距离'], '炮管长度': ['炮管长度', '炮管长', '炮管全长'],
|
||||
'气动布局': ['气动布局'], '武备': ['武备'],
|
||||
'武器装备': ['武器装备'], '引信': ['引信'],
|
||||
'参战情况': ['参战情况'],
|
||||
'动力装置': ['动力装置'], '飞行速度': ['飞行速度'],
|
||||
'服役时间': ['服役时间'], '新造时': ['新造时'],
|
||||
'活动范围': ['活动范围'], '弹匣容弹量': ['弹匣容弹量'],
|
||||
'编制': ['编制'], '高度': ['高度'],
|
||||
'制造厂': ['制造厂'], '口径': ['口径'],
|
||||
'鱼雷': ['鱼雷'], '经度': ['经度'],
|
||||
'研发时间': ['研发时间'], '简介': ['简介'],
|
||||
'首次轨道发射': ['首次轨道发射'],
|
||||
'挂载点': ['挂载点'], '刀锋宽度': ['刀锋宽度'],
|
||||
'续航距离': ['续航距离'], '枪械': ['枪械'],
|
||||
'最大速度': ['最大速度'], '运载火箭': ['运载火箭'],
|
||||
'生产年限': ['生产年限'], '全枪重': ['全枪重'],
|
||||
'空重': ['空重'], '水雷': ['水雷'],
|
||||
'枪炮': ['枪炮'], '水上排水量': ['水上排水量', '排水量'],
|
||||
'诞生时间': ['诞生时间'], '内置武器': ['内置武器'],
|
||||
'机长': ['机长'], '中心直径': ['中心直径', '直径'],
|
||||
'装药类型': ['装药类型'], '最大起飞重量': ['最大起飞重量', '起飞重量'],
|
||||
'有效射程': ['有效射程'], '现状': ['现状'],
|
||||
'研制时间': ['研制时间'], '舰舰导弹': ['舰舰导弹'],
|
||||
'下水时间': ['下水时间', '下水'], '机炮': ['机炮'],
|
||||
'弹长': ['弹长'], '退役时间': ['退役时间', '退役'],
|
||||
'最大射程': ['最大射程'], '改装时': ['改装时'],
|
||||
'刀重': ['刀重'], '自持力': ['自持力'],
|
||||
'产国': ['产国'], '航速': ['航速'],
|
||||
'制造商': ['制造商'], '型宽': ['型宽'],
|
||||
'弹重': ['弹重'], '刀长': ['刀长'],
|
||||
'舰长': ['舰长'], '研发厂商': ['研发厂商'],
|
||||
'旋翼直径': ['旋翼直径'], '导弹': ['导弹'],
|
||||
'满排吨位': ['满排吨位'], '底盘类型': ['底盘类型'],
|
||||
'刀锋长度': ['刀锋长度'], '弹径': ['弹径'],
|
||||
'全长': ['全长'], '竣工时': ['竣工时'],
|
||||
'发射日期': ['发射日期'], '宽度': ['宽度'],
|
||||
'总重': ['总重'], '建造时间': ['建造时间'],
|
||||
'射控装置': ['射控装置'], '图片': ['图片'],
|
||||
'轨道': ['轨道'], '改装前': ['改装前'],
|
||||
'发动机': ['发动机'], '最大航程': ['最大航程'],
|
||||
'研发单位': ['研发单位'], '大类': ['大类'],
|
||||
'关注度': ['关注度'], '最大飞行速度': ['最大飞行速度'],
|
||||
'火炮': ['火炮'], '战地机型': ['战地机型'],
|
||||
'防空兵器': ['防空兵器'], '潜航深度': ['潜航深度'],
|
||||
'轨道卫星': ['轨道卫星'], '尾翼装置': ['尾翼装置'],
|
||||
'乘员与载员': ['乘员与载员'], '名称': ['名称'],
|
||||
'引信装置': ['引信装置'], '次型': ['次型'],
|
||||
'车长': ['车长'], '武装': ['武装'],"航长":['航长'],
|
||||
'反舰导弹': ['反舰导弹'],
|
||||
'满载排水量': ['满载排水量'], '装备': ['装备']}
|
||||
|
||||
self.big_cates ={'火炮': ['火炮'], '飞行器': ['飞行器'],
|
||||
'舰船舰艇': ['舰船舰艇'], '坦克装甲车辆': ['坦克装甲车辆'],
|
||||
'太空装备': ['太空装备'], '爆炸物': ['爆炸物'],
|
||||
'导弹武器': ['导弹武器'], '枪械与单兵': ['枪械与单兵', '枪械', '枪', '单兵']}
|
||||
self.second_cates = {'榴弹发射器': ['榴弹发射器'], '炸弹': ['炸弹', '炸药'],
|
||||
'手榴弹': ['手榴弹'], '电子战机': ['电子战机'],
|
||||
'机枪': ['机枪'], '宇宙飞船': ['宇宙飞船', '飞船'],
|
||||
'加农炮': ['加农炮'], '救护车': ['救护车'],
|
||||
'攻击机': ['攻击机'], '非自动步枪': ['非自动步枪', '步枪'],
|
||||
'火箭弹': ['火箭弹'], '地雷': ['地雷'],
|
||||
'高射炮': ['高射炮'], '航天飞机': ['航天飞机'],
|
||||
'航天机构': ['航天机构', '航天局', '航天部门'], '舰舰导弹': ['舰舰导弹'],
|
||||
'通用飞机': ['通用飞机'], '岸舰导弹': ['岸舰导弹', '导弹'],
|
||||
'舰炮': ['舰炮'], '巡洋舰': ['巡洋舰'],
|
||||
'气垫艇/气垫船': ['气垫艇/气垫船','气垫艇','气垫船'], '装甲指挥车': ['装甲指挥车', '装甲车', '指挥车'],
|
||||
'无人机': ['无人机'], '氢弹': ['氢弹'],
|
||||
'坦克炮': ['坦克炮'], '干线': ['干线'],
|
||||
'原子弹': ['原子弹'], '冲锋枪': ['冲锋枪'],
|
||||
'导弹艇': ['导弹艇'], '水雷战舰艇': ['水雷战舰艇'],
|
||||
'侦察机': ['侦察机'], '试验机': ['试验机'],
|
||||
'舰地(潜地)导弹': ['舰地(潜地)导弹','舰地导弹','潜地导弹', '导弹'],
|
||||
'支线': ['支线'], '军事卫星': ['军事卫星'],
|
||||
'地空导弹': ['地空导弹'], '航空炮': ['航空炮'],
|
||||
'战列舰': ['战列舰'], '无后坐炮': ['无后坐炮'],
|
||||
'空地导弹': ['空地导弹'], '加农榴弹炮': ['加农榴弹炮'],
|
||||
'运输机': ['运输机'], '自行火炮': ['自行火炮'],
|
||||
'地地导弹': ['地地导弹'], '空舰导弹': ['空舰导弹'],
|
||||
'教练机': ['教练机'], '其他特种装甲车辆': ['其他特种装甲车辆'],
|
||||
'火箭筒': ['火箭筒'], '空间探测器': ['空间探测器', '探测器'],
|
||||
'预警机': ['预警机'], '航空母舰': ['航空母舰', '航母'],
|
||||
'迷彩服': ['迷彩服'],'弹炮结合系统': ['弹炮结合系统'],
|
||||
'科学卫星': ['科学卫星'], '空空导弹': ['空空导弹','导弹'],
|
||||
'迫击炮': ['迫击炮'],
|
||||
'应用卫星': ['应用卫星', '卫星'], '保障辅助舰艇': ['保障辅助舰艇'],
|
||||
'刀具': ['刀具'], '霰弹枪': ['霰弹枪'],
|
||||
'自动步枪': ['自动步枪'], '手枪': ['手枪'],
|
||||
'反弹道导弹': ['反弹道导弹'], '两栖作战舰艇': ['两栖作战舰艇'],
|
||||
'特种坦克': ['特种坦克', '坦克'], '运输直升机': ['运输直升机', '直升机'],
|
||||
'巡逻舰/艇': ['巡逻舰/艇', '巡逻舰', '巡逻舰艇', '巡逻舰艇'], '加油机': ['加油机'],
|
||||
'反坦克炮': ['反坦克炮'],
|
||||
'越野车': ['越野车'], '步兵战车': ['步兵战车'],
|
||||
'战斗机': ['战斗机'], '护卫舰': ['护卫舰'],
|
||||
'工程抢修车': ['工程抢修车'],'反潜机': ['反潜机'],
|
||||
'常规潜艇': ['常规潜艇'], '装甲侦察车': ['装甲侦察车'],
|
||||
'舰空导弹': ['舰空导弹'], '运载火箭': ['运载火箭'],
|
||||
'中子弹': ['中子弹'], '飞艇': ['飞艇'],
|
||||
'航天基地': ['航天基地'], '鱼雷': ['鱼雷'],
|
||||
'轰炸机': ['轰炸机'], '技术试验卫星': ['技术试验卫星', '卫星'],
|
||||
'狙击枪': ['狙击枪'], '水雷': ['水雷'],
|
||||
'装甲车载炮': ['装甲车载炮'], '榴弹炮': ['榴弹炮'],
|
||||
'驱逐舰': ['驱逐舰'], '装甲运兵车': ['装甲运兵车'],
|
||||
'火箭炮': ['火箭炮'], '多用途直升机': ['多用途直升机', '直升机'],
|
||||
'核潜艇': ['核潜艇'], '武装直升机': ['武装直升机', '直升机'],
|
||||
'布/扫雷车': ['布/扫雷车', '扫雷车', '扫雷车'], '潜舰导弹': ['潜舰导弹', '导弹'],
|
||||
'主战坦克': ['主战坦克', '坦克']}
|
||||
self.weapons = self.load_weapons()
|
||||
self.weapon_dict = {i:i for i in self.weapons}
|
||||
self.countries = {'荷兰': ['荷兰'], '阿根廷': ['阿根廷'], '瑞士': ['瑞士'],
|
||||
'伊朗': ['伊朗'], '以色列': ['以色列'], '前南斯拉夫': ['前南斯拉夫'],
|
||||
'越南': ['越南'], '葡萄牙': ['葡萄牙'], '乌克兰': ['乌克兰'],
|
||||
'新西兰': ['新西兰'], '奥地利': ['奥地利'], '希腊': ['希腊'],
|
||||
'塞尔维亚': ['塞尔维亚'], '比利时': ['比利时'],
|
||||
'俄罗斯': ['俄罗斯'], '前捷克斯洛伐克': ['前捷克斯洛伐克'],
|
||||
'捷克': ['捷克'], '土耳其': ['土耳其'], '缅甸': ['缅甸'],
|
||||
'美国': ['美国'], '德国': ['德国'], '巴西': ['巴西'],
|
||||
'印度尼西亚': ['印度尼西亚'], '法国': ['法国'],
|
||||
'瑞典': ['瑞典'], '前苏联': ['前苏联'],
|
||||
'朝鲜': ['朝鲜'],
|
||||
'埃及': ['埃及'], '墨西哥': ['墨西哥'], '巴基斯坦': ['巴基斯坦'],
|
||||
'马来西亚': ['马来西亚'], '澳大利亚': ['澳大利亚'], '泰国': ['泰国'],
|
||||
'欧盟': ['欧盟'], '波兰': ['波兰'],
|
||||
'韩国': ['韩国'], '日本': ['日本'],
|
||||
'罗马尼亚': ['罗马尼亚'], '克罗地亚': ['克罗地亚'], '智利': ['智利'],
|
||||
'匈牙利': ['匈牙利'], '意大利': ['意大利'], '英国': ['英国'],
|
||||
'丹麦': ['丹麦'], '挪威': ['挪威'], '哈萨克斯坦': ['哈萨克斯坦'],
|
||||
'爱尔兰': ['爱尔兰'], '伊拉克': ['伊拉克'],
|
||||
'中国': ['中国','中华人民共和国'], '印度': ['印度'],
|
||||
'保加利亚': ['保加利亚'], '斯洛伐克': ['斯洛伐克'],
|
||||
'西班牙': ['西班牙'], '秘鲁': ['秘鲁'],
|
||||
'阿联酋': ['阿联酋'], '卢森堡': ['卢森堡'],
|
||||
'巴拿马': ['巴拿马'], '新加坡': ['新加坡'],
|
||||
'波黑': ['波黑'], '南非': ['南非'],
|
||||
'苏/俄': ['苏/俄', '苏联', '俄罗斯'], '加拿大': ['加拿大'], '芬兰': ['芬兰']}
|
||||
|
||||
self.compares = {
|
||||
'$gt': ['高于','大于','长于','高过','大过','长过','多于', '远于', '远过', '之后', '晚于', '后于'],
|
||||
'$lt': ['低于', '小于', '短于', '低过', '短过', '少于', '近于', '近过', '未达到', '没达到', '之前', '先于', '早于'],
|
||||
'$lte': ['不高于','不大于','不长于','不高过','不大过','不长过','不多于', '不远于', '不远过'],
|
||||
'$gte': ['不低于', '不小于', '不短于', '不低过', '不短过', '不少于', '不近于', '不近过', '达到'],
|
||||
'$eq': ['等于', '差不多'],
|
||||
'$ne': ['不等于', '不是']}
|
||||
self.counts = ['多少', '几', '几多']
|
||||
self.mosts = {
|
||||
-1:['最大', '最远', '最长', '最高', '最久', '最快', '最多', '最强'],
|
||||
1:['最小', '最短', '最近', '最低', '最矮', '最慢', '最少', '最弱'],
|
||||
}
|
||||
|
||||
self.unit_dict = {
|
||||
'海里': [1852, '米'],
|
||||
'英里': [1610, '米'],
|
||||
'/节': [1852, '米'],
|
||||
'km/节': [1000, '米'],
|
||||
'吨': [1000, '千克'],
|
||||
'-吨': [1000, '千克'],
|
||||
'公里': [1000, '米'],
|
||||
'公里/节': [1000, '米'],
|
||||
'公里/小时': [1000, '米'],
|
||||
'海里节': [1852, '米'],
|
||||
'海里,节': [1852, '米'],
|
||||
'海里/节': [1852, '米'],
|
||||
'海哩/节': [1852, '米'],
|
||||
'海浬/节': [1852, '米'],
|
||||
'毫米': [0.001, '米'],
|
||||
'节': [1852, '米'],
|
||||
'节/海里': [1852, '米'],
|
||||
'节海里': [1852, '米'],
|
||||
'节行驶英里': [1852, '米'],
|
||||
'节下海里': [1852, '米'],
|
||||
'克': [0.001, '千克'],
|
||||
'里': [1852, '米'],
|
||||
'里/节': [1852, '米'],
|
||||
'米': [1, '米'],
|
||||
'千克': [1, '克'],
|
||||
'千米': [1000, '米'],
|
||||
'千米/节': [1000, '米'],
|
||||
'千米/时': [1000, '米'],
|
||||
'千米/小时': [1000, '米'],
|
||||
'千米每小时': [1000, '米'],
|
||||
'万海里/节': [18520000, '米'],
|
||||
'英里,节': [1610, '米'],
|
||||
'英里/节': [1610, '米'],
|
||||
'余英里': [1610, '米'],
|
||||
'约海里': [1852, '米'],
|
||||
'最大海里': [1852, '米'],
|
||||
'厘米': [0.01, '米'],
|
||||
'分米': [0.1, '米'],
|
||||
'人': [1, '人'],
|
||||
'位': [1, '位']}
|
||||
|
||||
unit_dict = {i:len(i) for i in self.unit_dict}
|
||||
unit_wds = [i[0] for i in sorted(unit_dict.items(), key = lambda asd: asd[1], reverse=True)]
|
||||
unit_regex = '([0-9]+.?[0-9]+)(%s)+' % '|'.join(unit_wds)
|
||||
time_regex = '[0-9]{4}年[0-9]{0,4}月?[0-9]{0,4}日?'
|
||||
self.unit_pattern = re.compile(unit_regex)
|
||||
self.time_pattern = re.compile(time_regex)
|
||||
self.country_dict = self.build_dict(self.countries)
|
||||
self.big_dict = self.build_dict(self.big_cates)
|
||||
self.small_dict = self.build_dict(self.second_cates)
|
||||
self.attribute_dict = self.build_dict(self.attributes)
|
||||
self.compare_dict = self.build_dict(self.compares)
|
||||
self.most_dict = self.build_dict(self.mosts)
|
||||
self.add_jieba(self.country_dict, 'n_country')
|
||||
self.add_jieba(self.big_dict, 'n_big')
|
||||
self.add_jieba(self.small_dict, 'n_small')
|
||||
self.add_jieba(self.attribute_dict, 'n_attr')
|
||||
self.add_jieba(self.compare_dict, 'n_compare')
|
||||
self.add_jieba(self.most_dict, 'n_most')
|
||||
self.add_jieba(self.weapons, 'n_weapon')
|
||||
|
||||
return
|
||||
|
||||
'''加载武器实体'''
|
||||
def load_weapons(self):
|
||||
weapons = []
|
||||
for record in open(self.datapath):
|
||||
data = json.loads(record)
|
||||
weapons.append(data['名称'])
|
||||
return list(set(weapons))
|
||||
|
||||
'''构造映射字典'''
|
||||
def build_dict(self, dict):
|
||||
wd_dict = {}
|
||||
for cate, wds in dict.items():
|
||||
for wd in wds:
|
||||
wd_dict[wd] = cate
|
||||
return wd_dict
|
||||
|
||||
'''检测单位'''
|
||||
def detect_entity(self, question):
|
||||
units = [i[0] + i[1] for i in self.unit_pattern.findall(question) if i]
|
||||
times = self.time_pattern.findall(question)
|
||||
return times, units
|
||||
|
||||
'''检查年份并统一时间'''
|
||||
def standard_year(self, sent):
|
||||
sent = sent.replace(' ', '')
|
||||
pattern_year = re.compile('[0-9]{4}年')
|
||||
pattern_month = re.compile('[0-9]{1,4}月')
|
||||
pattern_day = re.compile('[0-9]{1,4}日')
|
||||
default_day = ''
|
||||
default_month = ''
|
||||
month = pattern_month.findall(sent)
|
||||
day = pattern_day.findall(sent)
|
||||
year = pattern_year.findall(sent)
|
||||
if year:
|
||||
year = year[0].replace('年', '')
|
||||
if month:
|
||||
default_month = month[0].replace('月', '')
|
||||
if day:
|
||||
default_day = day[0].replace('日', '')
|
||||
if year:
|
||||
date_new = year + self.full_date(default_month) + self.full_date(default_day)
|
||||
else:
|
||||
date_new = ''
|
||||
else:
|
||||
return ''
|
||||
return date_new
|
||||
|
||||
'''补全日期'''
|
||||
def full_date(self, date):
|
||||
if not date:
|
||||
date = '01'
|
||||
if int(date) < 10 and len(date) < 2:
|
||||
date = '0' + date
|
||||
return date
|
||||
|
||||
'检测是否有数字'
|
||||
def check_num(self, sent):
|
||||
pattern = re.compile('\d+')
|
||||
res = pattern.findall(str(sent))
|
||||
return res[0]
|
||||
|
||||
'''检查单位并统一数量'''
|
||||
def standard_unit(self, unit_value):
|
||||
num = self.check_num(unit_value)
|
||||
unit = unit_value.replace(num, '')
|
||||
unit_info = self.unit_dict.get(unit, [1, 'default'])
|
||||
plus = unit_info[0]
|
||||
num_standrd = float(num) * plus
|
||||
return num_standrd
|
||||
|
||||
'''将实体标记和实体词加入到jieba当中'''
|
||||
def add_jieba(self, wds, tag):
|
||||
for wd in wds:
|
||||
jieba.add_word(wd, tag=tag, freq=300000)
|
||||
return
|
||||
|
||||
'''问句解析'''
|
||||
def question_parser(self, question):
|
||||
times, units = self.detect_entity(question)
|
||||
self.add_jieba(times, 'n_time')
|
||||
self.add_jieba(units, 'n_unit')
|
||||
wds = [(i.word, i.flag) for i in pseg.cut(question)]
|
||||
parser_dict = {}
|
||||
parser_dict['n_attrs'] = [wd for wd,flag in wds if flag == 'n_attr']
|
||||
parser_dict['n_times'] = [wd for wd,flag in wds if flag == 'n_time']
|
||||
parser_dict['n_bigs'] = [wd for wd,flag in wds if flag == 'n_big']
|
||||
parser_dict['n_smalls'] = [wd for wd,flag in wds if flag == 'n_small']
|
||||
parser_dict['n_countries'] = [wd for wd,flag in wds if flag == 'n_country']
|
||||
parser_dict['n_compares'] = [wd for wd,flag in wds if flag == 'n_compare']
|
||||
parser_dict['n_mosts'] = [wd for wd,flag in wds if flag == 'n_most']
|
||||
parser_dict['n_units'] = [wd for wd,flag in wds if flag == 'n_unit']
|
||||
parser_dict['n_weapons'] = [wd for wd,flag in wds if flag == 'n_weapon']
|
||||
parser_dict['pattern'] = [flag for wd, flag in wds if flag in ['n_attr', 'n_time', 'n_big', 'n_small', 'n_unit', 'n_country', 'n_compare', 'n_most', 'n_weapon']]
|
||||
parser_dict['wds'] = wds
|
||||
return parser_dict
|
||||
|
||||
'''答案搜索'''
|
||||
def search_answer(self, parser_dict):
|
||||
print(parser_dict)
|
||||
pattern = parser_dict['pattern']
|
||||
print(pattern)
|
||||
search_data = []
|
||||
condition = {}
|
||||
targets = ['名称']
|
||||
search_flag = 1
|
||||
|
||||
if pattern in [['n_country', 'n_small'], ['n_small', 'n_country']]:
|
||||
country = self.country_dict.get(parser_dict.get('n_countries')[0])
|
||||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||||
condition = {'产国': country, '类型':n_small}
|
||||
targets = ['名称']
|
||||
search_data.append({'condition':condition, 'targets':targets})
|
||||
|
||||
elif pattern in [['n_country', 'n_big'], ['n_big', 'n_country']]:
|
||||
country = self.country_dict.get(parser_dict.get('n_countries')[0])
|
||||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||||
condition = {'产国': country, '类型': n_big}
|
||||
targets = ['名称']
|
||||
search_data.append({'condition': condition, 'targets': targets})
|
||||
|
||||
elif pattern in [['n_country', 'n_weapon'], ['n_weapon']]:
|
||||
n_weapon = self.weapon_dict.get(parser_dict.get('n_weapons')[0])
|
||||
condition = {'名称': n_weapon}
|
||||
targets = ['简介']
|
||||
search_data.append({'condition': condition, 'targets': targets})
|
||||
|
||||
# 单实体多属性查询
|
||||
elif pattern in [['n_country', 'n_weapon'],
|
||||
['n_weapon', 'n_attr'],
|
||||
['n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_country', 'n_weapon', 'n_attr'],
|
||||
['n_country', 'n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_country', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr']
|
||||
]:
|
||||
n_weapon = self.weapon_dict.get(parser_dict.get('n_weapons')[0])
|
||||
condition = {'名称': n_weapon}
|
||||
targets = [self.attribute_dict.get(attr) for attr in parser_dict.get('n_attrs')]
|
||||
search_data.append({'condition': condition, 'targets': targets})
|
||||
|
||||
# 多实体多属性查询
|
||||
elif pattern in [
|
||||
['n_weapon', 'n_weapon', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon','n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr', 'n_attr'],
|
||||
]:
|
||||
n_weapons = [self.weapon_dict.get(weapon) for weapon in parser_dict.get('n_weapons')]
|
||||
condition = {'名称': {"$in": n_weapons}}
|
||||
targets = [self.attribute_dict.get(attr) for attr in parser_dict.get('n_attrs')]
|
||||
search_data.append({'condition': condition, 'targets': targets})
|
||||
|
||||
# 实体、实体属性相间隔
|
||||
elif pattern in [
|
||||
['n_weapon', 'n_attr','n_weapon', 'n_attr'],
|
||||
['n_country','n_weapon', 'n_attr', 'n_weapon', 'n_attr'],
|
||||
['n_country','n_weapon', 'n_attr', 'n_country','n_weapon', 'n_attr'],
|
||||
['n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr'],
|
||||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr'],
|
||||
['n_country','n_weapon', 'n_attr', 'n_country',' n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country', 'n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country', 'n_weapon', 'n_attr', 'n_attr'],
|
||||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
['n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr', 'n_country','n_weapon', 'n_attr', 'n_attr', 'n_attr'],
|
||||
]:
|
||||
n_indxes = [indx for indx, name in enumerate(pattern) if name == 'n_weapon']
|
||||
n_weapons = [self.weapon_dict.get(weapon) for weapon in parser_dict.get('n_weapons')]
|
||||
n1_weapon = n_weapons[0]
|
||||
n2_weapon = n_weapons[1]
|
||||
targets1 = [self.attribute_dict.get(weapon) for indx, weapon in enumerate(parser_dict.get('n_attrs')) if indx < len(n_indxes)]
|
||||
targets2 = [self.attribute_dict.get(weapon) for indx, weapon in enumerate(parser_dict.get('n_attrs')) if indx >= len(n_indxes)]
|
||||
condition1 = {'名称': n1_weapon}
|
||||
condition2 = {'名称': n2_weapon}
|
||||
search_data.append({'condition':condition1, 'targets': targets1})
|
||||
search_data.append({'condition':condition2, 'targets': targets2})
|
||||
|
||||
# 比较查找,单操作符+操作数的实体
|
||||
elif pattern in [
|
||||
['n_attr', 'n_compare', 'n_unit', 'n_small'],
|
||||
['n_small', 'n_attr', 'n_compare', 'n_unit'],
|
||||
['n_attr', 'n_compare', 'n_time', 'n_small'],
|
||||
['n_attr', 'n_time', 'n_compare', 'n_small'],
|
||||
['n_small', 'n_attr', 'n_compare', 'n_time'],
|
||||
['n_small', 'n_attr', 'n_time', 'n_compare'],
|
||||
['n_attr', 'n_compare', 'n_unit', 'n_big'],
|
||||
['n_big', 'n_attr', 'n_compare', 'n_unit'],
|
||||
['n_attr', 'n_compare', 'n_time', 'n_big'],
|
||||
['n_attr', 'n_time', 'n_compare', 'n_big'],
|
||||
['n_big', 'n_attr', 'n_compare', 'n_time'],
|
||||
['n_big', 'n_attr', 'n_time', 'n_compare'],
|
||||
]:
|
||||
|
||||
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
|
||||
n_compare = self.compare_dict.get(parser_dict.get('n_compares')[0])
|
||||
|
||||
if 'n_unit' in pattern:
|
||||
n_unit = self.standard_unit(parser_dict.get('n_units')[0])
|
||||
if 'n_small' in pattern:
|
||||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||||
condition = {n_attr:{n_compare:n_unit}, '类型':n_small}
|
||||
else:
|
||||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||||
condition = {n_attr:{n_compare:n_unit}, '大类':n_big}
|
||||
else:
|
||||
n_time = self.standard_year(parser_dict.get('n_times')[0])
|
||||
if 'n_small' in pattern:
|
||||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||||
condition = {n_attr: {n_compare: n_time}, '类型': n_small}
|
||||
else:
|
||||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||||
condition = {n_attr: {n_compare: n_time}, '大类': n_big}
|
||||
|
||||
targets = [n_attr]
|
||||
search_data.append({'condition':condition, 'targets':targets})
|
||||
|
||||
# 比较查找,双操作符+操作数的实体
|
||||
elif pattern in [
|
||||
['n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit', 'n_small'],
|
||||
['n_small', 'n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit'],
|
||||
['n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time', 'n_small'],
|
||||
['n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare', 'n_small'],
|
||||
['n_small', 'n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time'],
|
||||
['n_small', 'n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare'],
|
||||
['n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit', 'n_big'],
|
||||
['n_big', 'n_attr', 'n_compare', 'n_unit', 'n_compare', 'n_unit'],
|
||||
['n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time', 'n_big'],
|
||||
['n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare', 'n_big'],
|
||||
['n_big', 'n_attr', 'n_compare', 'n_time', 'n_compare', 'n_time'],
|
||||
['n_big', 'n_attr', 'n_time', 'n_compare', 'n_time', 'n_compare'],
|
||||
]:
|
||||
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
|
||||
n_compares = [self.compare_dict.get(compare) for compare in parser_dict.get('n_compares')]
|
||||
|
||||
if 'n_unit' in pattern:
|
||||
n_units = [self.standard_unit(unit) for unit in parser_dict.get('n_units')]
|
||||
if 'n_small' in pattern:
|
||||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||||
condition = {n_attr:{n_compares[0]:n_units[0], n_compares[1]:n_units[1]}, '类型':n_small}
|
||||
else:
|
||||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||||
condition = {n_attr:{n_compares[0]:n_units[0], n_compares[1]:n_units[1]},'大类':n_big}
|
||||
else:
|
||||
n_times = [self.standard_year(year) for year in parser_dict.get('n_times')]
|
||||
if 'n_small' in pattern:
|
||||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||||
condition = {n_attr:{n_compares[0]:n_times[0], n_compares[1]:n_times[1]}, '类型': n_small}
|
||||
else:
|
||||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||||
condition = {n_attr:{n_compares[0]:n_times[0], n_compares[1]:n_times[1]}, '大类': n_big}
|
||||
targets = [n_attr]
|
||||
search_data.append({'condition':condition, 'targets':targets})
|
||||
|
||||
# 属性最值查找
|
||||
elif pattern in [['n_small', 'n_attr', 'n_most'],
|
||||
['n_attr', 'n_most', 'n_small'],
|
||||
['n_big', 'n_attr', 'n_most'],
|
||||
['n_attr', 'n_most', 'n_big'],
|
||||
]:
|
||||
search_flag = 0
|
||||
n_attr = self.attribute_dict.get(parser_dict.get('n_attrs')[0])
|
||||
n_most = self.most_dict.get(parser_dict.get('n_mosts')[0])
|
||||
if 'n_small' in pattern:
|
||||
n_small = self.small_dict.get(parser_dict.get('n_smalls')[0])
|
||||
condition = {'类型': n_small, 'sort_key':{n_attr: n_most}}
|
||||
else:
|
||||
n_big = self.big_dict.get(parser_dict.get('n_bigs')[0])
|
||||
condition = {'大类': n_big, 'sort_key': {n_attr: n_most}}
|
||||
targets.append(n_attr)
|
||||
search_data.append({'condition':condition, 'targets':targets})
|
||||
|
||||
result = self.query_mongo(search_flag, search_data)
|
||||
return result
|
||||
|
||||
'''查询mongo数据库'''
|
||||
def query_mongo(self, search_flag, search_data):
|
||||
result = []
|
||||
if search_flag:
|
||||
result = self.query_mongo_attr(search_data)
|
||||
else:
|
||||
result = self.query_mongo_sort(search_data)
|
||||
return result
|
||||
|
||||
'''查询mongo数据库,正常'''
|
||||
def query_mongo_attr(self, search_data):
|
||||
result = []
|
||||
for search in search_data:
|
||||
condition = search['condition']
|
||||
targets = search['targets']
|
||||
for res in self.col.find(condition):
|
||||
result.append([res.get('名称') + target + ':' + str(res.get(target,'null')) for target in targets if res.get(target, 'null') != 'null'])
|
||||
return result
|
||||
|
||||
'''按照最值方法查找mongo数据库'''
|
||||
def query_mongo_sort(self, search_data):
|
||||
result = []
|
||||
for search in search_data:
|
||||
condition = {key:value for key, value in search['condition'].items() if key != 'sort_key'}
|
||||
sort_condition = [(i,j) for i, j in search['condition'].get('sort_key').items()]
|
||||
targets = search['targets']
|
||||
for res in self.col.find(condition).sort(sort_condition).limit(1):
|
||||
result_ = [res.get('名称') + target + ':' + str(res.get(target, 'null')) for target in targets]
|
||||
result.append(result_)
|
||||
return result
|
||||
|
||||
'问答主函数'
|
||||
def qa_main(self, question):
|
||||
parser_dict = self.question_parser(question)
|
||||
results = self.search_answer(parser_dict)
|
||||
if results == [[]]:
|
||||
print('sorry, do not know the answer yet...')
|
||||
else:
|
||||
print('find %s result:'% len(results))
|
||||
print('answer detail:')
|
||||
for result in results:
|
||||
print(result)
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
handler = MilitaryGraph()
|
||||
while 1:
|
||||
question = input("enter an question to parser:\n")
|
||||
handler.qa_main(question)
|
Loading…
Reference in New Issue
Block a user