创建项目
This commit is contained in:
commit
77fa8935d5
11
.idea/AbstractKnowledgeGraph.iml
Normal file
11
.idea/AbstractKnowledgeGraph.iml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="TestRunnerService">
|
||||||
|
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||||
|
</component>
|
||||||
|
</module>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/AbstractKnowledgeGraph.iml" filepath="$PROJECT_DIR$/.idea/AbstractKnowledgeGraph.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
43
.idea/workspace.xml
Normal file
43
.idea/workspace.xml
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectFrameBounds" extendedState="1">
|
||||||
|
<option name="y" value="23" />
|
||||||
|
<option name="width" value="1680" />
|
||||||
|
<option name="height" value="971" />
|
||||||
|
</component>
|
||||||
|
<component name="PropertiesComponent">
|
||||||
|
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
|
||||||
|
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
|
||||||
|
</component>
|
||||||
|
<component name="RunManager">
|
||||||
|
<configuration name="search_concept" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||||
|
<module name="AbstractKnowledgeGraph" />
|
||||||
|
<option name="INTERPRETER_OPTIONS" value="" />
|
||||||
|
<option name="PARENT_ENVS" value="true" />
|
||||||
|
<envs>
|
||||||
|
<env name="PYTHONUNBUFFERED" value="1" />
|
||||||
|
</envs>
|
||||||
|
<option name="SDK_HOME" value="" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||||
|
<option name="IS_MODULE_SDK" value="true" />
|
||||||
|
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||||
|
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||||
|
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/search_concept.py" />
|
||||||
|
<option name="PARAMETERS" value="" />
|
||||||
|
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||||
|
<option name="EMULATE_TERMINAL" value="false" />
|
||||||
|
<option name="MODULE_MODE" value="false" />
|
||||||
|
<option name="REDIRECT_INPUT" value="false" />
|
||||||
|
<option name="INPUT_FILE" value="" />
|
||||||
|
<method v="2" />
|
||||||
|
</configuration>
|
||||||
|
<recent_temporary>
|
||||||
|
<list>
|
||||||
|
<item itemvalue="Python.search_concept" />
|
||||||
|
</list>
|
||||||
|
</recent_temporary>
|
||||||
|
</component>
|
||||||
|
<component name="VcsContentAnnotationSettings">
|
||||||
|
<option name="myLimit" value="2678400000" />
|
||||||
|
</component>
|
||||||
|
</project>
|
93160
dict/concept_total.txt
Normal file
93160
dict/concept_total.txt
Normal file
File diff suppressed because it is too large
Load Diff
1423
dict/hiearchy.txt
Normal file
1423
dict/hiearchy.txt
Normal file
File diff suppressed because it is too large
Load Diff
82
search_concept.py
Normal file
82
search_concept.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
# coding = utf-8
|
||||||
|
import os
|
||||||
|
import networkx as nx
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('TkAgg')
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
class ConceptNet:
|
||||||
|
def __init__(self):
|
||||||
|
cur = "/".join(os.path.abspath(__file__).split('/')[:-1])
|
||||||
|
self.hiearchy_file = os.path.join(cur, "dict/hiearchy.txt")
|
||||||
|
self.concept_file = os.path.join(cur, "dict/concept_total.txt")
|
||||||
|
return
|
||||||
|
|
||||||
|
'''加载概念边'''
|
||||||
|
def load_concept_edges(self, ):
|
||||||
|
edges = []
|
||||||
|
for line in open(self.hiearchy_file):
|
||||||
|
line = line.strip().split(' ')
|
||||||
|
if len(line) < 2:
|
||||||
|
continue
|
||||||
|
from_ = line[0].split('|')[-1]
|
||||||
|
to_ = line[1].split('|')[-1]
|
||||||
|
edges.append((to_, from_))
|
||||||
|
return edges
|
||||||
|
|
||||||
|
'''利用networkx构建有向图'''
|
||||||
|
def build_graph(self, edges):
|
||||||
|
G = nx.DiGraph()
|
||||||
|
G.add_edges_from(edges)
|
||||||
|
return G
|
||||||
|
|
||||||
|
'''构造底层概念词典'''
|
||||||
|
def build_basic_concept(self):
|
||||||
|
concept_dict = {}
|
||||||
|
print("loading concept edges")
|
||||||
|
edges = self.load_concept_edges()
|
||||||
|
print("build grpah")
|
||||||
|
graph = self.build_graph(edges)
|
||||||
|
path = nx.all_pairs_shortest_path(graph)
|
||||||
|
for i in path:
|
||||||
|
wd = i[0]
|
||||||
|
path_dict = i[1]
|
||||||
|
len_dict = {i:len(j) for i,j in path_dict.items()}
|
||||||
|
len_dict_ = sorted(len_dict.items(), key=lambda asd:asd[1], reverse=True)
|
||||||
|
longest_path = path_dict.get(len_dict_[0][0])
|
||||||
|
if not longest_path:
|
||||||
|
continue
|
||||||
|
concept_dict[wd] = longest_path
|
||||||
|
return concept_dict
|
||||||
|
|
||||||
|
'''搜集主函数'''
|
||||||
|
def build_all_concepts(self):
|
||||||
|
all_dict = {}
|
||||||
|
concept_dict = self.build_basic_concept()
|
||||||
|
print('building all concepts')
|
||||||
|
for line in open(self.concept_file):
|
||||||
|
line = line.strip().split('\t')
|
||||||
|
wd = line[0]
|
||||||
|
concepts = [i.split('|')[-1] for i in line[-1].split(',')]
|
||||||
|
concept_path = concept_dict.get(wd, '')
|
||||||
|
if not concept_path:
|
||||||
|
concept_path = [[wd] + concept_dict.get(c, [c]) for c in concepts]
|
||||||
|
all_dict[wd] = concept_path
|
||||||
|
return all_dict
|
||||||
|
|
||||||
|
'''层级搜索主函数'''
|
||||||
|
def search_hiearchy(self):
|
||||||
|
import time
|
||||||
|
start_time = time.time()
|
||||||
|
all_dict = self.build_all_concepts()
|
||||||
|
print(time.time()-start_time)
|
||||||
|
while 1:
|
||||||
|
wd = input('enter an wd to search:').strip()
|
||||||
|
path = all_dict.get(wd, '')
|
||||||
|
print(wd, path)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
handler = ConceptNet()
|
||||||
|
handler.search_hiearchy()
|
Loading…
Reference in New Issue
Block a user