diff --git a/README.md b/README.md index e701a4c..025a175 100644 --- a/README.md +++ b/README.md @@ -37,12 +37,64 @@ ChinesePersonRelationGraph, person relationship extraction based on nlp methods. # 人物关系基础知识库 -# 刘备人物关系网 +1,收集人名词典 +2,基于人名词典,采集搜狗人物关系图谱数据库 +1# 刘备人物关系网 ![image](https://github.com/liuhuanyong/ChinesePersonRelationGraph/blob/master/image/rel_graph1.png) -# 韩寒人物关系网 +2# 韩寒人物关系网 ![image](https://github.com/liuhuanyong/ChinesePersonRelationGraph/blob/master/image/rel_graph2.png) +3,人物关系数据库规模 + +|项目|数量| +|:--:|:--:| +|人物|11024| +|关系对|35995| +|关系类型|1144| + +4,人物关系60% + +关系类型 频次 频率 累加频率 +搭档 4692 0.1303478164240471 0.1303478164240471 +好友 3771 0.10476164018224247 0.23510945660628957 +队友 1758 0.04883875986220691 0.2839482164684965 +朋友 1681 0.04669963329258806 0.3306478497610846 +丈夫 1431 0.03975441715746194 0.3704022669185465 +妻子 1198 0.03328147571952439 0.4036837426380709 +师傅 986 0.02739193243693744 0.4310756750750083 +儿子 972 0.027003000333370376 0.4580786754083787 +母亲 922 0.02561395710634515 0.4836926325147239 +同学 698 0.01939104344927214 0.5030836759639961 +弟弟 678 0.01883542615846205 0.5219191021224581 +女儿 609 0.01691854650516724 0.5388376486276253 +前女友 594 0.016501833537059675 0.555339482164685 +哥哥 580 0.01611290143349261 0.5714523835981776 +合作 573 0.01591843538170908 0.5873708189798867 +前男友 573 0.01591843538170908 0.6032892543615959 + +# 回标语料构建 +目录地址:EventMonitor +运行方式:cd EventMonitor , scrapy crawl eventspider + + + + +# + + + + + + + + + + + + + + diff --git a/collect_person_rel.py b/collect_person_rel.py index c204a50..896947b 100644 --- a/collect_person_rel.py +++ b/collect_person_rel.py @@ -83,7 +83,7 @@ class PersonSpider: history_names = [i.strip() for i in open('history_person_names.txt') if len(i.strip()) > 1] star_names = [i.strip() for i in open('star_person_names.txt') if len(i.strip()) > 1] name_dict = { - # 'star': star_names, + 'star': star_names, 'history': history_names, } for label, names in name_dict.items(): @@ -194,20 +194,6 @@ class PersonSpider: if __name__ == '__main__': handler = PersonSpider() - # handler.spider('张学友') - # while 1: - # query = input('entere an person to spider:') - # res = handler.spider(query) - # print(res) - - # handler.update_data() - # iteration = 10 - # while(iteration): - # # handler.read_persons() - # handler.update_data() - # print(iteration) - # iteration -= 1 - # handler.read_persons() - handler.modify_data() + handler.spider_main()