增加倒排字典

This commit is contained in:
lzjqsdd 2016-04-25 23:58:51 +08:00
parent cbb2cc26b3
commit d94fb654dd

26
ml/InverseIndex.py Normal file
View File

@ -0,0 +1,26 @@
#encoding=utf-8
import jieba
import json
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
file = open('../news_spider/title.json')
worddict = dict()
count=0
while True:
line = file.readline()
if not line:
break
data = json.loads(line)
seg_list = list(jieba.cut(data['title'], cut_all=True))
count+=1
for w in seg_list:
if w not in worddict:
worddict[w] = []
worddict[w].append(count)
for i in worddict:
print i