add parseWord
This commit is contained in:
parent
db6bd39356
commit
ec9165e2ce
19
ml/div.py
19
ml/div.py
@ -1,5 +1,18 @@
|
||||
#encoding=utf-8
|
||||
import jieba
|
||||
seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
|
||||
for l in seg_list:
|
||||
print l
|
||||
|
||||
|
||||
|
||||
class DivideWord:
|
||||
def __init__(self):
|
||||
pass
|
||||
def parse(self):
|
||||
file = open('../news_spider/title.json')
|
||||
|
||||
while True:
|
||||
line = file.readline()
|
||||
if not line:
|
||||
break
|
||||
data = json.loads(line)
|
||||
seg_list = list(jieba.cut(data['title'], cut_all=True))
|
||||
print seg_list
|
||||
|
Loading…
Reference in New Issue
Block a user