add parseWord

2016-04-24 23:56:08 +08:00 · 2016-04-24 23:56:08 +08:00 · ec9165e2ce
commit ec9165e2ce
parent db6bd39356
1 changed files with 16 additions and 3 deletions
--- a/ml/div.py
+++ b/ml/div.py
@ -1,5 +1,18 @@
 #encoding=utf-8
 import jieba
-seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
-for l in seg_list:
-	print l
+
+
+
+class DivideWord:
+	def __init__(self):
+		pass
+	def parse(self):
+		file = open('../news_spider/title.json')
+
+		while True:
+			line = file.readline()
+			if not line:
+				break
+			data = json.loads(line)
+			seg_list = list(jieba.cut(data['title'], cut_all=True))
+			print seg_list