diff --git a/ml/div.py b/ml/div.py new file mode 100644 index 0000000..1ddb179 --- /dev/null +++ b/ml/div.py @@ -0,0 +1,5 @@ +#encoding=utf-8 +import jieba +seg_list = jieba.cut("我来到北京清华大学", cut_all=True) +for l in seg_list: + print l diff --git a/news_spider/news2db.py b/news_spider/news2db.py new file mode 100644 index 0000000..6c7e672 --- /dev/null +++ b/news_spider/news2db.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/python +import json +import sqlite3 +import sys +reload(sys) +sys.setdefaultencoding('utf-8') + +file = open('title.json') +conn = sqlite3.connect('news.db') +while 1: + line = file.readline() + if not line: + break + data = json.loads(line) + insertsql = "insert into news(title,time,url) values ('"+str(data['title']).decode('utf-8')+"','"+str(data['time']).decode('utf-8')+"','"+str(data['url']).decode('utf-8')+"')" + print insertsql + conn.execute(insertsql) + conn.commit() + +conn.close()