From db6bd393561fabf6996b0438a1f8da3f9b5d0839 Mon Sep 17 00:00:00 2001 From: lzjqsdd Date: Sun, 24 Apr 2016 00:17:26 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=95=B0=E6=8D=AE=E5=BA=93?= =?UTF-8?q?=E5=86=99=E5=85=A5=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ml/div.py | 5 +++++ news_spider/news2db.py | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 ml/div.py create mode 100644 news_spider/news2db.py diff --git a/ml/div.py b/ml/div.py new file mode 100644 index 0000000..1ddb179 --- /dev/null +++ b/ml/div.py @@ -0,0 +1,5 @@ +#encoding=utf-8 +import jieba +seg_list = jieba.cut("我来到北京清华大学", cut_all=True) +for l in seg_list: + print l diff --git a/news_spider/news2db.py b/news_spider/news2db.py new file mode 100644 index 0000000..6c7e672 --- /dev/null +++ b/news_spider/news2db.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/python +import json +import sqlite3 +import sys +reload(sys) +sys.setdefaultencoding('utf-8') + +file = open('title.json') +conn = sqlite3.connect('news.db') +while 1: + line = file.readline() + if not line: + break + data = json.loads(line) + insertsql = "insert into news(title,time,url) values ('"+str(data['title']).decode('utf-8')+"','"+str(data['time']).decode('utf-8')+"','"+str(data['url']).decode('utf-8')+"')" + print insertsql + conn.execute(insertsql) + conn.commit() + +conn.close()