开始添加检索模块

2016-04-24 00:16:16 +08:00 · 2016-04-24 00:16:16 +08:00 · 3541ef0e7e
commit 3541ef0e7e
parent d7a3e28f59
8 changed files with 22 additions and 16 deletions
--- a/news_spider/news.db
+++ b/news_spider/news.db
--- a/news_spider/news_spider/spiders/NetEase.py
+++ b/news_spider/news_spider/spiders/NetEase.py
@ -12,14 +12,16 @@ class NetEaseSpider(scrapy.Spider):
 	allowed_domains=['news.163.com']

 	base_url = 'http://snapshot.news.163.com/wgethtml/http+!!news.163.com!'
-#	year = ['2016','2015']
-#	month = ['12','11','10','09','08','07','06','05','04','03','02','01']
+	year = ['2016','2015']
+	month = ['12','11','10','09','08','07','06','05','04','03','02','01']
+	day = ['31','30','29','28','27','26','25','24','23','22','21',
+	   '20','19','18','17','16','15','14','13','12','11','10',
+		   '09','08','07','06','05','04','03','02','01']
+#	year = ['2016']
+#	month = ['03']
 #	day = ['31','30','29','28','27','26','25','24','23','22','21',
 #		   '20','19','18','17','16','15','14','13','12','11','10',
 #		   '09','08','07','06','05','04','03','02','01']
-	day = ['31']
-	year = ['2016']
-	month = ['03']

 	def parse(self,response):
 		for y in self.year:
--- a/news_spider/news_spider/spiders/NetEase.pyc
+++ b/news_spider/news_spider/spiders/NetEase.pyc
--- a/news_spider/news_spider/spiders/Tencent.py
+++ b/news_spider/news_spider/spiders/Tencent.py
@ -11,17 +11,17 @@ class TencentSpider(scrapy.Spider):
 	name='tencent'
 	allowed_domains=['news.qq.com']

-#	base_url = 'http://news.qq.com/b/history/index'
-#	year = ['2016','2015','2014']
-#	month = ['12','11','10','09','08','07','06','05','04','03','02','01']
-#	day = ['31','30','29','28','27','26','25','24','23','22','21',
-#		   '20','19','18','17','16','15','14','13','12','11','10',
-#		   '09','08','07','06','05','04','03','02','01']
+	base_url = 'http://news.qq.com/b/history/index'
+	year = ['2016','2015','2014']
+	month = ['12','11','10','09','08','07','06','05','04','03','02','01']
+	day = ['31','30','29','28','27','26','25','24','23','22','21',
+		   '20','19','18','17','16','15','14','13','12','11','10',
+		   '09','08','07','06','05','04','03','02','01']
 	tp = ['am','pm']

-	day = ['31']
-	year = ['2016']
-	month = ['03']
+#	day = ['31']
+#	year = ['2016']
+#	month = ['03']

 	def parse(self,response):
 		for y in self.year:
--- a/news_spider/news_spider/spiders/Tencent.pyc
+++ b/news_spider/news_spider/spiders/Tencent.pyc
--- a/news_spider/news_spider/spiders/TouTiaoSpider.py
+++ b/news_spider/news_spider/spiders/TouTiaoSpider.py
@ -12,7 +12,7 @@ class TouTiaoSpider(scrapy.Spider):
 	]
 	base_class_url = 'http://toutiao.com/articles_news_society'
 	base_url = 'http://toutiao.com'
-	maxpage = 10;#允许爬的最大的页数
+	maxpage = 501;#允许爬的最大的页数
 	category = ['articles_news_society','articles_news_entertainment',
 	'articles_movie','articles_news_tech','articles_digital',
 	'articels_news_sports','articles_news_finance','articles_news_military',
--- a/news_spider/news_spider/spiders/TouTiaoSpider.pyc
+++ b/news_spider/news_spider/spiders/TouTiaoSpider.pyc
--- a/news_spider/show.py
+++ b/news_spider/show.py
@ -12,7 +12,11 @@ while 1:
 		break
 	data = json.loads(line)
 	c+=1
-	print data['time'],data['title'],data['url']
+	if sys.argv[2] == '1':
+		print c,"-->",data['time'],data['title'],data['url'],data['content']
+	else:
+		print c,"-->",data['time'],data['title'],data['url']
+		

 #data = json.load(file)
 #c = 0