开始添加检索模块
This commit is contained in:
parent
d7a3e28f59
commit
3541ef0e7e
Binary file not shown.
@ -12,14 +12,16 @@ class NetEaseSpider(scrapy.Spider):
|
||||
allowed_domains=['news.163.com']
|
||||
|
||||
base_url = 'http://snapshot.news.163.com/wgethtml/http+!!news.163.com!'
|
||||
# year = ['2016','2015']
|
||||
# month = ['12','11','10','09','08','07','06','05','04','03','02','01']
|
||||
year = ['2016','2015']
|
||||
month = ['12','11','10','09','08','07','06','05','04','03','02','01']
|
||||
day = ['31','30','29','28','27','26','25','24','23','22','21',
|
||||
'20','19','18','17','16','15','14','13','12','11','10',
|
||||
'09','08','07','06','05','04','03','02','01']
|
||||
# year = ['2016']
|
||||
# month = ['03']
|
||||
# day = ['31','30','29','28','27','26','25','24','23','22','21',
|
||||
# '20','19','18','17','16','15','14','13','12','11','10',
|
||||
# '09','08','07','06','05','04','03','02','01']
|
||||
day = ['31']
|
||||
year = ['2016']
|
||||
month = ['03']
|
||||
|
||||
def parse(self,response):
|
||||
for y in self.year:
|
||||
|
Binary file not shown.
@ -11,17 +11,17 @@ class TencentSpider(scrapy.Spider):
|
||||
name='tencent'
|
||||
allowed_domains=['news.qq.com']
|
||||
|
||||
# base_url = 'http://news.qq.com/b/history/index'
|
||||
# year = ['2016','2015','2014']
|
||||
# month = ['12','11','10','09','08','07','06','05','04','03','02','01']
|
||||
# day = ['31','30','29','28','27','26','25','24','23','22','21',
|
||||
# '20','19','18','17','16','15','14','13','12','11','10',
|
||||
# '09','08','07','06','05','04','03','02','01']
|
||||
base_url = 'http://news.qq.com/b/history/index'
|
||||
year = ['2016','2015','2014']
|
||||
month = ['12','11','10','09','08','07','06','05','04','03','02','01']
|
||||
day = ['31','30','29','28','27','26','25','24','23','22','21',
|
||||
'20','19','18','17','16','15','14','13','12','11','10',
|
||||
'09','08','07','06','05','04','03','02','01']
|
||||
tp = ['am','pm']
|
||||
|
||||
day = ['31']
|
||||
year = ['2016']
|
||||
month = ['03']
|
||||
# day = ['31']
|
||||
# year = ['2016']
|
||||
# month = ['03']
|
||||
|
||||
def parse(self,response):
|
||||
for y in self.year:
|
||||
|
Binary file not shown.
@ -12,7 +12,7 @@ class TouTiaoSpider(scrapy.Spider):
|
||||
]
|
||||
base_class_url = 'http://toutiao.com/articles_news_society'
|
||||
base_url = 'http://toutiao.com'
|
||||
maxpage = 10;#允许爬的最大的页数
|
||||
maxpage = 501;#允许爬的最大的页数
|
||||
category = ['articles_news_society','articles_news_entertainment',
|
||||
'articles_movie','articles_news_tech','articles_digital',
|
||||
'articels_news_sports','articles_news_finance','articles_news_military',
|
||||
|
Binary file not shown.
@ -12,7 +12,11 @@ while 1:
|
||||
break
|
||||
data = json.loads(line)
|
||||
c+=1
|
||||
print data['time'],data['title'],data['url']
|
||||
if sys.argv[2] == '1':
|
||||
print c,"-->",data['time'],data['title'],data['url'],data['content']
|
||||
else:
|
||||
print c,"-->",data['time'],data['title'],data['url']
|
||||
|
||||
|
||||
#data = json.load(file)
|
||||
#c = 0
|
||||
|
Loading…
Reference in New Issue
Block a user