commit
58c576edda
2
.gitignore
vendored
Normal file → Executable file
2
.gitignore
vendored
Normal file → Executable file
@ -5,3 +5,5 @@ data/title.json
|
||||
data/cutnews
|
||||
data/orinews
|
||||
data/inversedata
|
||||
**/*.pyc
|
||||
tools/news.db
|
||||
|
0
data/show.png
Normal file → Executable file
0
data/show.png
Normal file → Executable file
Before Width: | Height: | Size: 239 KiB After Width: | Height: | Size: 239 KiB |
0
data/stopword.txt
Normal file → Executable file
0
data/stopword.txt
Normal file → Executable file
BIN
ml/Cut.pyc
BIN
ml/Cut.pyc
Binary file not shown.
0
ml/InverseIndex.py
Normal file → Executable file
0
ml/InverseIndex.py
Normal file → Executable file
Binary file not shown.
0
ml/Search.py
Normal file → Executable file
0
ml/Search.py
Normal file → Executable file
BIN
ml/Search.pyc
BIN
ml/Search.pyc
Binary file not shown.
0
ml/__init__.py
Normal file → Executable file
0
ml/__init__.py
Normal file → Executable file
BIN
ml/__init__.pyc
BIN
ml/__init__.pyc
Binary file not shown.
0
news_spider/news_spider/__init__.py
Normal file → Executable file
0
news_spider/news_spider/__init__.py
Normal file → Executable file
Binary file not shown.
0
news_spider/news_spider/commands/__init__.py
Normal file → Executable file
0
news_spider/news_spider/commands/__init__.py
Normal file → Executable file
Binary file not shown.
0
news_spider/news_spider/commands/crawlall.py
Normal file → Executable file
0
news_spider/news_spider/commands/crawlall.py
Normal file → Executable file
Binary file not shown.
0
news_spider/news_spider/items.py
Normal file → Executable file
0
news_spider/news_spider/items.py
Normal file → Executable file
Binary file not shown.
0
news_spider/news_spider/pipelines.py
Normal file → Executable file
0
news_spider/news_spider/pipelines.py
Normal file → Executable file
Binary file not shown.
27
news_spider/news_spider/rotateuseragent.py
Normal file → Executable file
27
news_spider/news_spider/rotateuseragent.py
Normal file → Executable file
@ -1,6 +1,6 @@
|
||||
# -*-coding:utf-8-*-
|
||||
|
||||
from scrapy import log
|
||||
import logging
|
||||
|
||||
"""避免被ban策略之一:使用useragent池。
|
||||
|
||||
@ -8,7 +8,26 @@ from scrapy import log
|
||||
"""
|
||||
|
||||
import random
|
||||
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
|
||||
from scrapy import signals
|
||||
|
||||
class UserAgentMiddleware(object):
|
||||
"""This middleware allows spiders to override the user_agent"""
|
||||
|
||||
def __init__(self, user_agent='Scrapy'):
|
||||
self.user_agent = user_agent
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
o = cls(crawler.settings['USER_AGENT'])
|
||||
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
|
||||
return o
|
||||
|
||||
def spider_opened(self, spider):
|
||||
self.user_agent = getattr(spider, 'user_agent', self.user_agent)
|
||||
|
||||
def process_request(self, request, spider):
|
||||
if self.user_agent:
|
||||
request.headers.setdefault(b'User-Agent', self.user_agent)
|
||||
|
||||
class RotateUserAgentMiddleware(UserAgentMiddleware):
|
||||
|
||||
@ -19,8 +38,8 @@ class RotateUserAgentMiddleware(UserAgentMiddleware):
|
||||
ua = random.choice(self.user_agent_list)
|
||||
if ua:
|
||||
#显示当前使用的useragent
|
||||
print "********Current UserAgent:%s************",ua
|
||||
log.msg('Current UserAgent:'+ua,log.INFO)
|
||||
print "********Current UserAgent:%s************" % ua
|
||||
logging.info('Current UserAgent:'+ua)
|
||||
request.headers.setdefault('User-Agent', ua)
|
||||
|
||||
#the default user_agent_list composes chrome,I E,firefox,Mozilla,opera,netscape
|
||||
|
Binary file not shown.
0
news_spider/news_spider/settings.py
Normal file → Executable file
0
news_spider/news_spider/settings.py
Normal file → Executable file
Binary file not shown.
0
news_spider/news_spider/setup.py
Normal file → Executable file
0
news_spider/news_spider/setup.py
Normal file → Executable file
0
news_spider/news_spider/spiders/NetEase.py
Normal file → Executable file
0
news_spider/news_spider/spiders/NetEase.py
Normal file → Executable file
Binary file not shown.
0
news_spider/news_spider/spiders/Tencent.py
Normal file → Executable file
0
news_spider/news_spider/spiders/Tencent.py
Normal file → Executable file
Binary file not shown.
0
news_spider/news_spider/spiders/TouTiaoSpider.py
Normal file → Executable file
0
news_spider/news_spider/spiders/TouTiaoSpider.py
Normal file → Executable file
0
news_spider/news_spider/spiders/TouTiaoSpider.py.old
Normal file → Executable file
0
news_spider/news_spider/spiders/TouTiaoSpider.py.old
Normal file → Executable file
Binary file not shown.
0
news_spider/news_spider/spiders/__init__.py
Normal file → Executable file
0
news_spider/news_spider/spiders/__init__.py
Normal file → Executable file
Binary file not shown.
0
news_spider/scrapy.cfg
Normal file → Executable file
0
news_spider/scrapy.cfg
Normal file → Executable file
1
requirements.txt
Executable file
1
requirements.txt
Executable file
@ -0,0 +1 @@
|
||||
Scrapy==1.7.3
|
0
test/test_tool.py
Normal file → Executable file
0
test/test_tool.py
Normal file → Executable file
0
testdata/data/cutnews/0.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/0.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/1.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/1.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/10.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/10.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/11.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/11.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/12.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/12.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/13.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/13.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/14.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/14.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/2.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/2.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/3.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/3.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/4.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/4.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/5.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/5.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/6.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/6.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/7.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/7.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/8.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/8.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/9.txt
vendored
Normal file → Executable file
0
testdata/data/cutnews/9.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/0.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/0.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/1.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/1.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/10.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/10.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/100.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/100.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/101.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/101.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/102.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/102.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/103.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/103.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/104.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/104.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/105.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/105.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/106.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/106.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/107.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/107.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/108.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/108.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/109.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/109.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/11.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/11.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/110.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/110.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/111.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/111.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/112.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/112.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/113.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/113.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/114.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/114.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/115.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/115.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/116.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/116.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/117.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/117.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/118.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/118.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/119.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/119.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/12.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/12.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/120.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/120.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/121.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/121.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/122.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/122.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/123.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/123.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/124.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/124.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/125.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/125.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/126.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/126.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/127.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/127.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/128.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/128.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/129.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/129.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/13.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/13.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/130.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/130.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/131.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/131.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/132.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/132.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/133.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/133.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/134.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/134.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/135.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/135.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/136.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/136.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/137.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/137.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/138.txt
vendored
Normal file → Executable file
0
testdata/data/inversedata/138.txt
vendored
Normal file → Executable file
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user