Merge pull request #7 from JIMhackKING/master

优化代码,提高代码可移植性
This commit is contained in:
lzjqsdd 2020-02-25 17:34:25 +08:00 committed by GitHub
commit 58c576edda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
227 changed files with 56 additions and 6 deletions

2
.gitignore vendored Normal file → Executable file
View File

@ -5,3 +5,5 @@ data/title.json
data/cutnews
data/orinews
data/inversedata
**/*.pyc
tools/news.db

0
Frame.md Normal file → Executable file
View File

0
README.md Normal file → Executable file
View File

0
data/show.png Normal file → Executable file
View File

Before

Width:  |  Height:  |  Size: 239 KiB

After

Width:  |  Height:  |  Size: 239 KiB

0
data/stopword.txt Normal file → Executable file
View File

0
ml/Cut.py Normal file → Executable file
View File

Binary file not shown.

0
ml/InverseIndex.py Normal file → Executable file
View File

Binary file not shown.

0
ml/Search.py Normal file → Executable file
View File

Binary file not shown.

0
ml/__init__.py Normal file → Executable file
View File

Binary file not shown.

0
news_spider/news_spider/__init__.py Normal file → Executable file
View File

Binary file not shown.

0
news_spider/news_spider/commands/__init__.py Normal file → Executable file
View File

0
news_spider/news_spider/commands/crawlall.py Normal file → Executable file
View File

0
news_spider/news_spider/items.py Normal file → Executable file
View File

Binary file not shown.

0
news_spider/news_spider/pipelines.py Normal file → Executable file
View File

27
news_spider/news_spider/rotateuseragent.py Normal file → Executable file
View File

@ -1,6 +1,6 @@
# -*-coding:utf-8-*-
from scrapy import log
import logging
"""避免被ban策略之一使用useragent池。
@ -8,7 +8,26 @@ from scrapy import log
"""
import random
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
from scrapy import signals
class UserAgentMiddleware(object):
"""This middleware allows spiders to override the user_agent"""
def __init__(self, user_agent='Scrapy'):
self.user_agent = user_agent
@classmethod
def from_crawler(cls, crawler):
o = cls(crawler.settings['USER_AGENT'])
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
return o
def spider_opened(self, spider):
self.user_agent = getattr(spider, 'user_agent', self.user_agent)
def process_request(self, request, spider):
if self.user_agent:
request.headers.setdefault(b'User-Agent', self.user_agent)
class RotateUserAgentMiddleware(UserAgentMiddleware):
@ -19,8 +38,8 @@ class RotateUserAgentMiddleware(UserAgentMiddleware):
ua = random.choice(self.user_agent_list)
if ua:
#显示当前使用的useragent
print "********Current UserAgent:%s************",ua
log.msg('Current UserAgent:'+ua,log.INFO)
print "********Current UserAgent:%s************" % ua
logging.info('Current UserAgent:'+ua)
request.headers.setdefault('User-Agent', ua)
#the default user_agent_list composes chrome,I E,firefox,Mozilla,opera,netscape

0
news_spider/news_spider/settings.py Normal file → Executable file
View File

Binary file not shown.

0
news_spider/news_spider/setup.py Normal file → Executable file
View File

0
news_spider/news_spider/spiders/NetEase.py Normal file → Executable file
View File

0
news_spider/news_spider/spiders/Tencent.py Normal file → Executable file
View File

0
news_spider/news_spider/spiders/TouTiaoSpider.py Normal file → Executable file
View File

0
news_spider/news_spider/spiders/TouTiaoSpider.py.old Normal file → Executable file
View File

0
news_spider/news_spider/spiders/__init__.py Normal file → Executable file
View File

0
news_spider/scrapy.cfg Normal file → Executable file
View File

1
requirements.txt Executable file
View File

@ -0,0 +1 @@
Scrapy==1.7.3

0
test/test_tool.py Normal file → Executable file
View File

0
testdata/data/cutnews/0.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/1.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/10.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/11.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/12.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/13.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/14.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/2.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/3.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/4.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/5.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/6.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/7.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/8.txt vendored Normal file → Executable file
View File

0
testdata/data/cutnews/9.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/0.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/1.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/10.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/100.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/101.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/102.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/103.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/104.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/105.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/106.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/107.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/108.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/109.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/11.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/110.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/111.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/112.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/113.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/114.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/115.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/116.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/117.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/118.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/119.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/12.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/120.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/121.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/122.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/123.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/124.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/125.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/126.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/127.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/128.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/129.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/13.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/130.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/131.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/132.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/133.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/134.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/135.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/136.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/137.txt vendored Normal file → Executable file
View File

0
testdata/data/inversedata/138.txt vendored Normal file → Executable file
View File

Some files were not shown because too many files have changed in this diff Show More