增加今日头条简版一个内容板块的抓取,网站限制最多500页

This commit is contained in:
lzjqsdd 2016-04-19 10:49:43 +08:00
parent 387eddfa2f
commit a37d701063
31 changed files with 646 additions and 0 deletions

BIN
news_spider/news.db Normal file

Binary file not shown.

View File

Binary file not shown.

View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class NewsSpiderItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
title = scrapy.Field()
time = scrapy.Field()
content = scrapy.Field()

Binary file not shown.

View File

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import codecs
import json
class NewsSpiderPipeline(object):
def process_item(self,item,spider):
return item
class TouTiaoPipeline(object):
def __init__(self):
# self.file = codecs.open('toutiao.json','wb',encoding='utf-8')
self.file = open('toutiao.json','wb')
def process_item(self,item,spider):
line = json.dumps(dict(item))+'\n'
# self.file.write(line.decode("unicode_escape"))
self.file.write(line)
return item

Binary file not shown.

View File

@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
# Scrapy settings for news_spider project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'news_spider'
SPIDER_MODULES = ['news_spider.spiders']
NEWSPIDER_MODULE = 'news_spider.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'news_spider (+http://www.yourdomain.com)'
# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS=32
# Configure a delay for requests for the same website (default: 0)
# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY=3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN=16
#CONCURRENT_REQUESTS_PER_IP=16
# Disable cookies (enabled by default)
#COOKIES_ENABLED=False
# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED=False
# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Language': 'en',
#}
# Enable or disable spider middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
# 'news_spider.middlewares.MyCustomSpiderMiddleware': 543,
#}
# Enable or disable downloader middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
# 'news_spider.middlewares.MyCustomDownloaderMiddleware': 543,
#}
# Enable or disable extensions
# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
#EXTENSIONS = {
# 'scrapy.telnet.TelnetConsole': None,
#}
# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
#ITEM_PIPELINES = {
# 'news_spider.pipelines.TouTiaoPipeline': 300,
#}
# Enable and configure the AutoThrottle extension (disabled by default)
# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
# NOTE: AutoThrottle will honour the standard settings for concurrency and delay
#AUTOTHROTTLE_ENABLED=True
# The initial download delay
#AUTOTHROTTLE_START_DELAY=5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY=60
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG=False
# Enable and configure HTTP caching (disabled by default)
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED=True
#HTTPCACHE_EXPIRATION_SECS=0
#HTTPCACHE_DIR='httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES=[]
#HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'

Binary file not shown.

View File

@ -0,0 +1,43 @@
import scrapy
from news_spider.items import NewsSpiderItem
import json
import time
class TouTiaoSpider(scrapy.Spider):
name = 'toutiao'
allowed_domains = ["toutiao.com"]
start_urls = [
'http://toutiao.com/articles_news_society'
]
base_class_url = 'http://toutiao.com/articles_news_society'
base_url = 'http://toutiao.com'
page = 1;
def parse(self,response):
print self.page
urls = response.xpath("//div[@class='info']//a/@href").extract()
for url in urls:
news_url = self.base_url+url
yield scrapy.Request(news_url,self.parseNews)
self.page+=1
if(self.page <=30):
yield scrapy.Request(self.base_class_url+'/p'+str(self.page))
def parseNews(self,response):
articles = response.xpath("//div[@id='pagelet-article']")
for article in articles:
item = NewsSpiderItem()
item['title'] = article.xpath("//div[@class='article-header']/h1/text()").extract()[0]
item['time'] = article.xpath("//div[@id='pagelet-article']//span[@class='time']/text()").extract()[0]
content = article.xpath("//div[@class='article-content']//p/text()").extract()
#item['content'] = article.xpath("//div[@class='article-content']//p/text()").extract()
cc=''
if(len(content) != 0):
for c in content:
cc = cc+c
item['content'] = cc
yield item
def printC(self,text):
for t in text:
print t.encode('utf-8')

View File

@ -0,0 +1,33 @@
#coding:cp936
import scrapy
from news_spider.items import NewsSpiderItem
import json
import time
class TouTiaoSpider(scrapy.Spider):
name = 'toutiao'
# allowed_domains = ["toutiao.com"]
maxbetime = int(time.time())
aliastime = int(time.time()*1000)
baseurl = 'http://toutiao.com/api/article/recent/?source=2&count=20&category=__all__&offset=0'
# baseurl = 'http://toutiao.com/api/article/recent/?source=2&count=100&category=news_hot&utm_source=toutiao&offset=0'
start_urls = [baseurl+'&max_behot_time='+str(maxbetime)+'&_='+str(aliastime)]
def parse(self,response):
print response.url
data = json.loads(response.body)
self.maxbetime = data['next']['max_behot_time']
for news in data['data']:
item = NewsSpiderItem()
item['title'] = news['title']
item['abstract'] = news['abstract']
item['time'] = news['datetime']
news_url = news['url']
yield scrapy.Request(news_url,callback=self.parseNews)
nexturl = self.baseurl+'&max_behot_time='+str(self.maxbetime)
yield scrapy.Request(nexturl,callback=self.parse)
def parseNews(self,response):
print response.xpath("//div[@class='article-content']/p/text()").extract()

Binary file not shown.

View File

@ -0,0 +1,4 @@
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.

Binary file not shown.

11
news_spider/scrapy.cfg Normal file
View File

@ -0,0 +1,11 @@
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# https://scrapyd.readthedocs.org/en/latest/deploy.html
[settings]
default = news_spider.settings
[deploy]
#url = http://localhost:6800/
project = news_spider

249
tutorial/item.json Normal file
View File

@ -0,0 +1,249 @@
[{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: Resources"], "link": ["/Computers/Programming/Resources/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["eff-bot's Daily Python URL"], "link": ["http://www.pythonware.com/daily/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Contains links to assorted resources from the Python universe, compiled by PythonWare.\r\n \r\n ", "\r\n "]},
{"title": ["O'Reilly Python Center"], "link": ["http://oreilly.com/python/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Features Python books, resources, news and articles.\r\n \r\n ", "\r\n "]},
{"title": ["Python Developer's Guide"], "link": ["https://www.python.org/dev/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Resources for reporting bugs, accessing the Python source tree with CVS and taking part in the development of Python.\r\n \r\n ", "\r\n "]},
{"title": ["Social Bug"], "link": ["http://win32com.goermezer.de/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Scripts, examples and news about Python programming for the Windows platform.\r\n \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: Languages: Python: Resources"], "link": ["/Computers/Programming/Languages/Python/Resources/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Modules"], "link": ["/Computers/Programming/Languages/Python/Modules/Web/"], "desc": ["\r\n ", "@\u00a0", "\r\n "]},
{"title": ["Templating Libraries"], "link": ["/Computers/Programming/Languages/Python/Web/Templating_Libraries/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Web Frameworks"], "link": ["/Computers/Programming/Languages/Python/Web/Web_Frameworks/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: FAQs, Help, and Tutorials"], "link": ["/Computers/Programming/FAQs%2C_Help%2C_and_Tutorials/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Computers: Programming: Languages: Python: Articles and Reviews"], "link": ["/Computers/Programming/Languages/Python/Articles_and_Reviews/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Deutsch"], "link": ["/World/Deutsch/Computer/Programmieren/Sprachen/Python/Anleitungen%2C_Hilfen_und_FAQs"], "desc": ["\r\n \t", "\r\n ", "\r\n "]},
{"title": ["Python Documentation Index"], "link": ["https://www.python.org/doc/"], "desc": ["\r\n \r\n ", " \r\n\t\t\t\r\n - Official tutorial and references, including library/module usage, Macintosh libraries, language syntax, extending/embedding, and the Python/C API. Also links to off-site beginners' tutorials, HOWTOs, and many special interest topics.\r\n \r\n ", "\r\n "]},
{"title": ["About.com: Python"], "link": ["http://python.about.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Includes articles on simple script solutions, database management, web development, network protocols, and other advanced topics.\r\n \r\n ", "\r\n "]},
{"title": ["A Beginner's Python Tutorial"], "link": ["http://www.sthurlow.com/python/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A Python tutorial originally written for the modding community of Firaxis' Civilization. Useful to beginners with little or no programming knowledge.\r\n \r\n ", "\r\n "]},
{"title": ["Cameron Laird on Python and the Web"], "link": ["http://phaseit.net/claird/comp.lang.python/web_python.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Discusses both client-side and server-side Web programming.\r\n \r\n ", "\r\n "]},
{"title": ["Code Like a Pythonista: Idiomatic Python"], "link": ["http://python.net/~goodger/projects/pycon/2007/idiomatic/handout.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A tutorial that teaches common Python programming idioms used by experienced programmers, but may not be obvious to newcomers.\r\n \r\n ", "\r\n "]},
{"title": ["A Course in Python/CGI"], "link": ["http://www.upriss.org.uk/python/PythonCourse.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - This site contains materials from a 13 week course for learning Python and CGI. Suited for self-study.\r\n \r\n ", "\r\n "]},
{"title": ["The Django Book"], "link": ["http://www.djangobook.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Free (Only E-Book version) Django Web Framework E-Book by APress. The best book for learning Django. Requires good understanding of Python.\r\n \r\n ", "\r\n "]},
{"title": ["Five Minutes to a Python CGI"], "link": ["http://www.ddj.com/184412536"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By David Mertz. Brief introduction to writing CGI programs, Python CGI module, formatting output, error logging/debugging.\r\n \r\n ", "\r\n "]},
{"title": ["Instant Hacking"], "link": ["http://hetland.org/writing/instant-hacking.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Magnus Lie Hetland. Minimal, concise, general introduction to programming, via Python, moves very quickly. English, Italian, Polish, Japanese, Serbian, Korean.\r\n \r\n ", "\r\n "]},
{"title": ["Instant Python"], "link": ["http://hetland.org/writing/instant-python.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Magnus Lie Hetland. Minimal introduction for experienced programmers. Treats basics, OO programming, Jedi mind trick. English, French, Italian, Portuguese, Spanish; German, Norwegian; Lithuanian, Polish, Russian; Japanese, Korean.\r\n \r\n ", "\r\n "]},
{"title": ["One Day of IDLE Toying"], "link": ["https://hkn.eecs.berkeley.edu/~dyoo/python/idle_intro/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Danny Yoo. Visual guide to using IDLE, a simple interactive shell for Python programming. Each step of this tutorial has screenshots for absolute beginners. English, Dutch, German; Greek, French, Italian, Portuguese; Indonesian.\r\n \r\n ", "\r\n "]},
{"title": ["Python and UML"], "link": ["http://www.objectsbydesign.com/projects/python_uml.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Can the UML help Python developers? How about CP4E?\r\n \r\n ", "\r\n "]},
{"title": ["Python CGI"], "link": ["http://www.cs.virginia.edu/~lab2q/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An Interactive Instruction.\r\n \r\n ", "\r\n "]},
{"title": ["Python Database Programming"], "link": ["https://wiki.python.org/moin/DatabaseProgramming/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Programs can use minimal code to access, display, and update a database. This guide provides \"links to relevant Python modules, documentation, and projects\" concerning databases, as well as adding persistence to Python objects.\r\n \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: Personal Pages"], "link": ["/Computers/Programming/Personal_Pages/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Altis, Kevin"], "link": ["http://radio-weblogs.com/0102677/categories/python/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - News and information related to Python and its promotion as \"most powerful language you can still read.\"\r\n \r\n ", "\r\n "]},
{"title": ["Anand Pillai - Random bytes on technology and open source"], "link": ["http://randombytes.blogspot.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A Python programmer's weblog.\r\n \r\n ", "\r\n "]},
{"title": ["Blended Technologies"], "link": ["http://www.blendedtechnologies.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Programming weblog for a small, newly formed, and curious company; covers mostly Python.\r\n \r\n ", "\r\n "]},
{"title": ["Gruet, Richard"], "link": ["http://rgruet.free.fr/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Python Language Quick Reference versions 1.5 to 2.3. OmniORBpy utilities and other Python stuff.\r\n \r\n ", "\r\n "]},
{"title": ["Hinsen, Konrad"], "link": ["http://starship.python.net/crew/hinsen/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Python for science. In particular computational chemistry.\r\n \r\n ", "\r\n "]},
{"title": ["Python for Beginners"], "link": ["https://wiki.python.org/moin/BeginnersGuide"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - New to programming? This page lists some places that can get you started quickly.\r\n \r\n ", "\r\n "]},
{"title": ["Python Programming for Beginners"], "link": ["http://www.linuxjournal.com/article/3946"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Shows how to write programs that use command-line options, read and write to pipes, access environment variables, handle interrupts, read from and write to files, create temporary files, write to system logs.\r\n \r\n ", "\r\n "]},
{"title": ["Python Programming Tutorial"], "link": ["http://www.dickbaldwin.com/tocpyth.htm"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Richard G. Baldwin. Separate lessons teach programming basics. Free online, fee download files.\r\n \r\n ", "\r\n "]},
{"title": ["Python Quick Reference"], "link": ["http://www.brunningonline.net/simon/python/PQR.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Simon Brunning. Thorough 'cheat sheet'; brief reminders for nearly each language aspect: syntax nuances; built-in features, statements, modules; basic, advanced types, operations; lexical entities, common development tools. For several Python versions; HTML, Zip, Windows Help, text.\r\n \r\n ", "\r\n "]},
{"title": ["Python Recipes"], "link": ["http://code.activestate.com/recipes/langs/python/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Collaborative website built by ActiveState and O'Reilly, hosts user contributions; collection of recipes.\r\n \r\n ", "\r\n "]},
{"title": ["Python SIGs"], "link": ["https://www.python.org/community/sigs/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Archives of current and past listserv discussions on a LOT of interesting topics. GREAT for researching problems.\r\n \r\n ", "\r\n "]},
{"title": ["Python Tutorial"], "link": ["http://www.mtdev.com/2002/08/python-tutorial"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Introduction to Python, where to find it, how to install, and create a very simple script.\r\n \r\n ", "\r\n "]},
{"title": ["A Quick Tour of Python"], "link": ["http://stsdas.stsci.edu/pyraf/python_quick_tour.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Short overview of some basics, for users with some programming knowledge.\r\n \r\n ", "\r\n "]},
{"title": ["TCC Publications: Programming Languages"], "link": ["http://infohost.nmt.edu/tcc/help/pubs/lang.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Python language quick reference (28 pp), Tkinter reference (84 pp), Python Imaging Library PIL quick reference (6 pp). Postscript and PDF formats available.\r\n \r\n ", "\r\n "]},
{"title": ["Tutorialized.com - Free Python tutorials"], "link": ["http://www.tutorialized.com/tutorials/Python/1"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A collection of Python tutorials from resources around the Internet.\r\n \r\n ", "\r\n "]},
{"title": ["Wikibooks Programming Python"], "link": ["http://en.wikibooks.org/wiki/Programming:Python"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A tutorial for python on Wikibooks. It is editable by anyone.\r\n \r\n ", "\r\n "]},
{"title": ["Writing CGI Programs in Python"], "link": ["http://www.devshed.com/c/a/Python/Writing-CGI-Programs-in-Python/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Introduces using Python for CGI applications. Has database access examples. Several parts. [Developer Shed]\r\n \r\n ", "\r\n "]},
{"title": ["Kuchling, A.M."], "link": ["http://www.amk.ca/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Lots of Python code and information, several quotation collections, and a Robertson Davies page.\r\n \r\n ", "\r\n "]},
{"title": ["Learning Python"], "link": ["http://www.learningpython.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A weblog tracking one man's journey into Python.\r\n \r\n ", "\r\n "]},
{"title": ["Lundh, Fredrik"], "link": ["http://effbot.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Personal guide to libraries, modules and extensions. Includes a weblog with Python news and personal ramblings.\r\n \r\n ", "\r\n "]},
{"title": ["Norlanders, Oscar"], "link": ["http://www.codeape.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Blog about programming. TeX bibliography tweak. Resume. Links.\r\n \r\n ", "\r\n "]},
{"title": ["van Rossum, Guido"], "link": ["https://www.python.org/~guido/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Personal home page of the creator of the Python programming language.\r\n \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: Languages: Python: Resources"], "link": ["/Computers/Programming/Languages/Python/Resources/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["BayPIGgies"], "link": ["http://www.baypiggies.net/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - The Silicon Valley-San Francisco Bay Area Python Users Group.\r\n \r\n ", "\r\n "]},
{"title": ["Chicago Python User Group"], "link": ["http://www.chipy.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Meetings, book reviews and mailing list.\r\n \r\n ", "\r\n "]},
{"title": ["Clepy"], "link": ["http://groups.google.com/group/clepy/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A group of Python enthusiasts living in the Cleveland, Ohio, area.\r\n \r\n ", "\r\n "]},
{"title": ["Dallas Ft. Worth Pythoneers"], "link": ["http://www.dfwpython.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Offers mailing list, a source repository and member profiles.\r\n \r\n ", "\r\n "]},
{"title": ["Fredericksburg ZPUG"], "link": ["http://www.zope.org/Members/poster/zpug_info"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Fredericksburg Zope and Python Users Group.\r\n \r\n ", "\r\n "]},
{"title": ["FRPythoneers"], "link": ["https://wiki.python.org/moin/FrontRangePythoneers"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A Python user group in the Fort Collins area that meets in Broomfield, CO.\r\n \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Computer Science: Conferences"], "link": ["/Computers/Computer_Science/Conferences/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Computers: Programming: Conferences"], "link": ["/Computers/Programming/Conferences/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Computers: Programming: Languages: Perl: Conferences"], "link": ["/Computers/Programming/Languages/Perl/Conferences/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Computers: Programming: Languages: Ruby: Conferences"], "link": ["/Computers/Programming/Languages/Ruby/Conferences/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["The GrimPypers"], "link": ["http://tech.groups.yahoo.com/group/grimpypers/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A group of Python users, programmers and enthusiasts based in Grimstad, Norway.\r\n \r\n ", "\r\n "]},
{"title": ["Michipug"], "link": ["http://groups.google.com/group/michipug"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Michigan (Detroit/Ann Arbor) Python group.\r\n \r\n ", "\r\n "]},
{"title": ["Omaha Python Users Group"], "link": ["http://www.omahapython.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A Python users group in the Omaha, NE Metro area.\r\n \r\n ", "\r\n "]},
{"title": ["OPAG"], "link": ["http://opag.ca/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Ottawa Python Authors Group. A group devoted to learning, using and providing resources for Python. OPAG also serves as a general gathering place for Python programmers from the Ottawa region and beyond.\r\n \r\n ", "\r\n "]},
{"title": ["PyAtl"], "link": ["http://www.meetup.com/python-atlanta/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Atlanta, Georgia Python User Group.\r\n \r\n ", "\r\n "]},
{"title": ["PyGTA"], "link": ["http://www.engcorp.com/pygta"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - The Toronto Python/Zope User Group meets regularly to explore the expanding world of Python and Zope.\r\n \r\n ", "\r\n "]},
{"title": ["SeaPig"], "link": ["http://www.seapig.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - The Seattle Python Interest Group.\r\n \r\n ", "\r\n "]},
{"title": ["TriZPUG"], "link": ["http://trizpug.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - he Triangle Zope/Python User Group in the Raleigh-Durham-Chapel Hill area of North Carolina, USA.\r\n \r\n ", "\r\n "]},
{"title": ["Utah Python User Group"], "link": ["http://groups.google.com/group/utahpython"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Offers meeting information, mailing list and links to members' blogs.\r\n \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Zope"], "link": ["/Computers/Software/Internet/Servers/Application/Zope/"], "desc": ["\r\n ", "@\u00a0", "\r\n "]},
{"title": ["Computers: Companies: Software Development"], "link": ["/Computers/Companies/Software_Development/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["bridgekeeper"], "link": ["http://www.crazy-compilers.com/bridgekeeper/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A Perl to Python source code conversion service.\r\n \r\n ", "\r\n "]},
{"title": ["decompyle"], "link": ["http://www.crazy-compilers.com/decompyle/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A service to decompile Python bytecode to readable Python source files.\r\n \r\n ", "\r\n "]},
{"title": ["Pythonware"], "link": ["http://www.pythonware.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Developers of the PythonWorks IDE, and useful modules such as the Python Imaging Library and XML-RPC implementation.\r\n \r\n ", "\r\n "]},
{"title": ["ReportLab"], "link": ["http://www.reportlab.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Authors of PDFgen, for generating professionally formatted printable documents on the fly.\r\n \r\n ", "\r\n "]},
{"title": ["Xellsoft"], "link": ["http://www.xellsoft.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Specialize in Python programming technology and object oriented databases, develop custom software and offer consulting in the area of internet, data communication, technical and scientific programming.\r\n \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Directories"], "link": ["/Computers/Programming/Languages/Python/Modules/Directories/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Cryptography"], "link": ["/Computers/Programming/Languages/Python/Modules/Cryptography/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Databases and Persistence"], "link": ["/Computers/Programming/Languages/Python/Modules/Databases_and_Persistence/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Distributed Computing"], "link": ["/Computers/Programming/Languages/Python/Modules/Distributed_Computing/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Graphics"], "link": ["/Computers/Programming/Languages/Python/Modules/Graphics/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["GUI"], "link": ["/Computers/Programming/Languages/Python/Modules/GUI/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Math and Calculations"], "link": ["/Computers/Programming/Languages/Python/Modules/Math_and_Calculations/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Network"], "link": ["/Computers/Programming/Languages/Python/Modules/Network/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Scientific"], "link": ["/Computers/Programming/Languages/Python/Modules/Scientific/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Text Processing"], "link": ["/Computers/Programming/Languages/Python/Modules/Text_Processing/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Web"], "link": ["/Computers/Programming/Languages/Python/Modules/Web/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["XML"], "link": ["/Computers/Programming/Languages/Python/Modules/XML/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: Languages: Python: Resources"], "link": ["/Computers/Programming/Languages/Python/Resources/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Computers: Programming: Languages: Ruby: Books"], "link": ["/Computers/Programming/Languages/Ruby/Books/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Deutsch"], "link": ["/World/Deutsch/Computer/Programmieren/Sprachen/Python/B%C3%BCcher"], "desc": ["\r\n \t", "\r\n ", "\r\n "]},
{"title": ["\u0420\u0443\u0441\u0441\u043a\u0438\u0439"], "link": ["/World/Russian/%D0%9A%D0%BE%D0%BC%D0%BF%D1%8C%D1%8E%D1%82%D0%B5%D1%80%D1%8B/%D0%9F%D1%80%D0%BE%D0%B3%D1%80%D0%B0%D0%BC%D0%BC%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5/%D0%AF%D0%B7%D1%8B%D0%BA%D0%B8/Python/%D0%9A%D0%BD%D0%B8%D0%B3%D0%B8"], "desc": ["\r\n \t", "\r\n ", "\r\n "]},
{"title": ["Core Python Programming"], "link": ["http://www.pearsonhighered.com/educator/academic/product/0,,0130260363,00%2Ben-USS_01DBC.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Wesley J. Chun; Prentice Hall PTR, 2001, ISBN 0130260363. For experienced developers to improve extant skills; professional level examples. Starts by introducing syntax, objects, error handling, functions, classes, built-ins. [Prentice Hall]\r\n \r\n ", "\r\n "]},
{"title": ["Data Structures and Algorithms with Object-Oriented Design Patterns in Python"], "link": ["http://www.brpreiss.com/books/opus7/html/book.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - The primary goal of this book is to promote object-oriented design using Python and to illustrate the use of the emerging object-oriented design patterns.\r\nA secondary goal of the book is to present mathematical tools just in time. Analysis techniques and proofs are presented as needed and in the proper context.\r\n \r\n ", "\r\n "]},
{"title": ["Dive Into Python 3"], "link": ["http://www.diveintopython.net/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Mark Pilgrim, Guide to Python 3 and its differences from Python 2. Each chapter starts with a real code sample and explains it fully. Has a comprehensive appendix of all the syntactic and semantic changes in Python 3\r\n\r\n\r\n \r\n ", "\r\n "]},
{"title": ["Data Formats"], "link": ["/Computers/Programming/Languages/Python/Modules/Data_Formats/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Email"], "link": ["/Computers/Programming/Languages/Python/Modules/Email/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Platform Specific"], "link": ["/Computers/Programming/Languages/Python/Modules/Platform_Specific/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Terminal IO"], "link": ["/Computers/Programming/Languages/Python/Modules/Terminal_IO/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: Libraries"], "link": ["/Computers/Programming/Libraries/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["config"], "link": ["http://www.red-dove.com/python_config.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Module allows implement a hierarchical configuration scheme for Python applications.\r\n \r\n ", "\r\n "]},
{"title": ["ExpectPy"], "link": ["http://expectpy.sourceforge.net/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An extension for a Python-feel to the Expect library. [Open Source, LGPL]\r\n \r\n ", "\r\n "]},
{"title": ["Grouch"], "link": ["https://www.mems-exchange.org/software/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Web development related open source software packages developed by MEMS and Nanotechnology Exchange. [Open Source, Python license]\r\n \r\n ", "\r\n "]},
{"title": ["log4p"], "link": ["http://www.red-dove.com/python_logging.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - The Logging Toolkit For Python, modeled after the Java toolkit log4j. [Open Source, IBM Public License]\r\n \r\n ", "\r\n "]},
{"title": ["mxBase package extensions for Python"], "link": ["http://www.egenix.com/products/python/mxBase/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - The eGenix.com mx Extensions for Python are a collection of professional quality Python software tools which enhance Python's usability in many important areas such as ODBC database connectivity, fast text processing, date/time processing and web site programming.\r\n \r\n ", "\r\n "]},
{"title": ["Orange"], "link": ["http://www.ailab.si/orange"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A free, component-based, public domain data mining software, which includes a range of preprocessing, modelling and data exploration techniques. [Open Source, GPL]\r\n \r\n ", "\r\n "]},
{"title": ["Parallel Python"], "link": ["http://www.parallelpython.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Provides an open source module to allow parallel execution of code on systems with multiple processors and on clusters. Features list, documentation, download, code samples, and a discussion forum.\r\n \r\n ", "\r\n "]},
{"title": ["Sisyphus"], "link": ["http://www.livinglogic.de/Python/sisyphus/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A module that simplifies running Python stuff as cron jobs. [Open Source, Python license]\r\n \r\n ", "\r\n "]},
{"title": ["The Snack Sound Toolkit"], "link": ["http://www.speech.kth.se/snack/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Module for basic sound handling (sound card and disk I/O); includes primitives for sound visualization, e.g. waveforms and spectrograms. [Open Source, GPL]\r\n \r\n ", "\r\n "]},
{"title": ["txObject ATK"], "link": ["http://txobject.sourceforge.net/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Contains the following parts: Object Library, IO/Timers, Threads, Inter-Process/Distributed Communication. [Open Source, GPL]\r\n \r\n ", "\r\n "]},
{"title": ["Foundations of Python Network Programming"], "link": ["http://rhodesmill.org/brandon/2011/foundations-of-python-network-programming/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - This book covers a wide range of topics. From raw TCP and UDP to encryption with TSL, and then to HTTP, SMTP, POP, IMAP, and ssh. It gives you a good understanding of each field and how to do everything on the network with Python.\r\n \r\n ", "\r\n "]},
{"title": ["Free Python books"], "link": ["http://www.techbooksforfree.com/perlpython.shtml"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Free Python books and tutorials.\r\n \r\n ", "\r\n "]},
{"title": ["FreeTechBooks: Python Scripting Language"], "link": ["http://www.freetechbooks.com/python-f6.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Annotated list of free online books on Python scripting language. Topics range from beginner to advanced.\r\n \r\n ", "\r\n "]},
{"title": ["How to Think Like a Computer Scientist: Learning with Python"], "link": ["http://greenteapress.com/thinkpython/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Allen B. Downey, Jeffrey Elkner, Chris Meyers; Green Tea Press, 2002, ISBN 0971677506. Teaches general principles of programming, via Python as subject language. Thorough, in-depth approach to many basic and intermediate programming topics. Full text online and downloads: HTML, PDF, PS, LaTeX. [Free, Green Tea Press]\r\n \r\n ", "\r\n "]},
{"title": ["An Introduction to Python"], "link": ["http://www.network-theory.co.uk/python/intro/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Guido van Rossum, Fred L. Drake, Jr.; Network Theory Ltd., 2003, ISBN 0954161769. Printed edition of official tutorial, for v2.x, from Python.org. [Network Theory, online]\r\n \r\n ", "\r\n "]},
{"title": ["Learn to Program Using Python"], "link": ["http://www.freenetpages.co.uk/hp/alan.gauld/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Book by Alan Gauld with full text online. Introduction for those learning programming basics: terminology, concepts, methods to write code. Assumes no prior knowledge but basic computer skills.\r\n \r\n ", "\r\n "]},
{"title": ["Making Use of Python"], "link": ["http://www.wiley.com/WileyCDA/WileyTitle/productCd-0471219754.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Rashi Gupta; John Wiley and Sons, 2002, ISBN 0471219754. Covers language basics, use for CGI scripting, GUI development, network programming; shows why it is one of more sophisticated of popular scripting languages. [Wiley]\r\n \r\n ", "\r\n "]},
{"title": ["Practical Python"], "link": ["http://hetland.org/writing/practical-python/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Magnus Lie Hetland; Apress LP, 2002, ISBN 1590590066. Readable guide to ideas most vital to new users, from basics common to high level languages, to more specific aspects, to a series of 10 ever more complex programs. [Apress]\r\n \r\n ", "\r\n "]},
{"title": ["Pro Python System Administration"], "link": ["http://sysadminpy.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Rytis Sileika, ISBN13: 978-1-4302-2605-5, Uses real-world system administration examples like manage devices with SNMP and SOAP, build a distributed monitoring system, manage web applications and parse complex log files, monitor and manage MySQL databases.\r\n \r\n ", "\r\n "]},
{"title": ["Programming in Python 3 (Second Edition)"], "link": ["http://www.qtrac.eu/py3book.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A Complete Introduction to the Python 3.\r\n \r\n ", "\r\n "]},
{"title": ["Python 2.1 Bible"], "link": ["http://www.wiley.com/WileyCDA/WileyTitle/productCd-0764548077.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Dave Brueck, Stephen Tanner; John Wiley and Sons, 2001, ISBN 0764548077. Full coverage, clear explanations, hands-on examples, full language reference; shows step by step how to use components, assemble them, form full-featured programs. [John Wiley and Sons]\r\n \r\n ", "\r\n "]},
{"title": ["Python 3 Object Oriented Programming"], "link": ["https://www.packtpub.com/python-3-object-oriented-programming/book"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A step-by-step tutorial for OOP in Python 3, including discussion and examples of abstraction, encapsulation, information hiding, and raise, handle, define, and manipulate exceptions.\r\n \r\n ", "\r\n "]},
{"title": ["Python Language Reference Manual"], "link": ["http://www.network-theory.co.uk/python/language/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Guido van Rossum, Fred L. Drake, Jr.; Network Theory Ltd., 2003, ISBN 0954161785. Printed edition of official language reference, for v2.x, from Python.org, describes syntax, built-in datatypes. [Network Theory, online]\r\n \r\n ", "\r\n "]},
{"title": ["Python Programming Patterns"], "link": ["http://www.pearsonhighered.com/educator/academic/product/0,,0130409561,00%2Ben-USS_01DBC.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Thomas W. Christopher; Prentice Hall PTR, 2002, ISBN 0130409561. Shows how to write large programs, introduces powerful design patterns that deliver high levels of robustness, scalability, reuse.\r\n \r\n ", "\r\n "]},
{"title": ["Python Programming with the Java Class Libraries: A Tutorial for Building Web and Enterprise Applications with Jython"], "link": ["http://www.informit.com/store/product.aspx?isbn=0201616165&redir=1"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Richard Hightower; Addison-Wesley, 2002, 0201616165. Begins with Python basics, many exercises, interactive sessions. Shows programming novices concepts and practical methods. Shows programming experts Python's abilities and ways to interface with Java APIs. [publisher website]\r\n \r\n ", "\r\n "]},
{"title": ["Python: Visual QuickStart Guide"], "link": ["http://www.pearsonhighered.com/educator/academic/product/0,,0201748843,00%2Ben-USS_01DBC.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Chris Fehily; Peachpit Press, 2002, ISBN 0201748843. Task-based, step-by-step visual reference guide, many screen shots, for courses in digital graphics; Web design, scripting, development; multimedia, page layout, office tools, operating systems. [Prentice Hall]\r\n \r\n ", "\r\n "]},
{"title": ["Sams Teach Yourself Python in 24 Hours"], "link": ["http://www.informit.com/store/product.aspx?isbn=0672317354"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Ivan Van Laningham; Sams Publishing, 2000, ISBN 0672317354. Split into 24 hands-on, 1 hour lessons; steps needed to learn topic: syntax, language features, OO design and programming, GUIs (Tkinter), system administration, CGI. [Sams Publishing]\r\n \r\n ", "\r\n "]},
{"title": ["Text Processing in Python"], "link": ["http://gnosis.cx/TPiP/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.]\r\n \r\n ", "\r\n "]},
{"title": ["XML Processing with Python"], "link": ["http://www.informit.com/store/product.aspx?isbn=0130211192"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]\r\n \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["CPython"], "link": ["https://www.python.org/download/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - The original implementation of Python, written in C.\r\n \r\n ", "\r\n "]},
{"title": ["IronPython"], "link": ["http://www.codeplex.com/wikipage?ProjectName=IronPython"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A fast Python implementation for .NET and Mono.\r\n \r\n ", "\r\n "]},
{"title": ["Jython"], "link": ["http://sourceforge.net/projects/jython/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A 100 % Pure Java implementation of the Python language.\r\n \r\n ", "\r\n "]},
{"title": ["Python for .NET"], "link": ["http://pythonnet.sourceforge.net/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Package gives nearly seamless integration with .NET Common Language Runtime: CLR. Strong scripting tool, allows scripting or building full programs in Python, using .NET services and components coded in any language targeting CLR: Managed C++, C#, VB, JScript.\r\n \r\n ", "\r\n "]},
{"title": ["Stackless Python"], "link": ["http://www.stackless.com/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An experimental implementation that supports continuations, generators, microthreads, and coroutines.\r\n \r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: Languages: Python: Resources"], "link": ["/Computers/Programming/Languages/Python/Resources/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Charming Python"], "link": ["http://gnosis.cx/publish/tech_index_cp.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Column by David Mertz, Gnosis Software, Inc. Over 20 articles on Python programming, many topics.\r\n \r\n ", "\r\n "]},
{"title": ["Developing GNOME Application with Python"], "link": ["http://www.linuxfocus.org/English/July2000/article160.shtml"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Set of articles written for beginners in Gnome programming and GNU/Linux.\r\n \r\n ", "\r\n "]},
{"title": ["An Interview with Guido van Rossum"], "link": ["http://oreilly.com/pub/a/oreilly/frank/rossum_1099.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Python creator discusses features and future of the language. O'Reilly Media.\r\n \r\n ", "\r\n "]},
{"title": ["Mark Lutz's Python Advocacy Page"], "link": ["http://www.rmi.net/~lutz/advocacy.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Links to several old Python articles and reviews.\r\n \r\n ", "\r\n "]},
{"title": ["Python Enhancement Proposals (PEPs)"], "link": ["http://www.python.org/dev/peps/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Design documents providing information to the Python community, or describing a new feature for Python.\r\n \r\n ", "\r\n "]},
{"title": ["Python Programming Language"], "link": ["http://en.wikipedia.org/wiki/Python_programming_language"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Growing article, with links to many related topics. [Wikipedia]\r\n \r\n ", "\r\n "]},
{"title": ["Python Squeezes the Web"], "link": ["http://www.linuxplanet.com/linuxplanet/tutorials/1132/1/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A practical example of using Python to automate the crawl of web pages.\r\n \r\n ", "\r\n "]},
{"title": ["SOA and Web Services"], "link": ["http://www.ibm.com/developerworks/views/webservices/libraryview.jsp?search_by=python+web+services+developer"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Several articles related to the development of web services with Python, written by Mike Olson and Uche Ogbuji, 2001-2004. IBM developerWorks.\r\n \r\n ", "\r\n "]},
{"title": ["Interview of Guido van Rossum"], "link": ["http://linuxfr.org/2003/03/12/11491.html"], "desc": ["\r\n \r\n ", " \r\n\t\t\t\r\n - Interview of the BDFL by the visitors of a French Linux news portal (the English version is included).\r\n (March 12, 2003)\r\n ", "\r\n "]},
{"title": ["A Conversation with Guido van Rossum"], "link": ["http://www.artima.com/intv/guido.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Python creator talks with Bill Venners about topics relevant to Python and its community; 6 part series, Artima Software.\r\n (February 17, 2003)\r\n ", "\r\n "]},
{"title": ["Proper XML Output in Python"], "link": ["http://www.xml.com/pub/a/2002/11/13/py-xml.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Uche Ogbuji explores the intricacies of creating proper XML output in Python, including character set and encoding issues.\r\n (November 13, 2002)\r\n ", "\r\n "]},
{"title": ["Python Persistence Management"], "link": ["http://www.ibm.com/developerworks/linux/library/l-pypers.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Using serialization to store Python objects.\r\n (November 01, 2002)\r\n ", "\r\n "]},
{"title": ["A Tour of 4Suite"], "link": ["http://www.xml.com/pub/a/2002/10/16/py-xml.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - In this installment of Python and XML, Uche Ogbuji provides a tour of the core XML processing facilities of 4Suite.\r\n (October 16, 2002)\r\n ", "\r\n "]},
{"title": ["Make Python Run as Fast as C with Psyco"], "link": ["http://www.ibm.com/developerworks/linux/library/l-psyco.html"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Using psyco, the python specializing compiler.\r\n (October 01, 2002)\r\n ", "\r\n "]},
{"title": ["Simplified Exception Identification in Python"], "link": ["http://www.linuxjournal.com/article/5821"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Using functions and exception pattern dictionaries to simplify error recovery.\r\n (July 28, 2002)\r\n ", "\r\n "]},
{"title": ["Pick Up Some Python with This Script Walk-through"], "link": ["http://www.techrepublic.com/article/pick-up-some-python-with-this-script-walk-through/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An example conversion of a Perl script to Python.\r\n (June 18, 2002)\r\n ", "\r\n "]},
{"title": ["Space Shuttle Engineers Use Python to Streamline Mission Design"], "link": ["http://www.techrepublic.com/article/space-shuttle-engineers-use-python-to-streamline-mission-design/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Description of a practical use of Python by NASA, with quotes of Robin Friedrich, USA's Senior Project Engineer.\r\n (June 17, 2002)\r\n ", "\r\n "]},
{"title": ["Python: Yes, You Should Be Using It!"], "link": ["http://www.linux-mag.com/id/1025/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Alex Martelli gives an overview of the possibilities offered by Python, along with samples allowing the newcomer to experience Python code. [Free registration required to view this article.]\r\n (April 01, 2002)\r\n ", "\r\n "]},
{"title": ["Using Mix-ins with Python"], "link": ["http://www.linuxjournal.com/article/4540"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An explanation of the mix-in programming style as applied in Python.\r\n (April 01, 2001)\r\n ", "\r\n "]},
{"title": ["Embedding Python in Multi-Threaded C/C++ Applications"], "link": ["http://www.linuxjournal.com/article/3641"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A Linux Magazine article featuring an overview of the C/C++ API, explaining how to extend and embed Python in a multi-thread context.\r\n (May 01, 2000)\r\n ", "\r\n "]},
{"title": ["Why Python?"], "link": ["http://www.linuxjournal.com/article/3882"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Article by Eric Raymond where he explains reasons that led him to switch from Perl to Python.\r\n (May 01, 2000)\r\n ", "\r\n "]},
{"title": ["Linux Journal Python Interview"], "link": ["http://www.linuxjournal.com/article/5028"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An Interview with Guido van Rossum. A conversation with the creator of Python about an effort to teach Python to non-computer science students.\r\n (September 28, 1999)\r\n ", "\r\n "]},
{"title": ["Top"], "link": ["/"], "desc": ["\r\n\r\n "]},
{"title": ["Computers"], "link": ["/Computers/"], "desc": []},
{"title": ["Programming"], "link": ["/Computers/Programming/"], "desc": []},
{"title": ["Languages"], "link": ["/Computers/Programming/Languages/"], "desc": []},
{"title": ["Python"], "link": ["/Computers/Programming/Languages/Python/"], "desc": []},
{"title": [], "link": [], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Deployment"], "link": ["/Computers/Programming/Languages/Python/Development_Tools/Deployment/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Documentation Tools"], "link": ["/Computers/Programming/Languages/Python/Development_Tools/Documentation_Tools/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Foreign Language Interfaces"], "link": ["/Computers/Programming/Languages/Python/Development_Tools/Foreign_Language_Interfaces/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["GUI Builders"], "link": ["/Computers/Programming/Languages/Python/Development_Tools/GUI_Builders/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Integrated Development Environments"], "link": ["/Computers/Programming/Languages/Python/Development_Tools/Integrated_Development_Environments/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Performance and Testing"], "link": ["/Computers/Programming/Languages/Python/Development_Tools/Performance_and_Testing/"], "desc": ["\r\n ", "\u00a0", "\r\n "]},
{"title": ["Computers: Programming: Development Tools"], "link": ["/Computers/Programming/Development_Tools/"], "desc": ["\r\n ", " \r\n ", "\r\n "]},
{"title": ["Hap Python Remote Debugger"], "link": ["http://hapdebugger.sourceforge.net/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A remote debugger and IDE that can also be used for local debugging.\r\n \r\n ", "\r\n "]},
{"title": ["IPython"], "link": ["http://ipython.scipy.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An enhanced interactive Python shell with many features for object introspection, system shell access, and its own special command system for adding functionality when working interactively. [Open Source, LGPL]\r\n \r\n ", "\r\n "]},
{"title": ["PyCrust - The Flakiest Python Shell"], "link": ["http://sourceforge.net/projects/pycrust/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An interactive, graphical Python shell written in Python using wxPython.\r\n \r\n ", "\r\n "]},
{"title": ["Pymerase"], "link": ["http://pymerase.sourceforge.net/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A tool intended to generate a python object model, relational database, and an object-relational model connecting the two using UML or XML. However, it has been extended to also output GUI widgets and can be easily extended to output whatever else you might like. [Open source, MIT License]\r\n \r\n ", "\r\n "]},
{"title": ["PyReverse"], "link": ["http://www.logilab.org/2560/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - A set of tools for reverse engineering Python code, including creation of files readable by ArgoUML. Project information, downloads, and bug-track details. [Open Source, GPL]\r\n \r\n ", "\r\n "]},
{"title": ["Tixapps Applications Framework"], "link": ["http://tix.sourceforge.net/Tixapps/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - Tixapps applications are a suite of development applications that run with Tk/Tcl or Python, using the Tix widget set. [Open Source, BSD-like]\r\n \r\n ", "\r\n "]},
{"title": ["Winpdb"], "link": ["http://winpdb.org/"], "desc": ["\r\n\t\r\n ", " \r\n\t\t\t\r\n - An advanced python debugger, with support for smart breakpoints, multiple threads, namespace modification, embedded debugging and encrypted communication.\r\n \r\n ", "\r\n "]}]

11
tutorial/scrapy.cfg Normal file
View File

@ -0,0 +1,11 @@
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# https://scrapyd.readthedocs.org/en/latest/deploy.html
[settings]
default = tutorial.settings
[deploy]
#url = http://localhost:6800/
project = tutorial

View File

Binary file not shown.

View File

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class TutorialItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
pass
class DmozItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
title = scrapy.Field()
link =scrapy.Field()
desc = scrapy.Field()

BIN
tutorial/tutorial/items.pyc Normal file

Binary file not shown.

View File

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
class TutorialPipeline(object):
def process_item(self, item, spider):
return item

View File

@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
# Scrapy settings for tutorial project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'tutorial'
SPIDER_MODULES = ['tutorial.spiders']
NEWSPIDER_MODULE = 'tutorial.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'tutorial (+http://www.yourdomain.com)'
# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS=32
# Configure a delay for requests for the same website (default: 0)
# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY=3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN=16
#CONCURRENT_REQUESTS_PER_IP=16
# Disable cookies (enabled by default)
#COOKIES_ENABLED=False
# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED=False
# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Language': 'en',
#}
# Enable or disable spider middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
# 'tutorial.middlewares.MyCustomSpiderMiddleware': 543,
#}
# Enable or disable downloader middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
# 'tutorial.middlewares.MyCustomDownloaderMiddleware': 543,
#}
# Enable or disable extensions
# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
#EXTENSIONS = {
# 'scrapy.telnet.TelnetConsole': None,
#}
# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
#ITEM_PIPELINES = {
# 'tutorial.pipelines.SomePipeline': 300,
#}
# Enable and configure the AutoThrottle extension (disabled by default)
# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
# NOTE: AutoThrottle will honour the standard settings for concurrency and delay
#AUTOTHROTTLE_ENABLED=True
# The initial download delay
#AUTOTHROTTLE_START_DELAY=5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY=60
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG=False
# Enable and configure HTTP caching (disabled by default)
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED=True
#HTTPCACHE_EXPIRATION_SECS=0
#HTTPCACHE_DIR='httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES=[]
#HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,24 @@
import scrapy
from tutorial.items import DmozItem
class DmozSpider(scrapy.Spider):
name = "dmoz"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python",
]
def parse(self,response):
for href in response.css("ul.directory.dir-col > li > a::attr('href')"):
url = response.urljoin(href.extract())
print url
yield scrapy.Request(url,callback=self.parse_dir_contents)
def parse_dir_contents(self,response):
for sel in response.xpath('//ul/li'):
item = DmozItem()
item['title'] = sel.xpath('a/text()').extract()
item['link'] = sel.xpath('a/@href').extract()
item['desc'] = sel.xpath('text()').extract()
yield item

Binary file not shown.

View File

@ -0,0 +1,24 @@
import scrapy
from tutorial.items import DmozItem
class DmozSpider(scrapy.Spider):
name = "dmoz"
allowed_domains = ["domz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python",
]
def parse(self,response):
for href in response.css("ul.directory.dir-col > li > a::attr('href')").extract():
print href
url = response.urljoin(response.url,href)
yield scrapy.Request(url,callback=self.parse_dir_contents)
def parse_dir_contents(self,response):
for sel in response.xpath('//ul/li'):
item = DmozItem()
item['title'] = sel.xpath('a/text()').extract()
item['link'] = sel.xpath('a/@href').extract()
item['desc'] = sel.xpath('text()').extract()
yield item

View File

@ -0,0 +1,4 @@
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.

Binary file not shown.