[comments.py] Added new source_url column
This commit is contained in:
parent
462cb0eff1
commit
3d32ab6054
Binary file not shown.
@ -507,11 +507,11 @@ class CommentsItem(scrapy.Item):
|
||||
likes = scrapy.Field(
|
||||
output_processor=reactions_strip
|
||||
)
|
||||
ahah = scrapy.Field()
|
||||
love = scrapy.Field()
|
||||
wow = scrapy.Field()
|
||||
sigh = scrapy.Field()
|
||||
grrr = scrapy.Field()
|
||||
share = scrapy.Field() # num of shares
|
||||
source_url = scrapy.Field()
|
||||
url = scrapy.Field()
|
||||
shared_from = scrapy.Field()
|
||||
#ahah = scrapy.Field()
|
||||
#love = scrapy.Field()
|
||||
#wow = scrapy.Field()
|
||||
#sigh = scrapy.Field()
|
||||
#grrr = scrapy.Field()
|
||||
#share = scrapy.Field() # num of shares
|
||||
|
Binary file not shown.
@ -12,7 +12,7 @@ class CommentsSpider(FacebookSpider):
|
||||
name = "comments"
|
||||
custom_settings = {
|
||||
'FEED_EXPORT_FIELDS': ['source','reply_to','date','reactions','text', \
|
||||
'url'],
|
||||
'source_url','url'],
|
||||
'DUPEFILTER_CLASS' : 'scrapy.dupefilters.BaseDupeFilter',
|
||||
'CONCURRENT_REQUESTS':1,
|
||||
}
|
||||
@ -53,6 +53,7 @@ class CommentsSpider(FacebookSpider):
|
||||
new = ItemLoader(item=CommentsItem(),selector=reply)
|
||||
new.context['lang'] = self.lang
|
||||
new.add_xpath('source','.//h3/a/text()')
|
||||
new.add_xpath('source_url','.//h3/a/@href')
|
||||
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
||||
new.add_xpath('date','.//abbr/text()')
|
||||
new.add_xpath('reactions','.//a[contains(@href,"reaction/profile")]//text()')
|
||||
@ -92,7 +93,8 @@ class CommentsSpider(FacebookSpider):
|
||||
for root in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)!=1 and contains("0123456789", substring(@id,1,1))]'):
|
||||
new = ItemLoader(item=CommentsItem(),selector=root)
|
||||
new.context['lang'] = self.lang
|
||||
new.add_xpath('source', './/h3/a/text()')
|
||||
new.add_xpath('source','.//h3/a/text()')
|
||||
new.add_xpath('source_url','.//h3/a/@href')
|
||||
new.add_value('reply_to','ROOT')
|
||||
new.add_xpath('text','.//div[1]//text()')
|
||||
new.add_xpath('date','.//abbr/text()')
|
||||
@ -103,7 +105,8 @@ class CommentsSpider(FacebookSpider):
|
||||
for reply in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)=1 and contains("0123456789", substring(@id,1,1))]'):
|
||||
new = ItemLoader(item=CommentsItem(),selector=reply)
|
||||
new.context['lang'] = self.lang
|
||||
new.add_xpath('source', './/h3/a/text()')
|
||||
new.add_xpath('source','.//h3/a/text()')
|
||||
new.add_xpath('source_url','.//h3/a/@href')
|
||||
new.add_value('reply_to',response.meta['reply_to'])
|
||||
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
||||
new.add_xpath('date','.//abbr/text()')
|
||||
@ -137,7 +140,8 @@ class CommentsSpider(FacebookSpider):
|
||||
for reply in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)=1 and contains("0123456789", substring(@id,1,1))]'):
|
||||
new = ItemLoader(item=CommentsItem(),selector=reply)
|
||||
new.context['lang'] = self.lang
|
||||
new.add_xpath('source', './/h3/a/text()')
|
||||
new.add_xpath('source','.//h3/a/text()')
|
||||
new.add_xpath('source_url','.//h3/a/@href')
|
||||
new.add_value('reply_to',response.meta['reply_to'])
|
||||
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
||||
new.add_xpath('date','.//abbr/text()')
|
||||
|
Loading…
Reference in New Issue
Block a user