[comments.py] Added new source_url column
This commit is contained in:
parent
462cb0eff1
commit
3d32ab6054
Binary file not shown.
@ -507,11 +507,11 @@ class CommentsItem(scrapy.Item):
|
|||||||
likes = scrapy.Field(
|
likes = scrapy.Field(
|
||||||
output_processor=reactions_strip
|
output_processor=reactions_strip
|
||||||
)
|
)
|
||||||
ahah = scrapy.Field()
|
source_url = scrapy.Field()
|
||||||
love = scrapy.Field()
|
|
||||||
wow = scrapy.Field()
|
|
||||||
sigh = scrapy.Field()
|
|
||||||
grrr = scrapy.Field()
|
|
||||||
share = scrapy.Field() # num of shares
|
|
||||||
url = scrapy.Field()
|
url = scrapy.Field()
|
||||||
shared_from = scrapy.Field()
|
#ahah = scrapy.Field()
|
||||||
|
#love = scrapy.Field()
|
||||||
|
#wow = scrapy.Field()
|
||||||
|
#sigh = scrapy.Field()
|
||||||
|
#grrr = scrapy.Field()
|
||||||
|
#share = scrapy.Field() # num of shares
|
||||||
|
Binary file not shown.
@ -12,7 +12,7 @@ class CommentsSpider(FacebookSpider):
|
|||||||
name = "comments"
|
name = "comments"
|
||||||
custom_settings = {
|
custom_settings = {
|
||||||
'FEED_EXPORT_FIELDS': ['source','reply_to','date','reactions','text', \
|
'FEED_EXPORT_FIELDS': ['source','reply_to','date','reactions','text', \
|
||||||
'url'],
|
'source_url','url'],
|
||||||
'DUPEFILTER_CLASS' : 'scrapy.dupefilters.BaseDupeFilter',
|
'DUPEFILTER_CLASS' : 'scrapy.dupefilters.BaseDupeFilter',
|
||||||
'CONCURRENT_REQUESTS':1,
|
'CONCURRENT_REQUESTS':1,
|
||||||
}
|
}
|
||||||
@ -53,6 +53,7 @@ class CommentsSpider(FacebookSpider):
|
|||||||
new = ItemLoader(item=CommentsItem(),selector=reply)
|
new = ItemLoader(item=CommentsItem(),selector=reply)
|
||||||
new.context['lang'] = self.lang
|
new.context['lang'] = self.lang
|
||||||
new.add_xpath('source','.//h3/a/text()')
|
new.add_xpath('source','.//h3/a/text()')
|
||||||
|
new.add_xpath('source_url','.//h3/a/@href')
|
||||||
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
||||||
new.add_xpath('date','.//abbr/text()')
|
new.add_xpath('date','.//abbr/text()')
|
||||||
new.add_xpath('reactions','.//a[contains(@href,"reaction/profile")]//text()')
|
new.add_xpath('reactions','.//a[contains(@href,"reaction/profile")]//text()')
|
||||||
@ -92,7 +93,8 @@ class CommentsSpider(FacebookSpider):
|
|||||||
for root in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)!=1 and contains("0123456789", substring(@id,1,1))]'):
|
for root in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)!=1 and contains("0123456789", substring(@id,1,1))]'):
|
||||||
new = ItemLoader(item=CommentsItem(),selector=root)
|
new = ItemLoader(item=CommentsItem(),selector=root)
|
||||||
new.context['lang'] = self.lang
|
new.context['lang'] = self.lang
|
||||||
new.add_xpath('source', './/h3/a/text()')
|
new.add_xpath('source','.//h3/a/text()')
|
||||||
|
new.add_xpath('source_url','.//h3/a/@href')
|
||||||
new.add_value('reply_to','ROOT')
|
new.add_value('reply_to','ROOT')
|
||||||
new.add_xpath('text','.//div[1]//text()')
|
new.add_xpath('text','.//div[1]//text()')
|
||||||
new.add_xpath('date','.//abbr/text()')
|
new.add_xpath('date','.//abbr/text()')
|
||||||
@ -103,7 +105,8 @@ class CommentsSpider(FacebookSpider):
|
|||||||
for reply in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)=1 and contains("0123456789", substring(@id,1,1))]'):
|
for reply in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)=1 and contains("0123456789", substring(@id,1,1))]'):
|
||||||
new = ItemLoader(item=CommentsItem(),selector=reply)
|
new = ItemLoader(item=CommentsItem(),selector=reply)
|
||||||
new.context['lang'] = self.lang
|
new.context['lang'] = self.lang
|
||||||
new.add_xpath('source', './/h3/a/text()')
|
new.add_xpath('source','.//h3/a/text()')
|
||||||
|
new.add_xpath('source_url','.//h3/a/@href')
|
||||||
new.add_value('reply_to',response.meta['reply_to'])
|
new.add_value('reply_to',response.meta['reply_to'])
|
||||||
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
||||||
new.add_xpath('date','.//abbr/text()')
|
new.add_xpath('date','.//abbr/text()')
|
||||||
@ -137,7 +140,8 @@ class CommentsSpider(FacebookSpider):
|
|||||||
for reply in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)=1 and contains("0123456789", substring(@id,1,1))]'):
|
for reply in response.xpath('//div[contains(@id,"root")]/div/div/div[count(@id)=1 and contains("0123456789", substring(@id,1,1))]'):
|
||||||
new = ItemLoader(item=CommentsItem(),selector=reply)
|
new = ItemLoader(item=CommentsItem(),selector=reply)
|
||||||
new.context['lang'] = self.lang
|
new.context['lang'] = self.lang
|
||||||
new.add_xpath('source', './/h3/a/text()')
|
new.add_xpath('source','.//h3/a/text()')
|
||||||
|
new.add_xpath('source_url','.//h3/a/@href')
|
||||||
new.add_value('reply_to',response.meta['reply_to'])
|
new.add_value('reply_to',response.meta['reply_to'])
|
||||||
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
new.add_xpath('text','.//div[h3]/div[1]//text()')
|
||||||
new.add_xpath('date','.//abbr/text()')
|
new.add_xpath('date','.//abbr/text()')
|
||||||
|
Loading…
Reference in New Issue
Block a user