added post_id column
This commit is contained in:
parent
8baa108aab
commit
4a379f3af4
Binary file not shown.
@ -483,6 +483,12 @@ def parse_date2(date):
|
|||||||
|
|
||||||
return str(datetime.fromtimestamp(flat_d['publish_time']) - timedelta(hours=5))
|
return str(datetime.fromtimestamp(flat_d['publish_time']) - timedelta(hours=5))
|
||||||
|
|
||||||
|
def id_strip(post_id):
|
||||||
|
import json
|
||||||
|
|
||||||
|
d = json.loads(post_id[0]) #nested dict of features
|
||||||
|
return d['top_level_post_id']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class FbcrawlItem(scrapy.Item):
|
class FbcrawlItem(scrapy.Item):
|
||||||
@ -511,6 +517,9 @@ class FbcrawlItem(scrapy.Item):
|
|||||||
url = scrapy.Field(
|
url = scrapy.Field(
|
||||||
output_processor=url_strip
|
output_processor=url_strip
|
||||||
)
|
)
|
||||||
|
post_id = scrapy.Field(
|
||||||
|
output_processor=id_strip
|
||||||
|
)
|
||||||
shared_from = scrapy.Field()
|
shared_from = scrapy.Field()
|
||||||
|
|
||||||
class CommentsItem(scrapy.Item):
|
class CommentsItem(scrapy.Item):
|
||||||
|
Binary file not shown.
@ -15,7 +15,7 @@ class FacebookSpider(scrapy.Spider):
|
|||||||
custom_settings = {
|
custom_settings = {
|
||||||
'FEED_EXPORT_FIELDS': ['source','shared_from','date','text', \
|
'FEED_EXPORT_FIELDS': ['source','shared_from','date','text', \
|
||||||
'reactions','likes','ahah','love','wow', \
|
'reactions','likes','ahah','love','wow', \
|
||||||
'sigh','grrr','comments','url']
|
'sigh','grrr','comments','post_id','url']
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -151,6 +151,7 @@ class FacebookSpider(scrapy.Spider):
|
|||||||
self.logger.info('Parsing post n = {}'.format(abs(self.count)))
|
self.logger.info('Parsing post n = {}'.format(abs(self.count)))
|
||||||
new.add_xpath('comments', './div[2]/div[2]/a[1]/text()')
|
new.add_xpath('comments', './div[2]/div[2]/a[1]/text()')
|
||||||
new.add_xpath('date','./@data-ft')
|
new.add_xpath('date','./@data-ft')
|
||||||
|
new.add_xpath('post_id','./@data-ft')
|
||||||
new.add_xpath('url', ".//a[contains(@href,'footer')]/@href")
|
new.add_xpath('url', ".//a[contains(@href,'footer')]/@href")
|
||||||
|
|
||||||
#page_url #new.add_value('url',response.url)
|
#page_url #new.add_value('url',response.url)
|
||||||
|
Loading…
Reference in New Issue
Block a user