final
This commit is contained in:
parent
888ebeab70
commit
31c30c7b52
1
.~lock.exploit.csv#
Normal file
1
.~lock.exploit.csv#
Normal file
@ -0,0 +1 @@
|
||||
,rugantio,alice,26.08.2018 14:07,file:///home/rugantio/.config/libreoffice/4;
|
Binary file not shown.
Binary file not shown.
@ -70,18 +70,18 @@ def comments_strip(string):
|
||||
return string[0].rstrip(" commenti")
|
||||
|
||||
def reactions_strip(string):
|
||||
friends = 1 + string[0].count(',')
|
||||
string = string[0].split()[::-1]
|
||||
if len(string) == 1:
|
||||
string = string[0]
|
||||
while string.rfind('.') != -1:
|
||||
string = string[0:string.rfind('.')] + string[string.rfind('.')+1:]
|
||||
return string
|
||||
string = string[0].split()
|
||||
string = string[::-1][0]
|
||||
|
||||
|
||||
string = string[0]
|
||||
while string.rfind('.') != -1:
|
||||
string = string[0:string.rfind('.')] + string[string.rfind('.')+1:]
|
||||
|
||||
return int(string) + 1
|
||||
return int(string) + friends
|
||||
|
||||
class FbcrawlItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
@ -119,5 +119,4 @@ class FbcrawlItem(scrapy.Item):
|
||||
sigh = scrapy.Field()
|
||||
grrr = scrapy.Field()
|
||||
share = scrapy.Field() # num of shares
|
||||
num_id = scrapy.Field() # progressive int associated to the entry in the final table, not present in the webpage
|
||||
url = scrapy.Field()
|
||||
|
Binary file not shown.
@ -77,7 +77,6 @@ class FacebookSpider(scrapy.Spider):
|
||||
callback=self.parse_page,
|
||||
)
|
||||
|
||||
|
||||
def parse_page(self, response):
|
||||
for post in response.xpath("//div[contains(@data-ft,'top_level_post_id')]"): #select all posts
|
||||
self.logger.info('Parsing post %s', post)
|
||||
@ -102,7 +101,7 @@ class FacebookSpider(scrapy.Spider):
|
||||
|
||||
def parse_post(self,response):
|
||||
new = ItemLoader(item=FbcrawlItem(),response=response,parent=response.meta['item'])
|
||||
new.add_xpath('source', '//span/strong/a/text() | //div/a/strong/text() | //td/div/h3/strong/a/text()')
|
||||
new.add_xpath('source', "//td/div/h3/strong/a/text() | //span/strong/a/text() | //div/div/div/a[contains(@href,'post_id')]/strong/text()")
|
||||
new.add_xpath('date', '//div/div/abbr/text()')
|
||||
new.add_xpath('text','//div[@data-ft]//p//text()')
|
||||
new.add_xpath('reactions',"//a[contains(@href,'reaction/profile')]/div/div/text()")
|
||||
|
Loading…
Reference in New Issue
Block a user