final
This commit is contained in:
parent
888ebeab70
commit
31c30c7b52
1
.~lock.exploit.csv#
Normal file
1
.~lock.exploit.csv#
Normal file
@ -0,0 +1 @@
|
|||||||
|
,rugantio,alice,26.08.2018 14:07,file:///home/rugantio/.config/libreoffice/4;
|
Binary file not shown.
Binary file not shown.
@ -70,18 +70,18 @@ def comments_strip(string):
|
|||||||
return string[0].rstrip(" commenti")
|
return string[0].rstrip(" commenti")
|
||||||
|
|
||||||
def reactions_strip(string):
|
def reactions_strip(string):
|
||||||
|
friends = 1 + string[0].count(',')
|
||||||
|
string = string[0].split()[::-1]
|
||||||
if len(string) == 1:
|
if len(string) == 1:
|
||||||
string = string[0]
|
string = string[0]
|
||||||
while string.rfind('.') != -1:
|
while string.rfind('.') != -1:
|
||||||
string = string[0:string.rfind('.')] + string[string.rfind('.')+1:]
|
string = string[0:string.rfind('.')] + string[string.rfind('.')+1:]
|
||||||
return string
|
return string
|
||||||
string = string[0].split()
|
|
||||||
string = string[::-1][0]
|
|
||||||
|
|
||||||
|
string = string[0]
|
||||||
while string.rfind('.') != -1:
|
while string.rfind('.') != -1:
|
||||||
string = string[0:string.rfind('.')] + string[string.rfind('.')+1:]
|
string = string[0:string.rfind('.')] + string[string.rfind('.')+1:]
|
||||||
|
return int(string) + friends
|
||||||
return int(string) + 1
|
|
||||||
|
|
||||||
class FbcrawlItem(scrapy.Item):
|
class FbcrawlItem(scrapy.Item):
|
||||||
# define the fields for your item here like:
|
# define the fields for your item here like:
|
||||||
@ -119,5 +119,4 @@ class FbcrawlItem(scrapy.Item):
|
|||||||
sigh = scrapy.Field()
|
sigh = scrapy.Field()
|
||||||
grrr = scrapy.Field()
|
grrr = scrapy.Field()
|
||||||
share = scrapy.Field() # num of shares
|
share = scrapy.Field() # num of shares
|
||||||
num_id = scrapy.Field() # progressive int associated to the entry in the final table, not present in the webpage
|
|
||||||
url = scrapy.Field()
|
url = scrapy.Field()
|
||||||
|
Binary file not shown.
@ -77,7 +77,6 @@ class FacebookSpider(scrapy.Spider):
|
|||||||
callback=self.parse_page,
|
callback=self.parse_page,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_page(self, response):
|
def parse_page(self, response):
|
||||||
for post in response.xpath("//div[contains(@data-ft,'top_level_post_id')]"): #select all posts
|
for post in response.xpath("//div[contains(@data-ft,'top_level_post_id')]"): #select all posts
|
||||||
self.logger.info('Parsing post %s', post)
|
self.logger.info('Parsing post %s', post)
|
||||||
@ -102,7 +101,7 @@ class FacebookSpider(scrapy.Spider):
|
|||||||
|
|
||||||
def parse_post(self,response):
|
def parse_post(self,response):
|
||||||
new = ItemLoader(item=FbcrawlItem(),response=response,parent=response.meta['item'])
|
new = ItemLoader(item=FbcrawlItem(),response=response,parent=response.meta['item'])
|
||||||
new.add_xpath('source', '//span/strong/a/text() | //div/a/strong/text() | //td/div/h3/strong/a/text()')
|
new.add_xpath('source', "//td/div/h3/strong/a/text() | //span/strong/a/text() | //div/div/div/a[contains(@href,'post_id')]/strong/text()")
|
||||||
new.add_xpath('date', '//div/div/abbr/text()')
|
new.add_xpath('date', '//div/div/abbr/text()')
|
||||||
new.add_xpath('text','//div[@data-ft]//p//text()')
|
new.add_xpath('text','//div[@data-ft]//p//text()')
|
||||||
new.add_xpath('reactions',"//a[contains(@href,'reaction/profile')]/div/div/text()")
|
new.add_xpath('reactions',"//a[contains(@href,'reaction/profile')]/div/div/text()")
|
||||||
|
Loading…
Reference in New Issue
Block a user