final

2018-08-26 14:08:36 +02:00 · 2018-08-26 14:08:36 +02:00 · 31c30c7b52
commit 31c30c7b52
parent 888ebeab70
6 changed files with 7 additions and 8 deletions
--- a/.~lock.exploit.csv#
+++ b/.~lock.exploit.csv#
@ -0,0 +1 @@
 ,rugantio,alice,26.08.2018 14:07,file:///home/rugantio/.config/libreoffice/4;
--- a/fbcrawl/pycache/items.cpython-37.pyc
+++ b/fbcrawl/pycache/items.cpython-37.pyc
--- a/fbcrawl/pycache/settings.cpython-37.pyc
+++ b/fbcrawl/pycache/settings.cpython-37.pyc
--- a/fbcrawl/items.py
+++ b/fbcrawl/items.py
@ -70,18 +70,18 @@ def comments_strip(string):
    return string[0].rstrip(" commenti")
 def reactions_strip(string):
    friends = 1 + string[0].count(',')
    string = string[0].split()[::-1]
    if len(string) == 1:
        string = string[0]
        while string.rfind('.') != -1:
            string = string[0:string.rfind('.')] + string[string.rfind('.')+1:]
        return string
    string = string[0].split()
    string = string[::-1][0]
    string = string[0]
    while string.rfind('.') != -1:
        string = string[0:string.rfind('.')] + string[string.rfind('.')+1:]
-
+    return int(string) + friends
    return int(string) + 1
 class FbcrawlItem(scrapy.Item):
    # define the fields for your item here like:
@ -119,5 +119,4 @@ class FbcrawlItem(scrapy.Item):
    sigh = scrapy.Field()                      
    grrr = scrapy.Field()                      
    share = scrapy.Field()                      # num of shares
    num_id = scrapy.Field()                     # progressive int associated to the entry in the final table, not present in the webpage
    url = scrapy.Field()
--- a/fbcrawl/spiders/pycache/fbcrawl.cpython-37.pyc
+++ b/fbcrawl/spiders/pycache/fbcrawl.cpython-37.pyc
--- a/fbcrawl/spiders/fbcrawl.py
+++ b/fbcrawl/spiders/fbcrawl.py
@ -77,7 +77,6 @@ class FacebookSpider(scrapy.Spider):
            callback=self.parse_page,
        )
    def parse_page(self, response):        
        for post in response.xpath("//div[contains(@data-ft,'top_level_post_id')]"): #select all posts
            self.logger.info('Parsing post %s', post)
@ -102,7 +101,7 @@ class FacebookSpider(scrapy.Spider):
    def parse_post(self,response):
        new = ItemLoader(item=FbcrawlItem(),response=response,parent=response.meta['item'])            
-        new.add_xpath('source', '//span/strong/a/text() | //div/a/strong/text() | //td/div/h3/strong/a/text()')
+        new.add_xpath('source', "//td/div/h3/strong/a/text() | //span/strong/a/text() | //div/div/div/a[contains(@href,'post_id')]/strong/text()")
        new.add_xpath('date', '//div/div/abbr/text()')
        new.add_xpath('text','//div[@data-ft]//p//text()')
        new.add_xpath('reactions',"//a[contains(@href,'reaction/profile')]/div/div/text()")
		`@ -0,0 +1 @@`
							`,rugantio,alice,26.08.2018 14:07,file:///home/rugantio/.config/libreoffice/4;`