diff --git a/.~lock.Trump.csv# b/.~lock.Trump.csv#
new file mode 100644
index 0000000..6f06104
--- /dev/null
+++ b/.~lock.Trump.csv#
@@ -0,0 +1 @@
+,rugantio,alice,04.02.2019 17:42,file:///home/rugantio/.config/libreoffice/4;
\ No newline at end of file
diff --git a/fbcrawl/__pycache__/__init__.cpython-37.pyc b/fbcrawl/__pycache__/__init__.cpython-37.pyc
index d4240eb..2b54685 100644
Binary files a/fbcrawl/__pycache__/__init__.cpython-37.pyc and b/fbcrawl/__pycache__/__init__.cpython-37.pyc differ
diff --git a/fbcrawl/__pycache__/items.cpython-37.pyc b/fbcrawl/__pycache__/items.cpython-37.pyc
index b2f57d3..825ad8e 100644
Binary files a/fbcrawl/__pycache__/items.cpython-37.pyc and b/fbcrawl/__pycache__/items.cpython-37.pyc differ
diff --git a/fbcrawl/__pycache__/settings.cpython-37.pyc b/fbcrawl/__pycache__/settings.cpython-37.pyc
index 32cc9a4..380b6bd 100644
Binary files a/fbcrawl/__pycache__/settings.cpython-37.pyc and b/fbcrawl/__pycache__/settings.cpython-37.pyc differ
diff --git a/fbcrawl/items.py b/fbcrawl/items.py
index 4f48bae..ecafbd9 100644
--- a/fbcrawl/items.py
+++ b/fbcrawl/items.py
@@ -413,35 +413,38 @@ def url_strip(url):
     #catchin '&id=' is enough to identify the post
     i = fullurl.find('&id=')
     if i != -1:
-        j = fullurl[:i+4] + fullurl[i+4:].split('&')[0]
-        return j
-    else:
-        return fullurl
+        return fullurl[:i+4] + fullurl[i+4:].split('&')[0]
+    else:  #catch photos   
+        i = fullurl.find('/photos/')
+        if i != -1:
+            return fullurl[:i+8] + fullurl[i+8:].split('/?')[0]
+        else: #catch albums
+            i = fullurl.find('/albums/')
+            if i != -1:
+                return fullurl[:i+8] + fullurl[i+8:].split('/?')[0]
+            else:
+                return fullurl
+    
 
 class FbcrawlItem(scrapy.Item):
-    source = scrapy.Field(
-            output_processor=TakeFirst()
-    )                     # page that published the post
-
+    source = scrapy.Field( 
+        output_processor=TakeFirst()
+    )   
     date = scrapy.Field(      # when was the post published
-            input_processor=TakeFirst(),
-            output_processor=parse_date
+        input_processor=TakeFirst(),
+        output_processor=parse_date
     )       
-                                    
     text = scrapy.Field(
-            output_processor=Join(separator=u'')
+        output_processor=Join(separator=u'')
     )                       # full text of the post
-
     comments = scrapy.Field(
-            output_processor=comments_strip
+        output_processor=comments_strip
     )                                       
-
     reactions = scrapy.Field(
-            output_processor=reactions_strip
+        output_processor=reactions_strip
     )                  # num of reactions
-    
     likes = scrapy.Field(
-            output_processor=reactions_strip
+        output_processor=reactions_strip
     )                      
     ahah = scrapy.Field()                      
     love = scrapy.Field()                      
@@ -451,4 +454,5 @@ class FbcrawlItem(scrapy.Item):
     share = scrapy.Field()                      # num of shares
     url = scrapy.Field(
         output_processor=url_strip
-        )
+    )
+    shared_from = scrapy.Field()
diff --git a/fbcrawl/settings.py b/fbcrawl/settings.py
index ee82e25..0d6f667 100644
--- a/fbcrawl/settings.py
+++ b/fbcrawl/settings.py
@@ -14,7 +14,6 @@ BOT_NAME = 'fbcrawl'
 SPIDER_MODULES = ['fbcrawl.spiders']
 NEWSPIDER_MODULE = 'fbcrawl.spiders'
 
-
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'
 
@@ -22,7 +21,7 @@ USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTM
 ROBOTSTXT_OBEY = False
 
 # Configure maximum concurrent requests performed by Scrapy (default: 16)
-#CONCURRENT_REQUESTS = 32
+CONCURRENT_REQUESTS = 1
 
 # Configure a delay for requests for the same website (default: 0)
 # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
@@ -88,7 +87,7 @@ ROBOTSTXT_OBEY = False
 #HTTPCACHE_DIR = 'httpcache'
 #HTTPCACHE_IGNORE_HTTP_CODES = []
 #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
-FEED_EXPORT_FIELDS = ["source", "date", "text", "reactions","likes","ahah","love","wow","sigh","grrr","comments","url"] # specifies the order of the column to export as CSV
+#FEED_EXPORT_FIELDS = ["source", "date", "text", "reactions","likes","ahah","love","wow","sigh","grrr","comments","url"] # specifies the order of the column to export as CSV
 FEED_EXPORT_ENCODING = 'utf-8'
 DUPEFILTER_DEBUG = True
 LOG_LEVEL = 'INFO'
diff --git a/fbcrawl/spiders/__pycache__/__init__.cpython-37.pyc b/fbcrawl/spiders/__pycache__/__init__.cpython-37.pyc
index da2dd56..ef2c244 100644
Binary files a/fbcrawl/spiders/__pycache__/__init__.cpython-37.pyc and b/fbcrawl/spiders/__pycache__/__init__.cpython-37.pyc differ
diff --git a/fbcrawl/spiders/__pycache__/comments.cpython-37.pyc b/fbcrawl/spiders/__pycache__/comments.cpython-37.pyc
index bb0bb00..633f30c 100644
Binary files a/fbcrawl/spiders/__pycache__/comments.cpython-37.pyc and b/fbcrawl/spiders/__pycache__/comments.cpython-37.pyc differ
diff --git a/fbcrawl/spiders/__pycache__/fbcrawl.cpython-37.pyc b/fbcrawl/spiders/__pycache__/fbcrawl.cpython-37.pyc
index 41f9bb3..c6c0fc6 100644
Binary files a/fbcrawl/spiders/__pycache__/fbcrawl.cpython-37.pyc and b/fbcrawl/spiders/__pycache__/fbcrawl.cpython-37.pyc differ
diff --git a/fbcrawl/spiders/comments.py b/fbcrawl/spiders/comments.py
index 1ec1239..c9da10b 100644
--- a/fbcrawl/spiders/comments.py
+++ b/fbcrawl/spiders/comments.py
@@ -4,7 +4,6 @@ from scrapy.loader import ItemLoader
 from scrapy.http import FormRequest
 from fbcrawl.items import FbcrawlItem
 
-
 class FacebookSpider(scrapy.Spider):
     """
     Parse FB comments, given a page (needs credentials)
@@ -78,22 +77,27 @@ class FacebookSpider(scrapy.Spider):
         )
 
     def parse_page(self, response):
-        for post in response.xpath('//div[count(@class)=1 and count(@id)=1 and contains("0123456789", substring(@id,1,1))]'): #select all posts            
-            new = ItemLoader(item=FbcrawlItem(),selector=post)
-            new.add_xpath('source', "./div/h3/a/text()")
-            new.add_xpath('text',"//div/div/span[not(contains(text(),' · '))]/text() | ./div/div/text()")
-            yield new.load_item()
-        
-        rispostina = response.xpath('//div/a[contains(text(),"rispost")]/@href')
-
-        for i in range(len(rispostina)):
-            risp = response.urljoin(rispostina[i].extract())
+        #answer from page
+        for risposta in response.xpath('./div[string-length(@class) = 5 and count(@id)=1 and contains("0123456789", substring(@id,1,1))]'):            
+#            resp = ItemLoader(item=FbcrawlItem(),selector=risposta)
+            rispostina = risposta.xpath('./a[@href and text()="Altro"]/@href')
+            risp = response.urljoin(rispostina[0].extract())
             yield scrapy.Request(risp, callback=self.parse_rispostina)
         
-        next_page = response.xpath("//div[contains(@id,'see_next')]/a/@href")
-        if len(next_page) > 0:
-            next_page = response.urljoin(next_page[0].extract())
-            yield scrapy.Request(next_page, callback=self.parse_page)
+
+#        for i in range(len(rispostina)):
+#            risp = response.urljoin(rispostina[i].extract())
+#
+#        for post in response.xpath('//div[string-length(@class) = 2 and count(@id)=1 and contains("0123456789", substring(@id,1,1))]'): #select all posts            
+#            new = ItemLoader(item=FbcrawlItem(),selector=post)
+#            new.add_xpath('source', "./div/h3/a/text()")
+#            new.add_xpath('text',"./div[1]/div[1]/text()")            
+#            yield new.load_item()          
+#
+#        next_page = response.xpath("//div[contains(@id,'see_next')]/a/@href")
+#        if len(next_page) > 0:
+#            next_page = response.urljoin(next_page[0].extract())
+#            yield scrapy.Request(next_page, callback=self.parse_page)
 
     def parse_rispostina(self,response):
         for daje in response.xpath("//div[contains(@id,'root')]/div/div/div"): #select all posts                                
diff --git a/fbcrawl/spiders/fbcrawl.py b/fbcrawl/spiders/fbcrawl.py
index c2ba592..10e256f 100644
--- a/fbcrawl/spiders/fbcrawl.py
+++ b/fbcrawl/spiders/fbcrawl.py
@@ -1,30 +1,39 @@
 import scrapy
+import logging
 
 from scrapy.loader import ItemLoader
 from scrapy.http import FormRequest
 from fbcrawl.items import FbcrawlItem
-from scrapy.exceptions import CloseSpider
-
 
 class FacebookSpider(scrapy.Spider):
     """
     Parse FB pages (needs credentials)
     """    
     name = "fb"
+    custom_settings = {
+        'FEED_EXPORT_FIELDS': ['source','shared_from','date','text', \
+                               'reactions','likes','ahah','love','wow', \
+                               'sigh','grrr','comments','url']
+    }
 
-    def __init__(self, email='', password='', page='', year=2018, lang='_', **kwargs):
-        super(FacebookSpider, self).__init__(**kwargs)
+    def __init__(self,email='',password='',page='',year=2018,lang='_',*args,**kwargs):
+        #turn off annoying logging, set LOG_LEVEL=DEBUG in settings.py to see more logs
+        logger = logging.getLogger('scrapy.middleware')
+        logger.setLevel(logging.WARNING)
+        super().__init__(**kwargs)
         
         #email & pass need to be passed as attributes!
         if not email or not password:
-            raise ValueError("You need to provide valid email and password!")
+            raise AttributeError('You need to provide valid email and password:\n'
+                                 'scrapy fb -a email="EMAIL" -a password="PASSWORD"')
         else:
             self.email = email
             self.password = password
             
         #page name parsing (added support for full urls)
         if not page:
-            raise ValueError("You need to provide a valid page name to crawl!")
+            raise AttributeError('You need to provide a valid page name to crawl!'
+                                 'scrapy fb -a page="PAGENAME"')
         elif page.find('https://www.facebook.com/') != -1:
             self.page = page[25:]
         elif page.find('https://mbasic.facebook.com/') != -1:
@@ -35,22 +44,27 @@ class FacebookSpider(scrapy.Spider):
             self.page = page
         
         #parse year 
-        assert int(year) <= 2019 and int(year) >= 2015, 'Year must be a number 2015 <= year <= 2019'
+        assert int(year) <= 2019 and int(year) >= 2006, 'Year must be a number 2006 <= year <= 2019'
         self.year = int(year)    #arguments are passed as strings
-    
+
         #parse lang, if not provided (but is supported) it will be guessed in parse_home
         if lang=='_':
-            self.logger.info('Language attribute not provided, I will try to guess it')
-            self.logger.info('Currently supported languages are: "en", "es", "fr", "it", "pt"')
+            self.logger.info('Language attribute not provided, I will try to guess it from the fb interface')
+            self.logger.info('To specify, add the lang parameter: scrapy fb -a lang="LANGUAGE"')
+            self.logger.info('Currently choices for "LANGUAGE" are: "en", "es", "fr", "it", "pt"')
             self.lang=lang                            
         elif lang == 'en'  or lang == 'es' or lang == 'fr' or lang == 'it' or lang == 'pt':
-            self.lang = lang
+            self.lang = lang.lower()
         else:
             self.logger.info('Lang "{}" not currently supported'.format(lang))                             
             self.logger.info('Currently supported languages are: "en", "es", "fr", "it", "pt"')                             
             self.logger.info('Change your interface lang from facebook and try again')
-            raise CloseSpider('Language provided not currently supported')
+            raise AttributeError('Language provided not currently supported')
 
+        #current year, this variable is needed for parse_page recursion
+        self.k = 2019
+        self.count = 0
+        
         self.start_urls = ['https://mbasic.facebook.com']    
 
     def parse(self, response):
@@ -73,29 +87,39 @@ class FacebookSpider(scrapy.Spider):
         '''
         #handle 'save-device' redirection
         if response.xpath("//div/a[contains(@href,'save-device')]"):
+            self.logger.info('Got stuck in "save-device" checkpoint')
+            self.logger.info('I will now try to redirect to the correct page')
             return FormRequest.from_response(
                 response,
                 formdata={'name_action_selected': 'dont_save'},
-                callback=self.parse_home)
+                callback=self.parse_home
+                )
             
         #set language interface
         if self.lang == '_':
             if response.xpath("//input[@placeholder='Search Facebook']"):
+                self.logger.info('Language recognized: lang="en"')
                 self.lang = 'en'
-            elif response.xpath("//input[@value='Buscar']"):
+            elif response.xpath("//input[@placeholder='Buscar en Facebook']"):
+                self.logger.info('Language recognized: lang="es"')
                 self.lang = 'es'
-            elif response.xpath("//input[@value='Rechercher']"):
+            elif response.xpath("//input[@placeholder='Rechercher sur Facebook']"):
+                self.logger.info('Language recognized: lang="fr"')
                 self.lang = 'fr'
-            elif response.xpath("//input[@value='Cerca']"):
+            elif response.xpath("//input[@placeholder='Cerca su Facebook']"):
+                self.logger.info('Language recognized: lang="it"')
                 self.lang = 'it'
-            elif response.xpath("//input[@value='Pesquisar']"):
+            elif response.xpath("//input[@placeholder='Pesquisa no Facebook']"):
+                self.logger.info('Language recognized: lang="pt"')
                 self.lang = 'pt'                
             else:
-                raise CloseSpider('Language not recognized')
-          
+                raise AttributeError('Language not recognized\n'
+                                     'Change your interface lang from facebook ' 
+                                     'and try again')
+                                                                 
         #navigate to provided page
         href = response.urljoin(self.page)
-        self.logger.info('Parsing facebook page %s', href)
+        self.logger.info('Scraping facebook page {}'.format(href))
         return scrapy.Request(url=href,callback=self.parse_page)
 
     def parse_page(self, response):
@@ -106,6 +130,7 @@ class FacebookSpider(scrapy.Spider):
         #select all posts
         for post in response.xpath("//div[contains(@data-ft,'top_level_post_id')]"):            
             new = ItemLoader(item=FbcrawlItem(),selector=post)
+            self.logger.info('Parsing post n = {}'.format(abs(self.count)))
             new.add_xpath('comments', "./div[2]/div[2]/a[1]/text()")        
             new.add_xpath('url', ".//a[contains(@href,'footer')]/@href")
             new.add_xpath('reactions',".//a[contains(@aria-label,'reactions')]/text()")   
@@ -113,54 +138,53 @@ class FacebookSpider(scrapy.Spider):
             #page_url #new.add_value('url',response.url)
             #returns full post-link in a list
             post = post.xpath(".//a[contains(@href,'footer')]/@href").extract() 
-            temp_post = response.urljoin(post[0])        
-            yield scrapy.Request(temp_post, self.parse_post, meta={'item':new})       
+            temp_post = response.urljoin(post[0])
+            self.count -= 1
+            yield scrapy.Request(temp_post, self.parse_post, priority = self.count, meta={'item':new})       
 
         #load following page
-        next_page = response.xpath("//div[2]/a[contains(@href,'timestart=') and not(contains(text(),'ent')) and not(contains(text(),number()))]/@href").extract()      
-        if len(next_page) == 0: 
-            if response.meta['flag'] == 4 and self.year <= 2015:
-                self.logger.info('2014 reached, flag = 5')
-                next_page = response.xpath("//div/a[contains(@href,'time') and contains(text(),'2015')]/@href").extract()
-                self.logger.info('next_page = {}'.format(next_page[0]))
-                new_page = response.urljoin(next_page[0])
-                yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':5}) 
-            elif response.meta['flag'] == 3 and self.year <= 2015:
-                self.logger.info('2015 reached, flag = 4')
-                next_page = response.xpath("//div/a[contains(@href,'time') and contains(text(),'2015')]/@href").extract()
-                self.logger.info('next_page = {}'.format(next_page[0]))
-                new_page = response.urljoin(next_page[0])
-                yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':4}) 
-            elif response.meta['flag'] == 2 and self.year <= 2016:
-                self.logger.info('2016 reached, flag = 3')                
-                next_page = response.xpath("//div/a[contains(@href,'time') and contains(text(),'2016')]/@href").extract()
-                self.logger.info('next_page = {}'.format(next_page[0]))
-                new_page = response.urljoin(next_page[0])
-                yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':3}) 
-            elif response.meta['flag'] == 1 and self.year <= 2017:            
-                self.logger.info('2017 reached, flag = 2')          
-                next_page = response.xpath("//div/a[contains(@href,'time') and contains(text(),'2017')]/@href").extract()
-                self.logger.info('next_page = {}'.format(next_page[0]))
-                new_page = response.urljoin(next_page[0])
-                yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':2})      
-            elif response.meta['flag'] == 0 and self.year <= 2018:                      
-                self.logger.info('2018 reached, flag = 1')
-                next_page = response.xpath("//div/a[contains(@href,'time') and contains(text(),'2018')]/@href").extract()
-                self.logger.info('next_page = {}'.format(next_page[0]))
-                new_page = response.urljoin(next_page[0])
-                yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':1})
+        #tries to click on "more", otherwise it looks for the appropriate
+        #year for 1-click only and proceeds to click on others
+        new_page = response.xpath("//div[2]/a[contains(@href,'timestart=') and not(contains(text(),'ent')) and not(contains(text(),number()))]/@href").extract()      
+        if not new_page: 
+            if response.meta['flag'] == self.k and self.year <= self.k:                
+                self.logger.info('There are no more, clicking on year = {}'.format(self.k))
+                xpath = "//div/a[contains(@href,'time') and contains(text(),'" + str(self.k) + "')]/@href"
+                new_page = response.xpath(xpath).extract()
+                if new_page:
+                    new_page = response.urljoin(new_page[0])
+                    self.k -= 1
+                    self.logger.info('Everything OK, new flag: {}'.format(self.k))                                
+                    yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':self.k})
+                else:
+                    while not new_page: #sometimes the years are skipped 
+                        self.logger.info('XPATH not found for year {}'.format(self.k-1))
+                        self.k -= 1
+                        self.logger.info('Trying with previous year, flag={}'.format(self.k))
+                        xpath = "//div/a[contains(@href,'time') and contains(text(),'" + str(self.k) + "')]/@href"
+                        new_page = response.xpath(xpath).extract()
+                    self.logger.info('New page found with flag {}'.format(self.k))
+                    new_page = response.urljoin(new_page[0])
+                    self.k -= 1
+                    self.logger.info('Now going with flag {}'.format(self.k))
+                    yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':self.k})                            
         else:
-            new_page = response.urljoin(next_page[0])
+            new_page = response.urljoin(new_page[0])
             if 'flag' in response.meta:
+                self.logger.info('Page scraped, click on more! flag = {}'.format(response.meta['flag']))
                 yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':response.meta['flag']})
             else:
-                yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':0})
+                self.logger.info('FLAG DOES NOT REPRESENT ACTUAL YEAR')
+                self.logger.info('First page scraped, click on more! Flag not set, default flag = {}'.format(self.k))
+                yield scrapy.Request(new_page, callback=self.parse_page, meta={'flag':self.k})
                 
     def parse_post(self,response):
         new = ItemLoader(item=FbcrawlItem(),response=response,parent=response.meta['item'])
         new.add_xpath('source', "//td/div/h3/strong/a/text() | //span/strong/a/text() | //div/div/div/a[contains(@href,'post_id')]/strong/text()")
-        new.add_xpath('date', '//div/div/abbr/text()')
+        new.add_xpath('shared_from','//div[contains(@data-ft,"top_level_post_id") and contains(@data-ft,\'"isShare":1\')]/div/div[3]//strong/a/text()')
+        new.add_xpath('date','//div/div/abbr/text()')
         new.add_xpath('text','//div[@data-ft]//p//text() | //div[@data-ft]/div[@class]/div[@class]/text()')
+        new.add_xpath('reactions',"//a[contains(@href,'reaction/profile')]/div/div/text()")  
         
         reactions = response.xpath("//div[contains(@id,'sentence')]/a[contains(@href,'reaction/profile')]/@href")
         reactions = response.urljoin(reactions[0].extract())
@@ -175,4 +199,4 @@ class FacebookSpider(scrapy.Spider):
         new.add_xpath('wow',"//a[contains(@href,'reaction_type=3')]/span/text()")
         new.add_xpath('sigh',"//a[contains(@href,'reaction_type=7')]/span/text()")
         new.add_xpath('grrr',"//a[contains(@href,'reaction_type=8')]/span/text()")        
-        yield new.load_item()
+        yield new.load_item()
\ No newline at end of file