diff --git a/wikiextractor/WikiExtractor.py b/wikiextractor/WikiExtractor.py
index 99a39ad..feab143 100755
--- a/wikiextractor/WikiExtractor.py
+++ b/wikiextractor/WikiExtractor.py
@@ -79,7 +79,6 @@ knownNamespaces = set(['Template'])
 # The namespace used for template definitions
 # It is the name associated with namespace key=10 in the siteinfo header.
 templateNamespace = ''
-templatePrefix = ''
 
 ##
 # The namespace used for module definitions
@@ -196,8 +195,7 @@ def load_templates(file, output_file=None):
     Load templates from :param file:.
     :param output_file: file where to save templates and modules.
     """
-    global templateNamespace, templatePrefix
-    templatePrefix = templateNamespace + ':'
+    global templateNamespace
     global moduleNamespace, modulePrefix
     modulePrefix = moduleNamespace + ':'
     articles = 0
@@ -220,6 +218,13 @@ def load_templates(file, output_file=None):
             page = []
         elif tag == 'title':
             title = m.group(3)
+            if not output_file and not templateNamespace:  # do not know it yet
+                # we reconstruct it from the first title
+                colon = title.find(':')
+                if colon > 1:
+                    templateNamespace = title[:colon]
+                    Extractor.templatePrefix = title[:colon + 1]
+            # FIXME: should reconstruct also moduleNamespace
         elif tag == 'text':
             inText = True
             line = line[m.start(3):m.end(3)]
@@ -233,18 +238,11 @@ def load_templates(file, output_file=None):
         elif inText:
             page.append(line)
         elif tag == '/page':
-            if not output_file and not templateNamespace:  # do not know it yet
-                # we reconstruct it from the first title
-                colon = title.find(':')
-                if colon > 1:
-                    templateNamespace = title[:colon]
-                    templatePrefix = title[:colon + 1]
-            # FIXME: should reconstruct also moduleNamespace
-            if title.startswith(templatePrefix):
+            if title.startswith(Extractor.templatePrefix):
                 define_template(title, page)
                 templates += 1
             # save templates and modules to file
-            if output_file and (title.startswith(templatePrefix) or
+            if output_file and (title.startswith(Extractor.templatePrefix) or
                                 title.startswith(modulePrefix)):
                 output.write('<page>\n')
                 output.write('   <title>%s</title>\n' % title)
@@ -279,6 +277,63 @@ def decode_open(filename, mode='rt', encoding='utf-8'):
         return open(filename, mode, encoding=encoding)
 
 
+def collect_pages(text):
+    """
+    :param text: the text of a wikipedia file dump.
+    """
+    # we collect individual lines, since str.join() is significantly faster
+    # than concatenation
+    page = []
+    id = ''
+    revid = ''
+    last_id = ''
+    inText = False
+    redirect = False
+    for line in text:
+        if '<' not in line:     # faster than doing re.search()
+            if inText:
+                page.append(line)
+            continue
+        m = tagRE.search(line)
+        if not m:
+            continue
+        tag = m.group(2)
+        if tag == 'page':
+            page = []
+            redirect = False
+        elif tag == 'id' and not id:
+            id = m.group(3)
+        elif tag == 'id' and id: # <revision> <id></id> </revision>
+            revid = m.group(3)
+        elif tag == 'title':
+            title = m.group(3)
+        elif tag == 'redirect':
+            redirect = True
+        elif tag == 'text':
+            inText = True
+            line = line[m.start(3):m.end(3)]
+            page.append(line)
+            if m.lastindex == 4:  # open-close
+                inText = False
+        elif tag == '/text':
+            if m.group(1):
+                page.append(m.group(1))
+            inText = False
+        elif inText:
+            page.append(line)
+        elif tag == '/page':
+            colon = title.find(':')
+            if (colon < 0 or (title[:colon] in acceptedNamespaces) and id != last_id and
+                    not redirect and not title.startswith(templateNamespace)):
+                yield (id, revid, title, page)
+                last_id = id
+            id = ''
+            revid = ''
+            page = []
+            inText = False
+            redirect = False
+
+
 def process_dump(input_file, template_file, out_file, file_size, file_compress,
                  process_count, html_safe):
     """
@@ -290,7 +345,7 @@ def process_dump(input_file, template_file, out_file, file_size, file_compress,
     :param process_count: number of extraction processes to spawn.
     """
     global knownNamespaces
-    global templateNamespace, templatePrefix
+    global templateNamespace
     global moduleNamespace, modulePrefix
 
     urlbase = ''                # This is obtained from <siteinfo>
@@ -313,7 +368,7 @@ def process_dump(input_file, template_file, out_file, file_size, file_compress,
             knownNamespaces.add(m.group(3))
             if re.search('key="10"', line):
                 templateNamespace = m.group(3)
-                templatePrefix = templateNamespace + ':'
+                Extractor.templatePrefix = templateNamespace + ':'
             elif re.search('key="828"', line):
                 moduleNamespace = m.group(3)
                 modulePrefix = moduleNamespace + ':'
@@ -383,56 +438,12 @@ def process_dump(input_file, template_file, out_file, file_size, file_compress,
 
     # we collect individual lines, since str.join() is significantly faster
     # than concatenation
-    page = []
-    id = ''
-    revid = ''
-    last_id = ''
+
     ordinal = 0  # page count
-    inText = False
-    redirect = False
-    for line in input:
-        if '<' not in line:  # faster than doing re.search()
-            if inText:
-                page.append(line)
-            continue
-        m = tagRE.search(line)
-        if not m:
-            continue
-        tag = m.group(2)
-        if tag == 'page':
-            page = []
-            redirect = False
-        elif tag == 'id' and not id:
-            id = m.group(3)
-        elif tag == 'id' and id: # <revision> <id></id> </revision>
-            revid = m.group(3)
-        elif tag == 'title':
-            title = m.group(3)
-        elif tag == 'redirect':
-            redirect = True
-        elif tag == 'text':
-            inText = True
-            line = line[m.start(3):m.end(3)]
-            page.append(line)
-            if m.lastindex == 4:  # open-close
-                inText = False
-        elif tag == '/text':
-            if m.group(1):
-                page.append(m.group(1))
-            inText = False
-        elif inText:
-            page.append(line)
-        elif tag == '/page':
-            colon = title.find(':')
-            if (colon < 0 or (title[:colon] in acceptedNamespaces) and id != last_id and
-                    not redirect and not title.startswith(templateNamespace)):
-                job = (id, revid, urlbase, title, page, ordinal)
-                jobs_queue.put(job)  # goes to any available extract_process
-                last_id = id
-                ordinal += 1
-            id = ''
-            revid = ''
-            page = []
+    for id, revid, title, page in collect_pages(input):
+        job = (id, revid, urlbase, title, page, ordinal)
+        jobs_queue.put(job)  # goes to any available extract_process
+        ordinal += 1
 
     input.close()
 
@@ -467,7 +478,7 @@ def extract_process(jobs_queue, output_queue, html_safe):
     :html_safe: whether to convert entities in text to HTML.
     """
     while True:
-        job = jobs_queue.get()  # job is (id, revid, urlbase, title, page, ordinal)
+        job = jobs_queue.get()  # job is (id, revid, urlbase, title, page)
         if job:
             out = StringIO()  # memory buffer
             Extractor(*job[:-1]).extract(out, html_safe)  # (id, urlbase, title, page)
@@ -479,7 +490,8 @@ def extract_process(jobs_queue, output_queue, html_safe):
 
 
 def reduce_process(output_queue, output):
-    """Pull finished article text, write series of files (or stdout)
+    """
+    Pull finished article text, write series of files (or stdout)
     :param output_queue: text to be output.
     :param output: file object where to print.
     """
@@ -515,7 +527,7 @@ minFileSize = 200 * 1024
 
 
 def main():
-    global urlbase, acceptedNamespaces
+    global acceptedNamespaces
     global expand_templates, templateCache
 
     parser = argparse.ArgumentParser(prog=os.path.basename(sys.argv[0]),
@@ -609,24 +621,10 @@ def main():
                 with open(args.templates) as file:
                     load_templates(file)
 
-        with open(input_file) as file:
-            page = file.read()
-            ids = re.findall(r'<id>(\d*?)</id>', page)
-            id = ids[0] if ids else ''
-            revid = ids[1] if len(ids) > 1 else ''
-            m = re.search(r'<title>(.*?)</title>', page)
-            if m:
-                title = m.group(1)
-            else:
-                logging.error('Missing title element')
-                return
-            m = re.search(r'<base>(.*?)</base>', page)
-            if m:
-                base = m.group(1)
-                urlbase = base[:base.rfind("/")]
-            else:
-                urlbase = ''
-            Extractor(id, revid, urlbase, title, [page]).extract(sys.stdout)
+        urlbase = ''
+        with open(input_file) as input:
+            for id, revid, title, page in collect_pages(input):
+                Extractor(id, revid, urlbase, title, page).extract(sys.stdout)
         return
 
     output_path = args.output
diff --git a/wikiextractor/extract.py b/wikiextractor/extract.py
index 2180dc4..5a3bd5a 100644
--- a/wikiextractor/extract.py
+++ b/wikiextractor/extract.py
@@ -26,6 +26,7 @@ from urllib.parse import quote as urlencode
 from html.entities import name2codepoint
 import logging
 import time
+import pdb                      # DEBUG
 
 # ----------------------------------------------------------------------
 
@@ -81,6 +82,7 @@ def clean(extractor, text, expand_templates=False, html_safe=True):
     if expand_templates:
         # expand templates
         # See: http://www.mediawiki.org/wiki/Help:Templates
+        pdb.set_trace()         # DEBUG
         text = extractor.expandTemplates(text)
     else:
         # Drop transclusions (template, parser functions)
@@ -199,7 +201,12 @@ def compact(text, mark_headers=False):
     for line in text.split('\n'):
 
         if not line:
+            if len(listLevel):    # implies Extractor.HtmlFormatting
+                for c in reversed(listLevel):
+                    page.append(listClose[c])
+                    listLevel = ''
             continue
+
         # Handle section titles
         m = section.match(line)
         if m:
@@ -227,36 +234,35 @@ def compact(text, mark_headers=False):
                 page.append(title)
         # handle indents
         elif line[0] == ':':
-            # page.append(line.lstrip(':*#;'))
-            continue
+            page.append(line.lstrip(':'))
         # handle lists
-        elif line[0] in '*#;:':
+        # @see https://www.mediawiki.org/wiki/Help:Formatting
+        elif line[0] in '*#;':
             if Extractor.HtmlFormatting:
-                i = 0
-                for c, n in zip_longest(listLevel, line, fillvalue=''):
-                    if not n or n not in '*#;:':
-                        if c:
-                            page.append(listClose[c])
-                            listLevel = listLevel[:-1]
-                            continue
-                        else:
-                            break
-                    # n != ''
-                    if c != n and (not c or (c not in ';:' and n not in ';:')):
-                        if c:
-                            # close level
-                            page.append(listClose[c])
-                            listLevel = listLevel[:-1]
-                        listLevel += n
-                        page.append(listOpen[n])
-                    i += 1
-                n = line[i - 1]  # last list char
-                line = line[i:].strip()
-                if line:  # FIXME: n is '"'
-                    page.append(listItem[n] % line)
+                # close extra levels
+                l = 0
+                for c in listLevel:
+                    if l < len(line) and c != line[l]:
+                        for extra in reversed(listLevel[l:]):
+                            page.append(listClose[extra])
+                        listLevel = listLevel[:l]
+                        break
+                    l += 1
+                if l < len(line) and line[l] in '*#;:':
+                    # add new level (only one, no jumps)
+                    # FIXME: handle jumping levels
+                    type = line[l]
+                    page.append(listOpen[type])
+                    listLevel += type
+                    line = line[l+1:].strip()
+                else:
+                    # continue on same level
+                    type = line[l-1]
+                    line = line[l:].strip()
+                page.append(listItem[type] % line)
             else:
                 continue
-        elif len(listLevel):
+        elif len(listLevel):    # implies Extractor.HtmlFormatting
             for c in reversed(listLevel):
                 page.append(listClose[c])
             listLevel = []
@@ -786,6 +792,114 @@ spaces = re.compile(r' {2,}')
 # Matches dots
 dots = re.compile(r'\.{4,}')
 
+# ======================================================================
+
+class Template(list):
+    """
+    A Template is a list of TemplateText or TemplateArgs
+    """
+
+    @classmethod
+    def parse(cls, body):
+        tpl = Template()
+        # we must handle nesting, s.a.
+        # {{{1|{{PAGENAME}}}
+        # {{{italics|{{{italic|}}}
+        # {{#if:{{{{{#if:{{{nominee|}}}|nominee|candidate}}|}}}|
+        #
+        start = 0
+        for s,e in findMatchingBraces(body, 3):
+            tpl.append(TemplateText(body[start:s]))
+            tpl.append(TemplateArg(body[s+3:e-3]))
+            start = e
+        tpl.append(TemplateText(body[start:])) # leftover
+        return tpl
+
+    def subst(self, params, extractor, depth=0):
+        # We perform parameter substitutions recursively.
+        # We also limit the maximum number of iterations to avoid too long or
+        # even endless loops (in case of malformed input).
+
+        # :see: http://meta.wikimedia.org/wiki/Help:Expansion#Distinction_between_variables.2C_parser_functions.2C_and_templates
+        #
+        # Parameter values are assigned to parameters in two (?) passes.
+        # Therefore a parameter name in a template can depend on the value of
+        # another parameter of the same template, regardless of the order in
+        # which they are specified in the template call, for example, using
+        # Template:ppp containing "{{{{{{p}}}}}}", {{ppp|p=q|q=r}} and even
+        # {{ppp|q=r|p=q}} gives r, but using Template:tvvv containing
+        # "{{{{{{{{{p}}}}}}}}}", {{tvvv|p=q|q=r|r=s}} gives s.
+
+        #logging.debug('subst tpl (%d, %d) %s', len(extractor.frame), depth, self)
+
+        if depth > extractor.maxParameterRecursionLevels:
+            extractor.recursion_exceeded_3_errs += 1
+            return ''
+
+        return ''.join([tpl.subst(params, extractor, depth) for tpl in self])
+
+    def __str__(self):
+        return ''.join([str(x) for x in self])
+
+
+class TemplateText(str):
+    """Fixed text of template"""
+
+    def subst(self, params, extractor, depth):
+        return self
+
+
+class TemplateArg():
+    """
+    parameter to a template.
+    Has a name and a default value, both of which are Templates.
+    """
+    def __init__(self, parameter):
+        """
+        :param parameter: the parts of a tplarg.
+        """
+        # the parameter name itself might contain templates, e.g.:
+        #   appointe{{#if:{{{appointer14|}}}|r|d}}14|
+        #   4|{{{{{subst|}}}CURRENTYEAR}}
+
+        # any parts in a tplarg after the first (the parameter default) are
+        # ignored, and an equals sign in the first part is treated as plain text.
+        #logging.debug('TemplateArg %s', parameter)
+
+        parts = splitParts(parameter)
+        self.name = Template.parse(parts[0])
+        if len(parts) > 1:
+            # This parameter has a default value
+            self.default = Template.parse(parts[1])
+        else:
+            self.default = None
+
+    def __str__(self):
+        if self.default:
+            return '{{{%s|%s}}}' % (self.name, self.default)
+        else:
+            return '{{{%s}}}' % self.name
+
+    def subst(self, params, extractor, depth):
+        """
+        Substitute value for this argument from dict :param params:
+        Use :param extractor: to evaluate expressions for name and default.
+        Limit substitution to the maximun :param depth:.
+        """
+        # the parameter name itself might contain templates, e.g.:
+        # appointe{{#if:{{{appointer14|}}}|r|d}}14|
+        paramName = self.name.subst(params, extractor, depth+1)
+        paramName = extractor.expandTemplates(paramName)
+        res = ''
+        if paramName in params:
+            res = params[paramName]  # use parameter value specified in template invocation
+        elif self.default:            # use the default value
+            defaultValue = self.default.subst(params, extractor, depth+1)
+            res =  extractor.expandTemplates(defaultValue)
+        #logging.debug('subst arg %d %s -> %s' % (depth, paramName, res))
+        return res
+
+
 # ======================================================================
 
 substWords = 'subst:|safesubst:'
@@ -811,6 +925,10 @@ class Extractor():
     # Whether to produce json instead of the default <doc> output format.
     toJson = False
 
+    ##
+    # Obtained from TemplateNamespace
+    templatePrefix = ''
+
     def __init__(self, id, revid, urlbase, title, page):
         """
         :param page: a list of lines.
@@ -827,12 +945,13 @@ class Extractor():
         self.recursion_exceeded_3_errs = 0  # parameter recursion
         self.template_title_errs = 0
 
-    def clean_text(self, text, mark_headers=False, expand_templates=False,
+    def clean_text(self, text, mark_headers=False, expand_templates=True,
                    html_safe=True):
         """
         :param mark_headers: True to distinguish headers from paragraphs
           e.g. "## Section 1"
         """
+        self.magicWords['namespace'] = self.title[:max(0, self.title.find(":"))]
         self.magicWords['pagename'] = self.title
         self.magicWords['fullpagename'] = self.title
         self.magicWords['currentyear'] = time.strftime('%Y')
@@ -978,7 +1097,11 @@ class Extractor():
             # The '=' might occurr within an HTML attribute:
             #   "&lt;ref name=value"
             # but we stop at first.
-            m = re.match(' *([^=]*?) *=(.*)', param, re.DOTALL)
+
+            # The '=' might occurr within quotes:
+            # ''''<span lang="pt-pt" xml:lang="pt-pt">cénicas</span>'''
+
+            m = re.match(" *([^=']*?) *=(.*)", param, re.DOTALL)
             if m:
                 # This is a named parameter.  This case also handles parameter
                 # assignments like "2=xxx", where the number of an unnamed
@@ -1273,7 +1396,7 @@ def findMatchingBraces(text, ldelim=0):
 
     if ldelim:  # 2-3
         reOpen = re.compile('[{]{%d,}' % ldelim)  # at least ldelim
-        reNext = re.compile('[{]{2,}|}{2,}')  # at least 2
+        reNext = re.compile('[{]{2,}|}{2,}')  # at least 2 open or close bracces
     else:
         reOpen = re.compile('{{2,}|\[{2,}')
         reNext = re.compile('{{2,}|}{2,}|\[{2,}|]{2,}')  # at least 2
@@ -1439,7 +1562,7 @@ def fullyQualifiedTemplateTitle(templateTitle):
     # space]], but having in the system a redirect page with an empty title
     # causes numerous problems, so we'll live happier without it.
     if templateTitle:
-        return templatePrefix + ucfirst(templateTitle)
+        return Extractor.templatePrefix + ucfirst(templateTitle)
     else:
         return ''  # caller may log as error
 
@@ -1489,7 +1612,7 @@ def sharp_expr(expr):
         expr = re.sub('mod', '%', expr)
         expr = re.sub('\bdiv\b', '/', expr)
         expr = re.sub('\bround\b', '|ROUND|', expr)
-        return unicode(eval(expr))
+        return str(eval(expr))
     except:
         return '<span class="error"></span>'
 
@@ -1675,7 +1798,7 @@ def callParserFunction(functionName, args, frame):
 reNoinclude = re.compile(r'<noinclude>(?:.*?)</noinclude>', re.DOTALL)
 reIncludeonly = re.compile(r'<includeonly>|</includeonly>', re.DOTALL)
 
-# These are built before spawning processes, hence thay are shared.
+# These are built before spawning processes, hence they are shared.
 templates = {}
 redirects = {}
 # cache of parser templates
diff --git a/wikiextractor/extractPage.py b/wikiextractor/extractPage.py
index 1e40410..83b6758 100755
--- a/wikiextractor/extractPage.py
+++ b/wikiextractor/extractPage.py
@@ -46,7 +46,8 @@ tagRE = re.compile(r'(.*?)<(/?\w+)[^>]*>(?:([^<]*)(<.*?>)?)?')
 def process_data(input_file, id, templates=False):
     """
     :param input_file: name of the wikipedia dump file.
-    :param id: article id
+    :param id: article id.
+    :param templates: whether article is a template.
     """
 
     if input_file.lower().endswith(".bz2"):
@@ -105,9 +106,9 @@ def main():
     parser.add_argument("--id", default="1",
                         help="article number")
     parser.add_argument("--template", action="store_true",
-                        help="template number")
+                        help="whether article is a template")
     parser.add_argument("-v", "--version", action="version",
-                        version='%(prog)s ' + version,
+                        version='%(prog)s ' + __version__,
                         help="print program version")
 
     args = parser.parse_args()