See ChangeLog.

2015-05-06 16:08:27 +02:00 · 2015-05-06 16:08:27 +02:00 · d5cca5da43
commit d5cca5da43
parent b44b750056
2 changed files with 11 additions and 6 deletions
--- a/5
+++ b/5
@ -1,3 +1,8 @@
 2015-05-06  Giuseppe Attardi  <attardi@di.unipi.it>
 	* WikiExtractor.py (main): fixed arg.namespaces.
 	(compact): use fillvalue=' ' in izip_longest.
 2015-04-26  Giuseppe Attardi  <attardi@di.unipi.it>
 	* WikiExtractor.py (clean): use re.U when matching \W or chinese
--- a/WikiExtractor.py
+++ b/WikiExtractor.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 #
 # =============================================================================
-#  Version: 2.32 (Apr 26, 2015)
+#  Version: 2.33 (May 6, 2015)
 #  Author: Giuseppe Attardi (attardi@di.unipi.it), University of Pisa
 #
 #  Contributors:
@ -60,7 +60,7 @@ import Queue, threading, multiprocessing
 #===========================================================================
 # Program version
-version = '2.32'
+version = '2.33'
 ### PARAMS ####################################################################
@ -1761,7 +1761,7 @@ def makeInternalLink(title, label):
        if colon2 > 1 and title[colon+1:colon2] not in acceptedNamespaces:
            return ''
    if Extractor.keepLinks:
-        return '<a href="%s">%s</a>' % (urllib.quote(title.encode('utf-8')), anchor)
+        return '<a href="%s">%s</a>' % (urllib.quote(title.encode('utf-8')), label)
    else:
        return label
@ -2009,7 +2009,7 @@ def compact(text):
        elif line[0] in '*#;:':
            if Extractor.toHTML:
                i = 0
-                for c,n in izip_longest(listLevel, line):
+                for c,n in izip_longest(listLevel, line, fillvalue=' '):
                    if n not in '*#;:':
                        if c:
                            page.append(listClose[c])
@ -2022,7 +2022,7 @@ def compact(text):
                            # close level
                            page.append(listClose[c])
                            listLevel = listLevel[:-1]
-                        listLevel = listLevel + n
+                        listLevel += n
                        page.append(listOpen[n])
                    i += 1
                n = line[i-1]
@ -2406,7 +2406,7 @@ def main():
        return
    if args.namespaces:
-        acceptedNamespaces = set(args.ns.split(','))
+        acceptedNamespaces = set(args.namespaces.split(','))
    FORMAT = '%(levelname)s: %(message)s'
    logging.basicConfig(format=FORMAT)