See ChangeLog.
This commit is contained in:
parent
b44b750056
commit
d5cca5da43
@ -1,3 +1,8 @@
|
|||||||
|
2015-05-06 Giuseppe Attardi <attardi@di.unipi.it>
|
||||||
|
|
||||||
|
* WikiExtractor.py (main): fixed arg.namespaces.
|
||||||
|
(compact): use fillvalue=' ' in izip_longest.
|
||||||
|
|
||||||
2015-04-26 Giuseppe Attardi <attardi@di.unipi.it>
|
2015-04-26 Giuseppe Attardi <attardi@di.unipi.it>
|
||||||
|
|
||||||
* WikiExtractor.py (clean): use re.U when matching \W or chinese
|
* WikiExtractor.py (clean): use re.U when matching \W or chinese
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Version: 2.32 (Apr 26, 2015)
|
# Version: 2.33 (May 6, 2015)
|
||||||
# Author: Giuseppe Attardi (attardi@di.unipi.it), University of Pisa
|
# Author: Giuseppe Attardi (attardi@di.unipi.it), University of Pisa
|
||||||
#
|
#
|
||||||
# Contributors:
|
# Contributors:
|
||||||
@ -60,7 +60,7 @@ import Queue, threading, multiprocessing
|
|||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
||||||
# Program version
|
# Program version
|
||||||
version = '2.32'
|
version = '2.33'
|
||||||
|
|
||||||
### PARAMS ####################################################################
|
### PARAMS ####################################################################
|
||||||
|
|
||||||
@ -1761,7 +1761,7 @@ def makeInternalLink(title, label):
|
|||||||
if colon2 > 1 and title[colon+1:colon2] not in acceptedNamespaces:
|
if colon2 > 1 and title[colon+1:colon2] not in acceptedNamespaces:
|
||||||
return ''
|
return ''
|
||||||
if Extractor.keepLinks:
|
if Extractor.keepLinks:
|
||||||
return '<a href="%s">%s</a>' % (urllib.quote(title.encode('utf-8')), anchor)
|
return '<a href="%s">%s</a>' % (urllib.quote(title.encode('utf-8')), label)
|
||||||
else:
|
else:
|
||||||
return label
|
return label
|
||||||
|
|
||||||
@ -2009,7 +2009,7 @@ def compact(text):
|
|||||||
elif line[0] in '*#;:':
|
elif line[0] in '*#;:':
|
||||||
if Extractor.toHTML:
|
if Extractor.toHTML:
|
||||||
i = 0
|
i = 0
|
||||||
for c,n in izip_longest(listLevel, line):
|
for c,n in izip_longest(listLevel, line, fillvalue=' '):
|
||||||
if n not in '*#;:':
|
if n not in '*#;:':
|
||||||
if c:
|
if c:
|
||||||
page.append(listClose[c])
|
page.append(listClose[c])
|
||||||
@ -2022,7 +2022,7 @@ def compact(text):
|
|||||||
# close level
|
# close level
|
||||||
page.append(listClose[c])
|
page.append(listClose[c])
|
||||||
listLevel = listLevel[:-1]
|
listLevel = listLevel[:-1]
|
||||||
listLevel = listLevel + n
|
listLevel += n
|
||||||
page.append(listOpen[n])
|
page.append(listOpen[n])
|
||||||
i += 1
|
i += 1
|
||||||
n = line[i-1]
|
n = line[i-1]
|
||||||
@ -2406,7 +2406,7 @@ def main():
|
|||||||
return
|
return
|
||||||
|
|
||||||
if args.namespaces:
|
if args.namespaces:
|
||||||
acceptedNamespaces = set(args.ns.split(','))
|
acceptedNamespaces = set(args.namespaces.split(','))
|
||||||
|
|
||||||
FORMAT = '%(levelname)s: %(message)s'
|
FORMAT = '%(levelname)s: %(message)s'
|
||||||
logging.basicConfig(format=FORMAT)
|
logging.basicConfig(format=FORMAT)
|
||||||
|
Loading…
Reference in New Issue
Block a user