From cc6c0775463e9b814f6f69339b2398e88d64ef94 Mon Sep 17 00:00:00 2001 From: Giuseppe Attardi Date: Sat, 11 Apr 2015 03:43:32 +0200 Subject: [PATCH] fix to loop in parameter expansion. --- ChangeLog | 11 ++++++++ WikiExtractor.py | 69 +++++++++++++++++++++++++++++------------------- 2 files changed, 53 insertions(+), 27 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8c5c1fe..286b22c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2015-04-11 Giuseppe Attardi + + * WikiExtractor.py (substParameter): added parameter param_depth + to control depth of parameter expansion, separately from depth, + used for template expansion. + +2015-04-10 Giuseppe Attardi + + * WikiExtractor.py (callParserFunction): return '' also in case of + failure. + 2015-04-09 Giuseppe Attardi * WikiExtractor.py (expandTemplates): replaced frame parameter with diff --git a/WikiExtractor.py b/WikiExtractor.py index b5a44e7..a002833 100755 --- a/WikiExtractor.py +++ b/WikiExtractor.py @@ -298,6 +298,12 @@ def expandTemplates(text, depth=0): # template = "{{" parts "}}" + if depth > maxTemplateRecursionLevels: + logging.warn('Max template recursion exceeded!') + return '' + + logging.debug(' ' + str(depth)) return text # leftover res += text[cur:] @@ -380,20 +387,22 @@ def splitParameters(paramsList, sep='|'): return parameters -def templateParams(parameters, frame): +def templateParams(parameters, depth): """ Build a dictionary with positional or name key to expanded parameters. :param parameters: the parts[1:] of a template, i.e. all except the title. + :param depth: recusion depth. """ templateParams = {} if not parameters: return templateParams + #logging.debug(' ' + str(depth) + ' ' + ret) return ret else: logging.warn('Reached max template recursion: ' + str(maxTemplateRecursionLevels)) + logging.debug(' INVOCATION> ' + str(depth) + ' ' + template) return template else: # The page being included could not be identified return "" -def substParameter(parameter, templateParams, depth): +def substParameter(parameter, templateParams, depth, param_depth=0): """ :param parameter: the parts of a tplarg. :param templateParams: dict of name-values template parameters. """ + #logging.debug('substParameter ' + str(depth) + ' ' + parameter) # the parameter name itself might contain templates, e.g.: # appointe{{#if:{{{appointer14|}}}|r|d}}14| @@ -831,7 +845,7 @@ def substParameter(parameter, templateParams, depth): for s,e in findMatchingBraces(parameter, '(?