Corrected the logic to avoid redirect pages
The original logic statement was allowing all titles where the colon was not found (colon<0)
This commit is contained in:
parent
881f3e4252
commit
b4eac55da6
@ -421,7 +421,7 @@ def process_dump(input_file, template_file, out_file, file_size, file_compress,
|
||||
page.append(line)
|
||||
elif tag == '/page':
|
||||
colon = title.find(':')
|
||||
if (colon < 0 or (title[:colon] in acceptedNamespaces) and id != last_id and
|
||||
if (colon < 0 or (title[:colon] in acceptedNamespaces)) and (id != last_id and
|
||||
not redirect and not title.startswith(templateNamespace)):
|
||||
job = (id, revid, urlbase, title, page, ordinal)
|
||||
jobs_queue.put(job) # goes to any available extract_process
|
||||
|
Loading…
Reference in New Issue
Block a user