Corrected the logic to avoid redirect pages

The original logic statement was allowing all titles where the colon was not found (colon<0)
This commit is contained in:
Kapil Kukreja 2021-06-25 14:35:41 +05:30 committed by GitHub
parent 881f3e4252
commit b4eac55da6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -421,7 +421,7 @@ def process_dump(input_file, template_file, out_file, file_size, file_compress,
page.append(line)
elif tag == '/page':
colon = title.find(':')
if (colon < 0 or (title[:colon] in acceptedNamespaces) and id != last_id and
if (colon < 0 or (title[:colon] in acceptedNamespaces)) and (id != last_id and
not redirect and not title.startswith(templateNamespace)):
job = (id, revid, urlbase, title, page, ordinal)
jobs_queue.put(job) # goes to any available extract_process