Fix crash on entry without namespace attribute.
It occurs on enwiki-20170508-cirrussearch-content.json.gz for entry with id AVQXnGH_62ewIKYZMTMP
This commit is contained in:
parent
2a5e6aebc0
commit
24db54b2c8
@ -169,7 +169,7 @@ def process_dump(input_file, out_file, file_size, file_compress):
|
||||
content = json.loads(input.readline())
|
||||
type = index['index']['_type']
|
||||
id = index['index']['_id']
|
||||
if type == 'page' and content['namespace'] == 0:
|
||||
if type == 'page' and content.get('namespace') == 0:
|
||||
title = content['title']
|
||||
text = content['text']
|
||||
# drop references:
|
||||
|
Loading…
Reference in New Issue
Block a user