Fix crash on entry without namespace attribute.

It occurs on enwiki-20170508-cirrussearch-content.json.gz
for entry with id AVQXnGH_62ewIKYZMTMP
This commit is contained in:
denin 2017-05-23 15:24:10 +03:00
parent 2a5e6aebc0
commit 24db54b2c8

View File

@ -169,7 +169,7 @@ def process_dump(input_file, out_file, file_size, file_compress):
content = json.loads(input.readline())
type = index['index']['_type']
id = index['index']['_id']
if type == 'page' and content['namespace'] == 0:
if type == 'page' and content.get('namespace') == 0:
title = content['title']
text = content['text']
# drop references: