Force 'utf-8' encoding without relying on platform-dependent default
On Windows, the default encoding is 'cp1252' and this raises a UnicodeDecodeError. Fix #89 #144 #165
This commit is contained in:
parent
3162bb6c3c
commit
ff9a70cd6d
@ -2865,10 +2865,23 @@ def process_dump(input_file, template_file, out_file, file_size, file_compress,
|
||||
:param process_count: number of extraction processes to spawn.
|
||||
"""
|
||||
|
||||
def hook_compressed_encoded(encoding):
|
||||
def hook(filename, mode):
|
||||
ext = os.path.splitext(filename)[1]
|
||||
if ext == '.gz':
|
||||
import gzip
|
||||
return gzip.open(filename, mode, encoding=encoding)
|
||||
elif ext == '.bz2':
|
||||
import bz2
|
||||
return bz2.open(filename, mode, encoding=encoding)
|
||||
else:
|
||||
return open(filename, mode, encoding=encoding)
|
||||
return hook
|
||||
|
||||
if input_file == '-':
|
||||
input = sys.stdin
|
||||
else:
|
||||
input = fileinput.FileInput(input_file, openhook=fileinput.hook_compressed)
|
||||
input = fileinput.FileInput(input_file, openhook=hook_compressed_encoded('utf-8'))
|
||||
|
||||
# collect siteinfo
|
||||
for line in input:
|
||||
|
Loading…
Reference in New Issue
Block a user