Read files in chunks instead when hashing (#416)
This commit is contained in:
parent
6e34493bb1
commit
da8c64ec51
@ -22,9 +22,15 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def _calc_hash(existing_file: Path):
|
def _calc_hash(existing_file: Path):
|
||||||
|
CHUNK_SIZE = 1024 * 1024
|
||||||
|
md5_hash = hashlib.md5()
|
||||||
with open(existing_file, 'rb') as file:
|
with open(existing_file, 'rb') as file:
|
||||||
file_hash = hashlib.md5(file.read()).hexdigest()
|
chunk = file.read(CHUNK_SIZE)
|
||||||
return existing_file, file_hash
|
while chunk:
|
||||||
|
md5_hash.update(chunk)
|
||||||
|
chunk = file.read(CHUNK_SIZE)
|
||||||
|
file_hash = md5_hash.hexdigest()
|
||||||
|
return existing_file, file_hash
|
||||||
|
|
||||||
|
|
||||||
class RedditDownloader(RedditConnector):
|
class RedditDownloader(RedditConnector):
|
||||||
|
Loading…
Reference in New Issue
Block a user