Read files in chunks instead when hashing (#416)

This commit is contained in:
Ali Parlakci 2021-05-21 21:41:57 +03:00 committed by Serene
parent 6e34493bb1
commit da8c64ec51

View File

@ -22,9 +22,15 @@ logger = logging.getLogger(__name__)
def _calc_hash(existing_file: Path): def _calc_hash(existing_file: Path):
CHUNK_SIZE = 1024 * 1024
md5_hash = hashlib.md5()
with open(existing_file, 'rb') as file: with open(existing_file, 'rb') as file:
file_hash = hashlib.md5(file.read()).hexdigest() chunk = file.read(CHUNK_SIZE)
return existing_file, file_hash while chunk:
md5_hash.update(chunk)
chunk = file.read(CHUNK_SIZE)
file_hash = md5_hash.hexdigest()
return existing_file, file_hash
class RedditDownloader(RedditConnector): class RedditDownloader(RedditConnector):