Adding tarfile member sanitization to extractall()

This commit is contained in:
TrellixVulnTeam 2022-10-29 20:06:10 +00:00
parent 4a5dfa5be1
commit d23940a68e
2 changed files with 43 additions and 2 deletions

View File

@ -90,7 +90,29 @@ class bAbI():
with urlopen(req) as files:
with tarfile.open(fileobj=files, mode="r|gz") as tar:
tar.extractall(path=DEFAULT_DATA_FOLDER)
import os
def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory
def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")
tar.extractall(path, members, numeric_owner=numeric_owner)
safe_extract(tar, path=DEFAULT_DATA_FOLDER)
data_dir = data_dir / folder_name

View File

@ -125,7 +125,26 @@ class ReadingComprehension():
print("### Extract CNN data")
with urlopen(req) as files:
with tarfile.open(fileobj=files, mode="r|gz") as tar:
tar.extractall(path=DEFAULT_DATA_FOLDER)
def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory
def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")
tar.extractall(path, members, numeric_owner=numeric_owner)
safe_extract(tar, path=DEFAULT_DATA_FOLDER)
print("### CNN data complete")
data_dir = data_dir / folder_name