Skip to content

Commit

Permalink
Adding tarfile member sanitization to extractall() (#372)
Browse files Browse the repository at this point in the history
  • Loading branch information
TrellixVulnTeam authored Nov 24, 2022
1 parent 9770d30 commit b835743
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 2 deletions.
21 changes: 20 additions & 1 deletion examples/pytorch/bert/bert-quantization-sparsity/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,26 @@ def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_d
logger.info("extracting archive file {} to temp dir {}".format(
resolved_archive_file, tempdir))
with tarfile.open(resolved_archive_file, 'r:gz') as archive:
archive.extractall(tempdir)
def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)


safe_extract(archive, tempdir)
serialization_dir = tempdir
# Load config
config_file = os.path.join(serialization_dir, CONFIG_NAME)
Expand Down
24 changes: 23 additions & 1 deletion examples/pytorch/nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,29 @@ def unpack_nemo_ckpt(
for tar_mode in ["r:", "r:gz"]:
try:
with tarfile.open(nemo_archive_path, mode=tar_mode) as tar_file:
tar_file.extractall(path=out_dir_path)

import os

def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)


safe_extract(tar_file, path=out_dir_path)
return out_dir_path
except tarfile.ReadError:
pass
Expand Down

0 comments on commit b835743

Please sign in to comment.