diff --git a/ML/50-mlps/01-keras-basic/hasy_tools.py b/ML/50-mlps/01-keras-basic/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/01-keras-basic/hasy_tools.py +++ b/ML/50-mlps/01-keras-basic/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/02-keras-validation-curve/hasy_tools.py b/ML/50-mlps/02-keras-validation-curve/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/02-keras-validation-curve/hasy_tools.py +++ b/ML/50-mlps/02-keras-validation-curve/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/03-autokeras/hasy_tools.py b/ML/50-mlps/03-autokeras/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/03-autokeras/hasy_tools.py +++ b/ML/50-mlps/03-autokeras/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/04-keras-1000-epochs/hasy_tools.py b/ML/50-mlps/04-keras-1000-epochs/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/04-keras-1000-epochs/hasy_tools.py +++ b/ML/50-mlps/04-keras-1000-epochs/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/07-autokeras/hasy_tools.py b/ML/50-mlps/07-autokeras/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/07-autokeras/hasy_tools.py +++ b/ML/50-mlps/07-autokeras/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/08-keras-cnn/hasy_tools.py b/ML/50-mlps/08-keras-cnn/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/08-keras-cnn/hasy_tools.py +++ b/ML/50-mlps/08-keras-cnn/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/11-keras-cnn-dropout/hasy_tools.py b/ML/50-mlps/11-keras-cnn-dropout/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/11-keras-cnn-dropout/hasy_tools.py +++ b/ML/50-mlps/11-keras-cnn-dropout/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/12-keras-cnn-half/hasy_tools.py b/ML/50-mlps/12-keras-cnn-half/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/12-keras-cnn-half/hasy_tools.py +++ b/ML/50-mlps/12-keras-cnn-half/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/13-keras-cnn-dropout/hasy_tools.py b/ML/50-mlps/13-keras-cnn-dropout/hasy_tools.py index e8a9e42..cfb14b6 100644 --- a/ML/50-mlps/13-keras-cnn-dropout/hasy_tools.py +++ b/ML/50-mlps/13-keras-cnn-dropout/hasy_tools.py @@ -539,7 +539,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/15-keras-cnn-higher-dropout/hasy_tools.py b/ML/50-mlps/15-keras-cnn-higher-dropout/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/15-keras-cnn-higher-dropout/hasy_tools.py +++ b/ML/50-mlps/15-keras-cnn-higher-dropout/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/16-keras-cnn-higher-dropout-l2/hasy_tools.py b/ML/50-mlps/16-keras-cnn-higher-dropout-l2/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/16-keras-cnn-higher-dropout-l2/hasy_tools.py +++ b/ML/50-mlps/16-keras-cnn-higher-dropout-l2/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/17-keras-cnn-both-dropout/hasy_tools.py b/ML/50-mlps/17-keras-cnn-both-dropout/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/17-keras-cnn-both-dropout/hasy_tools.py +++ b/ML/50-mlps/17-keras-cnn-both-dropout/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/50-mlps/18-keras-cnn-both-dropout-75%/hasy_tools.py b/ML/50-mlps/18-keras-cnn-both-dropout-75%/hasy_tools.py index 7f7947f..ce0889e 100644 --- a/ML/50-mlps/18-keras-cnn-both-dropout-75%/hasy_tools.py +++ b/ML/50-mlps/18-keras-cnn-both-dropout-75%/hasy_tools.py @@ -533,7 +533,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/ML/hasy/hasy_tools.py b/ML/hasy/hasy_tools.py index 065acbc..58f4ae4 100644 --- a/ML/hasy/hasy_tools.py +++ b/ML/hasy/hasy_tools.py @@ -269,7 +269,26 @@ def _maybe_extract(tarfile_path, work_directory): hasy_tools_path = os.path.join(work_directory, "hasy_tools.py") if not os.path.isfile(hasy_tools_path): with tarfile.open(tarfile_path, "r:bz2") as tar: - tar.extractall(path=work_directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=work_directory) def _get_data(dataset_path): diff --git a/PyPI/package_analysis.py b/PyPI/package_analysis.py index 59c3787..848403c 100755 --- a/PyPI/package_analysis.py +++ b/PyPI/package_analysis.py @@ -198,7 +198,26 @@ def download(package_url: str) -> Tuple[List[str], Optional[str]]: if is_tarfile: try: with tarfile.open(target) as tar: - tar.extractall(target[:-file_ending_len]) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, target[:-file_ending_len]) except Exception: print(f"Something is wrong with the tar file of {pkg_name}") # Something is wrong with the tar file