Skip to content

Commit 1847a35

Browse files
committed
SafeTarfile: Update relative symlinks instead of dropping
1 parent 2e6a7ae commit 1847a35

File tree

1 file changed

+65
-15
lines changed

1 file changed

+65
-15
lines changed

unblob/handlers/archive/_safe_tarfile.py

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,23 +76,73 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
7676

7777
# prevent traversal attempts through links
7878
if tarinfo.islnk() or tarinfo.issym():
79-
if Path(tarinfo.linkname).is_absolute():
80-
self.record_problem(
81-
tarinfo,
82-
"Absolute path as link target.",
83-
"Converted to extraction relative path.",
84-
)
85-
tarinfo.linkname = f"./{tarinfo.linkname}"
86-
if not is_safe_path(
87-
basedir=extract_root,
88-
path=extract_root / tarinfo.linkname,
89-
):
90-
self.record_problem(
91-
tarinfo,
79+
link_target = Path(tarinfo.linkname)
80+
81+
# Check if the link is absolute and make it relative to extract_root
82+
if link_target.is_absolute():
83+
# Strip leading '/' to make the path relative
84+
rel_target = link_target.relative_to("/")
85+
86+
if Path(tarinfo.linkname).is_absolute():
87+
self.record_problem(
88+
tarinfo,
89+
"Absolute path as link target.",
90+
"Converted to extraction relative path.",
91+
)
92+
else:
93+
# Directly use the relative link target. If it points to an unsafe path, we'll
94+
# check and fix below
95+
rel_target = link_target
96+
97+
# The symlink will point to our relative target (may be updated below if unsafe)
98+
tarinfo.linkname = rel_target
99+
100+
# Resolve the link target to an absolute path
101+
resolved_path = (extract_root / tarinfo.name).parent / rel_target
102+
103+
# If the resolved path points outside of extract_root, we need to fix it!
104+
if not is_safe_path(extract_root, resolved_path):
105+
logger.warning(
92106
"Traversal attempt through link path.",
93-
"Skipped.",
107+
src=tarinfo.name,
108+
dest=tarinfo.linkname,
109+
basedir=extract_root,
110+
resovled_path=resolved_path,
94111
)
95-
return
112+
113+
for drop_count in range(len(str(rel_target).split("/"))):
114+
new_path = (
115+
(extract_root / tarinfo.name).parent
116+
/ Path("/".join(["placeholder"] * drop_count))
117+
/ rel_target
118+
)
119+
resolved_path = new_path.resolve()
120+
if str(resolved_path).startswith(str(extract_root)):
121+
break
122+
else:
123+
# We didn't hit the break, we couldn't resolve the path safely
124+
self.record_problem(
125+
tarinfo,
126+
"Traversal attempt through link path.",
127+
"Skipped.",
128+
)
129+
return
130+
131+
# Double check that it's safe now
132+
if not is_safe_path(extract_root, resolved_path):
133+
self.record_problem(
134+
tarinfo,
135+
"Traversal attempt through link path.",
136+
"Skipped.",
137+
)
138+
return
139+
140+
# Prepend placeholder directories before rel_target to get a valid path
141+
# within extract_root. This is the relative version of resolved_path.
142+
rel_target = Path("/".join(["placeholder"] * drop_count)) / rel_target
143+
tarinfo.linkname = rel_target
144+
145+
logger.debug("Creating symlink", points_to=resolved_path, name=tarinfo.name)
96146

97147
target_path = extract_root / tarinfo.name
98148
# directories are special: we can not set their metadata now + they might also be already existing

0 commit comments

Comments
 (0)