@@ -76,23 +76,73 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
76
76
77
77
# prevent traversal attempts through links
78
78
if tarinfo .islnk () or tarinfo .issym ():
79
- if Path (tarinfo .linkname ).is_absolute ():
80
- self .record_problem (
81
- tarinfo ,
82
- "Absolute path as link target." ,
83
- "Converted to extraction relative path." ,
84
- )
85
- tarinfo .linkname = f"./{ tarinfo .linkname } "
86
- if not is_safe_path (
87
- basedir = extract_root ,
88
- path = extract_root / tarinfo .linkname ,
89
- ):
90
- self .record_problem (
91
- tarinfo ,
79
+ link_target = Path (tarinfo .linkname )
80
+
81
+ # Check if the link is absolute and make it relative to extract_root
82
+ if link_target .is_absolute ():
83
+ # Strip leading '/' to make the path relative
84
+ rel_target = link_target .relative_to ("/" )
85
+
86
+ if Path (tarinfo .linkname ).is_absolute ():
87
+ self .record_problem (
88
+ tarinfo ,
89
+ "Absolute path as link target." ,
90
+ "Converted to extraction relative path." ,
91
+ )
92
+ else :
93
+ # Directly use the relative link target. If it points to an unsafe path, we'll
94
+ # check and fix below
95
+ rel_target = link_target
96
+
97
+ # The symlink will point to our relative target (may be updated below if unsafe)
98
+ tarinfo .linkname = rel_target
99
+
100
+ # Resolve the link target to an absolute path
101
+ resolved_path = (extract_root / tarinfo .name ).parent / rel_target
102
+
103
+ # If the resolved path points outside of extract_root, we need to fix it!
104
+ if not is_safe_path (extract_root , resolved_path ):
105
+ logger .warning (
92
106
"Traversal attempt through link path." ,
93
- "Skipped." ,
107
+ src = tarinfo .name ,
108
+ dest = tarinfo .linkname ,
109
+ basedir = extract_root ,
110
+ resovled_path = resolved_path ,
94
111
)
95
- return
112
+
113
+ for drop_count in range (len (str (rel_target ).split ("/" ))):
114
+ new_path = (
115
+ (extract_root / tarinfo .name ).parent
116
+ / Path ("/" .join (["placeholder" ] * drop_count ))
117
+ / rel_target
118
+ )
119
+ resolved_path = new_path .resolve ()
120
+ if str (resolved_path ).startswith (str (extract_root )):
121
+ break
122
+ else :
123
+ # We didn't hit the break, we couldn't resolve the path safely
124
+ self .record_problem (
125
+ tarinfo ,
126
+ "Traversal attempt through link path." ,
127
+ "Skipped." ,
128
+ )
129
+ return
130
+
131
+ # Double check that it's safe now
132
+ if not is_safe_path (extract_root , resolved_path ):
133
+ self .record_problem (
134
+ tarinfo ,
135
+ "Traversal attempt through link path." ,
136
+ "Skipped." ,
137
+ )
138
+ return
139
+
140
+ # Prepend placeholder directories before rel_target to get a valid path
141
+ # within extract_root. This is the relative version of resolved_path.
142
+ rel_target = Path ("/" .join (["placeholder" ] * drop_count )) / rel_target
143
+ tarinfo .linkname = rel_target
144
+
145
+ logger .debug ("Creating symlink" , points_to = resolved_path , name = tarinfo .name )
96
146
97
147
target_path = extract_root / tarinfo .name
98
148
# directories are special: we can not set their metadata now + they might also be already existing
0 commit comments