Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions fsspec/implementations/tests/test_zip.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import collections.abc
import os.path
import zipfile
from pathlib import Path
from shutil import make_archive

Expand Down Expand Up @@ -161,6 +162,17 @@ def zip_file_fixture(tmp_path):
return Path(make_archive(zip_file, "zip", data_dir))


@pytest.fixture(name="zip_file2")
def zip_file_fixture2(tmp_path):
file_path = tmp_path / "zip_file2.zip"

with zipfile.ZipFile(file_path, "w") as z:
z.writestr("a/b/c", "")
z.writestr("a/b/d/e", "")

return file_path


def _assert_all_except_context_dependent_variables(result, expected_result):
for path in expected_result:
assert result[path]
Expand Down Expand Up @@ -480,3 +492,43 @@ def test_find_returns_expected_result_recursion_depth_set(zip_file):
]

assert result == expected_result


@pytest.mark.parametrize(
"args,expected_result",
[
pytest.param(
("a/b", 1),
["a/b/c"],
id="find-maxdepth-correct-depth",
),
pytest.param(
("a/b", None, True),
["a/b", "a/b/c", "a/b/d", "a/b/d/e"],
id="find-withdirs-should-not-include-parents",
),
pytest.param(
("a/b", 1, True),
["a/b", "a/b/c", "a/b/d"],
id="find-withdirs-maxdepth",
),
pytest.param(
("/a//b///", 1, True),
["a/b", "a/b/c", "a/b/d"],
id="find-ill-formed-path",
),
pytest.param(
("\\a\\\\b\\", 1, True),
["a/b", "a/b/c", "a/b/d"],
id="find-ill-formed-path-windows",
),
pytest.param(
(Path("\\a\\\\b\\"), 1, True),
["a/b", "a/b/c", "a/b/d"],
id="find-using-pathobj",
),
],
)
def test_find_generic(zip_file2, args, expected_result):
zip_file_system = ZipFileSystem(zip_file2)
assert zip_file_system.find(*args) == expected_result
26 changes: 15 additions & 11 deletions fsspec/implementations/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,17 @@ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

def to_parts(_path: str):
return list(filter(None, _path.replace("\\", "/").split("/")))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"\" is a single slash because of escaping - is that the intent?

Copy link
Contributor Author

@TLCFEM TLCFEM Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, '\' needs to be escaped otherwise using r'\' should be equivalent.


if not isinstance(path, str):
path = str(path)

# Remove the leading slash, as the zip file paths are always
# given without a leading slash
path = path.lstrip("/")
path_parts = list(filter(lambda s: bool(s), path.split("/")))

def _matching_starts(file_path):
file_parts = filter(lambda s: bool(s), file_path.split("/"))
return all(a == b for a, b in zip(path_parts, file_parts))
path_parts = to_parts(path)
path_depth = len(path_parts)

self._get_dirs()

Expand All @@ -157,21 +160,22 @@ def _matching_starts(file_path):
return result if detail else [path]

for file_path, file_info in self.dir_cache.items():
if not (path == "" or _matching_starts(file_path)):
if len(file_parts := to_parts(file_path)) < path_depth or any(
a != b for a, b in zip(path_parts, file_parts)
):
# skip parent folders and mismatching paths
continue

if file_info["type"] == "directory":
if withdirs:
if file_path not in result:
result[file_path.strip("/")] = file_info
if withdirs and file_path not in result:
result[file_path.strip("/")] = file_info
continue

if file_path not in result:
result[file_path] = file_info if detail else None

if maxdepth:
path_depth = path.count("/")
result = {
k: v for k, v in result.items() if k.count("/") - path_depth < maxdepth
k: v for k, v in result.items() if k.count("/") < maxdepth + path_depth
}
return result if detail else sorted(result)
Loading