Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions urlpath/_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Python version compatibility helpers."""

from __future__ import annotations

__all__ = ("IS_PY312_PLUS",)

import sys

# Python 3.12+ introduced significant pathlib changes
IS_PY312_PLUS = sys.version_info >= (3, 12)
13 changes: 7 additions & 6 deletions urlpath/_flavour.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,20 @@
__all__ = ("_URLFlavour",)

import posixpath
import sys
from typing import TYPE_CHECKING

from ._compat import IS_PY312_PLUS
from ._utils import _url_splitroot

# Python 3.12+ removed _PosixFlavour class, replaced with module-based approach
if sys.version_info >= (3, 12):
_PosixFlavour = None # noqa: F811
else:
if not TYPE_CHECKING and not IS_PY312_PLUS:
from pathlib import _PosixFlavour
else:
Comment on lines +14 to +16
Copy link

Copilot AI Oct 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The conditional import logic is complex and could be simplified. Consider using a try/except ImportError pattern which would be more readable and handle the import availability directly.

Suggested change
if not TYPE_CHECKING and not IS_PY312_PLUS:
from pathlib import _PosixFlavour
else:
try:
from pathlib import _PosixFlavour
except ImportError:

Copilot uses AI. Check for mistakes.
_PosixFlavour = object


# Python 3.12+ compatibility: create flavour class or simple object
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
# Python 3.12+: _flavour is a module, we create a simple object with required attributes
class _URLFlavour:
r"""Custom pathlib flavour for parsing URLs as filesystem paths (Python 3.12+).
Expand Down Expand Up @@ -114,7 +115,7 @@ def normcase(self, path: str) -> str:

else:
# Python 3.9-3.11: Inherit from _PosixFlavour class
class _URLFlavour(_PosixFlavour):
class _URLFlavour(_PosixFlavour): # type: ignore[no-redef]
r"""Custom pathlib flavour for parsing URLs as filesystem paths.

Extends PosixFlavour to treat URLs as paths by:
Expand Down
72 changes: 34 additions & 38 deletions urlpath/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@
import os
import posixpath
import re
import sys
import urllib.parse
from pathlib import PurePath
from typing import Any
from unittest.mock import patch

import requests

from ._compat import IS_PY312_PLUS
from ._flavour import _URLFlavour
from ._utils import FrozenMultiDict, cached_property, netlocjoin
from ._utils import FrozenMultiDict, cached_property, cleanup_escapes, netlocjoin

try:
import jmespath
Expand Down Expand Up @@ -73,7 +73,7 @@ def __new__(cls, *args: Any) -> URL:
Returns:
New URL instance
"""
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
# Python 3.12: Canonicalize for stricter PurePath validation
# Note: This happens BEFORE _parse_args, so it's not redundant
canonicalized_args = tuple(cls._canonicalize_arg(a) for a in args)
Expand All @@ -95,17 +95,17 @@ def __init__(self, *args: Any) -> None:
Args:
*args: URL components (need to be canonicalized again for Python 3.12)
"""
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
# Python 3.12: Must canonicalize args again (__init__ gets original args)
canonicalized_args = tuple(self._canonicalize_arg(a) for a in args)
if len(canonicalized_args) > 1:
combined = type(self)._combine_args(canonicalized_args) # type: ignore[attr-defined]
combined = type(self)._combine_args(canonicalized_args)
super().__init__(*combined)
else:
super().__init__(*canonicalized_args)
# else: Python < 3.12 doesn't call parent __init__ (it's object.__init__)

if sys.version_info >= (3, 12):
if IS_PY312_PLUS:

@classmethod
def _combine_args(cls, canonicalized_args: tuple[str, ...]) -> tuple[str, ...]:
Expand Down Expand Up @@ -184,10 +184,10 @@ def _parse_path(cls, path: str) -> tuple[str, str, list[str]]:
return drv, root, tail_parts

# Python 3.12 compatibility: _parts was replaced with _tail_cached
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:

@property
def _parts(self) -> list[str]: # type: ignore[misc]
def _parts(self) -> list[str]:
"""Compatibility property for Python 3.12+ with manual caching.

In Python 3.12, pathlib uses _tail_cached instead of _parts. This property
Expand All @@ -202,7 +202,7 @@ def _parts(self) -> list[str]: # type: ignore[misc]
"""
# Check if we have a cached value
if hasattr(self, "_parts_cache"):
return self._parts_cache # type: ignore[return-value]
return self._parts_cache

self._ensure_parts_loaded()
# In Python 3.12, the structure is: _raw_paths contains input,
Expand All @@ -219,14 +219,14 @@ def _parts(self) -> list[str]: # type: ignore[misc]

# Clean up \x00 escape in last part (used to escape / in query/fragment/trailing)
if parts:
parts[-1] = parts[-1].replace("\\x00", "/")
parts[-1] = cleanup_escapes(parts[-1])

# Cache the result for future access
object.__setattr__(self, "_parts_cache", parts)
return parts

@_parts.setter
def _parts(self, value: list[str]) -> None: # type: ignore[misc]
def _parts(self, value: list[str]) -> None:
"""Compatibility setter for Python 3.12+.

Converts _parts list back to _tail_cached tuple. Clears the cache
Expand Down Expand Up @@ -259,11 +259,7 @@ def _from_parts(cls, args: Any) -> URL:
Returns:
New URL instance
"""
if sys.version_info >= (3, 12):
# Python 3.12 removed _from_parts, use direct construction
ret = cls(*args)
else:
ret = super()._from_parts(args)
ret = cls(*args) if IS_PY312_PLUS else super()._from_parts(args)
ret._init()
return ret

Expand All @@ -284,7 +280,7 @@ def _from_parsed_parts(cls, drv: str, root: str, parts: list[str]) -> URL:
"""
# Python 3.12 changed _from_parsed_parts from classmethod to instance method
# Signature changed from (drv, root, parts) to (self, drv, root, tail)
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
# In Python 3.12, we need to create an instance first and set _raw_paths
self = object.__new__(cls)
# Reconstruct the path string for _raw_paths
Expand Down Expand Up @@ -378,12 +374,12 @@ def _bootstrap_legacy_parts(self) -> None:

def _ensure_parts_loaded(self) -> None:
"""Ensure internal path parts are available across Python versions."""
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
if hasattr(self, "_load_parts"):
try:
_ = self._tail_cached # type: ignore[attr-defined]
_ = self._tail_cached
except AttributeError:
self._load_parts() # type: ignore[attr-defined]
self._load_parts()
else:
self._bootstrap_legacy_parts()

Expand All @@ -397,7 +393,7 @@ def _init(self) -> None:

if self._parts:
# trick to escape '/' in query and fragment and trailing
self._parts[-1] = self._parts[-1].replace("\\x00", "/")
self._parts[-1] = cleanup_escapes(self._parts[-1])

def _make_child(self, args: Any) -> URL:
# replace by parts that have no query and have no fragment
Expand Down Expand Up @@ -450,7 +446,7 @@ def joinpath(self, *pathsegments: Any) -> URL:
>>> str(url / '/absolute')
'http://example.com/absolute'
"""
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
# Python 3.12: Manually implement join logic
# First, canonicalize all segments (handles webob.Request, etc.)
canonicalized_segments = tuple(self._canonicalize_arg(seg) for seg in pathsegments)
Expand Down Expand Up @@ -501,9 +497,9 @@ def joinpath(self, *pathsegments: Any) -> URL:
else:
return super().joinpath(*pathsegments)

if sys.version_info >= (3, 12):
if IS_PY312_PLUS:

def __truediv__(self, key: Any) -> URL: # type: ignore[override]
def __truediv__(self, key: Any) -> URL:
"""Ensure the / operator reuses joinpath on Python 3.12+."""
return self.joinpath(key)

Expand Down Expand Up @@ -690,8 +686,8 @@ def _name_parts(self) -> tuple[str, str, str]:
"""
full_name = super().name
# In Python 3.12, super().name may have \x00 escape, clean it up
if sys.version_info >= (3, 12):
full_name = full_name.replace("\\x00", "/")
if IS_PY312_PLUS:
full_name = cleanup_escapes(full_name)

# Fragment takes priority - everything after # is fragment
fragment_idx = full_name.find("#")
Expand Down Expand Up @@ -1236,7 +1232,7 @@ def __init__(self, *args: Any, root: Any = None) -> None:
# The root argument is already handled in __new__
# In Python < 3.12, PurePath.__init__ does nothing, so we can't pass args
# In Python 3.12, we need to canonicalize and pass args (without root kwarg)
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
# Must canonicalize args (__init__ receives original args)
canonicalized_args = tuple(self._canonicalize_arg(a) for a in args)
super().__init__(*canonicalized_args)
Expand All @@ -1250,7 +1246,7 @@ def _from_parts(cls, args: Any) -> URL:
a dynamic subclass and calls _from_parts again, causing infinite recursion.
Instead, we use object.__new__ directly.
"""
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
# Create instance using object.__new__ to bypass __new__
self = object.__new__(cls)
# Set _raw_paths which is required for _load_parts
Expand Down Expand Up @@ -1309,7 +1305,7 @@ def joinpath(self, *pathsegments: Any) -> JailedURL:
>>> str(jail / '../../escape') # Prevented by _init
'http://example.com/app/'
"""
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
chroot = self._chroot
assert chroot is not None # Always set by __new__

Expand Down Expand Up @@ -1337,7 +1333,7 @@ def joinpath(self, *pathsegments: Any) -> JailedURL:
)
)
joined = type(self)._combine_args(
(chroot_url_str, seg_str.lstrip("/"), *canonicalized_segments[i + 1 :]) # type: ignore[attr-defined]
(chroot_url_str, seg_str.lstrip("/"), *canonicalized_segments[i + 1 :])
)
return type(self)(*joined)

Expand All @@ -1351,24 +1347,24 @@ def joinpath(self, *pathsegments: Any) -> JailedURL:
"",
)
)
joined = type(self)._combine_args((clean_url_str, *canonicalized_segments)) # type: ignore[attr-defined]
joined = type(self)._combine_args((clean_url_str, *canonicalized_segments))
return type(self)(*joined)
else:
# Python < 3.12: use _make_child which handles jailed logic
result: JailedURL = super().joinpath(*pathsegments) # type: ignore[assignment]
return result
result = super().joinpath(*pathsegments)
return result # type: ignore[return-value]
Comment on lines +1354 to +1355
Copy link

Copilot AI Oct 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intermediate variable assignment is unnecessary. The code can be simplified to 'return super().joinpath(*pathsegments) # type: ignore[return-value]'.

Suggested change
result = super().joinpath(*pathsegments)
return result # type: ignore[return-value]
return super().joinpath(*pathsegments) # type: ignore[return-value]

Copilot uses AI. Check for mistakes.

def _init(self) -> None:
# Python 3.12+: Must call _load_parts() to initialize _drv, _root, _parts
if sys.version_info >= (3, 12) and hasattr(self, "_load_parts"):
self._load_parts() # type: ignore[attr-defined]
if IS_PY312_PLUS and hasattr(self, "_load_parts"):
self._load_parts()

chroot = self._chroot
assert chroot is not None # Always set by __new__

if self._parts[: len(chroot.parts)] != list(chroot.parts): # type: ignore[has-type]
if self._parts[: len(chroot.parts)] != list(chroot.parts):
self._drv, self._root, self._parts = chroot._drv, chroot._root, chroot._parts[:]
if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
object.__setattr__(self, "_raw_paths", [str(chroot)])
if hasattr(self, "_parts_cache"):
object.__delattr__(self, "_parts_cache")
Expand Down Expand Up @@ -1398,7 +1394,7 @@ def resolve(self) -> URL:
chroot = self._chroot
assert chroot is not None # Always set by __new__

if sys.version_info >= (3, 12):
if IS_PY312_PLUS:
# Python 3.12: _parts is a property computed from _drv, _root, _tail_cached
# The resolve logic for jailed URLs needs _parts to look like:
# ["http://example.com/app/", "path", "to", "content", "..", "file"]
Expand Down
37 changes: 30 additions & 7 deletions urlpath/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@

from __future__ import annotations

__all__ = ("FrozenDict", "FrozenMultiDict", "MultiDictMixin", "cached_property", "netlocjoin", "_url_splitroot")
__all__ = (
"FrozenDict",
"FrozenMultiDict",
"MultiDictMixin",
"cached_property",
"netlocjoin",
"_url_splitroot",
"cleanup_escapes",
)

import functools
import re
Expand Down Expand Up @@ -79,18 +87,18 @@ def get_one(
key: Any,
default: Any = None,
predicate: Callable[[Any], bool] | None = None,
type: Callable[[Any], Any] | None = None,
type_: Callable[[Any], Any] | None = None,
) -> Any:
"""Get the first value for a key that matches the predicate.

Args:
key: The dictionary key to look up
default: Value to return if key not found or no value matches predicate
predicate: Optional callable to filter values (e.g., from inspect.getmembers)
type: Optional callable to transform the returned value
type_: Optional callable to transform the returned value

Returns:
The first matching value, optionally transformed by type callable,
The first matching value, optionally transformed by type_ callable,
or default if no match found.
"""
try:
Expand All @@ -100,7 +108,7 @@ def get_one(
else:
for value in values:
if not predicate or predicate(value):
return value if not type else type(value)
return value if not type_ else type_(value)

return default

Expand Down Expand Up @@ -214,7 +222,22 @@ def _url_splitroot(part: str, sep: str = "/") -> tuple[str, str, str]:

drive = urllib.parse.urlunsplit((scheme, netloc, "", "", ""))
match = re.match(f"^({re.escape(sep)}*)(.*)$", path)
assert match is not None
assert match is not None # we're sure it's always valid for this regex
root, path = match.groups()

return drive, root, path


def cleanup_escapes(text: str) -> str:
r"""Clean up escape sequences used for URL component separation.

Replaces the internal escape character (\x00) used to protect
forward slashes in query and fragment components back to regular
forward slashes.

Args:
text: String potentially containing \x00 escape sequences

Returns:
String with \x00 replaced by /
"""
return text.replace("\\x00", "/")