From a3962569c67e228cd94a40f533e2a2235d8b8854 Mon Sep 17 00:00:00 2001 From: Brandon Schabell Date: Mon, 13 Oct 2025 00:03:33 -0500 Subject: [PATCH] Add python 3.13 and 3.14 support --- .github/copilot-instructions.md | 6 +- .github/workflows/test.yml | 2 +- README.md | 2 +- pyproject.toml | 3 + urlpath/__init__.py | 218 +++++++++++++++++++++++++------- uv.lock | 2 + 6 files changed, 181 insertions(+), 52 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index eb66c1d..77c429a 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -59,7 +59,7 @@ make test # Run unit tests only make test-unit -# Run README tests only +# Run README tests only make test-doctest # Or use uv directly @@ -86,7 +86,7 @@ make help - **Build System**: `uv` with `hatchling` backend for modern Python packaging ### CI Configuration -GitHub Actions tests against Python 3.9-3.10 using `uv sync` and matrix strategy. Both unit tests and README doctests must pass. +GitHub Actions tests against Python 3.9-3.13 using `uv sync` and matrix strategy. Both unit tests and README doctests must pass. ## Code Conventions @@ -109,4 +109,4 @@ GitHub Actions tests against Python 3.9-3.10 using `uv sync` and matrix strategy - `urlpath/__init__.py`: Single-file module with all classes - `tests/test_url.py`: Comprehensive pytest test suite - `README.md`: Extensive examples with automated pytest validation -- `conftest.py`: pytest configuration for test discovery and path setup \ No newline at end of file +- `conftest.py`: pytest configuration for test discovery and path setup diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bd753de..dc5faf6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - name: Check out repository code uses: actions/checkout@v5 diff --git a/README.md b/README.md index 518de5a..32349ef 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ URLPath provides URL manipulator class that extends [`pathlib.PurePath`](https:/ ## Dependencies -* Python 3.9, 3.10 +* Python 3.9–3.14 * [Requests](http://docs.python-requests.org/) * [JMESPath](https://pypi.org/project/jmespath/) (Optional) * [WebOb](http://webob.org/) (Optional) diff --git a/pyproject.toml b/pyproject.toml index 2b2ead8..1029afc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,12 +23,15 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Python Modules", ] requires-python = ">=3.9" dependencies = [ "requests", + "charset-normalizer>=2,<4", ] [dependency-groups] diff --git a/urlpath/__init__.py b/urlpath/__init__.py index 02b498e..22b90da 100644 --- a/urlpath/__init__.py +++ b/urlpath/__init__.py @@ -5,6 +5,7 @@ __all__ = ("URL",) import collections.abc +import contextlib import functools import os import posixpath @@ -417,6 +418,9 @@ def __new__(cls, *args: Any) -> URL: # Python 3.12: Canonicalize for stricter PurePath validation # Note: This happens BEFORE _parse_args, so it's not redundant canonicalized_args = tuple(cls._canonicalize_arg(a) for a in args) + if len(canonicalized_args) > 1: + combined = cls._combine_args(canonicalized_args) + return super().__new__(cls, *combined) return super().__new__(cls, *canonicalized_args) else: # Python < 3.12: No early validation, canonicalization happens in _parse_args @@ -435,9 +439,91 @@ def __init__(self, *args: Any) -> None: if sys.version_info >= (3, 12): # Python 3.12: Must canonicalize args again (__init__ gets original args) canonicalized_args = tuple(self._canonicalize_arg(a) for a in args) - super().__init__(*canonicalized_args) + if len(canonicalized_args) > 1: + combined = type(self)._combine_args(canonicalized_args) # type: ignore[attr-defined] + super().__init__(*combined) + else: + super().__init__(*canonicalized_args) # else: Python < 3.12 doesn't call parent __init__ (it's object.__init__) + if sys.version_info >= (3, 12): + + @classmethod + def _combine_args(cls, canonicalized_args: tuple[str, ...]) -> tuple[str, ...]: + """Combine raw constructor arguments to emulate legacy joining semantics.""" + if not canonicalized_args: + return canonicalized_args + + current = canonicalized_args[0] + for seg in canonicalized_args[1:]: + parsed_current = urllib.parse.urlsplit(current) + parsed_segment = urllib.parse.urlsplit(seg) + + if parsed_segment.scheme: + current = urllib.parse.urlunsplit(parsed_segment) + continue + + if seg.startswith("/"): + current = urllib.parse.urlunsplit( + ( + parsed_current.scheme, + parsed_current.netloc, + parsed_segment.path or seg, + parsed_segment.query, + parsed_segment.fragment, + ) + ) + continue + + base_path = parsed_current.path or ("/" if parsed_current.netloc else "") + joined_path = posixpath.join(base_path, seg) + if joined_path == ".": + joined_path = "" + else: + parts = joined_path.split("/") + if "." in parts: + joined_path = "/".join(part for part in parts if part != ".") + current = urllib.parse.urlunsplit( + ( + parsed_current.scheme, + parsed_current.netloc, + joined_path, + "", + "", + ) + ) + + return (current,) + + @classmethod + def _parse_path(cls, path: str) -> tuple[str, str, list[str]]: + r"""Parse a URL path into drive, root, and tail components. + + Python 3.13 switched pathlib to the new PurePath implementation that + delegates parsing to ``os.path``. That behaviour breaks our URL + handling, so we hook into the new extension point and reuse the URL + flavour logic that previously powered ``_parse_parts``. + + Args: + path: Raw path string produced from ``_raw_paths``. + + Returns: + Tuple of ``(drive, root, tail_parts)`` where the tail preserves + escaped ``"/"`` characters via ``"\x00"`` markers exactly like + the historical implementation. + """ + if not path: + return "", "", [] + + drv, root, tail = cls._flavour.splitroot(path) + + if not tail: + tail_parts: list[str] = [] + else: + tail_parts = [part for part in tail.split(cls._flavour.sep) if part] + + return drv, root, tail_parts + # Python 3.12 compatibility: _parts was replaced with _tail_cached if sys.version_info >= (3, 12): @@ -495,11 +581,12 @@ def _parts(self, value: list[str]) -> None: # type: ignore[misc] object.__delattr__(self, "_parts_cache") # When setting _parts, we need to update _tail_cached - if value and (self._drv or self._root): - # First element contains drive+root, rest is tail - object.__setattr__(self, "_tail_cached", tuple(value[1:])) - else: - object.__setattr__(self, "_tail_cached", tuple(value)) + tail_parts = list(value[1:]) if value and (self._drv or self._root) else list(value) + + object.__setattr__(self, "_tail_cached", tail_parts) + tail_attr = getattr(type(self), "_tail", None) + if not isinstance(tail_attr, property): + object.__setattr__(self, "_tail", tail_parts) @classmethod def _from_parts(cls, args: Any) -> URL: @@ -602,22 +689,44 @@ def _canonicalize_arg(cls, a: Any) -> str: # Fall back to string conversion for other objects (including URL instances) return str(a) - def _ensure_parts_loaded(self) -> None: - """Ensure internal path parts are loaded (Python 3.12+ compatibility). - - In Python 3.12, pathlib uses lazy loading. This method checks if - _tail_cached is loaded and calls _load_parts() if needed. + def _bootstrap_legacy_parts(self) -> None: + """Populate pathlib 3.11-style attributes when they are missing. - Note: We check _tail_cached instead of _parts to avoid recursion since - _parts is a property that calls this method. + Python 3.13 no longer materialises ``_drv``/``_root``/``_parts`` eagerly, + but the rest of this module still expects them to be present. We rebuild + those attributes from ``_raw_paths`` so existing logic keeps working. """ - if sys.version_info >= (3, 12) and hasattr(self, "_load_parts"): - # In Python 3.12+, _drv/_root/_tail_cached are lazy-loaded - # Check if _tail_cached exists (not _parts to avoid recursion) - try: - _ = self._tail_cached # type: ignore[attr-defined] - except AttributeError: - self._load_parts() # type: ignore[attr-defined] + if hasattr(self, "_drv"): + return + + raw_paths = getattr(self, "_raw_paths", None) + if not raw_paths: + return + + raw_path = raw_paths[0] + drv, root, tail = self._flavour.splitroot(raw_path) + + parts: list[str] = [] + if drv or root: + parts.append(drv + root) + + if tail: + parts.extend(tail.split(self._flavour.sep)) + + object.__setattr__(self, "_drv", drv) + object.__setattr__(self, "_root", root) + object.__setattr__(self, "_parts", parts) + + def _ensure_parts_loaded(self) -> None: + """Ensure internal path parts are available across Python versions.""" + if sys.version_info >= (3, 12): + if hasattr(self, "_load_parts"): + try: + _ = self._tail_cached # type: ignore[attr-defined] + except AttributeError: + self._load_parts() # type: ignore[attr-defined] + else: + self._bootstrap_legacy_parts() def _init(self) -> None: r"""Initialize URL-specific attributes after construction. @@ -625,9 +734,7 @@ def _init(self) -> None: Loads parts (Python 3.12+) and cleans up escape sequences in the last path component (converting \x00 back to /). """ - # Python 3.12+: Must call _load_parts() to initialize _drv, _root, _parts - if sys.version_info >= (3, 12) and hasattr(self, "_load_parts"): - self._load_parts() # type: ignore[attr-defined] + self._ensure_parts_loaded() if self._parts: # trick to escape '/' in query and fragment and trailing @@ -710,22 +817,37 @@ def joinpath(self, *pathsegments: Any) -> URL: ) ) - # No absolute URLs/paths, do normal joining - # Strip query/fragment from self first + # No absolute URLs/paths, do manual joining to match legacy pathlib + base_path = self.path + if not base_path and self.netloc: + base_path = "/" + + joined_path = base_path + for seg_str in canonicalized_segments: + if not seg_str: + continue + joined_path = posixpath.join(joined_path, seg_str) + clean_url_str = urllib.parse.urlunsplit( ( self.scheme, self.netloc, - self.path, - "", # no query - "", # no fragment + joined_path, + "", # drop query for child joins + "", # drop fragment for child joins ) ) - # Create new URL by joining paths (use canonicalized segments) - return type(self)(clean_url_str, *canonicalized_segments) + + return type(self)(clean_url_str) else: return super().joinpath(*pathsegments) + if sys.version_info >= (3, 12): + + def __truediv__(self, key: Any) -> URL: # type: ignore[override] + """Ensure the / operator reuses joinpath on Python 3.12+.""" + return self.joinpath(key) + @cached_property def __str__(self) -> str: """Return string representation of the URL.""" @@ -862,8 +984,6 @@ def hostname(self) -> str | None: Returns: The decoded hostname, or None if not present. """ - import contextlib - result = super().hostname if result is not None: with contextlib.suppress(UnicodeEncodeError): @@ -887,7 +1007,8 @@ def path(self) -> str: begin = 1 if self._drv or self._root else 0 # Decode parts before encoding to avoid double-encoding - parts = [urllib.parse.unquote(i) for i in self._parts[begin:-1]] + [self.name] + decoded_name = urllib.parse.unquote(self.name) + parts = [urllib.parse.unquote(i) for i in self._parts[begin:-1]] + [decoded_name] return ( self._root @@ -1022,7 +1143,8 @@ def with_suffix(self, suffix: str) -> URL: Returns: A new URL instance with the modified suffix. """ - return super().with_suffix(urllib.parse.quote(suffix, safe=".")) + quoted_suffix = urllib.parse.quote(suffix, safe=".") + return super().with_suffix(quoted_suffix) def with_components( self, @@ -1551,12 +1673,14 @@ def joinpath(self, *pathsegments: Any) -> JailedURL: chroot.scheme, chroot.netloc, chroot.path, - "", # no query - "", # no fragment + "", + "", ) ) - # Join the absolute path (with / stripped) to chroot - return type(self)(chroot_url_str, seg_str.lstrip("/"), *canonicalized_segments[i + 1 :]) + joined = type(self)._combine_args( + (chroot_url_str, seg_str.lstrip("/"), *canonicalized_segments[i + 1 :]) # type: ignore[attr-defined] + ) + return type(self)(*joined) # No absolute paths, do normal joining clean_url_str = urllib.parse.urlunsplit( @@ -1564,11 +1688,12 @@ def joinpath(self, *pathsegments: Any) -> JailedURL: self.scheme, self.netloc, self.path, - "", # no query - "", # no fragment + "", + "", ) ) - return type(self)(clean_url_str, *canonicalized_segments) + joined = type(self)._combine_args((clean_url_str, *canonicalized_segments)) # type: ignore[attr-defined] + return type(self)(*joined) else: # Python < 3.12: use _make_child which handles jailed logic result: JailedURL = super().joinpath(*pathsegments) # type: ignore[assignment] @@ -1584,18 +1709,17 @@ def _init(self) -> None: if self._parts[: len(chroot.parts)] != list(chroot.parts): # type: ignore[has-type] self._drv, self._root, self._parts = chroot._drv, chroot._root, chroot._parts[:] - # Python 3.12: Also update _raw_paths to reflect the corrected path if sys.version_info >= (3, 12): - # Use the string representation of chroot as the new path object.__setattr__(self, "_raw_paths", [str(chroot)]) - # Clear _parts_cache since we updated _parts if hasattr(self, "_parts_cache"): object.__delattr__(self, "_parts_cache") - # Clear other cached properties that depend on the path if hasattr(self, "_str"): object.__delattr__(self, "_str") - if hasattr(self, "_tail_cached"): - object.__setattr__(self, "_tail_cached", tuple(chroot._parts)) + tail_parts = list(chroot._parts[1:]) if len(chroot._parts) > 1 else [] + object.__setattr__(self, "_tail_cached", tail_parts) + tail_attr = getattr(type(self), "_tail", None) + if not isinstance(tail_attr, property): + object.__setattr__(self, "_tail", tail_parts) super()._init() diff --git a/uv.lock b/uv.lock index bb50704..0f67b60 100644 --- a/uv.lock +++ b/uv.lock @@ -584,6 +584,7 @@ name = "urlpath" version = "1.2.0" source = { editable = "." } dependencies = [ + { name = "charset-normalizer" }, { name = "requests" }, ] @@ -605,6 +606,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "charset-normalizer", specifier = ">=2,<4" }, { name = "jmespath", marker = "extra == 'json'" }, { name = "requests" }, ]