From a3962569c67e228cd94a40f533e2a2235d8b8854 Mon Sep 17 00:00:00 2001
From: Brandon Schabell <brandonschabell@gmail.com>
Date: Mon, 13 Oct 2025 00:03:33 -0500
Subject: [PATCH] Add python 3.13 and 3.14 support

---
 .github/copilot-instructions.md |   6 +-
 .github/workflows/test.yml      |   2 +-
 README.md                       |   2 +-
 pyproject.toml                  |   3 +
 urlpath/__init__.py             | 218 +++++++++++++++++++++++++-------
 uv.lock                         |   2 +
 6 files changed, 181 insertions(+), 52 deletions(-)

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index eb66c1d..77c429a 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -59,7 +59,7 @@ make test
 # Run unit tests only
 make test-unit
 
-# Run README tests only  
+# Run README tests only
 make test-doctest
 
 # Or use uv directly
@@ -86,7 +86,7 @@ make help
 - **Build System**: `uv` with `hatchling` backend for modern Python packaging
 
 ### CI Configuration
-GitHub Actions tests against Python 3.9-3.10 using `uv sync` and matrix strategy. Both unit tests and README doctests must pass.
+GitHub Actions tests against Python 3.9-3.13 using `uv sync` and matrix strategy. Both unit tests and README doctests must pass.
 
 ## Code Conventions
 
@@ -109,4 +109,4 @@ GitHub Actions tests against Python 3.9-3.10 using `uv sync` and matrix strategy
 - `urlpath/__init__.py`: Single-file module with all classes
 - `tests/test_url.py`: Comprehensive pytest test suite
 - `README.md`: Extensive examples with automated pytest validation
-- `conftest.py`: pytest configuration for test discovery and path setup
\ No newline at end of file
+- `conftest.py`: pytest configuration for test discovery and path setup
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index bd753de..dc5faf6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -24,7 +24,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
     steps:
       - name: Check out repository code
         uses: actions/checkout@v5
diff --git a/README.md b/README.md
index 518de5a..32349ef 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ URLPath provides URL manipulator class that extends [`pathlib.PurePath`](https:/
 
 ## Dependencies
 
-* Python 3.9, 3.10
+* Python 3.9–3.14
 * [Requests](http://docs.python-requests.org/)
 * [JMESPath](https://pypi.org/project/jmespath/) (Optional)
 * [WebOb](http://webob.org/) (Optional)
diff --git a/pyproject.toml b/pyproject.toml
index 2b2ead8..1029afc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,12 +23,15 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Topic :: Internet :: WWW/HTTP",
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 requires-python = ">=3.9"
 dependencies = [
     "requests",
+    "charset-normalizer>=2,<4",
 ]
 
 [dependency-groups]
diff --git a/urlpath/__init__.py b/urlpath/__init__.py
index 02b498e..22b90da 100644
--- a/urlpath/__init__.py
+++ b/urlpath/__init__.py
@@ -5,6 +5,7 @@
 __all__ = ("URL",)
 
 import collections.abc
+import contextlib
 import functools
 import os
 import posixpath
@@ -417,6 +418,9 @@ def __new__(cls, *args: Any) -> URL:
             # Python 3.12: Canonicalize for stricter PurePath validation
             # Note: This happens BEFORE _parse_args, so it's not redundant
             canonicalized_args = tuple(cls._canonicalize_arg(a) for a in args)
+            if len(canonicalized_args) > 1:
+                combined = cls._combine_args(canonicalized_args)
+                return super().__new__(cls, *combined)
             return super().__new__(cls, *canonicalized_args)
         else:
             # Python < 3.12: No early validation, canonicalization happens in _parse_args
@@ -435,9 +439,91 @@ def __init__(self, *args: Any) -> None:
         if sys.version_info >= (3, 12):
             # Python 3.12: Must canonicalize args again (__init__ gets original args)
             canonicalized_args = tuple(self._canonicalize_arg(a) for a in args)
-            super().__init__(*canonicalized_args)
+            if len(canonicalized_args) > 1:
+                combined = type(self)._combine_args(canonicalized_args)  # type: ignore[attr-defined]
+                super().__init__(*combined)
+            else:
+                super().__init__(*canonicalized_args)
         # else: Python < 3.12 doesn't call parent __init__ (it's object.__init__)
 
+    if sys.version_info >= (3, 12):
+
+        @classmethod
+        def _combine_args(cls, canonicalized_args: tuple[str, ...]) -> tuple[str, ...]:
+            """Combine raw constructor arguments to emulate legacy joining semantics."""
+            if not canonicalized_args:
+                return canonicalized_args
+
+            current = canonicalized_args[0]
+            for seg in canonicalized_args[1:]:
+                parsed_current = urllib.parse.urlsplit(current)
+                parsed_segment = urllib.parse.urlsplit(seg)
+
+                if parsed_segment.scheme:
+                    current = urllib.parse.urlunsplit(parsed_segment)
+                    continue
+
+                if seg.startswith("/"):
+                    current = urllib.parse.urlunsplit(
+                        (
+                            parsed_current.scheme,
+                            parsed_current.netloc,
+                            parsed_segment.path or seg,
+                            parsed_segment.query,
+                            parsed_segment.fragment,
+                        )
+                    )
+                    continue
+
+                base_path = parsed_current.path or ("/" if parsed_current.netloc else "")
+                joined_path = posixpath.join(base_path, seg)
+                if joined_path == ".":
+                    joined_path = ""
+                else:
+                    parts = joined_path.split("/")
+                    if "." in parts:
+                        joined_path = "/".join(part for part in parts if part != ".")
+                current = urllib.parse.urlunsplit(
+                    (
+                        parsed_current.scheme,
+                        parsed_current.netloc,
+                        joined_path,
+                        "",
+                        "",
+                    )
+                )
+
+            return (current,)
+
+        @classmethod
+        def _parse_path(cls, path: str) -> tuple[str, str, list[str]]:
+            r"""Parse a URL path into drive, root, and tail components.
+
+            Python 3.13 switched pathlib to the new PurePath implementation that
+            delegates parsing to ``os.path``. That behaviour breaks our URL
+            handling, so we hook into the new extension point and reuse the URL
+            flavour logic that previously powered ``_parse_parts``.
+
+            Args:
+                path: Raw path string produced from ``_raw_paths``.
+
+            Returns:
+                Tuple of ``(drive, root, tail_parts)`` where the tail preserves
+                escaped ``"/"`` characters via ``"\x00"`` markers exactly like
+                the historical implementation.
+            """
+            if not path:
+                return "", "", []
+
+            drv, root, tail = cls._flavour.splitroot(path)
+
+            if not tail:
+                tail_parts: list[str] = []
+            else:
+                tail_parts = [part for part in tail.split(cls._flavour.sep) if part]
+
+            return drv, root, tail_parts
+
     # Python 3.12 compatibility: _parts was replaced with _tail_cached
     if sys.version_info >= (3, 12):
 
@@ -495,11 +581,12 @@ def _parts(self, value: list[str]) -> None:  # type: ignore[misc]
                 object.__delattr__(self, "_parts_cache")
 
             # When setting _parts, we need to update _tail_cached
-            if value and (self._drv or self._root):
-                # First element contains drive+root, rest is tail
-                object.__setattr__(self, "_tail_cached", tuple(value[1:]))
-            else:
-                object.__setattr__(self, "_tail_cached", tuple(value))
+            tail_parts = list(value[1:]) if value and (self._drv or self._root) else list(value)
+
+            object.__setattr__(self, "_tail_cached", tail_parts)
+            tail_attr = getattr(type(self), "_tail", None)
+            if not isinstance(tail_attr, property):
+                object.__setattr__(self, "_tail", tail_parts)
 
     @classmethod
     def _from_parts(cls, args: Any) -> URL:
@@ -602,22 +689,44 @@ def _canonicalize_arg(cls, a: Any) -> str:
         # Fall back to string conversion for other objects (including URL instances)
         return str(a)
 
-    def _ensure_parts_loaded(self) -> None:
-        """Ensure internal path parts are loaded (Python 3.12+ compatibility).
-
-        In Python 3.12, pathlib uses lazy loading. This method checks if
-        _tail_cached is loaded and calls _load_parts() if needed.
+    def _bootstrap_legacy_parts(self) -> None:
+        """Populate pathlib 3.11-style attributes when they are missing.
 
-        Note: We check _tail_cached instead of _parts to avoid recursion since
-        _parts is a property that calls this method.
+        Python 3.13 no longer materialises ``_drv``/``_root``/``_parts`` eagerly,
+        but the rest of this module still expects them to be present. We rebuild
+        those attributes from ``_raw_paths`` so existing logic keeps working.
         """
-        if sys.version_info >= (3, 12) and hasattr(self, "_load_parts"):
-            # In Python 3.12+, _drv/_root/_tail_cached are lazy-loaded
-            # Check if _tail_cached exists (not _parts to avoid recursion)
-            try:
-                _ = self._tail_cached  # type: ignore[attr-defined]
-            except AttributeError:
-                self._load_parts()  # type: ignore[attr-defined]
+        if hasattr(self, "_drv"):
+            return
+
+        raw_paths = getattr(self, "_raw_paths", None)
+        if not raw_paths:
+            return
+
+        raw_path = raw_paths[0]
+        drv, root, tail = self._flavour.splitroot(raw_path)
+
+        parts: list[str] = []
+        if drv or root:
+            parts.append(drv + root)
+
+        if tail:
+            parts.extend(tail.split(self._flavour.sep))
+
+        object.__setattr__(self, "_drv", drv)
+        object.__setattr__(self, "_root", root)
+        object.__setattr__(self, "_parts", parts)
+
+    def _ensure_parts_loaded(self) -> None:
+        """Ensure internal path parts are available across Python versions."""
+        if sys.version_info >= (3, 12):
+            if hasattr(self, "_load_parts"):
+                try:
+                    _ = self._tail_cached  # type: ignore[attr-defined]
+                except AttributeError:
+                    self._load_parts()  # type: ignore[attr-defined]
+            else:
+                self._bootstrap_legacy_parts()
 
     def _init(self) -> None:
         r"""Initialize URL-specific attributes after construction.
@@ -625,9 +734,7 @@ def _init(self) -> None:
         Loads parts (Python 3.12+) and cleans up escape sequences in the
         last path component (converting \x00 back to /).
         """
-        # Python 3.12+: Must call _load_parts() to initialize _drv, _root, _parts
-        if sys.version_info >= (3, 12) and hasattr(self, "_load_parts"):
-            self._load_parts()  # type: ignore[attr-defined]
+        self._ensure_parts_loaded()
 
         if self._parts:
             # trick to escape '/' in query and fragment and trailing
@@ -710,22 +817,37 @@ def joinpath(self, *pathsegments: Any) -> URL:
                         )
                     )
 
-            # No absolute URLs/paths, do normal joining
-            # Strip query/fragment from self first
+            # No absolute URLs/paths, do manual joining to match legacy pathlib
+            base_path = self.path
+            if not base_path and self.netloc:
+                base_path = "/"
+
+            joined_path = base_path
+            for seg_str in canonicalized_segments:
+                if not seg_str:
+                    continue
+                joined_path = posixpath.join(joined_path, seg_str)
+
             clean_url_str = urllib.parse.urlunsplit(
                 (
                     self.scheme,
                     self.netloc,
-                    self.path,
-                    "",  # no query
-                    "",  # no fragment
+                    joined_path,
+                    "",  # drop query for child joins
+                    "",  # drop fragment for child joins
                 )
             )
-            # Create new URL by joining paths (use canonicalized segments)
-            return type(self)(clean_url_str, *canonicalized_segments)
+
+            return type(self)(clean_url_str)
         else:
             return super().joinpath(*pathsegments)
 
+    if sys.version_info >= (3, 12):
+
+        def __truediv__(self, key: Any) -> URL:  # type: ignore[override]
+            """Ensure the / operator reuses joinpath on Python 3.12+."""
+            return self.joinpath(key)
+
     @cached_property
     def __str__(self) -> str:
         """Return string representation of the URL."""
@@ -862,8 +984,6 @@ def hostname(self) -> str | None:
         Returns:
             The decoded hostname, or None if not present.
         """
-        import contextlib
-
         result = super().hostname
         if result is not None:
             with contextlib.suppress(UnicodeEncodeError):
@@ -887,7 +1007,8 @@ def path(self) -> str:
         begin = 1 if self._drv or self._root else 0
 
         # Decode parts before encoding to avoid double-encoding
-        parts = [urllib.parse.unquote(i) for i in self._parts[begin:-1]] + [self.name]
+        decoded_name = urllib.parse.unquote(self.name)
+        parts = [urllib.parse.unquote(i) for i in self._parts[begin:-1]] + [decoded_name]
 
         return (
             self._root
@@ -1022,7 +1143,8 @@ def with_suffix(self, suffix: str) -> URL:
         Returns:
             A new URL instance with the modified suffix.
         """
-        return super().with_suffix(urllib.parse.quote(suffix, safe="."))
+        quoted_suffix = urllib.parse.quote(suffix, safe=".")
+        return super().with_suffix(quoted_suffix)
 
     def with_components(
         self,
@@ -1551,12 +1673,14 @@ def joinpath(self, *pathsegments: Any) -> JailedURL:
                             chroot.scheme,
                             chroot.netloc,
                             chroot.path,
-                            "",  # no query
-                            "",  # no fragment
+                            "",
+                            "",
                         )
                     )
-                    # Join the absolute path (with / stripped) to chroot
-                    return type(self)(chroot_url_str, seg_str.lstrip("/"), *canonicalized_segments[i + 1 :])
+                    joined = type(self)._combine_args(
+                        (chroot_url_str, seg_str.lstrip("/"), *canonicalized_segments[i + 1 :])  # type: ignore[attr-defined]
+                    )
+                    return type(self)(*joined)
 
             # No absolute paths, do normal joining
             clean_url_str = urllib.parse.urlunsplit(
@@ -1564,11 +1688,12 @@ def joinpath(self, *pathsegments: Any) -> JailedURL:
                     self.scheme,
                     self.netloc,
                     self.path,
-                    "",  # no query
-                    "",  # no fragment
+                    "",
+                    "",
                 )
             )
-            return type(self)(clean_url_str, *canonicalized_segments)
+            joined = type(self)._combine_args((clean_url_str, *canonicalized_segments))  # type: ignore[attr-defined]
+            return type(self)(*joined)
         else:
             # Python < 3.12: use _make_child which handles jailed logic
             result: JailedURL = super().joinpath(*pathsegments)  # type: ignore[assignment]
@@ -1584,18 +1709,17 @@ def _init(self) -> None:
 
         if self._parts[: len(chroot.parts)] != list(chroot.parts):  # type: ignore[has-type]
             self._drv, self._root, self._parts = chroot._drv, chroot._root, chroot._parts[:]
-            # Python 3.12: Also update _raw_paths to reflect the corrected path
             if sys.version_info >= (3, 12):
-                # Use the string representation of chroot as the new path
                 object.__setattr__(self, "_raw_paths", [str(chroot)])
-                # Clear _parts_cache since we updated _parts
                 if hasattr(self, "_parts_cache"):
                     object.__delattr__(self, "_parts_cache")
-                # Clear other cached properties that depend on the path
                 if hasattr(self, "_str"):
                     object.__delattr__(self, "_str")
-                if hasattr(self, "_tail_cached"):
-                    object.__setattr__(self, "_tail_cached", tuple(chroot._parts))
+                tail_parts = list(chroot._parts[1:]) if len(chroot._parts) > 1 else []
+                object.__setattr__(self, "_tail_cached", tail_parts)
+                tail_attr = getattr(type(self), "_tail", None)
+                if not isinstance(tail_attr, property):
+                    object.__setattr__(self, "_tail", tail_parts)
 
         super()._init()
 
diff --git a/uv.lock b/uv.lock
index bb50704..0f67b60 100644
--- a/uv.lock
+++ b/uv.lock
@@ -584,6 +584,7 @@ name = "urlpath"
 version = "1.2.0"
 source = { editable = "." }
 dependencies = [
+    { name = "charset-normalizer" },
     { name = "requests" },
 ]
 
@@ -605,6 +606,7 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "charset-normalizer", specifier = ">=2,<4" },
     { name = "jmespath", marker = "extra == 'json'" },
     { name = "requests" },
 ]