diff --git a/CHANGELOG.md b/CHANGELOG.md index 47ed0c9..b8cea8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated GitHub Actions workflows to use modern actions and UV package manager - Modernized code formatting (improved consistency and readability) - Centralized all package metadata in `pyproject.toml` (removed from module docstring) +=- **Updated all docstrings from reStructuredText to Google/NumPy style** (Args/Returns/Raises format) ### Added - `.python-version` file for Python version management @@ -41,6 +42,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CHANGELOG.md file (this file) - `.github/copilot-instructions.md` for AI-assisted development - `uv.lock` for reproducible dependency resolution +- **`from __future__ import annotations`** for modern type annotation support in Python 3.9+ +- **Complete type annotations** for all 56+ functions, methods, and properties +- **Comprehensive docstrings** for all public classes, functions, methods, and properties +- **Pydocstyle linting** (Ruff "D" rules) to enforce documentation consistency +- **Type annotations for all test functions** (`-> None` return types) +- **Docstrings for magic methods** (`__str__`, `__bytes__`, etc.) ### Removed - `setup.py` (replaced by `pyproject.toml`) diff --git a/pyproject.toml b/pyproject.toml index 4827999..1e4e203 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,11 +84,20 @@ select = [ "C4", # flake8-comprehensions "PIE", # flake8-pie "SIM", # flake8-simplify + "D", # pydocstyle (docstring linting) ] ignore = [ "E501", # line too long (handled by formatter) + "D203", # one-blank-line-before-class (conflicts with D211) + "D213", # multi-line-summary-second-line (conflicts with D212) ] +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["D"] # Don't require docstrings in tests + +[tool.ruff.lint.pydocstyle] +convention = "google" # Use Google-style docstrings + [tool.ruff.lint.isort] known-first-party = ["urlpath"] @@ -101,9 +110,9 @@ skip-magic-trailing-comma = false python_version = "3.9" warn_return_any = false warn_unused_configs = true -disallow_untyped_defs = false -disallow_incomplete_defs = false -check_untyped_defs = false +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true disallow_untyped_decorators = false no_implicit_optional = true warn_redundant_casts = true diff --git a/tests/test_url.py b/tests/test_url.py index 13e172f..784afd2 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -9,7 +9,7 @@ from urlpath import URL, JailedURL -def test_simple(): +def test_simple() -> None: original = "http://www.example.com/path/to/file.ext?query#fragment" url = URL(original) @@ -32,7 +32,7 @@ def test_simple(): assert url.fragment == "fragment" -def test_netloc_mixin(): +def test_netloc_mixin() -> None: url = URL("https://username:password@secure.example.com:1234/secure/path?query#fragment") assert url.drive == "https://username:password@secure.example.com:1234" @@ -44,7 +44,7 @@ def test_netloc_mixin(): assert url.port == 1234 -def test_join(): +def test_join() -> None: url = URL("http://www.example.com/path/to/file.ext?query#fragment") assert str(url / "https://secure.example.com/path") == "https://secure.example.com/path" @@ -52,13 +52,13 @@ def test_join(): assert str(url.with_name("other_file")) == "http://www.example.com/path/to/other_file" -def test_path(): +def test_path() -> None: url = URL("http://www.example.com/path/to/file.ext?query#fragment") assert url.path == "/path/to/file.ext" -def test_with(): +def test_with() -> None: url = URL("http://www.example.com/path/to/file.exe?query?fragment") assert str(url.with_scheme("https")) == "https://www.example.com/path/to/file.exe?query?fragment" @@ -78,7 +78,7 @@ def test_with(): ) -def test_query(): +def test_query() -> None: query = "field1=value1&field1=value2&field2=hello,%20world%26python" url = URL("http://www.example.com/form?" + query) @@ -101,7 +101,7 @@ def test_query(): assert url.form.get("field4") == ("1", "2", "3") -def test_add_query(): +def test_add_query() -> None: query = "field1=value1&field1=value2&field2=hello,%20world%26python" url = URL("http://www.example.com/form?" + query) @@ -132,13 +132,13 @@ def test_add_query(): assert url.add_query({}).query == query -def test_query_field_order(): +def test_query_field_order() -> None: url = URL("http://example.com/").with_query(field1="field1", field2="field2", field3="field3") assert str(url) == "http://example.com/?field1=field1&field2=field2&field3=field3" -def test_fragment(): +def test_fragment() -> None: url = URL("http://www.example.com/path/to/file.ext?query#fragment") assert url.fragment == "fragment" @@ -149,12 +149,12 @@ def test_fragment(): assert url.fragment == "new fragment" -def test_resolve(): +def test_resolve() -> None: url = URL("http://www.example.com//./../path/./..//./file/") assert str(url.resolve()) == "http://www.example.com/file" -def test_trailing_sep(): +def test_trailing_sep() -> None: original = "http://www.example.com/path/with/trailing/sep/" url = URL(original) @@ -169,7 +169,7 @@ def test_trailing_sep(): @pytest.mark.skipif(webob is None, reason="webob not installed") -def test_webob(): +def test_webob() -> None: base_url = "http://www.example.com" url = URL(webob.Request.blank("/webob/request", base_url=base_url)) @@ -179,7 +179,7 @@ def test_webob(): @pytest.mark.skipif(webob is None, reason="webob not installed") -def test_webob_jail(): +def test_webob_jail() -> None: request = webob.Request.blank("/path/to/filename.ext", {"SCRIPT_NAME": "/app/root"}) assert request.application_url == "http://localhost/app/root" @@ -191,7 +191,7 @@ def test_webob_jail(): assert str(url) == "http://localhost/app/root/path/to/filename.ext" -def test_jail(): +def test_jail() -> None: root = "http://www.example.com/app/" current = "http://www.example.com/app/path/to/content" url = URL(root).jailed / current @@ -208,13 +208,13 @@ def test_jail(): assert str(url / "http://www.example.com/app/path") == "http://www.example.com/app/path" -def test_init_with_empty_string(): +def test_init_with_empty_string() -> None: url = URL("") assert str(url) == "" -def test_encoding(): +def test_encoding() -> None: assert URL("http://www.xn--alliancefranaise-npb.nu/").hostname == "www.alliancefran\xe7aise.nu" assert ( str(URL("http://localhost/").with_hostinfo("www.alliancefran\xe7aise.nu")) @@ -265,7 +265,7 @@ def test_encoding(): assert str(URL("http://example.com/file").with_suffix(".///")) == "http://example.com/file.%2F%2F%2F" -def test_idempotent(): +def test_idempotent() -> None: url = URL( "http://\u65e5\u672c\u8a9e\u306e.\u30c9\u30e1\u30a4\u30f3.jp/" "path/to/\u30d5\u30a1\u30a4\u30eb.ext?\u30af\u30a8\u30ea" @@ -277,11 +277,11 @@ def test_idempotent(): ) -def test_embed(): +def test_embed() -> None: url = URL("http://example.com/").with_fragment(URL("/param1/param2").with_query(f1=1, f2=2)) assert str(url) == "http://example.com/#/param1/param2?f1=1&f2=2" -def test_pchar(): +def test_pchar() -> None: url = URL("s3://mybucket") / "some_folder/123_2017-10-30T18:43:11.csv.gz" assert str(url) == "s3://mybucket/some_folder/123_2017-10-30T18:43:11.csv.gz" diff --git a/urlpath/__init__.py b/urlpath/__init__.py index 96545ba..df00b11 100644 --- a/urlpath/__init__.py +++ b/urlpath/__init__.py @@ -1,4 +1,6 @@ -"""Object-oriented URL from `urllib.parse` and `pathlib`""" +"""Object-oriented URL from `urllib.parse` and `pathlib`.""" + +from __future__ import annotations __all__ = ("URL",) @@ -6,7 +8,9 @@ import functools import re import urllib.parse +from collections.abc import Iterator from pathlib import PurePath, _PosixFlavour +from typing import Any, Callable, TypeVar from unittest.mock import patch import requests @@ -24,26 +28,46 @@ missing = object() +_KT = TypeVar("_KT") +_VT = TypeVar("_VT") + + # http://stackoverflow.com/a/2704866/3622941 -class FrozenDict(collections.abc.Mapping): - """Immutable dict object.""" +class FrozenDict(collections.abc.Mapping[_KT, _VT]): + """Immutable dictionary with hashability. + + An immutable mapping type that can be hashed and used as a dictionary key + or set member. Uses XOR-based hashing for O(n) performance. + + This implementation provides: + - Immutability: Cannot be modified after creation + - Hashability: Can be used as dict keys or in sets + - Memory efficiency: Uses __slots__ to reduce memory overhead + + Examples: + >>> fd = FrozenDict({'a': 1, 'b': 2}) + >>> fd['a'] + 1 + >>> hash(fd) # Can be hashed + >>> fd['a'] = 3 # Raises error - immutable + """ __slots__ = ("_d", "_hash") - def __init__(self, *args, **kwargs): - self._d = dict(*args, **kwargs) - self._hash = None + def __init__(self, *args: Any, **kwargs: Any) -> None: + self._d: dict[_KT, _VT] = dict(*args, **kwargs) + self._hash: int | None = None - def __iter__(self): + def __iter__(self) -> Iterator[_KT]: return iter(self._d) - def __len__(self): + def __len__(self) -> int: return len(self._d) - def __getitem__(self, key): + def __getitem__(self, key: _KT) -> _VT: return self._d[key] - def __hash__(self): + def __hash__(self) -> int: # It would have been simpler and maybe more obvious to # use hash(tuple(sorted(self._d.items()))) from this discussion # so far, but this solution is O(n). I don't know what kind of @@ -55,7 +79,7 @@ def __hash__(self): self._hash ^= hash(pair) return self._hash - def __repr__(self): + def __repr__(self) -> str: return "<{} {{{}}}>".format( self.__class__.__name__, ", ".join("{!r}: {!r}".format(*i) for i in sorted(self._d.items())), @@ -63,10 +87,32 @@ def __repr__(self): class MultiDictMixin: - def get_one(self, key, default=None, predicate=None, type=None): - # `predicate` comes from `inspect.getmembers`. + """Mixin that adds get_one() method for multi-value dictionaries. + + Useful for dictionaries where values are sequences (like URL query parameters). + """ + + def get_one( + self, + key: Any, + default: Any = None, + predicate: Callable[[Any], bool] | None = None, + type: Callable[[Any], Any] | None = None, + ) -> Any: + """Get the first value for a key that matches the predicate. + + Args: + key: The dictionary key to look up + default: Value to return if key not found or no value matches predicate + predicate: Optional callable to filter values (e.g., from inspect.getmembers) + type: Optional callable to transform the returned value + + Returns: + The first matching value, optionally transformed by type callable, + or default if no match found. + """ try: - values = self[key] + values = self[key] # type: ignore[index] except LookupError: pass else: @@ -77,15 +123,36 @@ def get_one(self, key, default=None, predicate=None, type=None): return default -class FrozenMultiDict(MultiDictMixin, FrozenDict): - pass +class FrozenMultiDict(MultiDictMixin, FrozenDict[str, tuple[str, ...]]): + """Immutable multi-value dictionary for URL query parameters. + + Combines FrozenDict's immutability and hashing with MultiDictMixin's + get_one() method for handling multiple values per key. + """ + + +_F = TypeVar("_F", bound=Callable[..., Any]) + + +def cached_property(getter: _F) -> _F: + """Cached property decorator that doesn't require __hash__. + A lightweight alternative to functools.lru_cache that stores the + computed value in the instance's __dict__ without requiring the + instance to be hashable. -def cached_property(getter): - """Limited version of `functools.lru_cache`. But `__hash__` is not required.""" + This decorator can be stacked with @property for compatibility with + PurePath's property-based API. + + Args: + getter: The property getter function to cache + + Returns: + A wrapper function that caches the result of the first call + """ @functools.wraps(getter) - def helper(self): + def helper(self: Any) -> Any: key = "_cached_property_" + getter.__name__ if key in self.__dict__: @@ -94,18 +161,29 @@ def helper(self): result = self.__dict__[key] = getter(self) return result - return helper + return helper # type: ignore[return-value] + +def netlocjoin( + username: str | None, + password: str | None, + hostname: str | None, + port: int | None, +) -> str: + """Build a network location string from components. -def netlocjoin(username, password, hostname, port): - """Helper function for building netloc string. + Constructs a netloc in the format 'username:password@hostname:port', + omitting components that are None and properly percent-encoding + username and password. - :param str username: username string or `None` - :param str password: password string or `None` - :param str hostname: hostname string or `None` - :param int port: port number or `None` - :return: netloc string - :rtype: str + Args: + username: Username string (will be percent-encoded) or None + password: Password string (will be percent-encoded) or None + hostname: Hostname string or None + port: Port number or None + + Returns: + Formatted netloc string (e.g., 'user:pass@host:8080'). """ result = "" @@ -128,10 +206,30 @@ def netlocjoin(username, password, hostname, port): class _URLFlavour(_PosixFlavour): + r"""Custom pathlib flavour for parsing URLs as filesystem paths. + + Extends PosixFlavour to treat URLs as paths by: + - Using scheme+netloc as the drive component + - Parsing URL components (scheme, netloc, path, query, fragment) + - Escaping '/' characters in query and fragment with \\x00 + """ + has_drv = True # drive is scheme + netloc is_supported = True # supported in all platform - def splitroot(self, part, sep=_PosixFlavour.sep): + def splitroot(self, part: str, sep: str = _PosixFlavour.sep) -> tuple[str, str, str]: + """Split a URL into drive (scheme+netloc), root, and path components. + + Args: + part: URL string to split + sep: Path separator (must be '/') + + Returns: + Tuple of (drive, root, path) where: + - drive is 'scheme://netloc' + - root is the leading '/' if present + - path is the remainder with query/fragment escaped + """ assert sep == self.sep assert "\\x00" not in part @@ -143,34 +241,59 @@ def splitroot(self, part, sep=_PosixFlavour.sep): path = urllib.parse.urlunsplit(("", "", path, query.replace("/", "\\x00"), fragment.replace("/", "\\x00"))) drive = urllib.parse.urlunsplit((scheme, netloc, "", "", "")) - root, path = re.match(f"^({re.escape(sep)}*)(.*)$", path).groups() + match = re.match(f"^({re.escape(sep)}*)(.*)$", path) + assert match is not None + root, path = match.groups() return drive, root, path class URL(urllib.parse._NetlocResultMixinStr, PurePath): + """Object-oriented URL manipulation extending pathlib.PurePath. + + URL combines the power of pathlib's path operations with URL component + manipulation. It provides: + + - Pathlib-style operations: joining paths with /, parent, name, suffix, etc. + - URL components: scheme, netloc, username, password, hostname, port + - Query string handling: form, form_fields, with_query(), add_query() + - HTTP methods: get(), post(), put(), patch(), delete(), head(), options() + - Immutability: all modifications return new URL instances + + Examples: + >>> url = URL('https://user:pass@example.com:8080/path/to/file.txt?key=value#section') + >>> url.scheme + 'https' + >>> url.hostname + 'example.com' + >>> str(url / 'other.txt') + 'https://user:pass@example.com:8080/path/to/other.txt?key=value#section' + >>> str(url.with_query(foo='bar')) + 'https://user:pass@example.com:8080/path/to/file.txt?foo=bar#section' + """ + _flavour = _URLFlavour() - _parse_qsl_args: dict[str, bool] = {} - _urlencode_args: dict[str, bool] = {"doseq": True} + _parse_qsl_args: dict[str, Any] = {} + _urlencode_args: dict[str, Any] = {"doseq": True} @classmethod - def _from_parts(cls, args): + def _from_parts(cls, args: Any) -> URL: ret = super()._from_parts(args) ret._init() return ret @classmethod - def _from_parsed_parts(cls, drv, root, parts): + def _from_parsed_parts(cls, drv: str, root: str, parts: list[str]) -> URL: ret = super()._from_parsed_parts(drv, root, parts) ret._init() return ret @classmethod - def _parse_args(cls, args): + def _parse_args(cls, args: Any) -> Any: return super()._parse_args(cls._canonicalize_arg(a) for a in args) @classmethod - def _canonicalize_arg(cls, a): + def _canonicalize_arg(cls, a: Any) -> str: if isinstance(a, urllib.parse.SplitResult): return urllib.parse.urlunsplit(a) @@ -182,37 +305,49 @@ def _canonicalize_arg(cls, a): return a - def _init(self): + def _init(self) -> None: if self._parts: # trick to escape '/' in query and fragment and trailing self._parts[-1] = self._parts[-1].replace("\\x00", "/") - def _make_child(self, args): + def _make_child(self, args: Any) -> URL: # replace by parts that have no query and have no fragment with patch.object(self, "_parts", list(self.parts)): return super()._make_child(args) @cached_property - def __str__(self): + def __str__(self) -> str: + """Return string representation of the URL.""" # NOTE: PurePath.__str__ returns '.' if path is empty. return urllib.parse.urlunsplit(self.components) @cached_property - def __bytes__(self): + def __bytes__(self) -> bytes: + """Return UTF-8 encoded bytes representation of the URL.""" return str(self).encode("utf-8") # TODO: sort self.query in __hash__ @cached_property - def as_uri(self): - """Return URI.""" + def as_uri(self) -> str: + """Return the URL as a URI string. + + Returns: + The complete URI representation of the URL. + """ return str(self) @property @cached_property - def parts(self): - """An object providing sequence-like access to the - components in the filesystem path.""" + def parts(self) -> tuple[str, ...]: + """Path components as a tuple, similar to pathlib.PurePath.parts. + + Components are decoded from percent-encoding. The first element + is the URL root (scheme + netloc + '/') if present. + + Returns: + Tuple of decoded path components. + """ if self._drv or self._root: return tuple([self._parts[0]] + [urllib.parse.unquote(i) for i in self._parts[1:-1]] + [self.name]) else: @@ -220,44 +355,66 @@ def parts(self): @property @cached_property - def components(self): - """Url components, `(scheme, netloc, path, query, fragment)`.""" + def components(self) -> tuple[str, str, str, str, str]: + """All URL components as a tuple. + + Returns: + Tuple of (scheme, netloc, path, query, fragment). + """ return self.scheme, self.netloc, self.path, self.query, self.fragment _cparts = components @property @cached_property - def scheme(self): - """The scheme of url.""" + def scheme(self) -> str: + """URL scheme (e.g., 'http', 'https', 'ftp'). + + Returns: + The scheme component of the URL. + """ return urllib.parse.urlsplit(self._drv).scheme @property @cached_property - def netloc(self): - """The scheme of url.""" + def netloc(self) -> str: + """Network location (combined username, password, hostname, and port). + + Returns: + The netloc component in the format 'user:pass@host:port'. + """ return netlocjoin(self.username, self.password, self.hostname, self.port) @property @cached_property - def _userinfo(self): + def _userinfo(self) -> tuple[str | None, str | None]: return urllib.parse.urlsplit(self._drv)._userinfo @property @cached_property - def _hostinfo(self): + def _hostinfo(self) -> tuple[str | None, int | None]: return urllib.parse.urlsplit(self._drv)._hostinfo @property @cached_property - def hostinfo(self): - """The hostinfo of url. "hostinfo" is hostname and port.""" + def hostinfo(self) -> str: + """Hostname and port combined (excluding username and password). + + Returns: + The hostinfo in the format 'host:port'. + """ return netlocjoin(None, None, self.hostname, self.port) @property @cached_property - def username(self): - """The username of url.""" + def username(self) -> str | None: + """Username from the URL's authentication section. + + Automatically decodes percent-encoded usernames. + + Returns: + The decoded username, or None if not present. + """ # NOTE: username and password can be encoded by percent-encoding. # http://%75%73%65%72:%70%61%73%73%77%64@httpbin.org/basic-auth/user/passwd result = super().username @@ -267,8 +424,14 @@ def username(self): @property @cached_property - def password(self): - """The password of url.""" + def password(self) -> str | None: + """Password from the URL's authentication section. + + Automatically decodes percent-encoded passwords. + + Returns: + The decoded password, or None if not present. + """ result = super().password if result is not None: result = urllib.parse.unquote(result) @@ -276,8 +439,14 @@ def password(self): @property @cached_property - def hostname(self): - """The hostname of url.""" + def hostname(self) -> str | None: + """Hostname from the URL. + + Automatically decodes internationalized domain names (IDN) from punycode. + + Returns: + The decoded hostname, or None if not present. + """ import contextlib result = super().hostname @@ -288,9 +457,14 @@ def hostname(self): @property @cached_property - def path(self): - """The path of url, it's with trailing sep.""" + def path(self) -> str: + """URL path component, including trailing separator if present. + + Properly encodes path characters according to RFC 3986. + Returns: + The percent-encoded path string with trailing separator preserved. + """ # https://tools.ietf.org/html/rfc3986#appendix-A safe_pchars = "-._~!$&'()*+,;=:@" @@ -306,65 +480,130 @@ def path(self): @property @cached_property - def name(self): - """The final path component, if any.""" + def name(self) -> str: + """Final path component (filename), decoded and without query/fragment. + + Returns: + The decoded filename or last path segment. + """ return urllib.parse.unquote(urllib.parse.urlsplit(super().name).path.rstrip(self._flavour.sep)) @property @cached_property - def query(self): - """The query of url.""" + def query(self) -> str: + """Query string component of the URL. + + Returns: + The raw query string (without the leading '?'). + """ return urllib.parse.urlsplit(super().name).query @property @cached_property - def fragment(self): - """The fragment of url.""" + def fragment(self) -> str: + """Fragment identifier component of the URL. + + Returns: + The fragment string (without the leading '#'). + """ return urllib.parse.urlsplit(super().name).fragment @property @cached_property - def trailing_sep(self): - """The trailing separator of url.""" - return re.search("(" + re.escape(self._flavour.sep) + "*)$", urllib.parse.urlsplit(super().name).path).group(0) + def trailing_sep(self) -> str: + """Trailing separator characters from the path. + + Returns: + The trailing '/' characters, or empty string if none. + """ + match = re.search("(" + re.escape(self._flavour.sep) + "*)$", urllib.parse.urlsplit(super().name).path) + assert match is not None + return match.group(0) @property @cached_property - def form_fields(self): - """The query parsed by `urllib.parse.parse_qsl` of url.""" + def form_fields(self) -> tuple[tuple[str, str], ...]: + """Query string parsed as a tuple of (key, value) pairs. + + Uses urllib.parse.parse_qsl for parsing, preserving order and duplicates. + + Returns: + Tuple of (name, value) tuples from the query string. + """ return tuple(urllib.parse.parse_qsl(self.query, **self._parse_qsl_args)) @property @cached_property - def form(self): - """The query parsed by `urllib.parse.parse_qs` of url.""" + def form(self) -> FrozenMultiDict: + """Query string parsed as an immutable multi-value dictionary. + + Keys with multiple values are stored as tuples. Useful for accessing + query parameters by name. + + Returns: + FrozenMultiDict mapping parameter names to tuples of values. + """ return FrozenMultiDict( {k: tuple(v) for k, v in urllib.parse.parse_qs(self.query, **self._parse_qsl_args).items()} ) - def with_name(self, name): - """Return a new url with the file name changed.""" + def with_name(self, name: str) -> URL: + """Return a new URL with the filename changed. + + Args: + name: The new filename (automatically percent-encoded) + + Returns: + A new URL instance with the modified filename. + """ return super().with_name(urllib.parse.quote(name, safe="")) - def with_suffix(self, suffix): - """Return a new url with the file suffix changed (or added, if none).""" + def with_suffix(self, suffix: str) -> URL: + """Return a new URL with the file suffix changed or added. + + Args: + suffix: The new suffix including the dot (e.g., '.txt') + + Returns: + A new URL instance with the modified suffix. + """ return super().with_suffix(urllib.parse.quote(suffix, safe=".")) def with_components( self, *, - scheme=missing, - netloc=missing, - username=missing, - password=missing, - hostname=missing, - port=missing, - path=missing, - name=missing, - query=missing, - fragment=missing, - ): - """Return a new url with components changed.""" + scheme: Any = missing, + netloc: Any = missing, + username: Any = missing, + password: Any = missing, + hostname: Any = missing, + port: Any = missing, + path: Any = missing, + name: Any = missing, + query: Any = missing, + fragment: Any = missing, + ) -> URL: + """Return a new URL with specified components changed. + + All arguments are keyword-only. Omitted arguments retain their current values. + You can specify either netloc OR (username, password, hostname, port), not both. + You can specify either path OR name, not both. + + Args: + scheme: New scheme (e.g., 'https') + netloc: New network location as a string + username: New username (mutually exclusive with netloc) + password: New password (mutually exclusive with netloc) + hostname: New hostname (mutually exclusive with netloc) + port: New port number (mutually exclusive with netloc) + path: New path (mutually exclusive with name) + name: New filename (mutually exclusive with path) + query: New query string (str, dict, or list of tuples) + fragment: New fragment identifier + + Returns: + A new URL instance with the specified components modified. + """ if scheme is missing: scheme = self.scheme elif scheme is not None and not isinstance(scheme, str): @@ -433,29 +672,75 @@ def with_components( return self.__class__(urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))) - def with_scheme(self, scheme): - """Return a new url with the scheme changed.""" + def with_scheme(self, scheme: Any) -> URL: + """Return a new URL with the scheme changed. + + Args: + scheme: New scheme (e.g., 'https', 'ftp') + + Returns: + A new URL instance with the modified scheme. + """ return self.with_components(scheme=scheme) - def with_netloc(self, netloc): - """Return a new url with the netloc changed.""" + def with_netloc(self, netloc: Any) -> URL: + """Return a new URL with the network location changed. + + Args: + netloc: New netloc in format 'user:pass@host:port' + + Returns: + A new URL instance with the modified netloc. + """ return self.with_components(netloc=netloc) - def with_userinfo(self, username, password): - """Return a new url with the userinfo changed.""" + def with_userinfo(self, username: Any, password: Any) -> URL: + """Return a new URL with username and password changed. + + Args: + username: New username + password: New password + + Returns: + A new URL instance with modified credentials. + """ return self.with_components(username=username, password=password) - def with_hostinfo(self, hostname, port=None): - """Return a new url with the hostinfo changed.""" + def with_hostinfo(self, hostname: Any, port: int | None = None) -> URL: + """Return a new URL with hostname and port changed. + + Args: + hostname: New hostname + port: New port number (optional) + + Returns: + A new URL instance with modified host information. + """ return self.with_components(hostname=hostname, port=port) - def with_query(self, query=None, **kwargs): - """Return a new url with the query changed.""" + def with_query(self, query: Any = None, **kwargs: Any) -> URL: + """Return a new URL with the query string replaced. + + Args: + query: New query as dict, list of tuples, or string + **kwargs: Alternative way to specify query as keyword arguments + + Returns: + A new URL instance with the modified query string. + """ assert not (query and kwargs) return self.with_components(query=query or kwargs) - def add_query(self, query=None, **kwargs): - """Return a new url with the query ammended.""" + def add_query(self, query: Any = None, **kwargs: Any) -> URL: + """Return a new URL with query parameters appended to existing query. + + Args: + query: Additional query as dict, list of tuples, or string + **kwargs: Alternative way to specify additional query parameters + + Returns: + A new URL instance with query parameters added. + """ assert not (query and kwargs) query = query or kwargs if not query: @@ -475,13 +760,24 @@ def add_query(self, query=None, **kwargs): return self.with_components(query=new) return self.with_components() - def with_fragment(self, fragment): - """Return a new url with the fragment changed.""" + def with_fragment(self, fragment: Any) -> URL: + """Return a new URL with the fragment identifier changed. + + Args: + fragment: New fragment identifier (without the '#') + + Returns: + A new URL instance with the modified fragment. + """ return self.with_components(fragment=fragment) - def resolve(self): - """Resolve relative path of the path.""" - path = [] + def resolve(self) -> URL: + """Resolve relative path components ('.' and '..'). + + Returns: + A new URL with normalized path (no relative components). + """ + path: list[str] = [] for part in self.parts[1:] if self._drv or self._root else self.parts: if part == "." or part == "": @@ -495,98 +791,116 @@ def resolve(self): if self._root: path.insert(0, self._root.rstrip(self._flavour.sep)) - path = self._flavour.join(path) - return self.__class__(urllib.parse.urlunsplit((self.scheme, self.netloc, path, self.query, self.fragment))) + path_str = self._flavour.join(path) + return self.__class__(urllib.parse.urlunsplit((self.scheme, self.netloc, path_str, self.query, self.fragment))) @property - def jailed(self): + def jailed(self) -> JailedURL: + """Create a JailedURL with this URL as both the current and root URL.""" return JailedURL(self, root=self) - def get(self, params=None, **kwargs): - r"""Sends a GET request. + def get(self, params: Any = None, **kwargs: Any) -> requests.Response: + """Send a GET request to this URL. - :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. - :param \*\*kwargs: Optional arguments that ``request`` takes. - :return: :class:`Response ` object - :rtype: requests.Response - """ + Args: + params: Dictionary or bytes to send in the query string + **kwargs: Additional arguments passed to requests.get() + Returns: + requests.Response object from the GET request. + """ url = str(self) response = requests.get(url, params, **kwargs) return response - def options(self, **kwargs): - r"""Sends a OPTIONS request. + def options(self, **kwargs: Any) -> requests.Response: + """Send an OPTIONS request to this URL. - :param \*\*kwargs: Optional arguments that ``request`` takes. - :return: :class:`Response ` object - :rtype: requests.Response - """ + Args: + **kwargs: Additional arguments passed to requests.options() + Returns: + requests.Response object from the OPTIONS request. + """ url = str(self) return requests.options(url, **kwargs) - def head(self, **kwargs): - r"""Sends a HEAD request. + def head(self, **kwargs: Any) -> requests.Response: + """Send a HEAD request to this URL. - :param \*\*kwargs: Optional arguments that ``request`` takes. - :return: :class:`Response ` object - :rtype: requests.Response - """ + Args: + **kwargs: Additional arguments passed to requests.head() + Returns: + requests.Response object from the HEAD request. + """ url = str(self) return requests.options(url, **kwargs) - def post(self, data=None, json=None, **kwargs): - r"""Sends a POST request. + def post(self, data: Any = None, json: Any = None, **kwargs: Any) -> requests.Response: + """Send a POST request to this URL. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. - :param json: (optional) json data to send in the body of the :class:`Request`. - :param \*\*kwargs: Optional arguments that ``request`` takes. - :return: :class:`Response ` object - :rtype: requests.Response - """ + Args: + data: Dictionary, bytes, or file-like object to send in the request body + json: JSON data to send in the request body + **kwargs: Additional arguments passed to requests.post() + Returns: + requests.Response object from the POST request. + """ url = str(self) return requests.post(url, data=data, json=json, **kwargs) - def put(self, data=None, **kwargs): - r"""Sends a PUT request. + def put(self, data: Any = None, **kwargs: Any) -> requests.Response: + """Send a PUT request to this URL. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. - :param \*\*kwargs: Optional arguments that ``request`` takes. - :return: :class:`Response ` object - :rtype: requests.Response - """ + Args: + data: Dictionary, bytes, or file-like object to send in the request body + **kwargs: Additional arguments passed to requests.put() + Returns: + requests.Response object from the PUT request. + """ url = str(self) return requests.put(url, data=data, **kwargs) - def patch(self, data=None, **kwargs): - r"""Sends a PATCH request. + def patch(self, data: Any = None, **kwargs: Any) -> requests.Response: + """Send a PATCH request to this URL. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. - :param \*\*kwargs: Optional arguments that ``request`` takes. - :return: :class:`Response ` object - :rtype: requests.Response - """ + Args: + data: Dictionary, bytes, or file-like object to send in the request body + **kwargs: Additional arguments passed to requests.patch() + Returns: + requests.Response object from the PATCH request. + """ url = str(self) return requests.patch(url, data=data, **kwargs) - def delete(self, **kwargs): - r"""Sends a DELETE request. + def delete(self, **kwargs: Any) -> requests.Response: + """Send a DELETE request to this URL. - :param \*\*kwargs: Optional arguments that ``request`` takes. - :return: :class:`Response ` object - :rtype: requests.Response - """ + Args: + **kwargs: Additional arguments passed to requests.delete() + Returns: + requests.Response object from the DELETE request. + """ url = str(self) return requests.delete(url, **kwargs) - def get_text(self, name="", query="", pattern="", overwrite=False): - """Runs a url with a specific query, amending query if necessary, and returns the resulting text""" + def get_text(self, name: str = "", query: Any = "", pattern: Any = "", overwrite: bool = False) -> Any: + """Execute a GET request and return text response, optionally filtered. + + Args: + name: Path segment to append before making request + query: Query parameters to add or replace + pattern: Regex pattern (str or compiled) to filter response lines + overwrite: If True, replace query; if False, amend existing query + + Returns: + Response text as string, or list of matching lines if pattern provided. + """ q = query if overwrite else self.add_query(query).query if query else self.query url = self.joinpath(name) if name else self res = url.with_query(q).get() @@ -602,9 +916,21 @@ def get_text(self, name="", query="", pattern="", overwrite=False): return res - def get_json(self, name="", query="", keys="", overwrite=False): - """Runs a url with a specific query, amending query if necessary, and returns the result after applying a - transformer""" + def get_json(self, name: str = "", query: Any = "", keys: Any = "", overwrite: bool = False) -> Any: + """Execute a GET request and return JSON response, optionally filtered with JMESPath. + + Args: + name: Path segment to append before making request + query: Query parameters to add or replace + keys: JMESPath expression (str or compiled) to extract data from JSON + overwrite: If True, replace query; if False, amend existing query + + Returns: + Parsed JSON response, or JMESPath-filtered result if keys provided. + + Raises: + ImportError: If keys is provided but jmespath is not installed. + """ q = query if overwrite else self.add_query(query).query if query else self.query url = self.joinpath(name) if name else self res = url.with_query(q).get() @@ -622,12 +948,31 @@ def get_json(self, name="", query="", keys="", overwrite=False): class JailedURL(URL): - _chroot = None + """URL that is restricted to stay within a root URL path (sandboxed). + + JailedURL ensures all path operations stay within the specified root, + preventing navigation outside the jail via '..' or absolute paths. + Useful for security-sensitive applications or URL templating. + + Examples: + >>> root = URL('http://example.com/app/') + >>> jail = JailedURL('http://example.com/app/content', root=root) + >>> str(jail / '../../escape') # Stays within /app/ + 'http://example.com/app/' + >>> str(jail / '/absolute') # Absolute paths relative to root + 'http://example.com/app/absolute' + + Attributes: + _chroot: The root URL that constrains all operations + """ + + _chroot: URL | None = None # Dynamically set by __new__, will be URL when methods run - def __new__(cls, *args, root=None): + def __new__(cls, *args: Any, root: Any = None) -> JailedURL: if root is not None: root = URL(root) elif cls._chroot is not None: + # This is reachable when __new__ is called on dynamically created subclasses root = cls._chroot elif webob and len(args) >= 1 and isinstance(args[0], webob.Request): root = URL(args[0].application_url) @@ -641,9 +986,10 @@ def __new__(cls, *args, root=None): return type(cls.__name__, (cls,), {"_chroot": root})._from_parts(args) - def _make_child(self, args): + def _make_child(self, args: Any) -> URL: drv, root, parts = self._parse_args(args) chroot = self._chroot + assert chroot is not None # Always set by __new__ if drv: # check in _init @@ -657,16 +1003,18 @@ def _make_child(self, args): return self._from_parsed_parts(drv, root, parts) - def _init(self): + def _init(self) -> None: chroot = self._chroot + assert chroot is not None # Always set by __new__ - if self._parts[: len(chroot.parts)] != list(chroot.parts): + if self._parts[: len(chroot.parts)] != list(chroot.parts): # type: ignore[has-type] self._drv, self._root, self._parts = chroot._drv, chroot._root, chroot._parts[:] super()._init() - def resolve(self): + def resolve(self) -> URL: chroot = self._chroot + assert chroot is not None # Always set by __new__ with ( patch.object(self, "_root", chroot.path), @@ -675,5 +1023,6 @@ def resolve(self): return super().resolve() @property - def chroot(self): + def chroot(self) -> URL: + assert self._chroot is not None # Always set by __new__ return self._chroot