Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,3 +302,25 @@ def test_percent_encoding_spaces() -> None:
# Test that actual spaces get encoded properly
url_with_spaces = URL("https://somepath.com/test") / "Test path" / "my test file.txt"
assert str(url_with_spaces) == "https://somepath.com/test/Test%20path/my%20test%20file.txt"


def test_colon_in_filename() -> None:
"""Test that colons in filenames are not treated as scheme separators."""
# Reported bug: URL('http://www.example.com/abc:def.html') was truncated to 'http://www.example.com/def.html'
url = URL("http://www.example.com/abc:def.html")
assert str(url) == "http://www.example.com/abc:def.html"
assert url.name == "abc:def.html"
assert url.path == "/abc:def.html"

# Test various positions and uses of colons
assert str(URL("http://www.example.com/file:name.txt")) == "http://www.example.com/file:name.txt"
assert str(URL("http://www.example.com/path/to/file:v2.html")) == "http://www.example.com/path/to/file:v2.html"
assert str(URL("http://www.example.com/:colon.txt")) == "http://www.example.com/:colon.txt"
assert str(URL("http://www.example.com/colon:.txt")) == "http://www.example.com/colon:.txt"

# Test with query and fragment
url = URL("http://www.example.com/abc:def.html?key=value#frag")
assert url.name == "abc:def.html"
assert url.query == "key=value"
assert url.fragment == "frag"
assert str(url) == "http://www.example.com/abc:def.html?key=value#frag"
43 changes: 39 additions & 4 deletions urlpath/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,41 @@ def path(self) -> str:
+ self.trailing_sep
)

@property
@cached_property
def _name_parts(self) -> tuple[str, str, str]:
"""Parse super().name into (path, query, fragment) without using urlsplit.

We can't use urlsplit here because it treats colons as scheme separators,
which breaks filenames like 'abc:def.html'.

Parsing order: fragment first (after #), then query (after ?), then path.

Returns:
Tuple of (path, query, fragment) strings.
"""
full_name = super().name

# Fragment takes priority - everything after # is fragment
fragment_idx = full_name.find("#")
if fragment_idx != -1:
fragment = full_name[fragment_idx + 1 :]
before_fragment = full_name[:fragment_idx]
else:
fragment = ""
before_fragment = full_name

# Query is everything after ? (but before #)
query_idx = before_fragment.find("?")
if query_idx != -1:
query = before_fragment[query_idx + 1 :]
path = before_fragment[:query_idx]
else:
query = ""
path = before_fragment

return path, query, fragment

@property
@cached_property
def name(self) -> str:
Expand All @@ -487,7 +522,7 @@ def name(self) -> str:
Returns:
The decoded filename or last path segment.
"""
return urllib.parse.unquote(urllib.parse.urlsplit(super().name).path.rstrip(self._flavour.sep))
return urllib.parse.unquote(self._name_parts[0].rstrip(self._flavour.sep))

@property
@cached_property
Expand All @@ -497,7 +532,7 @@ def query(self) -> str:
Returns:
The raw query string (without the leading '?').
"""
return urllib.parse.urlsplit(super().name).query
return self._name_parts[1]

@property
@cached_property
Expand All @@ -507,7 +542,7 @@ def fragment(self) -> str:
Returns:
The fragment string (without the leading '#').
"""
return urllib.parse.urlsplit(super().name).fragment
return self._name_parts[2]

@property
@cached_property
Expand All @@ -517,7 +552,7 @@ def trailing_sep(self) -> str:
Returns:
The trailing '/' characters, or empty string if none.
"""
match = re.search("(" + re.escape(self._flavour.sep) + "*)$", urllib.parse.urlsplit(super().name).path)
match = re.search("(" + re.escape(self._flavour.sep) + "*)$", self._name_parts[0])
assert match is not None
return match.group(0)

Expand Down