diff --git a/CHANGELOG.md b/CHANGELOG.md index 903f345..9c420ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.1.2] - 2025-01-27 +### Fixed +- When matching URIs against allow/disallow rules, the library previously used explicitly only the path part of the URI. Fixed it to use path, query and fragment. + ## [1.1.1] - 2022-11-08 ### Fixed - The `Parser` now also trims hidden whitespace characters that aren't covered by PHP's `trim()` function by default. Such characters at the beginning of a line can cause parsing to fail, because it's important that user-agent and rule lines actually start with the corresponding keywords. diff --git a/LICENSE b/LICENSE index 81fc620..4b0c56e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2024 Christian Olear +Copyright (c) 2025 Christian Olear Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/src/RulePattern.php b/src/RulePattern.php index 26e63a6..4a0bf03 100644 --- a/src/RulePattern.php +++ b/src/RulePattern.php @@ -25,15 +25,11 @@ public function pattern(): string */ public function matches(string|Url $uri): bool { - $path = $uri instanceof Url ? $uri->path() : Url::parse($uri)->path(); + $pathQueryFragment = $uri instanceof Url ? $uri->relative() : Url::parse($uri)->relative(); - if (!is_string($path)) { - return false; - } - - $path = Encoding::decodePercentEncodedAsciiCharactersInPath($path); + $pathQueryFragment = Encoding::decodePercentEncodedAsciiCharactersInPath($pathQueryFragment); - return preg_match($this->preparedRegexPattern(), $path) === 1; + return preg_match($this->preparedRegexPattern(), $pathQueryFragment) === 1; } private function preparedRegexPattern(): string diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 42105b7..b9deed4 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -357,6 +357,22 @@ public function test_parse_sitemap_lines(): void ], $robotsTxt->sitemaps()); } + public function test_it_uses_not_only_the_path_but_also_the_query_when_matching(): void + { + $robotsTxtContent = <<parse($robotsTxtContent); + + $this->assertFalse($robotsTxt->isAllowed('/?foo', 'MyBot')); + + $this->assertFalse($robotsTxt->isAllowed('/?foo=bar', 'MyBot')); + + $this->assertTrue($robotsTxt->isAllowed('/yo?foo=bar', 'MyBot')); + } + /** * @param string[] $expected * @param RulePattern[] $actual