From 648d8c605b21c54fbcd395354181ed1b044cc664 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sat, 16 Mar 2024 16:42:54 +0100 Subject: [PATCH 1/3] Update coding style for upcoming PHP-CS-Fixer changes Once we bump minimum PHP version, we will get newer PHP-CS-Fixer, which will try to apply this cleanups. Also manually tweak anonymous functions so that they are cleanly formatted once we switch to `fn` syntax. --- .php-cs-fixer.php | 4 +++ src/Readability.php | 61 ++++++++++++++++++++++++--------------- tests/ReadabilityTest.php | 2 +- 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php index 19cd18d..5f09a0c 100644 --- a/.php-cs-fixer.php +++ b/.php-cs-fixer.php @@ -26,6 +26,10 @@ 'strict_comparison' => true, 'strict_param' => true, 'concat_space' => ['spacing' => 'one'], + // Pulled in by @Symfony:risky but we still support PHP 7.4 + 'modernize_strpos' => false, + // Pulled in by @Symfony, we cannot add property types until we bump PHP to ≥ 7.4 + 'no_null_property_initialization' => false, ]) ->setFinder($finder) ; diff --git a/src/Readability.php b/src/Readability.php index f41d6c0..7de921e 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -142,7 +142,7 @@ class Readability implements LoggerAwareInterface * @param string $parser Which parser to use for turning raw HTML into a DOMDocument * @param bool $useTidy Use tidy */ - public function __construct(string $html, string $url = null, string $parser = 'libxml', bool $useTidy = true) + public function __construct(string $html, ?string $url = null, string $parser = 'libxml', bool $useTidy = true) { $this->url = $url; $this->html = $html; @@ -739,7 +739,7 @@ public function flagIsActive(int $flag): bool */ public function addFlag(int $flag): void { - $this->flags = $this->flags | $flag; + $this->flags |= $flag; } /** @@ -747,7 +747,7 @@ public function addFlag(int $flag): void */ public function removeFlag(int $flag): void { - $this->flags = $this->flags & ~$flag; + $this->flags &= ~$flag; } /** @@ -893,11 +893,9 @@ protected function initializeNode(\DOMElement $node): void * Using a variety of metrics (content score, classname, element types), find the content that is * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. * - * @param \DOMElement $page - * * @return \DOMElement|false */ - protected function grabArticle(\DOMElement $page = null) + protected function grabArticle(?\DOMElement $page = null) { if (!$page) { $page = $this->dom; @@ -933,9 +931,9 @@ protected function grabArticle(\DOMElement $page = null) // Remove unlikely candidates $unlikelyMatchString = $node->getAttribute('class') . ' ' . $node->getAttribute('id') . ' ' . $node->getAttribute('style'); - if (mb_strlen($unlikelyMatchString) > 3 && // don't process "empty" strings - preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) && - !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) + if (mb_strlen($unlikelyMatchString) > 3 // don't process "empty" strings + && preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) + && !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) ) { $this->logger->debug('Removing unlikely candidate (using conf) ' . $node->getNodePath() . ' by "' . $unlikelyMatchString . '"'); $node->parentNode->removeChild($node); @@ -1120,9 +1118,13 @@ protected function grabArticle(\DOMElement $page = null) } } - $topCandidates = array_filter($topCandidates, function ($v, $idx) { - return 0 === $idx || null !== $v; - }, \ARRAY_FILTER_USE_BOTH); + $topCandidates = array_filter( + $topCandidates, + function ($v, $idx) { + return 0 === $idx || null !== $v; + }, + \ARRAY_FILTER_USE_BOTH + ); $topCandidate = $topCandidates[0]; /* @@ -1442,7 +1444,7 @@ private function loadHtml(): void libxml_use_internal_errors(false); } - $this->dom->registerNodeClass(\DOMElement::class, \Readability\JSLikeHTMLElement::class); + $this->dom->registerNodeClass(\DOMElement::class, JSLikeHTMLElement::class); } private function getAncestors(\DOMElement $node, int $maxDepth = 0): array @@ -1464,9 +1466,19 @@ private function isPhrasingContent($node): bool { return \XML_TEXT_NODE === $node->nodeType || \in_array(strtoupper($node->nodeName), $this->phrasingElements, true) - || (\in_array(strtoupper($node->nodeName), ['A', 'DEL', 'INS'], true) && !\in_array(false, array_map(function ($c) { - return $this->isPhrasingContent($c); - }, iterator_to_array($node->childNodes)), true)); + || ( + \in_array(strtoupper($node->nodeName), ['A', 'DEL', 'INS'], true) + && !\in_array( + false, + array_map( + function ($c) { + return $this->isPhrasingContent($c); + }, + iterator_to_array($node->childNodes) + ), + true + ) + ); } private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool @@ -1475,10 +1487,12 @@ private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool return false; } - $a = array_filter(iterator_to_array($node->childNodes), function ($childNode) { - return $childNode instanceof \DOMText && - preg_match($this->regexps['hasContent'], $this->getInnerText($childNode)); - }); + $a = array_filter( + iterator_to_array($node->childNodes), + function ($childNode) { + return $childNode instanceof \DOMText && preg_match($this->regexps['hasContent'], $this->getInnerText($childNode)); + } + ); return 0 === \count($a); } @@ -1491,9 +1505,10 @@ private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool */ private function isNodeVisible(\DOMElement $node): bool { - return !($node->hasAttribute('style') - && preg_match($this->regexps['isNotVisible'], $node->getAttribute('style')) + return !( + $node->hasAttribute('style') + && preg_match($this->regexps['isNotVisible'], $node->getAttribute('style')) ) - && !$node->hasAttribute('hidden'); + && !$node->hasAttribute('hidden'); } } diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php index 1c3d5c2..3fb9dc0 100644 --- a/tests/ReadabilityTest.php +++ b/tests/ReadabilityTest.php @@ -550,7 +550,7 @@ public function testVisibleNode(string $content, bool $shouldBeVisible): void } } - private function getReadability(string $html, string $url = null, string $parser = 'libxml', bool $useTidy = true): Readability + private function getReadability(string $html, ?string $url = null, string $parser = 'libxml', bool $useTidy = true): Readability { $readability = new Readability($html, $url, $parser, $useTidy); From e792644fe82ba63e08a1ab01f5db318623aab9f4 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sat, 16 Mar 2024 16:11:23 +0100 Subject: [PATCH 2/3] Drop PHP < 7.4 support This will allow us to use flexible heredocs in test, as well as typed properties and other goodies. https://www.php.net/releases/7_3_0.php https://www.php.net/releases/7_4_0.php --- .github/workflows/coding-standards.yml | 2 +- .github/workflows/continuous-integration.yml | 6 ++---- composer.json | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/coding-standards.yml b/.github/workflows/coding-standards.yml index 24feed1..de7c8aa 100644 --- a/.github/workflows/coding-standards.yml +++ b/.github/workflows/coding-standards.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: php: - - "7.2" + - "7.4" steps: - name: "Checkout" diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 2687e8f..3e2562c 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -19,8 +19,6 @@ jobs: strategy: matrix: php: - - "7.2" - - "7.3" - "7.4" - "8.0" - "8.1" @@ -65,7 +63,7 @@ jobs: strategy: matrix: php: - - "7.4" + - "8.0" steps: - name: "Checkout" @@ -116,7 +114,7 @@ jobs: strategy: matrix: php: - - "7.2" + - "7.4" steps: - name: "Checkout" diff --git a/composer.json b/composer.json index 254c04d..dd26256 100644 --- a/composer.json +++ b/composer.json @@ -24,7 +24,7 @@ "role": "Developer (original JS version)" }], "require": { - "php": ">=7.2.0", + "php": ">=7.4.0", "ext-mbstring": "*", "psr/log": "^1.0.1 || ^2.0 || ^3.0", "masterminds/html5": "^2.7" From 89d3b74259d2d680948d692429a2531f7e060340 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sat, 16 Mar 2024 17:01:08 +0100 Subject: [PATCH 3/3] Rectorize to PHP 7.4 Switches to short anonymous function syntax. --- rector.php | 4 ++-- src/Readability.php | 12 +++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/rector.php b/rector.php index 379e368..8c99642 100644 --- a/rector.php +++ b/rector.php @@ -23,9 +23,9 @@ // Define what rule sets will be applied $rectorConfig->sets([ - LevelSetList::UP_TO_PHP_72, + LevelSetList::UP_TO_PHP_74, ]); // is your PHP version different from the one your refactor to? - $rectorConfig->phpVersion(PhpVersion::PHP_72); + $rectorConfig->phpVersion(PhpVersion::PHP_74); }; diff --git a/src/Readability.php b/src/Readability.php index 7de921e..836a333 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1120,9 +1120,7 @@ protected function grabArticle(?\DOMElement $page = null) $topCandidates = array_filter( $topCandidates, - function ($v, $idx) { - return 0 === $idx || null !== $v; - }, + fn ($v, $idx) => 0 === $idx || null !== $v, \ARRAY_FILTER_USE_BOTH ); $topCandidate = $topCandidates[0]; @@ -1471,9 +1469,7 @@ private function isPhrasingContent($node): bool && !\in_array( false, array_map( - function ($c) { - return $this->isPhrasingContent($c); - }, + fn ($c) => $this->isPhrasingContent($c), iterator_to_array($node->childNodes) ), true @@ -1489,9 +1485,7 @@ private function hasSingleTagInsideElement(\DOMElement $node, string $tag): bool $a = array_filter( iterator_to_array($node->childNodes), - function ($childNode) { - return $childNode instanceof \DOMText && preg_match($this->regexps['hasContent'], $this->getInnerText($childNode)); - } + fn ($childNode) => $childNode instanceof \DOMText && preg_match($this->regexps['hasContent'], $this->getInnerText($childNode)) ); return 0 === \count($a);