Skip to content

Commit 123cf13

Browse files
committed
[PHP 8.4][Intl] Add grapheme_str_split
Add a polyfill for the `grapheme_str_split` function added in PHP 8.4. Requires PHP 7.3, because the polyfill is based on `\X` Regex, and it only works properly on PCRE2, which [only comes with PHP 7.3+](https://php.watch/versions/7.3/pcre2). Further, there are some cases that the polyfill cannot split complex characters (such as two consecutive country flag Emojis). This is now fixed in However, this change will likely only make it to PHP 8.4. References: - [RFC: Grapheme cluster for `str_split` function: `grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split) - [PHP.Watch: PHP 8.4: New `grapheme_str_split` function](https://php.watch/versions/8.4/grapheme_str_split)
1 parent e85ab80 commit 123cf13

File tree

12 files changed

+160
-0
lines changed

12 files changed

+160
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Polyfills are provided for:
6868
- the `Date*Exception/Error` classes introduced in PHP 8.3;
6969
- the `SQLite3Exception` class introduced in PHP 8.3;
7070
- the `mb_ucfirst` and `mb_lcfirst` functions introduced in PHP 8.4;
71+
- the `grapheme_str_split` function introduced in PHP 8.4 (requires PHP >= 7.3);
7172

7273
It is strongly recommended to upgrade your PHP version and/or install the missing
7374
extensions whenever possible. This polyfill should be used only when there is no

src/Intl/Grapheme/Grapheme.php

+33
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
* - grapheme_strrpos - Find position (in grapheme units) of last occurrence of a string
2727
* - grapheme_strstr - Returns part of haystack string from the first occurrence of needle to the end of haystack
2828
* - grapheme_substr - Return part of a string
29+
* - grapheme_str_split - Splits a string into an array of individual or chunks of graphemes.
2930
*
3031
* @author Nicolas Grekas <[email protected]>
3132
*
@@ -191,6 +192,38 @@ public static function grapheme_strstr($s, $needle, $beforeNeedle = false)
191192
return mb_strstr($s, $needle, $beforeNeedle, 'UTF-8');
192193
}
193194

195+
public static function grapheme_str_split($s, $len = 1) {
196+
if ($len < 0 || $len > 1073741823) {
197+
if (80000 > \PHP_VERSION_ID) {
198+
return false;
199+
}
200+
201+
throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.');
202+
}
203+
204+
if ($s === '') {
205+
return [];
206+
}
207+
208+
preg_match_all('/\X/u', $s, $matches);
209+
210+
if (empty($matches[0])) {
211+
return false;
212+
}
213+
214+
if ($len === 1) {
215+
return $matches[0];
216+
}
217+
218+
$chunks = array_chunk($matches[0], $len);
219+
220+
array_walk($chunks, static function(&$value) {
221+
$value = implode('', $value);
222+
});
223+
224+
return $chunks;
225+
}
226+
194227
private static function grapheme_position($s, $needle, $offset, $mode)
195228
{
196229
$needle = (string) $needle;

src/Intl/Grapheme/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ This component provides a partial, native PHP implementation of the
2121
- [`grapheme_strstr`](https://php.net/grapheme_strstr): Returns part of haystack string from
2222
the first occurrence of needle to the end of haystack
2323
- [`grapheme_substr`](https://php.net/grapheme_substr): Return part of a string
24+
- [`grapheme_str_split](https://php.net/grapheme_str_split): Splits a string into an array of individual or chunks of graphemes.
2425

2526
More information can be found in the
2627
[main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md).

src/Intl/Grapheme/bootstrap.php

+4
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,7 @@ function grapheme_strstr($haystack, $needle, $beforeNeedle = false) { return p\G
5656
if (!function_exists('grapheme_substr')) {
5757
function grapheme_substr($string, $offset, $length = null) { return p\Grapheme::grapheme_substr($string, $offset, $length); }
5858
}
59+
60+
if (\PHP_VERSION_ID >= 70300) {
61+
require __DIR__.'/bootstrap73.php';
62+
}

src/Intl/Grapheme/bootstrap73.php

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Polyfill\Php84 as p;
13+
14+
if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) {
15+
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); }
16+
}
17+

src/Intl/Grapheme/bootstrap80.php

+3
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,6 @@ function grapheme_strstr(?string $haystack, ?string $needle, ?bool $beforeNeedle
4848
if (!function_exists('grapheme_substr')) {
4949
function grapheme_substr(?string $string, ?int $offset, ?int $length = null): string|false { return p\Grapheme::grapheme_substr((string) $string, (int) $offset, $length); }
5050
}
51+
if (!function_exists('grapheme_str_split')) {
52+
function grapheme_str_split(string $string, int $length = 1): array|false { return p\Grapheme::grapheme_str_split($string, $length); }
53+
}

src/Php84/Php84.php

+29
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,33 @@ public static function mb_lcfirst(string $string, ?string $encoding = null): str
6363

6464
return $firstChar . mb_substr($string, 1, null, $encoding);
6565
}
66+
67+
public static function grapheme_str_split(string $string, int $length)
68+
{
69+
if ($length < 0 || $length > 1073741823) {
70+
throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.');
71+
}
72+
73+
if ($string === '') {
74+
return [];
75+
}
76+
77+
preg_match_all('/\X/u', $string, $matches);
78+
79+
if (empty($matches[0])) {
80+
return false;
81+
}
82+
83+
if ($length === 1) {
84+
return $matches[0];
85+
}
86+
87+
$chunks = array_chunk($matches[0], $length);
88+
89+
array_walk($chunks, static function(&$value) {
90+
$value = implode('', $value);
91+
});
92+
93+
return $chunks;
94+
}
6695
}

src/Php84/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Symfony Polyfill / Php84
44
This component provides features added to PHP 8.4 core:
55

66
- [`mb_ucfirst` and `mb_lcfirst`](https://wiki.php.net/rfc/mb_ucfirst)
7+
- [`grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split)
78

89
More information can be found in the
910
[main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md).

src/Php84/bootstrap.php

+4
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,7 @@ function mb_ucfirst($string, ?string $encoding = null): string { return p\Php84:
2323
if (!function_exists('mb_lcfirst')) {
2424
function mb_lcfirst($string, ?string $encoding = null): string { return p\Php84::mb_lcfirst($string, $encoding); }
2525
}
26+
27+
if (\PHP_VERSION_ID >= 70300) {
28+
require __DIR__.'/bootstrap73.php';
29+
}

src/Php84/bootstrap73.php

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Polyfill\Php84 as p;
13+
14+
if (\PHP_VERSION_ID >= 80400) {
15+
return;
16+
}
17+
18+
if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) {
19+
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); }
20+
}
21+

tests/Intl/Grapheme/GraphemeTest.php

+26
Original file line numberDiff line numberDiff line change
@@ -207,4 +207,30 @@ public function testGraphemeStrstr()
207207
$this->assertSame('국어', grapheme_strstr('한국어', ''));
208208
$this->assertSame('ÉJÀ', grapheme_stristr('DÉJÀ', 'é'));
209209
}
210+
211+
/**
212+
* @dataProvider graphemeStrSplitDataProvider
213+
* @requires PHP 7.3
214+
*/
215+
public function testGraphemeStrSplit(string $string, int $length, array $expectedValues) {
216+
$this->assertSame($expectedValues, grapheme_str_split($string, $length));
217+
}
218+
219+
public static function graphemeStrSplitDataProvider(): array {
220+
$return = [
221+
['', 1, []],
222+
['PHP', 1, ['P', 'H', 'P']],
223+
['你好', 1, ['', '']],
224+
['අයේෂ්', 1, ['', 'යේ', 'ෂ්']],
225+
['สวัสดี', 2, ['สวั', 'สดี']],
226+
['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]],
227+
];
228+
229+
// https://github.com/PCRE2Project/pcre2/issues/410
230+
if (PCRE_VERSION_MAJOR > 10 && PCRE_VERSION_MAJOR >= 44) {
231+
$return[] = ['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]];
232+
}
233+
234+
return $return;
235+
}
210236
}

tests/Php84/Php84Test.php

+20
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,24 @@ public static function lcFirstDataProvider(): array {
6868
["ß", "ß"],
6969
];
7070
}
71+
72+
/**
73+
* @dataProvider graphemeStrSplitDataProvider
74+
* @requires PHP 7.3
75+
*/
76+
public function testGraphemeStrSplit(string $string, int $length, array $expectedValues) {
77+
$this->assertSame($expectedValues, grapheme_str_split($string, $length));
78+
}
79+
80+
public static function graphemeStrSplitDataProvider(): array {
81+
return [
82+
['', 1, []],
83+
['PHP', 1, ['P', 'H', 'P']],
84+
['你好', 1, ['', '']],
85+
['අයේෂ්', 1, ['', 'යේ', 'ෂ්']],
86+
['สวัสดี', 2, ['สวั', 'สดี']],
87+
['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]],
88+
// ['👭🏻👰🏿‍♂️', 2, ['👭🏻', '👰🏿‍♂️']], // https://github.com/PCRE2Project/pcre2/issues/410
89+
];
90+
}
7191
}

0 commit comments

Comments
 (0)