Skip to content

Fix multibyte character corruption in post summaries #1995

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 24, 2025
4 changes: 4 additions & 0 deletions .github/changelog/1995-from-description
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: patch
Type: fixed

Fix multibyte character corruption in post summaries, preventing Greek and other non-ASCII text from being garbled during text processing.
10 changes: 5 additions & 5 deletions includes/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ function generate_post_summary( $post, $length = 500 ) {
* @param string $excerpt_more The excerpt more.
*/
$excerpt_more = \apply_filters( 'activitypub_excerpt_more', '[…]' );
$length = $length - strlen( $excerpt_more );
$length = $length - \mb_strlen( $excerpt_more, 'UTF-8' );

$content = \sanitize_post_field( 'post_excerpt', $post->post_excerpt, $post->ID );

Expand All @@ -1223,13 +1223,13 @@ function generate_post_summary( $post, $length = 500 ) {
}

$content = \strip_shortcodes( $content );
$content = \html_entity_decode( $content );
$content = \wp_strip_all_tags( $content );
$content = \html_entity_decode( $content, ENT_QUOTES, 'UTF-8' );
$content = \trim( $content );
$content = \preg_replace( '/\R+/m', "\n\n", $content );
$content = \preg_replace( '/[\r\t]/', '', $content );
$content = \preg_replace( '/\R+/mu', "\n\n", $content );
$content = \preg_replace( '/[\r\t]/u', '', $content );

if ( $length && \strlen( $content ) > $length ) {
if ( $length && \mb_strlen( $content, 'UTF-8' ) > $length ) {
$content = \wordwrap( $content, $length, '</activitypub-summary>' );
$content = \explode( '</activitypub-summary>', $content, 2 );
$content = $content[0] . ' ' . $excerpt_more;
Expand Down
8 changes: 4 additions & 4 deletions includes/transformer/class-post.php
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ protected function get_image() {

$alt = \get_post_meta( $id, '_wp_attachment_image_alt', true );
if ( $alt ) {
$image['name'] = \wp_strip_all_tags( \html_entity_decode( $alt ) );
$image['name'] = \html_entity_decode( \wp_strip_all_tags( $alt ), ENT_QUOTES, 'UTF-8' );
}

return $image;
Expand Down Expand Up @@ -264,7 +264,7 @@ protected function get_icon() {

$alt = \get_post_meta( $id, '_wp_attachment_image_alt', true );
if ( $alt ) {
$image['name'] = \wp_strip_all_tags( \html_entity_decode( $alt ) );
$image['name'] = \html_entity_decode( \wp_strip_all_tags( $alt ), ENT_QUOTES, 'UTF-8' );
}

return $image;
Expand Down Expand Up @@ -1013,11 +1013,11 @@ public function wp_attachment_to_activity_attachment( $media ) {
);

if ( ! empty( $media['alt'] ) ) {
$image['name'] = \wp_strip_all_tags( \html_entity_decode( $media['alt'] ) );
$image['name'] = \html_entity_decode( \wp_strip_all_tags( $media['alt'] ), ENT_QUOTES, 'UTF-8' );
} else {
$alt = \get_post_meta( $id, '_wp_attachment_image_alt', true );
if ( $alt ) {
$image['name'] = \wp_strip_all_tags( \html_entity_decode( $alt ) );
$image['name'] = \html_entity_decode( \wp_strip_all_tags( $alt ), ENT_QUOTES, 'UTF-8' );
}
}

Expand Down
7 changes: 7 additions & 0 deletions tests/includes/class-test-functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,13 @@ public function get_post_summary_data() {
),
'<p>Hello World</p>' . PHP_EOL,
),
array(
'Greek Excerpt',
array(
'post_excerpt' => 'Τι μπορεί να σου συμβεί σε μια βόλτα για να αγοράσεις μια βαλίτσα για τα ταξίδια σου; Όλα είναι πιθανά αν έχεις ανοιχτές τις "κεραίες" σου!',
),
'<p>Τι μπορεί να σου συμβεί σε μια βόλτα για να αγοράσεις μια βαλίτσα για τα ταξίδια σου; Όλα είναι πιθανά αν έχεις ανοιχτές τις &#8220;κεραίες&#8221; σου!</p>' . PHP_EOL,
),
array(
'Content',
array(
Expand Down
2 changes: 1 addition & 1 deletion tests/includes/class-test-shortcodes.php
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ public function test_excerpt() {
$content = do_shortcode( '[ap_excerpt length="25"]' );
wp_reset_postdata();

$this->assertEquals( "<p>Lorem ipsum dolor […]</p>\n", $content );
$this->assertEquals( "<p>Lorem ipsum dolor sit […]</p>\n", $content );
}

/**
Expand Down
Loading