@@ -10,8 +10,10 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
16+ #include <limits.h> // IWYU pragma: keep
1517#include <math.h>
1618#include <stdarg.h>
1719#include <stdint.h>
@@ -224,6 +226,185 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
224226 return i ;
225227}
226228
229+ #ifdef HAVE_LIBNCURSESW
230+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
231+ char tempBuf [MB_LEN_MAX ];
232+
233+ char * dest = ps -> buf ? (char * )ps -> buf + ps -> pos : tempBuf ;
234+ size_t len = wcrtomb (dest , wc , & ps -> mbState );
235+ assert (len != (size_t )-1 );
236+ assert (len > 0 );
237+
238+ ps -> pos += len ;
239+ }
240+ #else
241+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
242+ if (ps -> buf ) {
243+ ((char * )ps -> buf )[ps -> pos ] = (char )c ;
244+ }
245+ ps -> pos += 1 ;
246+ }
247+ #endif
248+
249+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
250+ assert (src || maxLen == 0 );
251+
252+ size_t pos = 0 ;
253+ bool wasReplaced = false;
254+
255+ #ifdef HAVE_LIBNCURSESW
256+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
257+ wchar_t ch ;
258+
259+ mbstate_t decState ;
260+ memset (& decState , 0 , sizeof (decState ));
261+ mbstate_t newState ;
262+ #else
263+ const char replacementChar = '?' ;
264+ char ch ;
265+ #endif
266+
267+ do {
268+ size_t len = 0 ;
269+ bool shouldReplace = false;
270+ ch = 0 ;
271+
272+ if (pos < maxLen ) {
273+ // Read the next character from the byte sequence
274+ #ifdef HAVE_LIBNCURSESW
275+ memcpy (& newState , & decState , sizeof (newState ));
276+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
277+
278+ switch (len ) {
279+
280+ case (size_t )-2 :
281+ errno = EILSEQ ;
282+ shouldReplace = true;
283+ len = maxLen - pos ;
284+ break ;
285+
286+ case (size_t )-1 :
287+ shouldReplace = true;
288+ len = 1 ;
289+ break ;
290+
291+ case 0 :
292+ assert (ch == 0 );
293+ len = 1 ;
294+ // Fallthrough
295+
296+ default :
297+ memcpy (& decState , & newState , sizeof (decState ));
298+ }
299+ #else
300+ len = 1 ;
301+ ch = src [pos ];
302+ #endif
303+ }
304+
305+ pos += len ;
306+
307+ // Filter unprintable characters
308+ if (!shouldReplace && ch != 0 ) {
309+ #ifdef HAVE_LIBNCURSESW
310+ shouldReplace = !iswprint (ch );
311+ #else
312+ shouldReplace = !isprint ((unsigned char )ch );
313+ #endif
314+ }
315+
316+ if (shouldReplace ) {
317+ ch = replacementChar ;
318+ if (wasReplaced ) {
319+ continue ;
320+ }
321+ }
322+ wasReplaced = shouldReplace ;
323+
324+ encodeWChar (ps , ch );
325+ } while (ch != 0 );
326+ }
327+
328+ char * String_makePrintable (const char * str , size_t maxLen ) {
329+ WCharEncoderState encState ;
330+
331+ memset (& encState , 0 , sizeof (encState ));
332+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
333+ size_t bufSize = encState .pos ;
334+ assert (bufSize > 0 );
335+
336+ memset (& encState , 0 , sizeof (encState ));
337+ char * buf = xMalloc (bufSize );
338+ encState .buf = buf ;
339+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
340+ assert (encState .pos == bufSize );
341+ encState .buf = NULL ;
342+
343+ return buf ;
344+ }
345+
346+ #ifndef HAVE_STRNLEN
347+ static size_t strnlen (const char * str , size_t maxLen ) {
348+ size_t len ;
349+ for (len = 0 ; len < maxLen && str [len ] != '\0' ; len ++ ) {}
350+ return len ;
351+ }
352+ #endif
353+
354+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
355+ assert (* str || maxLen == 0 );
356+
357+ if (maxWidth < 0 )
358+ maxWidth = INT_MAX ;
359+
360+ const char * start = * str ;
361+
362+ #ifdef HAVE_LIBNCURSESW
363+ mbstate_t state ;
364+ memset (& state , 0 , sizeof (state ));
365+
366+ int totalWidth = 0 ;
367+ for (size_t pos = 0 ; pos < maxLen ; ) {
368+ wchar_t wc ;
369+ size_t len = mbrtowc (& wc , & start [pos ], maxLen - pos , & state );
370+ if (len == 0 || len == (size_t )-2 || len == (size_t )-1 ) {
371+ assert (len != (size_t )-1 );
372+ break ;
373+ }
374+
375+ pos += len ;
376+
377+ assert (wc != L'\0' );
378+ int w = wcwidth (wc );
379+ if (w < 0 ) {
380+ assert (w >= 0 );
381+ break ;
382+ }
383+
384+ if (w > maxWidth - totalWidth )
385+ break ;
386+
387+ if (w > 0 || CRT_utf8 ) {
388+ // (*str - start) will represent the length of the substring bounded
389+ // by the width limit.
390+
391+ // In Unicode, combining characters are always placed after the base
392+ // character. Some legacy 8-bit encodings instead place combining
393+ // characters before the base character.
394+ * str = & start [pos ];
395+ }
396+ totalWidth += w ;
397+ }
398+
399+ return totalWidth ;
400+ #else
401+ maxLen = MINIMUM ((unsigned int )maxWidth , maxLen );
402+ size_t len = strnlen (* str , maxLen );
403+ * str = start + len ;
404+ return (int )len ;
405+ #endif
406+ }
407+
227408int xAsprintf (char * * strp , const char * fmt , ...) {
228409 va_list vl ;
229410 va_start (vl , fmt );
0 commit comments