@@ -10,8 +10,10 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
16+ #include <limits.h> // IWYU pragma: keep
1517#include <math.h>
1618#include <stdarg.h>
1719#include <stdint.h>
@@ -235,6 +237,246 @@ size_t strnlen(const char* str, size_t maxLen) {
235237}
236238#endif
237239
240+ #ifdef HAVE_LIBNCURSESW
241+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
242+ assert (!ps -> buf || ps -> pos < ps -> size );
243+
244+ char tempBuf [MB_LEN_MAX ];
245+ char * dest = ps -> buf ? (char * )ps -> buf + ps -> pos : tempBuf ;
246+
247+ // It is unnecessarily expensive to fix the output string if the caller
248+ // gives an incorrect buffer size. This function would not support any
249+ // truncation of the output string.
250+ size_t len = wcrtomb (dest , wc , & ps -> mbState );
251+ assert (len > 0 );
252+ if (len == (size_t )-1 ) {
253+ assert (len != (size_t )-1 );
254+ fail ();
255+ }
256+ if (ps -> buf && len > ps -> size - ps -> pos ) {
257+ assert (!ps -> buf || len <= ps -> size - ps -> pos );
258+ fail ();
259+ }
260+
261+ ps -> pos += len ;
262+ }
263+ #else
264+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
265+ assert (!ps -> buf || ps -> pos < ps -> size );
266+
267+ char * buf = ps -> buf ;
268+ if (buf ) {
269+ buf [ps -> pos ] = (char )c ;
270+ }
271+
272+ ps -> pos += 1 ;
273+ }
274+ #endif
275+
276+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
277+ assert (src || maxLen == 0 );
278+
279+ size_t pos = 0 ;
280+ bool wasReplaced = false;
281+
282+ #ifdef HAVE_LIBNCURSESW
283+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
284+ wchar_t ch ;
285+
286+ mbstate_t decState ;
287+ memset (& decState , 0 , sizeof (decState ));
288+ #else
289+ const char replacementChar = '?' ;
290+ char ch ;
291+ #endif
292+
293+ do {
294+ size_t len = 0 ;
295+ bool shouldReplace = false;
296+ ch = 0 ;
297+
298+ if (pos < maxLen ) {
299+ // Read the next character from the byte sequence
300+ #ifdef HAVE_LIBNCURSESW
301+ mbstate_t newState ;
302+ memcpy (& newState , & decState , sizeof (newState ));
303+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
304+
305+ assert (len != 0 || ch == 0 );
306+ switch (len ) {
307+ case (size_t )-2 :
308+ errno = EILSEQ ;
309+ shouldReplace = true;
310+ len = maxLen - pos ;
311+ break ;
312+
313+ case (size_t )-1 :
314+ shouldReplace = true;
315+ len = 1 ;
316+ break ;
317+
318+ default :
319+ memcpy (& decState , & newState , sizeof (decState ));
320+ }
321+ #else
322+ len = 1 ;
323+ ch = src [pos ];
324+ #endif
325+ }
326+
327+ pos += len ;
328+
329+ // Filter unprintable characters
330+ if (!shouldReplace && ch != 0 ) {
331+ #ifdef HAVE_LIBNCURSESW
332+ shouldReplace = !iswprint (ch );
333+ #else
334+ shouldReplace = !isprint ((unsigned char )ch );
335+ #endif
336+ }
337+
338+ if (shouldReplace ) {
339+ ch = replacementChar ;
340+ if (wasReplaced ) {
341+ continue ;
342+ }
343+ }
344+ wasReplaced = shouldReplace ;
345+
346+ encodeWChar (ps , ch );
347+ } while (ch != 0 );
348+ }
349+
350+ char * String_makePrintable (const char * str , size_t maxLen ) {
351+ WCharEncoderState encState ;
352+
353+ memset (& encState , 0 , sizeof (encState ));
354+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
355+ size_t size = encState .pos ;
356+ assert (size > 0 );
357+
358+ memset (& encState , 0 , sizeof (encState ));
359+ char * buf = xMalloc (size );
360+ encState .size = size ;
361+ encState .buf = buf ;
362+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
363+ assert (encState .pos == size );
364+
365+ return buf ;
366+ }
367+
368+ bool String_decodeNextWChar (MBStringDecoderState * ps ) {
369+ if (!ps -> str || ps -> maxLen == 0 ) {
370+ return false;
371+ }
372+
373+ // If the previous call of this function encounters an invalid sequence,
374+ // do not continue (because the "mbState" object for mbrtowc() is
375+ // undefined). The caller is supposed to reset the state.
376+ #ifdef HAVE_LIBNCURSESW
377+ bool isStateDefined = ps -> ch != WEOF ;
378+ #else
379+ bool isStateDefined = ps -> ch != EOF ;
380+ #endif
381+ if (!isStateDefined ) {
382+ return false;
383+ }
384+
385+ #ifdef HAVE_LIBNCURSESW
386+ wchar_t wc ;
387+ size_t len = mbrtowc (& wc , ps -> str , ps -> maxLen , & ps -> mbState );
388+ switch (len ) {
389+ case (size_t )-1 :
390+ // Invalid sequence
391+ ps -> ch = WEOF ;
392+ return false;
393+
394+ case (size_t )-2 :
395+ // Incomplete sequence
396+ ps -> str += ps -> maxLen ;
397+ ps -> maxLen = 0 ;
398+ return false;
399+
400+ case 0 :
401+ assert (wc == 0 );
402+
403+ ps -> str = NULL ;
404+ ps -> maxLen = 0 ;
405+ ps -> ch = wc ;
406+ return true;
407+
408+ default :
409+ ps -> str += len ;
410+ ps -> maxLen -= len ;
411+ ps -> ch = wc ;
412+ }
413+ return true;
414+ #else
415+ ps -> ch = * ps -> str ;
416+ if (ps -> ch == 0 ) {
417+ ps -> str = NULL ;
418+ ps -> maxLen = 0 ;
419+ } else {
420+ ps -> str ++ ;
421+ ps -> maxLen -- ;
422+ }
423+ return true;
424+ #endif
425+ }
426+
427+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
428+ assert (* str || maxLen == 0 );
429+
430+ if (maxWidth < 0 )
431+ maxWidth = INT_MAX ;
432+
433+ #ifdef HAVE_LIBNCURSESW
434+ MBStringDecoderState state ;
435+ memset (& state , 0 , sizeof (state ));
436+ state .str = * str ;
437+ state .maxLen = maxLen ;
438+
439+ int totalWidth = 0 ;
440+
441+ while (String_decodeNextWChar (& state )) {
442+ if (state .ch == 0 )
443+ break ;
444+
445+ int w = wcwidth ((wchar_t )state .ch );
446+ if (w < 0 ) {
447+ assert (w >= 0 );
448+ break ;
449+ }
450+
451+ if (w > maxWidth - totalWidth )
452+ break ;
453+
454+ totalWidth += w ;
455+
456+ // If the character takes zero columns, include the character in the
457+ // substring if the working encoding is UTF-8, and ignore it otherwise.
458+ // In Unicode, combining characters are always placed after the base
459+ // character, but some legacy 8-bit encodings instead place combining
460+ // characters before the base character.
461+ if (w <= 0 && !CRT_utf8 ) {
462+ continue ;
463+ }
464+
465+ // (*str - start) will represent the length of the substring bounded
466+ // by the width limit.
467+ * str = state .str ;
468+ }
469+
470+ assert (state .ch != WEOF );
471+ return totalWidth ;
472+ #else
473+ maxLen = MINIMUM ((unsigned int )maxWidth , maxLen );
474+ size_t len = strnlen (* str , maxLen );
475+ * str += len ;
476+ return (int )len ;
477+ #endif
478+ }
479+
238480int xAsprintf (char * * strp , const char * fmt , ...) {
239481 va_list vl ;
240482 va_start (vl , fmt );
0 commit comments