library: correct that 'escape_str_utf8' guy's behavior

Thanks to Konstantin for discovering 2 problems in the issue referenced below. That 15+ year old logic went a little too far overboard wrestling with a utf8 string. Henceforth, we will not treat 'x9b' as special. And we also will handle a 'combining acute accent' correctly. Reference(s): https://gitlab.com/procps-ng/procps/-/issues/176 Signed-off-by: Jim Warner <james.warner@comcast.net>
2020-06-17 08:58:50 -05:00
parent 38537f79c7
commit 356e9a0672
1 changed files with 9 additions and 29 deletions
--- a/proc/escape.c
+++ b/proc/escape.c
@ -87,36 +87,16 @@ static int escape_str_utf8(char *restrict dst, const char *restrict src, int buf
      my_bytes++;

    } else {
-      /* multibyte - printable */
+      /* multibyte - maybe, kinda "printable" */
      int wlen = wcwidth(wc);
-
-      if (wlen<=0) {
-	// invisible multibyte -- we don't ignore it, because some terminal
-	// interpret it wrong and more safe is replace it with '?'
-	*(dst++) = '?';
-	src+=len;
-	my_cells++;
-	my_bytes++;
-      } else {
-        // multibyte - printable
-        // Got space?
-        if (wlen > *maxcells-my_cells || len >= bufsize-(my_bytes+1)) break;
-        // 0x9b is control byte for some terminals
-        if (memchr(src, 0x9B, len)) {
-	  // unsafe multibyte
-	  *(dst++) = '?';
-	  src+=len;
-	  my_cells++;
-	  my_bytes++;
-        } else {
-	  // safe multibyte
-       	  memcpy(dst, src, len);
-	  my_cells += wlen;
-	  dst += len;
-	  my_bytes += len;
-          src += len;
-        }
-      }
+      // Got space?
+      if (wlen > *maxcells-my_cells || len >= bufsize-(my_bytes+1)) break;
+      // safe multibyte
+      memcpy(dst, src, len);
+      dst += len;
+      src += len;
+      my_bytes += len;
+      if (wlen > 0) my_cells += wlen;
    }
    //fprintf(stdout, "cells: %d\n", my_cells);
  }