library: correct that 'escape_str_utf8' guy's behavior
Thanks to Konstantin for discovering 2 problems in the issue referenced below. That 15+ year old logic went a little too far overboard wrestling with a utf8 string. Henceforth, we will not treat 'x9b' as special. And we also will handle a 'combining acute accent' correctly. Reference(s): https://gitlab.com/procps-ng/procps/-/issues/176 Signed-off-by: Jim Warner <james.warner@comcast.net>
This commit is contained in:
parent
38537f79c7
commit
356e9a0672
@ -87,36 +87,16 @@ static int escape_str_utf8(char *restrict dst, const char *restrict src, int buf
|
||||
my_bytes++;
|
||||
|
||||
} else {
|
||||
/* multibyte - printable */
|
||||
/* multibyte - maybe, kinda "printable" */
|
||||
int wlen = wcwidth(wc);
|
||||
|
||||
if (wlen<=0) {
|
||||
// invisible multibyte -- we don't ignore it, because some terminal
|
||||
// interpret it wrong and more safe is replace it with '?'
|
||||
*(dst++) = '?';
|
||||
src+=len;
|
||||
my_cells++;
|
||||
my_bytes++;
|
||||
} else {
|
||||
// multibyte - printable
|
||||
// Got space?
|
||||
if (wlen > *maxcells-my_cells || len >= bufsize-(my_bytes+1)) break;
|
||||
// 0x9b is control byte for some terminals
|
||||
if (memchr(src, 0x9B, len)) {
|
||||
// unsafe multibyte
|
||||
*(dst++) = '?';
|
||||
src+=len;
|
||||
my_cells++;
|
||||
my_bytes++;
|
||||
} else {
|
||||
// safe multibyte
|
||||
memcpy(dst, src, len);
|
||||
my_cells += wlen;
|
||||
dst += len;
|
||||
my_bytes += len;
|
||||
src += len;
|
||||
}
|
||||
}
|
||||
// Got space?
|
||||
if (wlen > *maxcells-my_cells || len >= bufsize-(my_bytes+1)) break;
|
||||
// safe multibyte
|
||||
memcpy(dst, src, len);
|
||||
dst += len;
|
||||
src += len;
|
||||
my_bytes += len;
|
||||
if (wlen > 0) my_cells += wlen;
|
||||
}
|
||||
//fprintf(stdout, "cells: %d\n", my_cells);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user