library: refactor 'escape' logic for newlib (2nd time)
Much of what was represented in the commit message for the reference shown below was revisited in this patch. It also means that the assertion in the last paragraph of that message will only now be true with LANG unset. [ and forget all the bullshit about not altering any ] [ kernel supplied data. sometimes we must to avoid a ] [ corrupt display due to a string we can not decode. ] And while this commit still avoids the overhead of the 'mbrtowc', 'wcwidth' 'isprint, & 'iswprint' functions, we achieve all the benefits with simple table lookups. Plus such benefits are extended to additional strings. For example, both PIDS_EXE and PIDS_CMD fields are now also subject to being 'escaped'. If a program name did contain multibyte characters, potential truncation may corrupt it when it's squeezed into a 15/63 byte array. Now, all future users of this new library only need to deal with the disparities between string and printable lengths. Such strings themselves are always printable. [ the ps program now contains some unnecessary costs ] [ with the duplicated former 'escape' functions. But ] [ we retain that copied escape.c code for posterity. ] [ besides, in a one-shot guy it's of little concern. ] Note: Proper display of some multibyte strings was not possible at the linux console. It would seem a concept of zero length chars (like a 'combining acute accent') is not recognized. Thus the display becomes corrupted. But if utf8 decoding is disabled (via LANG=), then all callers will now see '?', restoring correct alignment. Reference(s): . Dec 2020, newlib 'escape' logic refactored commit a221b9084ae979e6fd073a83e7fbc46c44551f35 Signed-off-by: Jim Warner <james.warner@comcast.net>
This commit is contained in:
parent
a221b9084a
commit
649e45482f
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* escape.c - printing handling
|
* escape.c - printing handling
|
||||||
* Copyright 1998-2002 by Albert Cahalan
|
* Copyright 1998-2002 by Albert Cahalan
|
||||||
|
* Copyright 2020 Jim Warner <james.warner@comcast.net>
|
||||||
*
|
*
|
||||||
* This library is free software; you can redistribute it and/or
|
* This library is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
@ -17,6 +18,7 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <langinfo.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@ -31,24 +33,83 @@
|
|||||||
if ((bytes) >= INT_MAX) return 0; \
|
if ((bytes) >= INT_MAX) return 0; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
static char UTF_tab[] = {
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 - 0x0F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 - 0x1F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 - 0x2F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 - 0x3F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 - 0x5F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 - 0x7F
|
||||||
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0x80 - 0x8F
|
||||||
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0x90 - 0x9F
|
||||||
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0xA0 - 0xAF
|
||||||
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0xB0 - 0xBF
|
||||||
|
-1,-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xC0 - 0xCF, 0xC2 = begins 2
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xD0 - 0xDF
|
||||||
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE0 - 0xEF, 0xE0 = begins 3
|
||||||
|
4, 4, 4, 4, 4,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 0xF0 - 0xFF, 0xF0 = begins 4
|
||||||
|
}; // ( 0xF5 & beyond invalid )
|
||||||
|
|
||||||
int escape_str (unsigned char *dst, const unsigned char *src, int bufsize) {
|
static const unsigned char ESC_tab[] = {
|
||||||
|
"@..............................." // 0x00 - 0x1F
|
||||||
|
"||||||||||||||||||||||||||||||||" // 0x20 - 0x3F
|
||||||
|
"||||||||||||||||||||||||||||||||" // 0x40 - 0x5f
|
||||||
|
"|||||||||||||||||||||||||||||||." // 0x60 - 0x7F
|
||||||
|
"????????????????????????????????" // 0x80 - 0x9F
|
||||||
|
"????????????????????????????????" // 0xA0 - 0xBF
|
||||||
|
"????????????????????????????????" // 0xC0 - 0xDF
|
||||||
|
"????????????????????????????????" // 0xE0 - 0xFF
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void esc_all (unsigned char *str) {
|
||||||
|
unsigned char c;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
// if bad locale/corrupt str, replace non-printing stuff
|
||||||
|
for (i = 0; str[i] != '\0'; i++)
|
||||||
|
if ((c = ESC_tab[str[i]]) != '|')
|
||||||
|
str[i] = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void esc_ctl (unsigned char *str, int len) {
|
||||||
int i, n;
|
int i, n;
|
||||||
|
|
||||||
SECURE_ESCAPE_ARGS(dst, bufsize);
|
for (i = 0; i < len; ) {
|
||||||
|
// even with a proper locale, strings might be corrupt
|
||||||
|
if ((n = UTF_tab[str[i]]) < 0 || i + n > len) {
|
||||||
|
esc_all(&str[i]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// and eliminate those non-printing control characters
|
||||||
|
if (str[i] < 0x20 || str[i] == 0x7f)
|
||||||
|
str[i] = '?';
|
||||||
|
i += n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int escape_str (unsigned char *dst, const unsigned char *src, int bufsize) {
|
||||||
|
static int utf_sw = 0;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if (utf_sw == 0) {
|
||||||
|
char *enc = nl_langinfo(CODESET);
|
||||||
|
utf_sw = enc && strcasecmp(enc, "UTF-8") == 0 ? 1 : -1;
|
||||||
|
}
|
||||||
|
SECURE_ESCAPE_ARGS(dst, bufsize);
|
||||||
n = snprintf(dst, bufsize, "%s", src);
|
n = snprintf(dst, bufsize, "%s", src);
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
*dst = '\0';
|
*dst = '\0';
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (n >= bufsize) n = bufsize-1;
|
if (n >= bufsize) n = bufsize-1;
|
||||||
|
if (utf_sw < 0)
|
||||||
// control chars, especially tabs, create alignment problems for ps & top ...
|
esc_all(dst);
|
||||||
for (i = 0; i < n; i++)
|
else
|
||||||
if (dst[i] < 0x20 || dst[i] == 0x7f)
|
esc_ctl(dst, n);
|
||||||
dst[i] = '?';
|
|
||||||
|
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,27 +118,24 @@ int escape_command (unsigned char *outbuf, const proc_t *pp, int bytes, unsigned
|
|||||||
int overhead = 0;
|
int overhead = 0;
|
||||||
int end = 0;
|
int end = 0;
|
||||||
|
|
||||||
if(flags & ESC_BRACKETS){
|
if (flags & ESC_BRACKETS)
|
||||||
overhead += 2;
|
overhead += 2;
|
||||||
}
|
if (flags & ESC_DEFUNCT) {
|
||||||
if(flags & ESC_DEFUNCT){
|
if (pp->state == 'Z') overhead += 10; // chars in " <defunct>"
|
||||||
if(pp->state=='Z') overhead += 10; // chars in " <defunct>"
|
|
||||||
else flags &= ~ESC_DEFUNCT;
|
else flags &= ~ESC_DEFUNCT;
|
||||||
}
|
}
|
||||||
if(overhead + 1 >= bytes){ // if no room for even one byte of the command name
|
if (overhead + 1 >= bytes) {
|
||||||
|
// if no room for even one byte of the command name
|
||||||
outbuf[0] = '\0';
|
outbuf[0] = '\0';
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if(flags & ESC_BRACKETS){
|
if (flags & ESC_BRACKETS)
|
||||||
outbuf[end++] = '[';
|
outbuf[end++] = '[';
|
||||||
}
|
|
||||||
end += escape_str(outbuf+end, pp->cmd, bytes-overhead);
|
end += escape_str(outbuf+end, pp->cmd, bytes-overhead);
|
||||||
|
// we want "[foo] <defunct>", not "[foo <defunct>]"
|
||||||
// Hmmm, do we want "[foo] <defunct>" or "[foo <defunct>]"?
|
if (flags & ESC_BRACKETS)
|
||||||
if(flags & ESC_BRACKETS){
|
|
||||||
outbuf[end++] = ']';
|
outbuf[end++] = ']';
|
||||||
}
|
if (flags & ESC_DEFUNCT) {
|
||||||
if(flags & ESC_DEFUNCT){
|
|
||||||
memcpy(outbuf+end, " <defunct>", 10);
|
memcpy(outbuf+end, " <defunct>", 10);
|
||||||
end += 10;
|
end += 10;
|
||||||
}
|
}
|
||||||
|
@ -250,9 +250,9 @@ ENTER(0x220);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
case_Name:
|
case_Name:
|
||||||
{ char buf[64];
|
{ char buf[64], raw[64];
|
||||||
unsigned u = 0;
|
unsigned u = 0;
|
||||||
while(u < sizeof(buf) - 1u){
|
while(u < sizeof(raw) - 1u){
|
||||||
int c = *S++;
|
int c = *S++;
|
||||||
if(c=='\n') break;
|
if(c=='\n') break;
|
||||||
if(c=='\0') break; // should never happen
|
if(c=='\0') break; // should never happen
|
||||||
@ -262,14 +262,16 @@ ENTER(0x220);
|
|||||||
if(!c) break; // should never happen
|
if(!c) break; // should never happen
|
||||||
if(c=='n') c='\n'; // else we assume it is '\\'
|
if(c=='n') c='\n'; // else we assume it is '\\'
|
||||||
}
|
}
|
||||||
buf[u++] = c;
|
raw[u++] = c;
|
||||||
}
|
}
|
||||||
buf[u] = '\0';
|
raw[u] = '\0';
|
||||||
#ifdef FALSE_THREADS
|
#ifdef FALSE_THREADS
|
||||||
if (!IS_THREAD(P)) {
|
if (!IS_THREAD(P)) {
|
||||||
#endif
|
#endif
|
||||||
if (!P->cmd && !(P->cmd = strdup(buf)))
|
if (!P->cmd) {
|
||||||
return 1;
|
escape_str(buf, raw, sizeof(buf));
|
||||||
|
if (!(P->cmd = strdup(buf))) return 1;
|
||||||
|
}
|
||||||
#ifdef FALSE_THREADS
|
#ifdef FALSE_THREADS
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -554,6 +556,7 @@ static int sd2proc (proc_t *restrict p) {
|
|||||||
// Reads /proc/*/stat files, being careful not to trip over processes with
|
// Reads /proc/*/stat files, being careful not to trip over processes with
|
||||||
// names like ":-) 1 2 3 4 5 6".
|
// names like ":-) 1 2 3 4 5 6".
|
||||||
static int stat2proc (const char* S, proc_t *restrict P) {
|
static int stat2proc (const char* S, proc_t *restrict P) {
|
||||||
|
char buf[64], raw[64];
|
||||||
size_t num;
|
size_t num;
|
||||||
char* tmp;
|
char* tmp;
|
||||||
|
|
||||||
@ -570,12 +573,16 @@ ENTER(0x160);
|
|||||||
S++;
|
S++;
|
||||||
tmp = strrchr(S, ')');
|
tmp = strrchr(S, ')');
|
||||||
if (!tmp || !tmp[1]) return 0;
|
if (!tmp || !tmp[1]) return 0;
|
||||||
num = tmp - S;
|
|
||||||
#ifdef FALSE_THREADS
|
#ifdef FALSE_THREADS
|
||||||
if (!IS_THREAD(P)) {
|
if (!IS_THREAD(P)) {
|
||||||
#endif
|
#endif
|
||||||
if (!P->cmd && !(P->cmd = strndup(S, num)))
|
if (!P->cmd) {
|
||||||
return 1;
|
num = tmp - S;
|
||||||
|
memcpy(raw, S, num);
|
||||||
|
raw[num] = '\0';
|
||||||
|
escape_str(buf, raw, sizeof(buf));
|
||||||
|
if (!(P->cmd = strdup(buf))) return 1;
|
||||||
|
}
|
||||||
#ifdef FALSE_THREADS
|
#ifdef FALSE_THREADS
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -978,9 +985,10 @@ static char *readlink_exe (const char *path){
|
|||||||
int in;
|
int in;
|
||||||
|
|
||||||
snprintf(buf, sizeof(buf), "%s/exe", path);
|
snprintf(buf, sizeof(buf), "%s/exe", path);
|
||||||
in = (int)readlink(buf, dst_buffer, MAX_BUFSZ-1);
|
in = (int)readlink(buf, src_buffer, MAX_BUFSZ-1);
|
||||||
if (in > 0) {
|
if (in > 0) {
|
||||||
dst_buffer[in] = '\0';
|
src_buffer[in] = '\0';
|
||||||
|
escape_str(dst_buffer, src_buffer, MAX_BUFSZ);
|
||||||
return strdup(dst_buffer);
|
return strdup(dst_buffer);
|
||||||
}
|
}
|
||||||
return strdup("-");
|
return strdup("-");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user