top: adapt utf8 logic to support extra wide characters
Back when top was refactored to support UTF-8 encoding it was acknowledged that languages like zh_CN were not supported. That was because a single 'character' might require more than a single 'column' when it's printed. Well I've now figured out how to accommodate languages like that. My adaptation is represented in this patch. [ and just in case someone wishes to avoid the extra ] [ runtime costs, a #define OFF_XTRAWIDE is included. ] Along the way, I've cleaned up some miscellaneous code supporting the 'Inspect' feature so that the rightmost screen column was always used rather than being blank. [ interestingly, my xterm & urxvt terminal emulators ] [ are able to split extra wide characters then print ] [ 1/2 of such graphics in the last column. the gnome ] [ terminal emulator does not duplicate such behavior ] [ but prints 1 extra character in same width window. ] Reference(s): . Sep, 2017 - original utf8 support commit 9773c56add6446d418c0677f306c8771356f0c01 Signed-off-by: Jim Warner <james.warner@comcast.net>
This commit is contained in:
parent
6f2e66969a
commit
264790d80d
57
top/top.c
57
top/top.c
@ -37,6 +37,7 @@
|
|||||||
#include <termios.h>
|
#include <termios.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
@ -504,6 +505,24 @@ static char UTF8_tab[] = {
|
|||||||
}; // ( 0xF5 & beyond invalid )
|
}; // ( 0xF5 & beyond invalid )
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Accommodate any potential differences between some multibyte
|
||||||
|
* character sequence and the screen columns needed to print it */
|
||||||
|
static inline int utf8_cols (const unsigned char *p, int n) {
|
||||||
|
#ifndef OFF_XTRAWIDE
|
||||||
|
wchar_t wc;
|
||||||
|
int wlen;
|
||||||
|
|
||||||
|
(void)mbtowc(&wc, (const char *)p, n);
|
||||||
|
if ((wlen = wcwidth(wc)) < 1) wlen = 1;
|
||||||
|
return wlen;
|
||||||
|
#else
|
||||||
|
(void)p; (void)n;
|
||||||
|
return 1;
|
||||||
|
#endif
|
||||||
|
} // end: utf8_cols
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine difference between total bytes versus printable
|
* Determine difference between total bytes versus printable
|
||||||
* characters in that passed, potentially multi-byte, string */
|
* characters in that passed, potentially multi-byte, string */
|
||||||
@ -514,8 +533,8 @@ static int utf8_delta (const char *str) {
|
|||||||
while (*p) {
|
while (*p) {
|
||||||
// -1 represents a decoding error, pretend it's untranslated ...
|
// -1 represents a decoding error, pretend it's untranslated ...
|
||||||
if (0 > (clen = UTF8_tab[*p])) return 0;
|
if (0 > (clen = UTF8_tab[*p])) return 0;
|
||||||
|
cnum += utf8_cols(p, clen);
|
||||||
p += clen;
|
p += clen;
|
||||||
++cnum;
|
|
||||||
}
|
}
|
||||||
return (int)((const char *)p - str) - cnum;
|
return (int)((const char *)p - str) - cnum;
|
||||||
} // end: utf8_delta
|
} // end: utf8_delta
|
||||||
@ -532,8 +551,8 @@ static int utf8_embody (const char *str, int width) {
|
|||||||
while (*p) {
|
while (*p) {
|
||||||
// -1 represents a decoding error, pretend it's untranslated ...
|
// -1 represents a decoding error, pretend it's untranslated ...
|
||||||
if (0 > (clen = UTF8_tab[*p])) return width;
|
if (0 > (clen = UTF8_tab[*p])) return width;
|
||||||
|
if (width < (cnum += utf8_cols(p, clen))) break;
|
||||||
p += clen;
|
p += clen;
|
||||||
if (++cnum >= width) break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (int)((const char *)p - str);
|
return (int)((const char *)p - str);
|
||||||
@ -2636,15 +2655,15 @@ static void insp_find_str (int ch, int *col, int *row) {
|
|||||||
* while visible search matches display with capclr_hdr for emphasis.
|
* while visible search matches display with capclr_hdr for emphasis.
|
||||||
* ( we hide ugly plumbing in macros to concentrate on the algorithm ) */
|
* ( we hide ugly plumbing in macros to concentrate on the algorithm ) */
|
||||||
static void insp_mkrow_raw (int col, int row) {
|
static void insp_mkrow_raw (int col, int row) {
|
||||||
#define maxSZ ( Screen_cols - (to + 1) )
|
#define maxSZ ( Screen_cols - to )
|
||||||
#define capNO { if (hicap) { putp(Caps_off); hicap = 0; } }
|
#define capNO { if (hicap) { putp(Caps_off); hicap = 0; } }
|
||||||
#define mkFND { PUTT("%s%.*s%s", Curwin->capclr_hdr, maxSZ, Insp_sel->fstr, Caps_off); \
|
#define mkFND { PUTT("%s%.*s%s", Curwin->capclr_hdr, maxSZ, Insp_sel->fstr, Caps_off); \
|
||||||
fr += Insp_sel->flen -1; to += Insp_sel->flen; hicap = 0; }
|
fr += Insp_sel->flen -1; to += Insp_sel->flen; hicap = 0; }
|
||||||
#ifndef INSP_JUSTNOT
|
#ifndef INSP_JUSTNOT
|
||||||
#define mkCTL { int x = maxSZ; const char *p = fmtmk("^%c", uch + '@'); \
|
#define mkCTL { const char *p = fmtmk("^%c", uch + '@'); \
|
||||||
PUTT("%s%.*s", (!hicap) ? Curwin->capclr_msg : "", x, p); to += 2; hicap = 1; }
|
PUTT("%s%.*s", (!hicap) ? Curwin->capclr_msg : "", maxSZ, p); to += 2; hicap = 1; }
|
||||||
#define mkUNP { int x = maxSZ; const char *p = fmtmk("<%02X>", uch); \
|
#define mkUNP { const char *p = fmtmk("<%02X>", uch); \
|
||||||
PUTT("%s%.*s", (!hicap) ? Curwin->capclr_msg : "", x, p); to += 4; hicap = 1; }
|
PUTT("%s%.*s", (!hicap) ? Curwin->capclr_msg : "", maxSZ, p); to += 4; hicap = 1; }
|
||||||
#else
|
#else
|
||||||
#define mkCTL { if ((to += 2) <= Screen_cols) \
|
#define mkCTL { if ((to += 2) <= Screen_cols) \
|
||||||
PUTT("%s^%c", (!hicap) ? Curwin->capclr_msg : "", uch + '@'); hicap = 1; }
|
PUTT("%s^%c", (!hicap) ? Curwin->capclr_msg : "", uch + '@'); hicap = 1; }
|
||||||
@ -2653,7 +2672,7 @@ static void insp_mkrow_raw (int col, int row) {
|
|||||||
#endif
|
#endif
|
||||||
#define mkSTD { capNO; if (++to <= Screen_cols) { static char _str[2]; \
|
#define mkSTD { capNO; if (++to <= Screen_cols) { static char _str[2]; \
|
||||||
_str[0] = uch; putp(_str); } }
|
_str[0] = uch; putp(_str); } }
|
||||||
char tline[SCREENMAX];
|
unsigned char tline[SCREENMAX];
|
||||||
int fr, to, ofs;
|
int fr, to, ofs;
|
||||||
int hicap = 0;
|
int hicap = 0;
|
||||||
|
|
||||||
@ -2661,7 +2680,7 @@ static void insp_mkrow_raw (int col, int row) {
|
|||||||
memcpy(tline, Insp_p[row] + col, sizeof(tline));
|
memcpy(tline, Insp_p[row] + col, sizeof(tline));
|
||||||
else tline[0] = '\n';
|
else tline[0] = '\n';
|
||||||
|
|
||||||
for (fr = 0, to = 0, ofs = 0; to < Screen_cols -1; fr++) {
|
for (fr = 0, to = 0, ofs = 0; to < Screen_cols; fr++) {
|
||||||
if (!ofs)
|
if (!ofs)
|
||||||
ofs = insp_find_ofs(col + fr, row);
|
ofs = insp_find_ofs(col + fr, row);
|
||||||
if (col + fr < ofs) {
|
if (col + fr < ofs) {
|
||||||
@ -2694,20 +2713,20 @@ static void insp_mkrow_raw (int col, int row) {
|
|||||||
* characters will then be displayed in two positions like '^A'.
|
* characters will then be displayed in two positions like '^A'.
|
||||||
* ( assuming they can even get past those 'gettext' utilities ) */
|
* ( assuming they can even get past those 'gettext' utilities ) */
|
||||||
static void insp_mkrow_utf8 (int col, int row) {
|
static void insp_mkrow_utf8 (int col, int row) {
|
||||||
#define maxSZ ( Screen_cols - (to + 1) )
|
#define maxSZ ( Screen_cols - to )
|
||||||
#define mkFND { PUTT("%s%.*s%s", Curwin->capclr_hdr, maxSZ, Insp_sel->fstr, Caps_off); \
|
#define mkFND { PUTT("%s%.*s%s", Curwin->capclr_hdr, maxSZ, Insp_sel->fstr, Caps_off); \
|
||||||
fr += Insp_sel->flen; to += Insp_sel->flen; }
|
fr += Insp_sel->flen; to += Insp_sel->flen; }
|
||||||
#ifndef INSP_JUSTNOT
|
#ifndef INSP_JUSTNOT
|
||||||
#define mkCTL { int x = maxSZ; const char *p = fmtmk("^%c", uch + '@'); \
|
#define mkCTL { const char *p = fmtmk("^%c", uch + '@'); \
|
||||||
PUTT("%s%.*s%s", Curwin->capclr_msg, x, p, Caps_off); to += 2; }
|
PUTT("%s%.*s%s", Curwin->capclr_msg, maxSZ, p, Caps_off); to += 2; }
|
||||||
#else
|
#else
|
||||||
#define mkCTL { if ((to += 2) <= Screen_cols) \
|
#define mkCTL { if ((to += 2) <= Screen_cols) \
|
||||||
PUTT("%s^%c%s", Curwin->capclr_msg, uch + '@', Caps_off); }
|
PUTT("%s^%c%s", Curwin->capclr_msg, uch + '@', Caps_off); }
|
||||||
#endif
|
#endif
|
||||||
#define mkNUL { buf1[0] = ' '; doPUT(buf1) }
|
#define mkNUL { buf1[0] = ' '; doPUT(buf1) }
|
||||||
#define doPUT(buf) if (++to <= Screen_cols) putp(buf);
|
#define doPUT(buf) if ((to += cno) <= Screen_cols) putp(buf);
|
||||||
static char buf1[2], buf2[3], buf3[4], buf4[5];
|
static char buf1[2], buf2[3], buf3[4], buf4[5];
|
||||||
char tline[BIGBUFSIZ];
|
unsigned char tline[BIGBUFSIZ];
|
||||||
int fr, to, ofs;
|
int fr, to, ofs;
|
||||||
|
|
||||||
col = utf8_proper_col(Insp_p[row], col, 1);
|
col = utf8_proper_col(Insp_p[row], col, 1);
|
||||||
@ -2715,15 +2734,17 @@ static void insp_mkrow_utf8 (int col, int row) {
|
|||||||
memcpy(tline, Insp_p[row] + col, sizeof(tline));
|
memcpy(tline, Insp_p[row] + col, sizeof(tline));
|
||||||
else tline[0] = '\n';
|
else tline[0] = '\n';
|
||||||
|
|
||||||
for (fr = 0, to = 0, ofs = 0; to < Screen_cols -1; ) {
|
for (fr = 0, to = 0, ofs = 0; to < Screen_cols; ) {
|
||||||
if (!ofs)
|
if (!ofs)
|
||||||
ofs = insp_find_ofs(col + fr, row);
|
ofs = insp_find_ofs(col + fr, row);
|
||||||
if (col + fr < ofs) {
|
if (col + fr < ofs) {
|
||||||
unsigned char uch = tline[fr++];
|
unsigned char uch = tline[fr];
|
||||||
switch (UTF8_tab[(int)uch]) {
|
int bno = UTF8_tab[uch];
|
||||||
|
int cno = utf8_cols(&tline[fr++], bno);
|
||||||
|
switch (bno) {
|
||||||
case 1:
|
case 1:
|
||||||
if (uch == '\n') break;
|
if (uch == '\n') break;
|
||||||
else if (uch < 32) mkCTL
|
if (uch < 32) mkCTL
|
||||||
else if (uch == 127) mkNUL
|
else if (uch == 127) mkNUL
|
||||||
else { buf1[0] = uch; doPUT(buf1) }
|
else { buf1[0] = uch; doPUT(buf1) }
|
||||||
break;
|
break;
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
//#define OFF_SCROLLBK /* disable tty emulators scrollback buffer */
|
//#define OFF_SCROLLBK /* disable tty emulators scrollback buffer */
|
||||||
//#define OFF_STDERROR /* disable our stderr buffering (redirect) */
|
//#define OFF_STDERROR /* disable our stderr buffering (redirect) */
|
||||||
//#define OFF_STDIOLBF /* disable our own stdout _IOFBF override */
|
//#define OFF_STDIOLBF /* disable our own stdout _IOFBF override */
|
||||||
|
//#define OFF_XTRAWIDE /* disable our extra wide multi-byte logic */
|
||||||
//#define PRETENDNOCAP /* use a terminal without essential caps */
|
//#define PRETENDNOCAP /* use a terminal without essential caps */
|
||||||
//#define QUICK_GRAPHS /* use fast algorithm, accept +2% distort */
|
//#define QUICK_GRAPHS /* use fast algorithm, accept +2% distort */
|
||||||
//#define RCFILE_NOERR /* rcfile errs silently default, vs. fatal */
|
//#define RCFILE_NOERR /* rcfile errs silently default, vs. fatal */
|
||||||
@ -553,6 +554,7 @@ typedef struct WIN_t {
|
|||||||
//atic void sig_resize (int dont_care_sig);
|
//atic void sig_resize (int dont_care_sig);
|
||||||
/*------ Special UTF-8 Multi-Byte support ------------------------------*/
|
/*------ Special UTF-8 Multi-Byte support ------------------------------*/
|
||||||
/*atic char UTF8_tab[] = { ... } */
|
/*atic char UTF8_tab[] = { ... } */
|
||||||
|
//atic inline int utf8_cols (const unsigned char *p, int n);
|
||||||
//atic int utf8_delta (const char *str);
|
//atic int utf8_delta (const char *str);
|
||||||
//atic int utf8_embody (const char *str, int width);
|
//atic int utf8_embody (const char *str, int width);
|
||||||
//atic const char *utf8_justify (const char *str, int width, int justr);
|
//atic const char *utf8_justify (const char *str, int width, int justr);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user