Virtual ISO: Convert filenames from UTF-8 to UCS-2 on Joliet, and some optimizations

This commit is contained in:
RichardG867
2022-03-28 21:12:53 -03:00
parent 8d5d7800f9
commit 936e74adb6

View File

@@ -150,85 +150,108 @@ viso_pwrite(const void *ptr, uint64_t offset, size_t size, size_t count, FILE *s
return ret; return ret;
} }
#define VISO_WRITE_STR_FUNC(n, t, st, cnv) \ static size_t
static void \ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
n(t *dest, const st *src, int buf_size, int charset) \ {
{ \ wchar_t c, *p = dest;
while (*src && (buf_size-- > 0)) { \ int next;
switch (*src) { \
case 'A' ... 'Z': \ while (buf_size-- > 0) {
case '0' ... '9': \ c = *src;
case '_': \ if (!c) {
/* Valid on all sets. */ \ /* Terminator. */
*dest = *src; \ *p = 0;
break; \ break;
\ } else if (c & 0x80) {
case 'a' ... 'z': \ /* Convert UTF-8 codepoints. */
/* Convert to uppercase on A and D. */ \ next = 0;
if (charset > VISO_CHARSET_A) \ while (c & 0x40) {
*dest = *src; \ next++;
else \ c <<= 1;
*dest = *src - 32; \ }
break; \ c = *src++ & (0x3f >> next);
\ while ((next-- > 0) && (buf_size-- > 0))
case ' ': \ c = (c << 6) | (*src++ & 0x3f);
case '!': \ } else {
case '"': \ /* Pass through sub-UTF-8 codepoints. */
case '%': \ src++;
case '&': \ }
case '(': \ *p++ = c;
case ')': \ }
case '+': \
case ',': \ return p - dest;
case '-': \ }
case '.': \
case '<': \ #define VISO_WRITE_STR_FUNC(n, dt, st, cnv) \
case '=': \ static void \
case '>': \ n(dt *dest, const st *src, int buf_size, int charset) \
/* Valid for A and filenames but not for D. */ \ { \
if (charset >= VISO_CHARSET_A) \ st c; \
*dest = *src; \ while (buf_size-- > 0) { \
else \ c = *src++; \
*dest = '_'; \ switch (c) { \
break; \ case 0x00: \
\ /* Terminator, apply space padding. */ \
case '*': \ while (buf_size-- >= 0) \
case '/': \ *dest++ = cnv(' '); \
case ':': \ return; \
case ';': \ \
case '?': \ case 'A' ... 'Z': \
case '\'': \ case '0' ... '9': \
/* Valid for A but not for filenames or D. */ \ case '_': \
if ((charset >= VISO_CHARSET_A) && (charset != VISO_CHARSET_FN)) \ /* Valid on all sets. */ \
*dest = *src; \ break; \
else \ \
*dest = '_'; \ case 'a' ... 'z': \
break; \ /* Convert to uppercase on D and A. */ \
\ if (charset <= VISO_CHARSET_A) \
case 0x00 ... 0x1f: \ c -= 'a' - 'A'; \
/* Not valid for A, D or filenames. */ \ break; \
if (charset > VISO_CHARSET_FN) \ \
*dest = *src; \ case ' ': \
else \ case '!': \
*dest = '_'; \ case '"': \
\ case '%': \
default: \ case '&': \
/* Not valid for A or D, but valid for filenames. */ \ case '(': \
if ((charset >= VISO_CHARSET_FN) && (*src <= 0xffff)) \ case ')': \
*dest = *src; \ case '+': \
else \ case ',': \
*dest = '_'; \ case '-': \
} \ case '.': \
\ case '<': \
*dest = cnv(*dest); \ case '=': \
\ case '>': \
dest++; \ /* Valid for A and filenames but not for D. */ \
src++; \ if (charset < VISO_CHARSET_A) \
} \ c = '_'; \
\ break; \
/* Apply space padding. */ \ \
while (buf_size-- > 0) \ case '*': \
*dest++ = cnv(' '); \ case '/': \
case ':': \
case ';': \
case '?': \
case '\'': \
/* Valid for A but not for filenames or D. */ \
if ((charset < VISO_CHARSET_A) || (charset == VISO_CHARSET_FN)) \
c = '_'; \
break; \
\
case 0x01 ... 0x1f: \
/* Not valid for A, D or filenames. */ \
if (charset <= VISO_CHARSET_FN) \
c = '_'; \
break; \
\
default: \
/* Not valid for A or D, but valid for filenames. */ \
if ((charset < VISO_CHARSET_FN) || (c > 0xffff)) \
c = '_'; \
break; \
} \
*dest++ = cnv(c); \
} \
} }
VISO_WRITE_STR_FUNC(viso_write_string, uint8_t, char, ) VISO_WRITE_STR_FUNC(viso_write_string, uint8_t, char, )
VISO_WRITE_STR_FUNC(viso_write_wstring, uint16_t, wchar_t, cpu_to_be16) VISO_WRITE_STR_FUNC(viso_write_wstring, uint16_t, wchar_t, cpu_to_be16)
@@ -250,16 +273,18 @@ viso_get_short_filename(viso_entry_t *dir, char *dest, const char *src)
/* Copy name. */ /* Copy name. */
int name_copy_len = MIN(8, name_len); int name_copy_len = MIN(8, name_len);
viso_write_string((uint8_t *) dest, src, name_copy_len, VISO_CHARSET_D); viso_write_string((uint8_t *) dest, src, name_copy_len, VISO_CHARSET_D);
dest[name_copy_len] = 0; dest[name_copy_len] = '\0';
/* Copy extension to temporary buffer. */ /* Copy extension to temporary buffer. */
char ext[5] = { 0 }; char ext[5] = { 0 };
int force_tail = (name_len > 8) || (ext_len == 1); int force_tail = (name_len > 8) || (ext_len == 1);
if (ext_len > 1) { if (ext_len > 1) {
ext[0] = '.'; ext[0] = '.';
if (ext_len > 4) if (ext_len > 4) {
ext_len = 4;
force_tail = 1; force_tail = 1;
viso_write_string((uint8_t *) &ext[1], &ext_pos[1], MIN(ext_len, 4) - 1, VISO_CHARSET_D); }
viso_write_string((uint8_t *) &ext[1], &ext_pos[1], ext_len - 1, VISO_CHARSET_D);
} }
/* Check if this filename is unique, and add a tail if required, while also adding the extension. */ /* Check if this filename is unique, and add a tail if required, while also adding the extension. */
@@ -739,7 +764,7 @@ viso_init(const char *dirname, int *error)
wtemp = realloc(wtemp, wtemp_len * sizeof(wchar_t)); wtemp = realloc(wtemp, wtemp_len * sizeof(wchar_t));
} }
max_len = (sizeof(last_entry->name_joliet) / sizeof(last_entry->name_joliet[0])) - 1; max_len = (sizeof(last_entry->name_joliet) / sizeof(last_entry->name_joliet[0])) - 1;
len = mbstowcs(wtemp, readdir_entry->d_name, wtemp_len - 1); len = viso_convert_utf8(wtemp, readdir_entry->d_name, wtemp_len);
if (len > max_len) { if (len > max_len) {
/* Relocate extension if this is a file whose name exceeds the maximum length. */ /* Relocate extension if this is a file whose name exceeds the maximum length. */
if (!S_ISDIR(last_entry->stats.st_mode)) { if (!S_ISDIR(last_entry->stats.st_mode)) {