From 8f919d9367e6303c5c19834c32389bde8974efa1 Mon Sep 17 00:00:00 2001 From: RichardG867 Date: Fri, 1 Apr 2022 20:11:56 -0300 Subject: [PATCH] Virtual ISO: Move UTF-16 encoding to UTF-8 decoding --- src/cdrom/cdrom_image_viso.c | 42 ++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/cdrom/cdrom_image_viso.c b/src/cdrom/cdrom_image_viso.c index 27fcb1f33..3333e860d 100644 --- a/src/cdrom/cdrom_image_viso.c +++ b/src/cdrom/cdrom_image_viso.c @@ -153,12 +153,14 @@ viso_pwrite(const void *ptr, uint64_t offset, size_t size, size_t count, FILE *s } static size_t -viso_convert_utf8(wchar_t *dest, const char *src, int buf_size) +viso_convert_utf8(wchar_t *dest, const char *src, ssize_t buf_size) { - wchar_t c, *p = dest; - int next; + uint32_t c; + wchar_t *p = dest; + size_t next; while (buf_size-- > 0) { + /* Interpret source codepoint. */ c = *src; if (!c) { /* Terminator. */ @@ -178,6 +180,23 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size) /* Pass through sub-UTF-8 codepoints. */ src++; } + + /* Convert codepoints >= U+10000 to UTF-16 surrogate pairs. + This has to be done here because wchar_t on some platforms + (Windows) is not wide enough to store such high codepoints. */ + if (c >= 0x10000) { + if ((c <= 0x10ffff) && (buf_size-- > 0)) { + /* Encode surrogate pair. */ + c -= 0x10000; + *p++ = 0xd800 | (c >> 10); + c = 0xdc00 | (c & 0x3ff); + } else { + /* Codepoint overflow or no room for a pair. */ + c = '_'; + } + } + + /* Write destination codepoint. */ *p++ = c; } @@ -190,6 +209,7 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size) { \ st c; \ while (buf_size-- > 0) { \ + /* Interpret source codepoint. */ \ c = *src++; \ switch (c) { \ case 0x00: \ @@ -249,22 +269,12 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size) \ default: \ /* Not valid for D or A, but valid for filenames. */ \ - if ((charset < VISO_CHARSET_FN) || (c > 0x10ffff)) { \ + if ((charset < VISO_CHARSET_FN) || (c > 0xffff)) \ c = '_'; \ - } else if (c >= 0x10000) { \ - /* Outside 16-bit UCS-2 space, but within 20-bit UTF-16. */ \ - if (buf_size-- > 0) { \ - /* Encode UTF-16 surrogate pair. */ \ - c -= 0x10000; \ - *dest++ = cnv(0xd800 | (c >> 10)); \ - c = 0xdc00 | (c & 0x3ff); \ - } else { \ - /* No room for an UTF-16 pair. */ \ - c = '_'; \ - } \ - } \ break; \ } \ + \ + /* Write destination codepoint with conversion function applied. */ \ *dest++ = cnv(c); \ } \ }