From 8f919d9367e6303c5c19834c32389bde8974efa1 Mon Sep 17 00:00:00 2001
From: RichardG867 <richardg867@gmail.com>
Date: Fri, 1 Apr 2022 20:11:56 -0300
Subject: [PATCH] Virtual ISO: Move UTF-16 encoding to UTF-8 decoding

---
 src/cdrom/cdrom_image_viso.c | 42 ++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/src/cdrom/cdrom_image_viso.c b/src/cdrom/cdrom_image_viso.c
index 27fcb1f33..3333e860d 100644
--- a/src/cdrom/cdrom_image_viso.c
+++ b/src/cdrom/cdrom_image_viso.c
@@ -153,12 +153,14 @@ viso_pwrite(const void *ptr, uint64_t offset, size_t size, size_t count, FILE *s
 }
 
 static size_t
-viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
+viso_convert_utf8(wchar_t *dest, const char *src, ssize_t buf_size)
 {
-    wchar_t c, *p = dest;
-    int     next;
+    uint32_t c;
+    wchar_t *p = dest;
+    size_t   next;
 
     while (buf_size-- > 0) {
+        /* Interpret source codepoint. */
         c = *src;
         if (!c) {
             /* Terminator. */
@@ -178,6 +180,23 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
             /* Pass through sub-UTF-8 codepoints. */
             src++;
         }
+
+        /* Convert codepoints >= U+10000 to UTF-16 surrogate pairs.
+           This has to be done here because wchar_t on some platforms
+           (Windows) is not wide enough to store such high codepoints. */
+        if (c >= 0x10000) {
+            if ((c <= 0x10ffff) && (buf_size-- > 0)) {
+                /* Encode surrogate pair. */
+                c -= 0x10000;
+                *p++ = 0xd800 | (c >> 10);
+                c    = 0xdc00 | (c & 0x3ff);
+            } else {
+                /* Codepoint overflow or no room for a pair. */
+                c = '_';
+            }
+        }
+
+        /* Write destination codepoint. */
         *p++ = c;
     }
 
@@ -190,6 +209,7 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
     {                                                                               \
         st c;                                                                       \
         while (buf_size-- > 0) {                                                    \
+            /* Interpret source codepoint. */                                       \
             c = *src++;                                                             \
             switch (c) {                                                            \
                 case 0x00:                                                          \
@@ -249,22 +269,12 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
                                                                                     \
                 default:                                                            \
                     /* Not valid for D or A, but valid for filenames. */            \
-                    if ((charset < VISO_CHARSET_FN) || (c > 0x10ffff)) {            \
+                    if ((charset < VISO_CHARSET_FN) || (c > 0xffff))                \
                         c = '_';                                                    \
-                    } else if (c >= 0x10000) {                                      \
-                        /* Outside 16-bit UCS-2 space, but within 20-bit UTF-16. */ \
-                        if (buf_size-- > 0) {                                       \
-                            /* Encode UTF-16 surrogate pair. */                     \
-                            c -= 0x10000;                                           \
-                            *dest++ = cnv(0xd800 | (c >> 10));                      \
-                            c       = 0xdc00 | (c & 0x3ff);                         \
-                        } else {                                                    \
-                            /* No room for an UTF-16 pair. */                       \
-                            c = '_';                                                \
-                        }                                                           \
-                    }                                                               \
                     break;                                                          \
             }                                                                       \
+                                                                                    \
+            /* Write destination codepoint with conversion function applied. */     \
             *dest++ = cnv(c);                                                       \
         }                                                                           \
     }