From 1dfb6fd111482173f43f44941545ffe240a434ed Mon Sep 17 00:00:00 2001
From: rilysh <nightquick@proton.me>
Date: Sat, 24 Feb 2024 23:19:32 +0530
Subject: [PATCH] bswap.h: fix GCC requirements for bswap* builtins

1. __builtin_bswap{32,64} were added in GCC 4.3, and __builtin_bswap16
was added in GCC 4.8, however, currently, the GCC requirements in
bswap.h file has >= 10. This requirement of GCC version is false for
bswap* but true for __has_builtin() (as it first was added in GCC 10.1).
As bswap* builtins were added before GCC 10, the preprocessor check will
always going to be true for bswap but will be false if GCC version is
< 10 as __has_builtin() won't be present. Since the byteswap function,
on x86-64, can boil down to a single bswap instruction, this optimization
may left behind (unless GCC do some pattern matching). To avoid this,
just use the compiler macros (for GCC: __GNUC__, clang: __GNUC__ or
__clang__) and if the compiler is neither GCC or Clang, fall-back to
native implementation.

2. Remove the useless casts (uint{16,32,64}_t) from the constants. These
constants already has their own suffix, and casting to a different type
will just get ignored as the return value already gets casts to it's
appropriate type.

3. Previously, Clang couldn't able to use __builtin_bswap* (even if it was
newer) as LLVM define __GNUC__ macro to a specific constant (usually lower
than GCC's (__GNUC__) and on my system it's 4) which is indeed < 10. The
first comment also fixes this issue.

Link: <https://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Other-Builtins.html>
Link: <https://gcc.gnu.org/onlinedocs/gcc-4.8.0/gcc/Other-Builtins.html>
Link: <https://libc-alpha.sourceware.narkive.com/PfaB4BGP/patch-byteswap-h-fix-gcc-ver-test-for-builtin-bswap32>
---
 src/include/86box/bswap.h | 94 ++++++++++++---------------------------
 1 file changed, 28 insertions(+), 66 deletions(-)

diff --git a/src/include/86box/bswap.h b/src/include/86box/bswap.h
index ac758b20a..78183d137 100644
--- a/src/include/86box/bswap.h
+++ b/src/include/86box/bswap.h
@@ -40,91 +40,55 @@
 
 #include <stdint.h>
 
-#ifdef HAVE_BYTESWAP_H
-#    include <byteswap.h>
-#else
-# define bswap_16(x) \
-    ( \
-        ((uint16_t)( \
-        (((uint16_t)(x) & (uint16_t)0x00ffU) << 8) |  \
-        (((uint16_t)(x) & (uint16_t)0xff00U) >> 8) )) \
-    )
+#define bswap_16(x)					\
+	((uint16_t)((((x) & 0x00ffu) << 8) |		\
+		    (((x) & 0xff00u) >> 8)))
 
-# define bswap_32(x) \
-    ( \
-        ((uint32_t)( \
-        (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) |  \
-        (((uint32_t)(x) & (uint32_t)0x0000ff00UL) <<  8) |  \
-        (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >>  8) |  \
-        (((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24) )) \
-    )
+#define bswap_32(x)					\
+	((uint32_t)((((x) & 0x000000fful) << 24) |	\
+		    (((x) & 0x0000ff00ul) <<  8) |	\
+		    (((x) & 0x00ff0000ul) >>  8) |	\
+		    (((x) & 0xff000000ul) >> 24)))
 
-# define bswap_64(x) \
-    ( \
-        ((uint64_t)( \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) <<  8) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >>  8) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56) )) \
-    )
-#endif    /*HAVE_BYTESWAP_H*/
+# define bswap_64(x)							\
+        ((uint64_t)((((x) & 0x00000000000000ffull) << 56) |		\
+		    (((x) & 0x000000000000ff00ull) << 40) |		\
+		    (((x) & 0x0000000000ff0000ull) << 24) |		\
+		    (((x) & 0x00000000ff000000ull) <<  8) |		\
+		    (((x) & 0x000000ff00000000ull) >>  8) |		\
+		    (((x) & 0x0000ff0000000000ull) >> 24) |		\
+		    (((x) & 0x00ff000000000000ull) >> 40) |		\
+		    (((x) & 0xff00000000000000ull) >> 56)))
 
-#if  __GNUC__ >= 10
-#if defined __has_builtin && __has_builtin(__builtin_bswap16)
-#define bswap16(x) __builtin_bswap16(x)
-#else
-static __inline uint16_t bswap16(uint16_t x)
-{
-    return bswap_16(x);
-}
-#    endif
-#else
 static __inline uint16_t
 bswap16(uint16_t x)
 {
+#if defined (__GNUC__) || defined (__clang__)
+    return __builtin_bswap16(x);
+#else
     return bswap_16(x);
-}
 #endif
+}
 
-#if __GNUC__ >= 10
-#    if defined __has_builtin && __has_builtin(__builtin_bswap32)
-#        define bswap32(x) __builtin_bswap32(x)
-#    else
 static __inline uint32_t
 bswap32(uint32_t x)
 {
-    return bswap_32(x);
-}
-#    endif
+#if defined (__GNUC__) || defined (__clang__)
+    return __builtin_bswap32(x);
 #else
-static __inline uint32_t
-bswap32(uint32_t x)
-{
     return bswap_32(x);
-}
 #endif
+}
 
-#if __GNUC__ >= 10
-#    if defined __has_builtin && __has_builtin(__builtin_bswap64)
-#        define bswap64(x) __builtin_bswap64(x)
-#    else
 static __inline uint64_t
 bswap64(uint64_t x)
 {
-    return bswap_64(x);
-}
-#    endif
+#if defined (__GNUC__) || defined (__clang__)
+    return __builtin_bswap64(x);
 #else
-static __inline uint64_t
-bswap64(uint64_t x)
-{
-    return bswap_64(x);
-}
+    return bswap_16(x);
 #endif
+}
 
 static __inline void
 bswap16s(uint16_t *s)
@@ -198,12 +162,10 @@ CPU_CONVERT(le, 64, uint64_t)
 /* unaligned versions (optimized for frequent unaligned accesses)*/
 
 #if defined(__i386__) || defined(__powerpc__)
-
 #    define cpu_to_le16wu(p, v) cpu_to_le16w(p, v)
 #    define cpu_to_le32wu(p, v) cpu_to_le32w(p, v)
 #    define le16_to_cpupu(p)    le16_to_cpup(p)
 #    define le32_to_cpupu(p)    le32_to_cpup(p)
-
 #    define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
 #    define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)