bswap.h: fix GCC requirements for bswap* builtins

1. __builtin_bswap{32,64} were added in GCC 4.3, and __builtin_bswap16 was added in GCC 4.8, however, currently, the GCC requirements in bswap.h file has >= 10. This requirement of GCC version is false for bswap* but true for __has_builtin() (as it first was added in GCC 10.1). As bswap* builtins were added before GCC 10, the preprocessor check will always going to be true for bswap but will be false if GCC version is < 10 as __has_builtin() won't be present. Since the byteswap function, on x86-64, can boil down to a single bswap instruction, this optimization may left behind (unless GCC do some pattern matching). To avoid this, just use the compiler macros (for GCC: __GNUC__, clang: __GNUC__ or __clang__) and if the compiler is neither GCC or Clang, fall-back to native implementation. 2. Remove the useless casts (uint{16,32,64}_t) from the constants. These constants already has their own suffix, and casting to a different type will just get ignored as the return value already gets casts to it's appropriate type. 3. Previously, Clang couldn't able to use __builtin_bswap* (even if it was newer) as LLVM define __GNUC__ macro to a specific constant (usually lower than GCC's (__GNUC__) and on my system it's 4) which is indeed < 10. The first comment also fixes this issue. Link: <https://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Other-Builtins.html> Link: <https://gcc.gnu.org/onlinedocs/gcc-4.8.0/gcc/Other-Builtins.html> Link: <https://libc-alpha.sourceware.narkive.com/PfaB4BGP/patch-byteswap-h-fix-gcc-ver-test-for-builtin-bswap32>
2024-02-24 23:19:32 +05:30
parent 6b435088f9
commit 1dfb6fd111
1 changed files with 28 additions and 66 deletions
--- a/src/include/86box/bswap.h
+++ b/src/include/86box/bswap.h
@@ -40,91 +40,55 @@

 #include <stdint.h>

-#ifdef HAVE_BYTESWAP_H
-#    include <byteswap.h>
-#else
-# define bswap_16(x) \
-    ( \
-        ((uint16_t)( \
-        (((uint16_t)(x) & (uint16_t)0x00ffU) << 8) |  \
-        (((uint16_t)(x) & (uint16_t)0xff00U) >> 8) )) \
-    )
+#define bswap_16(x)					\
+	((uint16_t)((((x) & 0x00ffu) << 8) |		\
+		    (((x) & 0xff00u) >> 8)))

-# define bswap_32(x) \
-    ( \
-        ((uint32_t)( \
-        (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) |  \
-        (((uint32_t)(x) & (uint32_t)0x0000ff00UL) <<  8) |  \
-        (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >>  8) |  \
-        (((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24) )) \
-    )
+#define bswap_32(x)					\
+	((uint32_t)((((x) & 0x000000fful) << 24) |	\
+		    (((x) & 0x0000ff00ul) <<  8) |	\
+		    (((x) & 0x00ff0000ul) >>  8) |	\
+		    (((x) & 0xff000000ul) >> 24)))

-# define bswap_64(x) \
-    ( \
-        ((uint64_t)( \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) <<  8) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >>  8) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) |  \
-        (uint64_t)(((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56) )) \
-    )
-#endif    /*HAVE_BYTESWAP_H*/
+# define bswap_64(x)							\
+        ((uint64_t)((((x) & 0x00000000000000ffull) << 56) |		\
+		    (((x) & 0x000000000000ff00ull) << 40) |		\
+		    (((x) & 0x0000000000ff0000ull) << 24) |		\
+		    (((x) & 0x00000000ff000000ull) <<  8) |		\
+		    (((x) & 0x000000ff00000000ull) >>  8) |		\
+		    (((x) & 0x0000ff0000000000ull) >> 24) |		\
+		    (((x) & 0x00ff000000000000ull) >> 40) |		\
+		    (((x) & 0xff00000000000000ull) >> 56)))

-#if  __GNUC__ >= 10
-#if defined __has_builtin && __has_builtin(__builtin_bswap16)
-#define bswap16(x) __builtin_bswap16(x)
-#else
-static __inline uint16_t bswap16(uint16_t x)
-{
-    return bswap_16(x);
-}
-#    endif
-#else
 static __inline uint16_t
 bswap16(uint16_t x)
 {
+#if defined (__GNUC__) || defined (__clang__)
+    return __builtin_bswap16(x);
+#else
    return bswap_16(x);
-}
 #endif
+}

-#if __GNUC__ >= 10
-#    if defined __has_builtin && __has_builtin(__builtin_bswap32)
-#        define bswap32(x) __builtin_bswap32(x)
-#    else
 static __inline uint32_t
 bswap32(uint32_t x)
 {
-    return bswap_32(x);
-}
-#    endif
+#if defined (__GNUC__) || defined (__clang__)
+    return __builtin_bswap32(x);
 #else
-static __inline uint32_t
-bswap32(uint32_t x)
-{
    return bswap_32(x);
-}
 #endif
+}

-#if __GNUC__ >= 10
-#    if defined __has_builtin && __has_builtin(__builtin_bswap64)
-#        define bswap64(x) __builtin_bswap64(x)
-#    else
 static __inline uint64_t
 bswap64(uint64_t x)
 {
-    return bswap_64(x);
-}
-#    endif
+#if defined (__GNUC__) || defined (__clang__)
+    return __builtin_bswap64(x);
 #else
-static __inline uint64_t
-bswap64(uint64_t x)
-{
-    return bswap_64(x);
-}
+    return bswap_16(x);
 #endif
+}

 static __inline void
 bswap16s(uint16_t *s)
@@ -198,12 +162,10 @@ CPU_CONVERT(le, 64, uint64_t)
 /* unaligned versions (optimized for frequent unaligned accesses)*/

 #if defined(__i386__) || defined(__powerpc__)
-
 #    define cpu_to_le16wu(p, v) cpu_to_le16w(p, v)
 #    define cpu_to_le32wu(p, v) cpu_to_le32w(p, v)
 #    define le16_to_cpupu(p)    le16_to_cpup(p)
 #    define le32_to_cpupu(p)    le32_to_cpup(p)
-
 #    define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
 #    define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)