From 2a3a07121640cfa7fb286f0e7eb7e149c0dfabef Mon Sep 17 00:00:00 2001 From: jdp_ <42700985+jdpatdiscord@users.noreply.github.com> Date: Mon, 28 Aug 2023 14:01:15 -0400 Subject: Reduce code stink (#1818) CRC32.cpp: Make table initialization compile time DSi_NAND.cpp: Fix file close / unmount / disk close on error ~L427: Remove redundant calls, as they are immediately rendered useless by `rem` being overwritten NDS.cpp / FreeBIOS.h: Remove unneeded size values in header Remove unneeded memset's as they are initialized anyway sha1.c / sha1.h: Fix useless warning Wifi.cpp: Remove unneeded includes DSi.cpp: Reduce ugly casts Deduplicate code qt_sdl/main.cpp: silence clang switch statement warning qt_sdl/main.h: fix override warnings dolphin/BitSet.h: use msvc extensions only when appropriate, fix broken bit set count under _WIN32 --- src/dolphin/BitSet.h | 110 +++++++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 51 deletions(-) (limited to 'src/dolphin') diff --git a/src/dolphin/BitSet.h b/src/dolphin/BitSet.h index d32b020..09cc1ce 100644 --- a/src/dolphin/BitSet.h +++ b/src/dolphin/BitSet.h @@ -3,85 +3,97 @@ #pragma once #include +#include #include #include #include "../types.h" -#ifdef _WIN32 - -#include - namespace Common { -template -constexpr int CountSetBits(T v) +#if defined(__GNUC__) || defined(__clang__) +__attribute((always_inline)) static constexpr int CountSetBits(u8 val) { - // from https://graphics.stanford.edu/~seander/bithacks.html - // GCC has this built in, but MSVC's intrinsic will only emit the actual - // POPCNT instruction, which we're not depending on - v = v - ((v >> 1) & (T) ~(T)0 / 3); - v = (v & (T) ~(T)0 / 15 * 3) + ((v >> 2) & (T) ~(T)0 / 15 * 3); - v = (v + (v >> 4)) & (T) ~(T)0 / 255 * 15; - return (T)(v * ((T) ~(T)0 / 255)) >> (sizeof(T) - 1) * 8; + return __builtin_popcount(val); } -inline int LeastSignificantSetBit(u8 val) +__attribute((always_inline)) static constexpr int CountSetBits(u16 val) { - unsigned long index; - _BitScanForward(&index, val); - return (int)index; + return __builtin_popcount(val); } -inline int LeastSignificantSetBit(u16 val) +__attribute((always_inline)) static constexpr int CountSetBits(u32 val) { - unsigned long index; - _BitScanForward(&index, val); - return (int)index; + return __builtin_popcount(val); } -inline int LeastSignificantSetBit(u32 val) +__attribute((always_inline)) static constexpr int CountSetBits(u64 val) { - unsigned long index; - _BitScanForward(&index, val); - return (int)index; + return __builtin_popcountll(val); } -inline int LeastSignificantSetBit(u64 val) +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u8 val) { - unsigned long index; - _BitScanForward64(&index, val); - return (int)index; + return __builtin_ctz(val); } -#else -namespace Common +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u16 val) { -constexpr int CountSetBits(u8 val) + return __builtin_ctz(val); +} +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u32 val) { - return __builtin_popcount(val); + return __builtin_ctz(val); } -constexpr int CountSetBits(u16 val) +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u64 val) { - return __builtin_popcount(val); + return __builtin_ctzll(val); } -constexpr int CountSetBits(u32 val) +#elif defined(_MSC_VER) +#include +// MSVC __popcnt doesn't switch between hardware availability like gcc does, can't use it, let C++ implementation handle it +__forceinline static int CountSetBits(u8 val) { - return __builtin_popcount(val); + return std::bitset<8>(val).count(); } -constexpr int CountSetBits(u64 val) +__forceinline static int CountSetBits(u16 val) { - return __builtin_popcountll(val); + return std::bitset<16>(val).count(); } -inline int LeastSignificantSetBit(u8 val) +__forceinline static int CountSetBits(u32 val) { - return __builtin_ctz(val); + return std::bitset<32>(val).count(); } -inline int LeastSignificantSetBit(u16 val) +__forceinline static int CountSetBits(u64 val) { - return __builtin_ctz(val); + return std::bitset<64>(val).count(); } -inline int LeastSignificantSetBit(u32 val) +__forceinline static int LeastSignificantSetBit(u8 val) { - return __builtin_ctz(val); + unsigned long count; + _BitScanForward(&count, val); + return count; } -inline int LeastSignificantSetBit(u64 val) +__forceinline static int LeastSignificantSetBit(u16 val) { - return __builtin_ctzll(val); + unsigned long count; + _BitScanForward(&count, val); + return count; +} +__forceinline static int LeastSignificantSetBit(u32 val) +{ + unsigned long count; + _BitScanForward(&count, val); + return count; +} +__forceinline static int LeastSignificantSetBit(u64 val) +{ +#if defined(_WIN64) + unsigned long count; + _BitScanForward64(&count, val); + return count; +#else + unsigned long tmp; + _BitScanForward(&tmp, (u32)(val & 0x00000000FFFFFFFFull)); + if (tmp) + return tmp; + _BitScanForward(&tmp, (u32)((val & 0xFFFFFFFF00000000ull) >> 32)); + return tmp ? tmp + 32 : 0; +#endif } #endif @@ -201,10 +213,6 @@ public: BitSet& operator^=(BitSet other) { return *this = *this ^ other; } BitSet& operator<<=(IntTy shift) { return *this = *this << shift; } BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; } - // Warning: Even though on modern CPUs this is a single fast instruction, - // Dolphin's official builds do not currently assume POPCNT support on x86, - // so slower explicit bit twiddling is generated. Still should generally - // be faster than a loop. constexpr unsigned int Count() const { return CountSetBits(m_val); } constexpr Iterator begin() const { return ++Iterator(m_val, 0); } constexpr Iterator end() const { return Iterator(m_val, -1); } -- cgit v1.2.3