diff options
Diffstat (limited to 'src/dolphin/BitSet.h')
-rw-r--r-- | src/dolphin/BitSet.h | 110 |
1 files changed, 59 insertions, 51 deletions
diff --git a/src/dolphin/BitSet.h b/src/dolphin/BitSet.h index d32b020..09cc1ce 100644 --- a/src/dolphin/BitSet.h +++ b/src/dolphin/BitSet.h @@ -3,85 +3,97 @@ #pragma once #include <cstddef> +#include <bitset> #include <initializer_list> #include <type_traits> #include "../types.h" -#ifdef _WIN32 - -#include <intrin.h> - namespace Common { -template <typename T> -constexpr int CountSetBits(T v) +#if defined(__GNUC__) || defined(__clang__) +__attribute((always_inline)) static constexpr int CountSetBits(u8 val) { - // from https://graphics.stanford.edu/~seander/bithacks.html - // GCC has this built in, but MSVC's intrinsic will only emit the actual - // POPCNT instruction, which we're not depending on - v = v - ((v >> 1) & (T) ~(T)0 / 3); - v = (v & (T) ~(T)0 / 15 * 3) + ((v >> 2) & (T) ~(T)0 / 15 * 3); - v = (v + (v >> 4)) & (T) ~(T)0 / 255 * 15; - return (T)(v * ((T) ~(T)0 / 255)) >> (sizeof(T) - 1) * 8; + return __builtin_popcount(val); } -inline int LeastSignificantSetBit(u8 val) +__attribute((always_inline)) static constexpr int CountSetBits(u16 val) { - unsigned long index; - _BitScanForward(&index, val); - return (int)index; + return __builtin_popcount(val); } -inline int LeastSignificantSetBit(u16 val) +__attribute((always_inline)) static constexpr int CountSetBits(u32 val) { - unsigned long index; - _BitScanForward(&index, val); - return (int)index; + return __builtin_popcount(val); } -inline int LeastSignificantSetBit(u32 val) +__attribute((always_inline)) static constexpr int CountSetBits(u64 val) { - unsigned long index; - _BitScanForward(&index, val); - return (int)index; + return __builtin_popcountll(val); } -inline int LeastSignificantSetBit(u64 val) +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u8 val) { - unsigned long index; - _BitScanForward64(&index, val); - return (int)index; + return __builtin_ctz(val); } -#else -namespace Common +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u16 val) { -constexpr int CountSetBits(u8 val) + return __builtin_ctz(val); +} +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u32 val) { - return __builtin_popcount(val); + return __builtin_ctz(val); } -constexpr int CountSetBits(u16 val) +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u64 val) { - return __builtin_popcount(val); + return __builtin_ctzll(val); } -constexpr int CountSetBits(u32 val) +#elif defined(_MSC_VER) +#include <intrin.h> +// MSVC __popcnt doesn't switch between hardware availability like gcc does, can't use it, let C++ implementation handle it +__forceinline static int CountSetBits(u8 val) { - return __builtin_popcount(val); + return std::bitset<8>(val).count(); } -constexpr int CountSetBits(u64 val) +__forceinline static int CountSetBits(u16 val) { - return __builtin_popcountll(val); + return std::bitset<16>(val).count(); } -inline int LeastSignificantSetBit(u8 val) +__forceinline static int CountSetBits(u32 val) { - return __builtin_ctz(val); + return std::bitset<32>(val).count(); } -inline int LeastSignificantSetBit(u16 val) +__forceinline static int CountSetBits(u64 val) { - return __builtin_ctz(val); + return std::bitset<64>(val).count(); } -inline int LeastSignificantSetBit(u32 val) +__forceinline static int LeastSignificantSetBit(u8 val) { - return __builtin_ctz(val); + unsigned long count; + _BitScanForward(&count, val); + return count; } -inline int LeastSignificantSetBit(u64 val) +__forceinline static int LeastSignificantSetBit(u16 val) { - return __builtin_ctzll(val); + unsigned long count; + _BitScanForward(&count, val); + return count; +} +__forceinline static int LeastSignificantSetBit(u32 val) +{ + unsigned long count; + _BitScanForward(&count, val); + return count; +} +__forceinline static int LeastSignificantSetBit(u64 val) +{ +#if defined(_WIN64) + unsigned long count; + _BitScanForward64(&count, val); + return count; +#else + unsigned long tmp; + _BitScanForward(&tmp, (u32)(val & 0x00000000FFFFFFFFull)); + if (tmp) + return tmp; + _BitScanForward(&tmp, (u32)((val & 0xFFFFFFFF00000000ull) >> 32)); + return tmp ? tmp + 32 : 0; +#endif } #endif @@ -201,10 +213,6 @@ public: BitSet& operator^=(BitSet other) { return *this = *this ^ other; } BitSet& operator<<=(IntTy shift) { return *this = *this << shift; } BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; } - // Warning: Even though on modern CPUs this is a single fast instruction, - // Dolphin's official builds do not currently assume POPCNT support on x86, - // so slower explicit bit twiddling is generated. Still should generally - // be faster than a loop. constexpr unsigned int Count() const { return CountSetBits(m_val); } constexpr Iterator begin() const { return ++Iterator(m_val, 0); } constexpr Iterator end() const { return Iterator(m_val, -1); } |