diff options
Diffstat (limited to 'src/dolphin/BitSet.h')
| -rw-r--r-- | src/dolphin/BitSet.h | 110 | 
1 files changed, 59 insertions, 51 deletions
| diff --git a/src/dolphin/BitSet.h b/src/dolphin/BitSet.h index d32b020..09cc1ce 100644 --- a/src/dolphin/BitSet.h +++ b/src/dolphin/BitSet.h @@ -3,85 +3,97 @@  #pragma once  #include <cstddef> +#include <bitset>  #include <initializer_list>  #include <type_traits>  #include "../types.h" -#ifdef _WIN32 - -#include <intrin.h> -  namespace Common  { -template <typename T> -constexpr int CountSetBits(T v) +#if defined(__GNUC__) || defined(__clang__) +__attribute((always_inline)) static constexpr int CountSetBits(u8 val)  { -  // from https://graphics.stanford.edu/~seander/bithacks.html -  // GCC has this built in, but MSVC's intrinsic will only emit the actual -  // POPCNT instruction, which we're not depending on -  v = v - ((v >> 1) & (T) ~(T)0 / 3); -  v = (v & (T) ~(T)0 / 15 * 3) + ((v >> 2) & (T) ~(T)0 / 15 * 3); -  v = (v + (v >> 4)) & (T) ~(T)0 / 255 * 15; -  return (T)(v * ((T) ~(T)0 / 255)) >> (sizeof(T) - 1) * 8; +  return __builtin_popcount(val);  } -inline int LeastSignificantSetBit(u8 val) +__attribute((always_inline)) static constexpr int CountSetBits(u16 val)  { -  unsigned long index; -  _BitScanForward(&index, val); -  return (int)index; +  return __builtin_popcount(val);  } -inline int LeastSignificantSetBit(u16 val) +__attribute((always_inline)) static constexpr int CountSetBits(u32 val)  { -  unsigned long index; -  _BitScanForward(&index, val); -  return (int)index; +  return __builtin_popcount(val);  } -inline int LeastSignificantSetBit(u32 val) +__attribute((always_inline)) static constexpr int CountSetBits(u64 val)  { -  unsigned long index; -  _BitScanForward(&index, val); -  return (int)index; +  return __builtin_popcountll(val);  } -inline int LeastSignificantSetBit(u64 val) +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u8 val)  { -  unsigned long index; -  _BitScanForward64(&index, val); -  return (int)index; +  return __builtin_ctz(val);  } -#else -namespace Common +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u16 val)  { -constexpr int CountSetBits(u8 val) +  return __builtin_ctz(val); +} +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u32 val)  { -  return __builtin_popcount(val); +  return __builtin_ctz(val);  } -constexpr int CountSetBits(u16 val) +__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u64 val)  { -  return __builtin_popcount(val); +  return __builtin_ctzll(val);  } -constexpr int CountSetBits(u32 val) +#elif defined(_MSC_VER) +#include <intrin.h> +// MSVC __popcnt doesn't switch between hardware availability like gcc does, can't use it, let C++ implementation handle it +__forceinline static int CountSetBits(u8 val)  { -  return __builtin_popcount(val); +  return std::bitset<8>(val).count();  } -constexpr int CountSetBits(u64 val) +__forceinline static int CountSetBits(u16 val)  { -  return __builtin_popcountll(val); +  return std::bitset<16>(val).count();  } -inline int LeastSignificantSetBit(u8 val) +__forceinline static int CountSetBits(u32 val)  { -  return __builtin_ctz(val); +  return std::bitset<32>(val).count();  } -inline int LeastSignificantSetBit(u16 val) +__forceinline static int CountSetBits(u64 val)  { -  return __builtin_ctz(val); +  return std::bitset<64>(val).count();  } -inline int LeastSignificantSetBit(u32 val) +__forceinline static int LeastSignificantSetBit(u8 val)  { -  return __builtin_ctz(val); +  unsigned long count; +  _BitScanForward(&count, val); +  return count;  } -inline int LeastSignificantSetBit(u64 val) +__forceinline static int LeastSignificantSetBit(u16 val)  { -  return __builtin_ctzll(val); +  unsigned long count; +  _BitScanForward(&count, val); +  return count; +} +__forceinline static int LeastSignificantSetBit(u32 val) +{ +  unsigned long count; +  _BitScanForward(&count, val); +  return count; +} +__forceinline static int LeastSignificantSetBit(u64 val) +{ +#if defined(_WIN64) +  unsigned long count; +  _BitScanForward64(&count, val); +  return count; +#else +  unsigned long tmp; +  _BitScanForward(&tmp, (u32)(val & 0x00000000FFFFFFFFull)); +  if (tmp) +    return tmp; +  _BitScanForward(&tmp, (u32)((val & 0xFFFFFFFF00000000ull) >> 32)); +  return tmp ? tmp + 32 : 0; +#endif  }  #endif @@ -201,10 +213,6 @@ public:    BitSet& operator^=(BitSet other) { return *this = *this ^ other; }    BitSet& operator<<=(IntTy shift) { return *this = *this << shift; }    BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; } -  // Warning: Even though on modern CPUs this is a single fast instruction, -  // Dolphin's official builds do not currently assume POPCNT support on x86, -  // so slower explicit bit twiddling is generated.  Still should generally -  // be faster than a loop.    constexpr unsigned int Count() const { return CountSetBits(m_val); }    constexpr Iterator begin() const { return ++Iterator(m_val, 0); }    constexpr Iterator end() const { return Iterator(m_val, -1); } |