aboutsummaryrefslogtreecommitdiff
path: root/src/dolphin/BitSet.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/dolphin/BitSet.h')
-rw-r--r--src/dolphin/BitSet.h110
1 files changed, 59 insertions, 51 deletions
diff --git a/src/dolphin/BitSet.h b/src/dolphin/BitSet.h
index d32b020..09cc1ce 100644
--- a/src/dolphin/BitSet.h
+++ b/src/dolphin/BitSet.h
@@ -3,85 +3,97 @@
#pragma once
#include <cstddef>
+#include <bitset>
#include <initializer_list>
#include <type_traits>
#include "../types.h"
-#ifdef _WIN32
-
-#include <intrin.h>
-
namespace Common
{
-template <typename T>
-constexpr int CountSetBits(T v)
+#if defined(__GNUC__) || defined(__clang__)
+__attribute((always_inline)) static constexpr int CountSetBits(u8 val)
{
- // from https://graphics.stanford.edu/~seander/bithacks.html
- // GCC has this built in, but MSVC's intrinsic will only emit the actual
- // POPCNT instruction, which we're not depending on
- v = v - ((v >> 1) & (T) ~(T)0 / 3);
- v = (v & (T) ~(T)0 / 15 * 3) + ((v >> 2) & (T) ~(T)0 / 15 * 3);
- v = (v + (v >> 4)) & (T) ~(T)0 / 255 * 15;
- return (T)(v * ((T) ~(T)0 / 255)) >> (sizeof(T) - 1) * 8;
+ return __builtin_popcount(val);
}
-inline int LeastSignificantSetBit(u8 val)
+__attribute((always_inline)) static constexpr int CountSetBits(u16 val)
{
- unsigned long index;
- _BitScanForward(&index, val);
- return (int)index;
+ return __builtin_popcount(val);
}
-inline int LeastSignificantSetBit(u16 val)
+__attribute((always_inline)) static constexpr int CountSetBits(u32 val)
{
- unsigned long index;
- _BitScanForward(&index, val);
- return (int)index;
+ return __builtin_popcount(val);
}
-inline int LeastSignificantSetBit(u32 val)
+__attribute((always_inline)) static constexpr int CountSetBits(u64 val)
{
- unsigned long index;
- _BitScanForward(&index, val);
- return (int)index;
+ return __builtin_popcountll(val);
}
-inline int LeastSignificantSetBit(u64 val)
+__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u8 val)
{
- unsigned long index;
- _BitScanForward64(&index, val);
- return (int)index;
+ return __builtin_ctz(val);
}
-#else
-namespace Common
+__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u16 val)
{
-constexpr int CountSetBits(u8 val)
+ return __builtin_ctz(val);
+}
+__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u32 val)
{
- return __builtin_popcount(val);
+ return __builtin_ctz(val);
}
-constexpr int CountSetBits(u16 val)
+__attribute((always_inline)) static constexpr int LeastSignificantSetBit(u64 val)
{
- return __builtin_popcount(val);
+ return __builtin_ctzll(val);
}
-constexpr int CountSetBits(u32 val)
+#elif defined(_MSC_VER)
+#include <intrin.h>
+// MSVC __popcnt doesn't switch between hardware availability like gcc does, can't use it, let C++ implementation handle it
+__forceinline static int CountSetBits(u8 val)
{
- return __builtin_popcount(val);
+ return std::bitset<8>(val).count();
}
-constexpr int CountSetBits(u64 val)
+__forceinline static int CountSetBits(u16 val)
{
- return __builtin_popcountll(val);
+ return std::bitset<16>(val).count();
}
-inline int LeastSignificantSetBit(u8 val)
+__forceinline static int CountSetBits(u32 val)
{
- return __builtin_ctz(val);
+ return std::bitset<32>(val).count();
}
-inline int LeastSignificantSetBit(u16 val)
+__forceinline static int CountSetBits(u64 val)
{
- return __builtin_ctz(val);
+ return std::bitset<64>(val).count();
}
-inline int LeastSignificantSetBit(u32 val)
+__forceinline static int LeastSignificantSetBit(u8 val)
{
- return __builtin_ctz(val);
+ unsigned long count;
+ _BitScanForward(&count, val);
+ return count;
}
-inline int LeastSignificantSetBit(u64 val)
+__forceinline static int LeastSignificantSetBit(u16 val)
{
- return __builtin_ctzll(val);
+ unsigned long count;
+ _BitScanForward(&count, val);
+ return count;
+}
+__forceinline static int LeastSignificantSetBit(u32 val)
+{
+ unsigned long count;
+ _BitScanForward(&count, val);
+ return count;
+}
+__forceinline static int LeastSignificantSetBit(u64 val)
+{
+#if defined(_WIN64)
+ unsigned long count;
+ _BitScanForward64(&count, val);
+ return count;
+#else
+ unsigned long tmp;
+ _BitScanForward(&tmp, (u32)(val & 0x00000000FFFFFFFFull));
+ if (tmp)
+ return tmp;
+ _BitScanForward(&tmp, (u32)((val & 0xFFFFFFFF00000000ull) >> 32));
+ return tmp ? tmp + 32 : 0;
+#endif
}
#endif
@@ -201,10 +213,6 @@ public:
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
BitSet& operator<<=(IntTy shift) { return *this = *this << shift; }
BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; }
- // Warning: Even though on modern CPUs this is a single fast instruction,
- // Dolphin's official builds do not currently assume POPCNT support on x86,
- // so slower explicit bit twiddling is generated. Still should generally
- // be faster than a loop.
constexpr unsigned int Count() const { return CountSetBits(m_val); }
constexpr Iterator begin() const { return ++Iterator(m_val, 0); }
constexpr Iterator end() const { return Iterator(m_val, -1); }