Optimizations of log2 for ivec4

This commit is contained in:
Christophe Riccio
2014-11-24 01:56:36 +01:00
parent 117634c7ea
commit e8fbcf76dd
5 changed files with 171 additions and 40 deletions

View File

@@ -35,20 +35,22 @@
namespace glm{
namespace detail
{
template <bool isFloat>
struct compute_log2{};
template <>
struct compute_log2<true>
{
template <typename T>
GLM_FUNC_QUALIFIER T operator() (T Value) const
# if GLM_LANG & GLM_LANG_CXX11_FLAG
using std::log2;
# else
template <typename genType>
genType log2(genType Value)
{
# if GLM_LANG & GLM_LANG_CXX11_FLAG
return std::log2(Value);
# else
return std::log(Value) * static_cast<T>(1.4426950408889634073599246810019);
# endif
return std::log(Value) * static_cast<genType>(1.4426950408889634073599246810019);
}
# endif
template <typename T, precision P, template <class, precision> class vecType, bool isFloat = true>
struct compute_log2
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & vec)
{
return detail::functor1<T, T, P, vecType>::call(log2, vec);
}
};
@@ -121,17 +123,13 @@ namespace detail
template <typename genType>
GLM_FUNC_QUALIFIER genType log2(genType x)
{
GLM_STATIC_ASSERT(std::numeric_limits<genType>::is_iec559 || std::numeric_limits<genType>::is_integer,
"GLM core 'log2' only accept floating-point inputs. Include <glm/gtx/integer.hpp> for additional integer support.");
assert(x > genType(0)); // log2 is only defined on the range (0, inf]
return detail::compute_log2<std::numeric_limits<genType>::is_iec559>()(x);
return log2(tvec1<genType>(x)).x;
}
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_QUALIFIER vecType<T, P> log2(vecType<T, P> const & x)
{
return detail::functor1<T, T, P, vecType>::call(log2, x);
return detail::compute_log2<T, P, vecType, std::numeric_limits<T>::is_iec559>::call(x);
}
// sqrt

View File

@@ -564,11 +564,12 @@
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2
#define GLM_ARCH_PURE 0x0000
#define GLM_ARCH_SSE2 0x0001
#define GLM_ARCH_SSE3 0x0002
#define GLM_ARCH_SSE4 0x0004
#define GLM_ARCH_AVX 0x0008
#define GLM_ARCH_AVX2 0x0010
#define GLM_ARCH_X86 0x0001
#define GLM_ARCH_SSE2 0x0002
#define GLM_ARCH_SSE3 0x0004
#define GLM_ARCH_SSE4 0x0008
#define GLM_ARCH_AVX 0x0010
#define GLM_ARCH_AVX2 0x0020
#if defined(GLM_FORCE_PURE)
# define GLM_ARCH GLM_ARCH_PURE

View File

@@ -29,19 +29,35 @@
namespace glm{
namespace detail
{
GLM_FUNC_QUALIFIER unsigned int nlz(unsigned int x)
template <typename T, precision P, template <class, precision> class vecType>
struct compute_log2<T, P, vecType, false>
{
return 31u - findMSB(x);
}
template <>
struct compute_log2<false>
{
template <typename T>
GLM_FUNC_QUALIFIER T operator() (T const & Value) const
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & vec)
{
return Value <= static_cast<T>(1) ? T(0) : T(32) - nlz(Value - T(1));
//Equivalent to return findMSB(vec); but save one function call in ASM with VC
//return findMSB(vec);
return detail::compute_findMSB_vec<T, P, vecType, sizeof(T) * 8>::call(vec);
}
};
# if(GLM_ARCH != GLM_ARCH_PURE) && (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM))
template <precision P>
struct compute_log2<int, P, tvec4, false>
{
GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & vec)
{
tvec4<int, P> Result(glm::uninitialize);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.x), vec.x);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.y), vec.y);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.z), vec.z);
_BitScanReverse(reinterpret_cast<unsigned long*>(&Result.w), vec.w);
return Result;
}
};
# endif//GLM_ARCH != GLM_ARCH_PURE
}//namespace detail
}//namespace glm