Added nlz, improved int log2, optimized findMSB with intrinsics

2011-10-11 19:15:41 +01:00
parent 09ee14ad4c
commit 3c2882439f
4 changed files with 139 additions and 119 deletions
--- a/glm/core/func_integer.inl
+++ b/glm/core/func_integer.inl
@@ -26,6 +26,11 @@
 /// @author Christophe Riccio
 ///////////////////////////////////////////////////////////////////////////////////

+#if(GLM_COMPILER & GLM_COMPILER_VC)
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+#endif
+
 namespace glm
 {
 	// uaddCarry
@@ -550,6 +555,32 @@ namespace glm
 	}

 	// findMSB
+#if(GLM_COMPILER & GLM_COMPILER_VC)
+
+	template <typename genIUType>
+	GLM_FUNC_QUALIFIER int findMSB
+	(
+		genIUType const & Value
+	)
+	{
+		unsigned long Result(0);
+		_BitScanReverse(&Result, Value); 
+		return int(Result);
+	}
+
+#elif((GLM_COMPILER & GLM_COMPILER_GCC) && __has_builtin(__builtin_clz))
+
+	template <typename genIUType>
+	GLM_FUNC_QUALIFIER int findMSB
+	(
+		genIUType const & Value
+	)
+	{
+		return __builtin_clz(x);
+	}
+
+#else
+
 	template <typename genIUType>
 	GLM_FUNC_QUALIFIER int findMSB
 	(
@@ -564,6 +595,7 @@ namespace glm
 		for(genIUType tmp = Value; tmp; tmp >>= 1, ++bit){}
 		return bit;
 	}
+#endif//(GLM_COMPILER)

 	template <typename T>
 	GLM_FUNC_QUALIFIER detail::tvec2<int> findMSB
--- a/glm/gtx/integer.hpp
+++ b/glm/gtx/integer.hpp
@@ -58,10 +58,14 @@ namespace glm
 	//! From GLM_GTX_integer extension.
 	int sqrt(int x);

-	//! Returns the log2 of x.
+	//! Returns the log2 of x. Can be reliably using to compute mipmap count from the texture size.
 	//! From GLM_GTX_integer extension.
 	unsigned int log2(unsigned int x);

+	//! Returns the floor log2 of x.
+	//! From GLM_GTX_integer extension.
+	unsigned int floor_log2(unsigned int x);
+
 	//! Modulus. Returns x - y * floor(x / y) for each component in x using the floating point value y.
 	//! From GLM_GTX_integer extension.
 	int mod(int x, int y);
@@ -72,21 +76,25 @@ namespace glm
 	genType factorial(genType const & x);

 	//! 32bit signed integer. 
-	//! From GLM_GTX_unsigned_int extension.
+	//! From GLM_GTX_integer extension.
 	typedef signed int					sint;

 	//! Returns x raised to the y power.
-	//! From GLM_GTX_unsigned_int extension.
+	//! From GLM_GTX_integer extension.
 	uint pow(uint x, uint y);

 	//! Returns the positive square root of x. 
-	//! From GLM_GTX_unsigned_int extension.
+	//! From GLM_GTX_integer extension.
 	uint sqrt(uint x);

 	//! Modulus. Returns x - y * floor(x / y) for each component in x using the floating point value y.
-	//! From GLM_GTX_unsigned_int extension.
+	//! From GLM_GTX_integer extension.
 	uint mod(uint x, uint y);

+	//! Returns the number of leading zeros.
+	//! From GLM_GTX_integer extension.
+	uint nlz(uint x);
+
 	/// @}
 }//namespace glm

--- a/glm/gtx/integer.inl
+++ b/glm/gtx/integer.inl
@@ -54,10 +54,18 @@ namespace detail
 		return(x & 0x0000003f);
 	}
 }//namespace detail
-/*
-// Henry Gordon Dietz: http://aggregate.org/MAGIC/
+
 GLM_FUNC_QUALIFIER unsigned int log2(unsigned int x)
 {
+	return unsigned(32) - nlz(x - 1u);
+	//if(x <= 1)
+	//	return 0;
+	//return unsigned(32) - findLSB(x) - 1u;
+	
+	
+/*
+	// Henry Gordon Dietz: http://aggregate.org/MAGIC/
+
 	register int y = (x & (x - 1));

 	y |= -y;
@@ -69,10 +77,11 @@ GLM_FUNC_QUALIFIER unsigned int log2(unsigned int x)
 	x |= (x >> 16);
 	
 	return detail::ones32(x) - 1 - y;
-}
 */
+}
+
 // Henry Gordon Dietz: http://aggregate.org/MAGIC/
-unsigned int log2(unsigned int x)
+unsigned int floor_log2(unsigned int x)
 {
 	x |= (x >> 1);
 	x |= (x >> 2);
@@ -159,4 +168,45 @@ GLM_FUNC_QUALIFIER uint mod(uint x, uint y)
 	return x - y * (x / y);
 }

+#if(GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_GCC))
+
+GLM_FUNC_QUALIFIER unsigned int nlz(unsigned int x) 
+{
+	return 32u - findMSB(x);
+}
+
+#else
+
+// Hackers Delight: http://www.hackersdelight.org/HDcode/nlz.c.txt
+GLM_FUNC_QUALIFIER unsigned int nlz(unsigned int x) 
+{
+   int y, m, n;
+
+   y = -int(x >> 16);      // If left half of x is 0,
+   m = (y >> 16) & 16;  // set n = 16.  If left half
+   n = 16 - m;          // is nonzero, set n = 0 and
+   x = x >> m;          // shift x right 16.
+                        // Now x is of the form 0000xxxx.
+   y = x - 0x100;       // If positions 8-15 are 0,
+   m = (y >> 16) & 8;   // add 8 to n and shift x left 8.
+   n = n + m;
+   x = x << m;
+
+   y = x - 0x1000;      // If positions 12-15 are 0,
+   m = (y >> 16) & 4;   // add 4 to n and shift x left 4.
+   n = n + m;
+   x = x << m;
+
+   y = x - 0x4000;      // If positions 14-15 are 0,
+   m = (y >> 16) & 2;   // add 2 to n and shift x left 2.
+   n = n + m;
+   x = x << m;
+
+   y = x >> 14;         // Set y = 0, 1, 2, or 3.
+   m = y & ~(y >> 1);   // Set m = 0, 1, 2, or 2 resp.
+   return unsigned(n + 2 - m);
+}
+
+#endif//(GLM_COMPILER)
+
 }//namespace glm