diff --git a/glm/core/intrinsic_common.inl b/glm/core/intrinsic_common.inl index 28ad3c6c..cee0f8c4 100644 --- a/glm/core/intrinsic_common.inl +++ b/glm/core/intrinsic_common.inl @@ -270,12 +270,12 @@ inline __m128 _mm_inf_ps(__m128 x) // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration // By Elan Ruskin, -inline __m128 _mm_sqrt_wip_ss(__m128 const x) +inline __m128 _mm_sqrt_wip_ss(__m128 const & x) { - __m128 recip = _mm_rsqrt_ss( x ); // "estimate" opcode - const static __m128 three = { 3, 3, 3, 3 }; // aligned consts for fast load - const static __m128 half = { 0.5,0.5,0.5,0.5 }; - __m128 halfrecip = _mm_mul_ss( half, recip ); - __m128 threeminus_xrr = _mm_sub_ss( three, _mm_mul_ss( x, _mm_mul_ss ( recip, recip ) ) ); - return _mm_mul_ss( halfrecip, threeminus_xrr ); + __m128 recip = _mm_rsqrt_ss(x); // "estimate" opcode + const static __m128 three = {3, 3, 3, 3}; // aligned consts for fast load + const static __m128 half = {0.5,0.5,0.5,0.5}; + __m128 halfrecip = _mm_mul_ss(half, recip); + __m128 threeminus_xrr = _mm_sub_ss(three, _mm_mul_ss(x, _mm_mul_ss (recip, recip))); + return _mm_mul_ss( halfrecip, threeminus_xrr); } diff --git a/glm/setup.hpp b/glm/setup.hpp index 80698816..0828a3cf 100644 --- a/glm/setup.hpp +++ b/glm/setup.hpp @@ -250,6 +250,37 @@ # define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_PURE #endif +#define GLM_INSTRUCTION_SET_SSSE3 0x00000008 // tmmintrin.h (SSSE3 + SSE3 + SSE2 + SSE1) +#define GLM_INSTRUCTION_SET_POPCNT 0x00000800 // popcntintrin.h +#define GLM_INSTRUCTION_SET_SSE4A 0x00000020 // ammintrin.h (SSE4A + POPCNT + SSE3 + SSE2 + SSE) +#define GLM_INSTRUCTION_SET_SSE4_1 0x00000040 // smmintrin.h (SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) +#define GLM_INSTRUCTION_SET_SSE4_2 0x00000080 // nmmintrin.h (SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) +#define GLM_INSTRUCTION_SET_SSE5 0x00000100 // bmmintrin.h (SSE4A + SSE3 + SSE2 + SSE deprecated) +#define GLM_INSTRUCTION_SET_AES 0x00000200 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1) +#define GLM_INSTRUCTION_SET_PCLMUL 0x00000400 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1) +#define GLM_INSTRUCTION_SET_AVX 0x00000800 // immintrin.h (AES + PCLMUL + SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE) + +#if(GLM_INSTRUCTION_SET != GLM_INSTRUCTION_SET_PURE) +# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_MMX) +# include +# endif +# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_3DNOW) +# include +# endif +# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE) +# include +# endif +# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE2) +# include +# endif +# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE3) +# include +# endif +# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSSE3) +# include +# endif +#endif + /////////////////////////////////////////////////////////////////////////////////////////////////// // Swizzle operators