Added SIMD sqrt functions support, ticket #2
This commit is contained in:
parent
9fbfb8c208
commit
d77694f8ac
@ -258,7 +258,7 @@ inline __m128 sse_inf_ps(__m128 x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
||||||
// By Elan Ruskin,
|
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
||||||
inline __m128 sse_sqrt_wip_ss(__m128 const & x)
|
inline __m128 sse_sqrt_wip_ss(__m128 const & x)
|
||||||
{
|
{
|
||||||
__m128 recip = _mm_rsqrt_ss(x); // "estimate" opcode
|
__m128 recip = _mm_rsqrt_ss(x); // "estimate" opcode
|
||||||
|
|||||||
@ -401,6 +401,28 @@ namespace glm
|
|||||||
detail::fvec4SIMD const & I,
|
detail::fvec4SIMD const & I,
|
||||||
detail::fvec4SIMD const & N,
|
detail::fvec4SIMD const & N,
|
||||||
float const & eta);
|
float const & eta);
|
||||||
|
|
||||||
|
//! Returns the positive square root of x.
|
||||||
|
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||||
|
detail::fvec4SIMD simdSqrt(
|
||||||
|
detail::fvec4SIMD const & x);
|
||||||
|
|
||||||
|
//! Returns the positive square root of x with an accuracy slight lower or equal than simdSqrt but much faster.
|
||||||
|
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||||
|
detail::fvec4SIMD simdFastSqrt(
|
||||||
|
detail::fvec4SIMD const & x);
|
||||||
|
|
||||||
|
//! Returns the reciprocal of the positive square root of x.
|
||||||
|
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||||
|
detail::fvec4SIMD simdInversesqrt(
|
||||||
|
detail::fvec4SIMD const & x);
|
||||||
|
|
||||||
|
//! Returns the reciprocal of the positive square root of x,
|
||||||
|
//! faster than simdInversesqrt but less accurate.
|
||||||
|
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||||
|
detail::fvec4SIMD simdFastInversesqrt(
|
||||||
|
detail::fvec4SIMD const & x);
|
||||||
|
|
||||||
}//namespace simd_vec4
|
}//namespace simd_vec4
|
||||||
}//namespace gtx
|
}//namespace gtx
|
||||||
}//namespace glm
|
}//namespace glm
|
||||||
|
|||||||
@ -634,6 +634,34 @@ namespace glm
|
|||||||
return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
|
return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x)
|
||||||
|
{
|
||||||
|
return _mm_sqrt_ps(x.Data);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
||||||
|
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
||||||
|
inline detail::fvec4SIMD simdInversesqrt(detail::fvec4SIMD const & x)
|
||||||
|
{
|
||||||
|
GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
|
||||||
|
GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
|
||||||
|
|
||||||
|
__m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode
|
||||||
|
__m128 halfrecip = _mm_mul_ps(half, recip);
|
||||||
|
__m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip)));
|
||||||
|
return _mm_mul_ps(halfrecip, threeminus_xrr);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline detail::fvec4SIMD simdFastInversesqrt(detail::fvec4SIMD const & x)
|
||||||
|
{
|
||||||
|
return _mm_rsqrt_ps(x.Data);
|
||||||
|
}
|
||||||
|
|
||||||
}//namespace simd_vec4
|
}//namespace simd_vec4
|
||||||
}//namespace gtx
|
}//namespace gtx
|
||||||
}//namespace glm
|
}//namespace glm
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user