Merge branch '0.9.1' of ssh://ogl-math.git.sourceforge.net/gitroot/ogl-math/ogl-math into 0.9.1
This commit is contained in:
@@ -327,7 +327,7 @@ namespace detail
|
||||
# define GLM_RESTRICT_VAR __restrict
|
||||
#elif((GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC31))
|
||||
# define GLM_DEPRECATED __attribute__((__deprecated__))
|
||||
# define GLM_ALIGN(x) __attribute__(aligned(x))
|
||||
# define GLM_ALIGN(x) __attribute__((aligned(x)))
|
||||
# if(GLM_COMPILER >= GLM_COMPILER_GCC33)
|
||||
# define GLM_RESTRICT __restrict__
|
||||
# define GLM_RESTRICT_VAR __restrict__
|
||||
|
||||
@@ -258,7 +258,7 @@ inline __m128 sse_inf_ps(__m128 x)
|
||||
}
|
||||
|
||||
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
||||
// By Elan Ruskin,
|
||||
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
||||
inline __m128 sse_sqrt_wip_ss(__m128 const & x)
|
||||
{
|
||||
__m128 recip = _mm_rsqrt_ss(x); // "estimate" opcode
|
||||
|
||||
@@ -133,6 +133,14 @@ namespace glm
|
||||
detail::tmat4x4<T> const & proj,
|
||||
detail::tvec4<U> const & viewport);
|
||||
|
||||
//! Define a picking region
|
||||
//! From GLM_GTC_matrix_transform extension.
|
||||
template <typename T, typename U>
|
||||
detail::tmat4x4<T> pickMatrix(
|
||||
detail::tvec2<T> const & center,
|
||||
detail::tvec2<T> const & delta,
|
||||
detail::tvec4<U> const & viewport);
|
||||
|
||||
//! Build a look at view matrix.
|
||||
//! From GLM_GTC_matrix_transform extension.
|
||||
template <typename T>
|
||||
|
||||
@@ -324,6 +324,25 @@ namespace matrix_transform
|
||||
return detail::tvec3<T>(obj);
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
detail::tmat4x4<T> pickMatrix
|
||||
(
|
||||
detail::tvec2<T> const & center,
|
||||
detail::tvec2<T> const & delta,
|
||||
detail::tvec4<U> const & viewport
|
||||
)
|
||||
{
|
||||
assert(delta.x > 0.0f && delta.y > 0.0f)
|
||||
detail::tmat4x4<T> Result(1.0f);
|
||||
|
||||
if(!(delta.x > 0.0f && delta.y > 0.0f))
|
||||
return Result; // Error
|
||||
|
||||
// Translate and scale the picked region to the entire window
|
||||
Result = translate(Result, (T(viewport[2]) - T(2) * (x - T(viewport[0]))) / delta.x, (T(viewport[3]) - T(2) * (y - T(viewport[1]))) / delta.y, T(0));
|
||||
return scale(Result, T(viewport[2]) / delta.x, T(viewport[3]) / delta.y, T(1));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline detail::tmat4x4<T> lookAt(
|
||||
const detail::tvec3<T>& eye,
|
||||
|
||||
@@ -150,7 +150,7 @@ namespace glm
|
||||
detail::tquat<T> const & q1,
|
||||
detail::tquat<T> const & q2);
|
||||
|
||||
//! Returns a LERP interpolated quaternion of x and y according a.
|
||||
//! Returns a SLERP interpolated quaternion of x and y according a.
|
||||
//! From GLM_GTC_quaternion extension.
|
||||
template <typename T>
|
||||
detail::tquat<T> mix(
|
||||
|
||||
@@ -370,6 +370,43 @@ namespace quaternion{
|
||||
k0 * x.z + k1 * y2.z);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline detail::tquat<T> mix2
|
||||
(
|
||||
detail::tquat<T> const & x,
|
||||
detail::tquat<T> const & y,
|
||||
T const & a
|
||||
)
|
||||
{
|
||||
bool flip = false;
|
||||
if(a <= T(0)) return x;
|
||||
if(a >= T(1)) return y;
|
||||
|
||||
T cos_t = dot(x, y);
|
||||
if(cos_t < T(0))
|
||||
{
|
||||
cos_t = -cos_t;
|
||||
flip = true;
|
||||
}
|
||||
|
||||
T alpha(0), beta(0);
|
||||
|
||||
if(T(1) - cos_t < 1e-7)
|
||||
beta = T(1) - alpha;
|
||||
else
|
||||
{
|
||||
T theta = acos(cos_t);
|
||||
T sin_t = sin(theta);
|
||||
beta = sin(theta * (T(1) - alpha)) / sin_t;
|
||||
alpha = sin(alpha * theta) / sin_t;
|
||||
}
|
||||
|
||||
if(flip)
|
||||
alpha = -alpha;
|
||||
|
||||
return normalize(beta * x + alpha * y2);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline detail::tquat<T> conjugate
|
||||
(
|
||||
|
||||
@@ -142,7 +142,7 @@ namespace glm
|
||||
//! Multiply matrix x by matrix y component-wise, i.e.,
|
||||
//! result[i][j] is the scalar product of x[i][j] and y[i][j].
|
||||
//! (From GLM_GTX_simd_mat4 extension).
|
||||
detail::fmat4x4SIMD simdMatrixCompMult(
|
||||
detail::fmat4x4SIMD matrixCompMult(
|
||||
detail::fmat4x4SIMD const & x,
|
||||
detail::fmat4x4SIMD const & y);
|
||||
|
||||
@@ -150,23 +150,23 @@ namespace glm
|
||||
//! and the second parameter r as a row vector
|
||||
//! and does a linear algebraic matrix multiply c * r.
|
||||
//! (From GLM_GTX_simd_mat4 extension).
|
||||
detail::fmat4x4SIMD simdOuterProduct(
|
||||
detail::fmat4x4SIMD outerProduct(
|
||||
detail::fvec4SIMD const & c,
|
||||
detail::fvec4SIMD const & r);
|
||||
|
||||
//! Returns the transposed matrix of x
|
||||
//! (From GLM_GTX_simd_mat4 extension).
|
||||
detail::fmat4x4SIMD simdTranspose(
|
||||
detail::fmat4x4SIMD transpose(
|
||||
detail::fmat4x4SIMD const & x);
|
||||
|
||||
//! Return the determinant of a mat4 matrix.
|
||||
//! (From GLM_GTX_simd_mat4 extension).
|
||||
float simdDeterminant(
|
||||
float determinant(
|
||||
detail::fmat4x4SIMD const & m);
|
||||
|
||||
//! Return the inverse of a mat4 matrix.
|
||||
//! (From GLM_GTX_simd_mat4 extension).
|
||||
detail::fmat4x4SIMD simdInverse(
|
||||
detail::fmat4x4SIMD inverse(
|
||||
detail::fmat4x4SIMD const & m);
|
||||
|
||||
}//namespace simd_mat4
|
||||
|
||||
@@ -242,7 +242,7 @@ namespace simd_mat4
|
||||
detail::fmat4x4SIMD const & x
|
||||
)
|
||||
{
|
||||
detail::tmat4x4<float> Result;
|
||||
GLM_ALIGN(16) detail::tmat4x4<float> Result;
|
||||
_mm_store_ps(&Result[0][0], x.Data[0].Data);
|
||||
_mm_store_ps(&Result[1][0], x.Data[1].Data);
|
||||
_mm_store_ps(&Result[2][0], x.Data[2].Data);
|
||||
@@ -250,7 +250,7 @@ namespace simd_mat4
|
||||
return Result;
|
||||
}
|
||||
|
||||
inline detail::fmat4x4SIMD simdMatrixCompMult
|
||||
inline detail::fmat4x4SIMD matrixCompMult
|
||||
(
|
||||
detail::fmat4x4SIMD const & x,
|
||||
detail::fmat4x4SIMD const & y
|
||||
@@ -264,30 +264,40 @@ namespace simd_mat4
|
||||
return result;
|
||||
}
|
||||
|
||||
inline detail::fmat4x4SIMD simdOuterProduct
|
||||
inline detail::fmat4x4SIMD outerProduct
|
||||
(
|
||||
detail::fvec4SIMD const & c,
|
||||
detail::fvec4SIMD const & r
|
||||
)
|
||||
{
|
||||
__m128 Shu0 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Shu1 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Shu2 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Shu3 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
detail::fmat4x4SIMD result(detail::fmat4x4SIMD::null);
|
||||
result[0].Data = _mm_mul_ps(c.Data, Shu0);
|
||||
result[1].Data = _mm_mul_ps(c.Data, Shu1);
|
||||
result[2].Data = _mm_mul_ps(c.Data, Shu2);
|
||||
result[3].Data = _mm_mul_ps(c.Data, Shu3);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline detail::fmat4x4SIMD simdTranspose(detail::fmat4x4SIMD const & m)
|
||||
inline detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m)
|
||||
{
|
||||
detail::fmat4x4SIMD result;
|
||||
detail::sse_transpose_ps(&m[0].Data, &result[0].Data);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline float simdDeterminant(detail::fmat4x4SIMD const & m)
|
||||
inline float determinant(detail::fmat4x4SIMD const & m)
|
||||
{
|
||||
float Result;
|
||||
_mm_store_ss(&Result, detail::sse_det_ps(&m[0].Data));
|
||||
return Result;
|
||||
}
|
||||
|
||||
inline detail::fmat4x4SIMD simdInverse(detail::fmat4x4SIMD const & m)
|
||||
inline detail::fmat4x4SIMD inverse(detail::fmat4x4SIMD const & m)
|
||||
{
|
||||
detail::fmat4x4SIMD result;
|
||||
detail::sse_inverse_ps(&m[0].Data, &result[0].Data);
|
||||
|
||||
@@ -336,23 +336,47 @@ namespace glm
|
||||
|
||||
//! Returns the length of x, i.e., sqrt(x * x).
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
float simdLength(
|
||||
float length(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the length of x, i.e., sqrt(x * x).
|
||||
//! Less accurate but much faster than simdLength.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
float fastLength(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the length of x, i.e., sqrt(x * x).
|
||||
//! Slightly more accurate but much slower than simdLength.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
float niceLength(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the length of x, i.e., sqrt(x * x).
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD simdLength4(
|
||||
detail::fvec4SIMD length4(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the length of x, i.e., sqrt(x * x).
|
||||
//! Less accurate but much faster than simdLength4.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD fastLength4(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the length of x, i.e., sqrt(x * x).
|
||||
//! Slightly more accurate but much slower than simdLength4.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD niceLength4(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1).
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
float simdDistance(
|
||||
float distance(
|
||||
detail::fvec4SIMD const & p0,
|
||||
detail::fvec4SIMD const & p1);
|
||||
|
||||
//! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1).
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD simdDistance4(
|
||||
detail::fvec4SIMD distance4(
|
||||
detail::fvec4SIMD const & p0,
|
||||
detail::fvec4SIMD const & p1);
|
||||
|
||||
@@ -364,19 +388,25 @@ namespace glm
|
||||
|
||||
//! Returns the dot product of x and y, i.e., result = x * y.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD simdDot4(
|
||||
detail::fvec4SIMD dot4(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y);
|
||||
|
||||
//! Returns the cross product of x and y.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD simdCross(
|
||||
detail::fvec4SIMD cross(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y);
|
||||
|
||||
//! Returns a vector in the same direction as x but with length of 1.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD simdNormalize(
|
||||
detail::fvec4SIMD normalize(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns a vector in the same direction as x but with length of 1.
|
||||
//! Less accurate but much faster than simdNormalize.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD fastNormalize(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! If dot(Nref, I) < 0.0, return N, otherwise, return -N.
|
||||
@@ -389,7 +419,7 @@ namespace glm
|
||||
//! For the incident vector I and surface orientation N,
|
||||
//! returns the reflection direction : result = I - 2.0 * dot(N, I) * N.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD simdReflect(
|
||||
detail::fvec4SIMD reflect(
|
||||
detail::fvec4SIMD const & I,
|
||||
detail::fvec4SIMD const & N);
|
||||
|
||||
@@ -397,10 +427,39 @@ namespace glm
|
||||
//! and the ratio of indices of refraction eta,
|
||||
//! return the refraction vector.
|
||||
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
|
||||
detail::fvec4SIMD simdRefract(
|
||||
detail::fvec4SIMD refract(
|
||||
detail::fvec4SIMD const & I,
|
||||
detail::fvec4SIMD const & N,
|
||||
float const & eta);
|
||||
|
||||
//! Returns the positive square root of x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||
detail::fvec4SIMD sqrt(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the positive square root of x with the nicest quality but very slow.
|
||||
//! Slightly more accurate but much slower than simdSqrt.
|
||||
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||
detail::fvec4SIMD niceSqrt(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the positive square root of x
|
||||
//! Less accurate but much faster than sqrt.
|
||||
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||
detail::fvec4SIMD fastSqrt(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the reciprocal of the positive square root of x.
|
||||
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||
detail::fvec4SIMD inversesqrt(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
//! Returns the reciprocal of the positive square root of x.
|
||||
//! Faster than inversesqrt but less accurate.
|
||||
//! (From GLM_GTX_simd_vec4 extension, exponential function)
|
||||
detail::fvec4SIMD fastInversesqrt(
|
||||
detail::fvec4SIMD const & x);
|
||||
|
||||
}//namespace simd_vec4
|
||||
}//namespace gtx
|
||||
}//namespace glm
|
||||
|
||||
@@ -275,7 +275,7 @@ namespace glm
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
detail::tvec4<float> Result;
|
||||
GLM_ALIGN(4) detail::tvec4<float> Result;
|
||||
_mm_store_ps(&Result[0], x.Data);
|
||||
return Result;
|
||||
}
|
||||
@@ -530,25 +530,67 @@ namespace glm
|
||||
return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
|
||||
}
|
||||
|
||||
inline float simdLength
|
||||
inline float length
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
|
||||
detail::fvec4SIMD sqt0 = sqrt(dot0);
|
||||
float Result = 0;
|
||||
_mm_store_ss(&Result, detail::sse_len_ps(x.Data));
|
||||
_mm_store_ss(&Result, sqt0.Data);
|
||||
return Result;
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD simdLength4
|
||||
inline float fastLength
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_len_ps(x.Data);
|
||||
detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
|
||||
detail::fvec4SIMD sqt0 = fastSqrt(dot0);
|
||||
float Result = 0;
|
||||
_mm_store_ss(&Result, sqt0.Data);
|
||||
return Result;
|
||||
}
|
||||
|
||||
inline float simdDistance
|
||||
inline float niceLength
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
|
||||
detail::fvec4SIMD sqt0 = niceSqrt(dot0);
|
||||
float Result = 0;
|
||||
_mm_store_ss(&Result, sqt0.Data);
|
||||
return Result;
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD length4
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return sqrt(dot4(x, x));
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD fastLength4
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return fastSqrt(dot4(x, x));
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD niceLength4
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return niceSqrt(dot4(x, x));
|
||||
}
|
||||
|
||||
inline float distance
|
||||
(
|
||||
detail::fvec4SIMD const & p0,
|
||||
detail::fvec4SIMD const & p1
|
||||
@@ -559,7 +601,7 @@ namespace glm
|
||||
return Result;
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD simdDistance4
|
||||
inline detail::fvec4SIMD distance4
|
||||
(
|
||||
detail::fvec4SIMD const & p0,
|
||||
detail::fvec4SIMD const & p1
|
||||
@@ -568,7 +610,7 @@ namespace glm
|
||||
return detail::sse_dst_ps(p0.Data, p1.Data);
|
||||
}
|
||||
|
||||
inline float simdDot
|
||||
inline float dot
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y
|
||||
@@ -579,16 +621,16 @@ namespace glm
|
||||
return Result;
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD simdDot4
|
||||
inline detail::fvec4SIMD dot4
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y
|
||||
)
|
||||
{
|
||||
return detail::sse_dot_ss(x.Data, y.Data);
|
||||
return detail::sse_dot_ps(x.Data, y.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD simdCross
|
||||
inline detail::fvec4SIMD cross
|
||||
(
|
||||
detail::fvec4SIMD const & x,
|
||||
detail::fvec4SIMD const & y
|
||||
@@ -597,15 +639,29 @@ namespace glm
|
||||
return detail::sse_xpd_ps(x.Data, y.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD simdNormalize
|
||||
inline detail::fvec4SIMD normalize
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
return detail::sse_nrm_ps(x.Data);
|
||||
__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
|
||||
__m128 isr0 = inversesqrt(dot0).Data;
|
||||
__m128 mul0 = _mm_mul_ps(x.Data, isr0);
|
||||
return mul0;
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD simdFaceforward
|
||||
inline detail::fvec4SIMD fastNormalize
|
||||
(
|
||||
detail::fvec4SIMD const & x
|
||||
)
|
||||
{
|
||||
__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
|
||||
__m128 isr0 = fastInversesqrt(dot0).Data;
|
||||
__m128 mul0 = _mm_mul_ps(x.Data, isr0);
|
||||
return mul0;
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD faceforward
|
||||
(
|
||||
detail::fvec4SIMD const & N,
|
||||
detail::fvec4SIMD const & I,
|
||||
@@ -615,7 +671,7 @@ namespace glm
|
||||
return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD simdReflect
|
||||
inline detail::fvec4SIMD reflect
|
||||
(
|
||||
detail::fvec4SIMD const & I,
|
||||
detail::fvec4SIMD const & N
|
||||
@@ -624,7 +680,7 @@ namespace glm
|
||||
return detail::sse_rfe_ps(I.Data, N.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD simdRefract
|
||||
inline detail::fvec4SIMD refract
|
||||
(
|
||||
detail::fvec4SIMD const & I,
|
||||
detail::fvec4SIMD const & N,
|
||||
@@ -634,6 +690,39 @@ namespace glm
|
||||
return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x)
|
||||
{
|
||||
return _mm_mul_ps(inversesqrt(x.Data).Data, x.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x)
|
||||
{
|
||||
return _mm_sqrt_ps(x.Data);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x)
|
||||
{
|
||||
return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data);
|
||||
}
|
||||
|
||||
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
||||
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
||||
inline detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x)
|
||||
{
|
||||
GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
|
||||
GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
|
||||
|
||||
__m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode
|
||||
__m128 halfrecip = _mm_mul_ps(half, recip);
|
||||
__m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip)));
|
||||
return _mm_mul_ps(halfrecip, threeminus_xrr);
|
||||
}
|
||||
|
||||
inline detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x)
|
||||
{
|
||||
return _mm_rsqrt_ps(x.Data);
|
||||
}
|
||||
|
||||
}//namespace simd_vec4
|
||||
}//namespace gtx
|
||||
}//namespace glm
|
||||
|
||||
Reference in New Issue
Block a user