All the vec4 geometry functions have SIMD optimizations
This commit is contained in:
parent
6ea28c83ff
commit
b5521ca7c2
@ -10,6 +10,24 @@
|
|||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
|
template <template <typename, precision> class vecType, typename T, precision P>
|
||||||
|
struct compute_length
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static T call(vecType<T, P> const & v)
|
||||||
|
{
|
||||||
|
return sqrt(dot(v, v));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <template <typename, precision> class vecType, typename T, precision P>
|
||||||
|
struct compute_distance
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static T call(vecType<T, P> const & p0, vecType<T, P> const & p1)
|
||||||
|
{
|
||||||
|
return length(p1 - p0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <template <class, precision> class vecType, typename T, precision P>
|
template <template <class, precision> class vecType, typename T, precision P>
|
||||||
struct compute_dot{};
|
struct compute_dot{};
|
||||||
|
|
||||||
@ -112,7 +130,7 @@ namespace detail
|
|||||||
{
|
{
|
||||||
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'length' accepts only floating-point inputs");
|
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'length' accepts only floating-point inputs");
|
||||||
|
|
||||||
return sqrt(dot(v, v));
|
return detail::compute_length<vecType, T, P>::call(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
// distance
|
// distance
|
||||||
@ -127,7 +145,7 @@ namespace detail
|
|||||||
template <typename T, precision P, template <typename, precision> class vecType>
|
template <typename T, precision P, template <typename, precision> class vecType>
|
||||||
GLM_FUNC_QUALIFIER T distance(vecType<T, P> const & p0, vecType<T, P> const & p1)
|
GLM_FUNC_QUALIFIER T distance(vecType<T, P> const & p0, vecType<T, P> const & p1)
|
||||||
{
|
{
|
||||||
return length(p1 - p0);
|
return detail::compute_distance<vecType, T, P>::call(p0, p1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// dot
|
// dot
|
||||||
|
@ -10,8 +10,7 @@ namespace detail
|
|||||||
{
|
{
|
||||||
GLM_FUNC_QUALIFIER static float call(tvec4<float, P> const& x, tvec4<float, P> const& y)
|
GLM_FUNC_QUALIFIER static float call(tvec4<float, P> const& x, tvec4<float, P> const& y)
|
||||||
{
|
{
|
||||||
__m128 const dot0 = glm_f32v1_dot(x.data, y.data);
|
return _mm_cvtss_f32(glm_f32v1_dot(x.data, y.data));
|
||||||
return _mm_cvtss_f32(dot0);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -36,9 +35,8 @@ namespace detail
|
|||||||
{
|
{
|
||||||
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
|
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
|
||||||
{
|
{
|
||||||
__m128 const nrm0 = glm_f32v4_nrm(v.data);
|
|
||||||
tvec4<float, P> result(uninitialize);
|
tvec4<float, P> result(uninitialize);
|
||||||
result.data = nrm0;
|
result.data = glm_f32v4_nrm(v.data);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -48,9 +46,8 @@ namespace detail
|
|||||||
{
|
{
|
||||||
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & N, tvec4<float, P> const & I, tvec4<float, P> const & Nref)
|
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & N, tvec4<float, P> const & I, tvec4<float, P> const & Nref)
|
||||||
{
|
{
|
||||||
__m128 const ffd0 = glm_f32v4_ffd(N.data. I.data, Nref.data);
|
|
||||||
tvec4<float, P> result(uninitialize);
|
tvec4<float, P> result(uninitialize);
|
||||||
result.data = ffd0;
|
result.data = glm_f32v4_ffd(N.data. I.data, Nref.data);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -60,9 +57,8 @@ namespace detail
|
|||||||
{
|
{
|
||||||
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & I, tvec4<float, P> const & N)
|
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & I, tvec4<float, P> const & N)
|
||||||
{
|
{
|
||||||
__m128 const rfe0 = glm_f32v4_rfe(I.data, N.data);
|
|
||||||
tvec4<float, P> result(uninitialize);
|
tvec4<float, P> result(uninitialize);
|
||||||
result.data = rfe0;
|
result.data = glm_f32v4_rfe(I.data, N.data);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user