Added SIMD optimization for geometric functions

2016-05-28 20:17:34 +02:00
parent 29fa0f1607
commit fb66c79ca4
2 changed files with 54 additions and 6 deletions
--- a/glm/detail/func_geometric.inl
+++ b/glm/detail/func_geometric.inl
@@ -51,6 +51,31 @@ namespace detail
 			return (tmp.x + tmp.y) + (tmp.z + tmp.w);
 		}
 	};
 	template <typename T, precision P>
 	struct compute_cross
 	{
 		GLM_FUNC_QUALIFIER static tvec3<T, P> call(tvec3<T, P> const & x, tvec3<T, P> const & y)
 		{
 			GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'cross' accepts only floating-point inputs");
 			return tvec3<T, P>(
 				x.y * y.z - y.y * x.z,
 				x.z * y.x - y.z * x.x,
 				x.x * y.y - y.x * x.y);
 		}
 	};
 	template <typename T, precision P, template <typename, precision> class vecType>
 	struct compute_normalize
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v)
 		{
 			GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'normalize' accepts only floating-point inputs");
 			return v * inversesqrt(dot(v, v));
 		}
 	};
 }//namespace detail
 	// length
@@ -104,12 +129,7 @@ namespace detail
 	template <typename T, precision P>
 	GLM_FUNC_QUALIFIER tvec3<T, P> cross(tvec3<T, P> const & x, tvec3<T, P> const & y)
 	{
-		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'cross' accepts only floating-point inputs");
+		return detail::compute_cross<T, P>::call(x, y);
 		return tvec3<T, P>(
 			x.y * y.z - y.y * x.z,
 			x.z * y.x - y.z * x.x,
 			x.x * y.y - y.x * x.y);
 	}
 	// normalize
--- a/glm/detail/func_geometric_simd.inl
+++ b/glm/detail/func_geometric_simd.inl
@@ -14,6 +14,34 @@ namespace detail
 			return _mm_cvtss_f32(dot0);
 		}
 	};
 	template <precision P>
 	struct compute_cross<float, P>
 	{
 		GLM_FUNC_QUALIFIER static tvec3<float, P> call(tvec3<float, P> const & a, tvec3<float, P> const & b)
 		{
 			__m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x);
 			__m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x);
 			__m128 const xpd0 = glm_f32v4_xpd(set0, set1);
 			tvec4<float, P> result(uninitialize);
 			result.data = xpd0;
 			return tvec3<float, P>(result);
 		}
 	};
 	template <precision P>
 	struct compute_normalize<float, P, tvec4>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
 			__m128 const nrm0 = glm_f32v4_nrm(v.data);
 			tvec4<float, P> result(uninitialize);
 			result.data = nrm0;
 			return result;
 		}
 	};
 }//namespace detail
 }//namespace glm