291 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			291 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| #ifndef GLM_SSE_VEC4_H
 | |
| #define GLM_SSE_VEC4_H
 | |
| 
 | |
| #include <xmmintrin.h>
 | |
| #include <emmintrin.h>
 | |
| 
 | |
| namespace glm{
 | |
| namespace sse{
 | |
| 
 | |
| #define GLM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
 | |
| 
 | |
| const __m128 zero = _mm_setzero_ps();
 | |
| const __m128 one = _mm_set_ps1(1.0f);
 | |
| const __m128 two = _mm_set_ps1(2.0f);
 | |
| const __m128 three = _mm_set_ps1(3.0f);
 | |
| const __m128 pouet = _mm_set_ps(2.0f, 4.0f, 6.0f, 8.0f);
 | |
| 
 | |
| #define GLM_ALIGN(x) __declspec(align(x))
 | |
| 
 | |
| GLM_ALIGN(16) struct vec4
 | |
| {
 | |
| 	enum ENoInit
 | |
| 	{
 | |
| 		NO_INIT
 | |
| 	};
 | |
| 
 | |
| 	union
 | |
| 	{
 | |
| 		__m128 data;
 | |
| 		struct{float x, y, z, w;};
 | |
| 		float array[4];
 | |
| 	};
 | |
| 
 | |
| 	vec4();
 | |
| 	vec4(ENoInit NoInit);
 | |
| 	vec4(float s);
 | |
| 	vec4(float x, float y, float z, float w);
 | |
| 	vec4(float v[4]);
 | |
| 
 | |
| 	vec4& operator+=(const float s);
 | |
| 
 | |
| 	vec4& operator+=(const vec4& v);
 | |
| 	vec4& operator*=(const vec4& v);
 | |
| 
 | |
| 	vec4& operator++();
 | |
| };
 | |
| 
 | |
| __forceinline vec4::vec4()
 | |
| {
 | |
| 	void* address = this;
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		eax, [address]
 | |
| 		xorps	xmm0, xmm0
 | |
| 		movaps	[eax], xmm0
 | |
| 	}
 | |
| }
 | |
| 
 | |
| __forceinline vec4::vec4(ENoInit NoInit)
 | |
| {}
 | |
| 
 | |
| __forceinline vec4::vec4(float s)
 | |
| {
 | |
| 	void* address = this;
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		esi, [address]
 | |
| 		movss	xmm0, s
 | |
| 		shufps	xmm0, xmm0, 0	
 | |
| 		movaps	[esi], xmm0
 | |
| 	}
 | |
| }
 | |
| 
 | |
| __forceinline vec4::vec4(float x, float y, float z, float w)
 | |
| {
 | |
| 	void* address = this;
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov			esi, address
 | |
| 		movss		xmm0, x
 | |
| 		movss		xmm1, y
 | |
| 		movss		xmm2, z
 | |
| 		movss		xmm3, w
 | |
| 		unpcklps	xmm0, xmm1
 | |
| 		unpcklps	xmm2, xmm3
 | |
| 		movlhps		xmm0, xmm2
 | |
| 		movaps		[esi], xmm0
 | |
| 	}
 | |
| }
 | |
| 
 | |
| __forceinline vec4::vec4(float v[4])
 | |
| {
 | |
| 	void* address = this;
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		eax, [address]
 | |
| 		mov		ebx, [v]
 | |
| 		movups	xmm0, [ebx]
 | |
| 		movaps	[eax], xmm0
 | |
| 	}
 | |
| }
 | |
| 
 | |
| __forceinline vec4& vec4::operator+=(const float s)
 | |
| {
 | |
| 	void* address = this;
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		eax, [address]
 | |
| 		movss	xmm1, s
 | |
| 		shufps	xmm1, xmm1, 0
 | |
| 		movaps	xmm0, [eax]
 | |
| 		addps	xmm0, xmm1	
 | |
| 		movaps	[eax], xmm0
 | |
| 	}
 | |
| 
 | |
| 	return *this;	
 | |
| }
 | |
| 
 | |
| __forceinline vec4& vec4::operator+=(const vec4& v)
 | |
| {
 | |
| 	void* address = this;
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		eax, [address]
 | |
| 		mov		ebx, [v]
 | |
| 		movaps	xmm0, [eax]
 | |
| 		addps	xmm0, [ebx]	
 | |
| 		movaps	[eax], xmm0
 | |
| 	}
 | |
| 
 | |
| 	return *this;
 | |
| }
 | |
| 
 | |
| __forceinline vec4& vec4::operator*=(const vec4& v)
 | |
| {
 | |
| 	void* address = this;
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		esi, address
 | |
| 		mov		edi, v
 | |
| 		movaps	xmm0, esi
 | |
| 		mulps	xmm0, edi	
 | |
| 		movaps	[esi], xmm0
 | |
| 	}
 | |
| 
 | |
| 	return *this;
 | |
| }
 | |
| 
 | |
| __forceinline vec4& vec4::operator++()
 | |
| {
 | |
| 	void* address = this;
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		eax, [address]
 | |
| 		movaps	xmm0, [eax]
 | |
| 		addps	xmm0, one	
 | |
| 		movaps	[eax], xmm0
 | |
| 	}
 | |
| 
 | |
| 	return *this;  
 | |
| }
 | |
| 
 | |
| __forceinline const vec4 operator- (const vec4& v)
 | |
| {
 | |
| 	vec4 result(vec4::NO_INIT);
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		esi, v
 | |
| 		xorps	xmm0, xmm0
 | |
| 		subps	xmm0, [esi]	
 | |
| 		movaps	result, xmm0
 | |
| 	}
 | |
| 
 | |
| 	result;
 | |
| }
 | |
| 
 | |
| __forceinline vec4 cross(const vec4& v1, const vec4& v2)
 | |
| {
 | |
| 	vec4 result(vec4::NO_INIT);
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		esi, v1
 | |
| 		mov		edi, v2
 | |
| 		movaps	xmm0, [esi]
 | |
| 		movaps	xmm1, [edi]
 | |
| 		shufps	xmm0, xmm0, _MM_SHUFFLE(3, 0, 2, 1)
 | |
| 		movaps	xmm2, xmm0
 | |
| 		shufps	xmm0, xmm0, _MM_SHUFFLE(3, 1, 0, 2)
 | |
| 		shufps	xmm1, xmm1, _MM_SHUFFLE(3, 0, 2, 1)
 | |
| 		movaps	xmm3, xmm1
 | |
| 		shufps	xmm1, xmm1, _MM_SHUFFLE(3, 1, 0, 2)
 | |
| 		mulps	xmm0, xmm3
 | |
| 		mulps	xmm1, xmm2
 | |
| 		subps	xmm0, xmm1
 | |
| 		movaps	result, xmm0
 | |
| 	}
 | |
| 
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| __forceinline float dot(const vec4& v1, const vec4& v2)
 | |
| {
 | |
| 	float result;
 | |
| 
 | |
| 	// All component processed
 | |
| 	//__asm
 | |
| 	//{
 | |
| 	//	mov		esi, v1
 | |
| 	//	mov		edi, v2
 | |
| 	//	movaps	xmm0, [esi]
 | |
| 	//	movaps	xmm1, [edi]
 | |
| 	//	mulps	xmm0, xmm1
 | |
| 	//	movaps	xmm1, xmm0
 | |
| 	//	shufps	xmm0, xmm0, _MM_SHUFFLE(2, 3, 0, 1)
 | |
| 	//	addps	xmm0, xmm1
 | |
| 	//	movaps	xmm1, xmm0
 | |
| 	//	shufps	xmm0, xmm0, _MM_SHUFFLE(0, 1, 2, 3)
 | |
| 	//	addps	xmm0, xmm1
 | |
| 	//	movss	result, xmm0
 | |
| 	//}
 | |
| 
 | |
| 	// SSE
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		esi, v1
 | |
| 		mov		edi, v2
 | |
| 		movaps	xmm0, [esi]		// w1, z1, y1, x1
 | |
| 		mulps	xmm0, [edi]		// w1 * w2, z1 * z2, y1 * y2, x1 * x2
 | |
| 		movhlps	xmm1, xmm0		// XX, XX, w1 * w2, z1 * z2
 | |
| 		addps	xmm0, xmm1		// XX, XX, y1 * y2 + w1 * w2, x1 * x2 + z1 * z2
 | |
| 		pshufd	xmm1, xmm0, 1	// XX, XX, XX, y1 * y2 + w1 * w2
 | |
| 		addss	xmm0, xmm1		// y1 * y2 + w1 * w2 + x1 * x2 + z1 * z2
 | |
| 		movss	result, xmm0
 | |
| 	}
 | |
| 
 | |
| 	// SSE 3
 | |
| 
 | |
| 	// SSE 4.1
 | |
| 	//__asm
 | |
| 	//{
 | |
| 	//	mov		esi, v1
 | |
| 	//	mov		edi, v2
 | |
| 	//	movaps	xmm0, [esi]
 | |
| 	//	dpps	xmm0, [edi]
 | |
| 	//	movss	result, xmm0
 | |
| 	//}
 | |
| 
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| __forceinline vec4 normalize(const vec4& v)
 | |
| {
 | |
| 	vec4 result(vec4::NO_INIT);
 | |
| 
 | |
| 	__asm
 | |
| 	{
 | |
| 		mov		esi, v
 | |
| 		movaps	xmm2, [esi]
 | |
| 		movaps	xmm0, xmm2
 | |
| 		mulps	xmm0, xmm0
 | |
| 		movaps	xmm1, xmm0
 | |
| 		shufps	xmm0, xmm0, _MM_SHUFFLE(2, 3, 0, 1)
 | |
| 		addps	xmm0, xmm1
 | |
| 		movaps	xmm1, xmm0
 | |
| 		shufps	xmm0, xmm0, _MM_SHUFFLE(0, 1, 2, 3)
 | |
| 		addps	xmm0, xmm1
 | |
| 		rsqrtps	xmm0, xmm0
 | |
| 		mulps	xmm2, xmm0
 | |
| 		movaps	result, xmm2
 | |
| 	}
 | |
| 
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| }//namespace sse
 | |
| }//namespace glm
 | |
| 
 | |
| void test_sse_vec4();
 | |
| 
 | |
| #endif//GLM_SSE_VEC4_H
 | 
