// 3D geometry support SIMD SSE4.1
// programmed by delta fares
#include <emmintrin.h>
#include <stdio.h>
typedef __m128 vec;
typedef struct {
__m128 x;
__m128 y;
__m128 z;} matrix;
// 3D Math
vec vec4(float x,float y, float z,float w);
vec vec3(float x,float y, float z);
vec vec2(float x,float y);
inline vec operator +(vec a,vec b);
inline vec operator -(vec a,vec b);
inline vec operator *(float k,vec v);
inline vec operator *(vec v,float k);
inline vec operator /(vec v,float k);
inline vec cross(vec a, vec b);
inline float norm(vec v);
inline float norm2(vec v); // SSE4.1
inline float dot(vec a,vec b);
inline float dot2(vec a,vec b); //SSE4.1
inline float dist2(vec a,vec b); //SSE41
inline void normalize(vec& v);
inline void normalize2(vec& v); //SSE4.1
inline vec reflect(vec v,vec n); //SSE4.1
matrix mat3(float a11,float a21,float a31,
float a12,float a22,float a32,
float a13,float a23,float a33);
matrix mat4(float a11,float a21,float a31,float a41,
float a12,float a22,float a32,float a42,
float a13,float a23,float a33,float a43);
inline vec operator *(matrix& mat,vec v);
.
.
// body program
vec vec3(float x,float y, float z) {
return _mm_set_ps(0,z,y,x);}
.
.
inline vec operator +(vec a,vec b) {
__asm {
movaps xmm0,a
movaps xmm1,b
addps xmm0,xmm1
}
};
inline vec operator -(vec a,vec b) {
__asm {
movaps xmm0,a
movaps xmm1,b
subps xmm0,xmm1
}
};
.
.
inline vec operator *(vec v,float k) {
_asm {
movss xmm0,k
shufps xmm0,xmm0,00h
movaps xmm1,v
mulps xmm0,xmm1
}
};
.
.
inline void normalize(vec& v) {
_asm {
mov eax,v
movaps xmm1,[eax]
movaps xmm0,xmm1 // xmm0 = v
mulps xmm1,xmm1 // x² y² z²
movaps xmm2,xmm1
movaps xmm3,xmm1
shufps xmm1,xmm1,_MM_SHUFFLE(0, 0, 0, 0) // xmm1 = x²
shufps xmm2,xmm2,_MM_SHUFFLE(1, 1, 1, 1) // xmm2 = y²
shufps xmm3,xmm3,_MM_SHUFFLE(2, 2, 2, 2) // xmm3 = z²
addps xmm1,xmm2
addps xmm1,xmm3 // xmm1 = x²+y²+z²
sqrtps xmm1,xmm1 // xmm1= sqrt(v*v)
divps xmm0,xmm1 // xmm0 = v/sqrt(v*v)
movaps [eax],xmm0
}
}
.
.
.
inline vec reflect(vec v,vec n) {
return 2*dot2(v,n)*n-v;
}
inline vec operator *(matrix& mat,vec v) {
vec res;
res.m128_f32[0] = dot2(mat.x,v);
res.m128_f32[1] = dot2(mat.y,v);
res.m128_f32[2] = dot2(mat.z,v);
res.m128_f32[3] = 0;
return res;
}
int main(int argc, char* argv[]) {
matrix mat = mat3(1 , 8,3,
3,-1,1,
4, 2,1);
vec a,b,c,V;
a = vec3(51,13,10);
b = vec3(1,1,0);
c =dot(a,b)*cross(a,b)+3*b+a;
show(c);
normalize2(c);
show(c);
vec r = reflect(b,a);
show(r);
printf("%f %f %f\n", dot(a,b),dot2(a,b)); // compare SSE2 with SSE4.1
V = mat*a;
show(V);
return 0;
}
// programmed by delta fares
#include <emmintrin.h>
#include <stdio.h>
typedef __m128 vec;
typedef struct {
__m128 x;
__m128 y;
__m128 z;} matrix;
// 3D Math
vec vec4(float x,float y, float z,float w);
vec vec3(float x,float y, float z);
vec vec2(float x,float y);
inline vec operator +(vec a,vec b);
inline vec operator -(vec a,vec b);
inline vec operator *(float k,vec v);
inline vec operator *(vec v,float k);
inline vec operator /(vec v,float k);
inline vec cross(vec a, vec b);
inline float norm(vec v);
inline float norm2(vec v); // SSE4.1
inline float dot(vec a,vec b);
inline float dot2(vec a,vec b); //SSE4.1
inline float dist2(vec a,vec b); //SSE41
inline void normalize(vec& v);
inline void normalize2(vec& v); //SSE4.1
inline vec reflect(vec v,vec n); //SSE4.1
matrix mat3(float a11,float a21,float a31,
float a12,float a22,float a32,
float a13,float a23,float a33);
matrix mat4(float a11,float a21,float a31,float a41,
float a12,float a22,float a32,float a42,
float a13,float a23,float a33,float a43);
inline vec operator *(matrix& mat,vec v);
.
.
// body program
vec vec3(float x,float y, float z) {
return _mm_set_ps(0,z,y,x);}
.
.
inline vec operator +(vec a,vec b) {
__asm {
movaps xmm0,a
movaps xmm1,b
addps xmm0,xmm1
}
};
inline vec operator -(vec a,vec b) {
__asm {
movaps xmm0,a
movaps xmm1,b
subps xmm0,xmm1
}
};
.
.
inline vec operator *(vec v,float k) {
_asm {
movss xmm0,k
shufps xmm0,xmm0,00h
movaps xmm1,v
mulps xmm0,xmm1
}
};
.
.
inline void normalize(vec& v) {
_asm {
mov eax,v
movaps xmm1,[eax]
movaps xmm0,xmm1 // xmm0 = v
mulps xmm1,xmm1 // x² y² z²
movaps xmm2,xmm1
movaps xmm3,xmm1
shufps xmm1,xmm1,_MM_SHUFFLE(0, 0, 0, 0) // xmm1 = x²
shufps xmm2,xmm2,_MM_SHUFFLE(1, 1, 1, 1) // xmm2 = y²
shufps xmm3,xmm3,_MM_SHUFFLE(2, 2, 2, 2) // xmm3 = z²
addps xmm1,xmm2
addps xmm1,xmm3 // xmm1 = x²+y²+z²
sqrtps xmm1,xmm1 // xmm1= sqrt(v*v)
divps xmm0,xmm1 // xmm0 = v/sqrt(v*v)
movaps [eax],xmm0
}
}
.
.
.
inline vec reflect(vec v,vec n) {
return 2*dot2(v,n)*n-v;
}
inline vec operator *(matrix& mat,vec v) {
vec res;
res.m128_f32[0] = dot2(mat.x,v);
res.m128_f32[1] = dot2(mat.y,v);
res.m128_f32[2] = dot2(mat.z,v);
res.m128_f32[3] = 0;
return res;
}
int main(int argc, char* argv[]) {
matrix mat = mat3(1 , 8,3,
3,-1,1,
4, 2,1);
vec a,b,c,V;
a = vec3(51,13,10);
b = vec3(1,1,0);
c =dot(a,b)*cross(a,b)+3*b+a;
show(c);
normalize2(c);
show(c);
vec r = reflect(b,a);
show(r);
printf("%f %f %f\n", dot(a,b),dot2(a,b)); // compare SSE2 with SSE4.1
V = mat*a;
show(V);
return 0;
}
Aucun commentaire:
Enregistrer un commentaire
Remarque : Seul un membre de ce blog est autorisé à enregistrer un commentaire.