00001 #ifndef __Float4Vector_H__
00002 #define __Float4Vector_H__
00003
00004
00005
00024 #include <stdio.h>
00025 #include <math.h>
00026
00027 #include "sseUtil.h"
00028
00029
00030 class Float4Vector
00031 {
00032 private:
00033
00034
00035 int dataSize4;
00036 int dataSize;
00037
00038 float* palign;
00039
00040 float* data;
00041 float* data1;
00042
00043 public:
00044
00045 Float4Vector(int size);
00046 Float4Vector(int size, float adata);
00047 Float4Vector(const Float4Vector& v);
00048 Float4Vector operator=(const Float4Vector& v);
00049
00050 ~Float4Vector(){ data=0; delete data1; delete[] palign; }
00051
00052
00053 inline int size(){ return dataSize; }
00054 inline int sizeData(){ return dataSize4; }
00055
00056 void load(Float4Vector* v);
00057
00058 inline float* getData(){return data;}
00059
00060 inline float* get0(){return data;}
00061 inline float* get(){return data1;}
00062
00063
00064 inline void set0(int line, int index, float value){ data[(index << 2)+line]= value; }
00065 inline float get0(int line, int index){ return data[(index << 2)+line]; }
00066
00067
00068 inline void set(int line, int index, float value){ data1[(index << 2)+line]= value; }
00069 inline float get(int line, int index){ return data1[(index << 2)+line]; }
00070
00071 void setAll(float value);
00072
00073
00074 float vT_v(int line);
00075 float norme2(int line);
00076 float sum(int line);
00077
00078 float minimum(int line);
00079 float maximum(int line);
00080 float mean(int line){ return sum(line)/dataSize; }
00081 float sigma(int line);
00082
00083
00084 inline SSE4 vT_v()
00085 {
00086 SSE4 val;
00087 xorps_r2r(xmm0, xmm0);
00088
00089 for(int i=0; i<dataSize4; i+=4)
00090 {
00091 movaps_m2r(data[i],xmm1);
00092 mulps_r2r(xmm1, xmm1);
00093 addps_r2r(xmm1, xmm0);
00094 }
00095
00096 movaps_r2m(xmm0, val.m);
00097
00098 return val;
00099 }
00100
00101 inline SSE4 norme2()
00102 {
00103 SSE4 val;
00104 xorps_r2r(xmm0, xmm0);
00105
00106 for(int i=0; i<dataSize4; i+=4)
00107 {
00108 movaps_m2r(data[i], xmm1);
00109
00110 mulps_r2r(xmm1, xmm1);
00111 addps_r2r(xmm1, xmm0);
00112 }
00113
00114 sqrtps_r2r(xmm0,xmm0);
00115 movaps_r2m(xmm0, val.m);
00116
00117 return val;
00118 }
00119
00120 inline SSE4 sum()
00121 {
00122 SSE4 val;
00123 xorps_r2r(xmm0, xmm0);
00124
00125
00126 for(int i=0; i<dataSize4; i+=4)
00127 {
00128 movaps_m2r(data[i],xmm1);
00129 addps_r2r(xmm1, xmm0);
00130 }
00131
00132 movaps_r2m(xmm0, val.m);
00133
00134 return val;
00135 }
00136
00137 inline SSE4 minimum()
00138 {
00139 SSE4 val;
00140 movaps_m2r(data[0], xmm0);
00141
00142 for(int i=4; i<dataSize4; i+=4)
00143 {
00144 movaps_m2r(data[i],xmm1);
00145 minps_r2r(xmm1, xmm0);
00146 }
00147
00148 movaps_r2m(xmm0, val.m);
00149
00150 return val;
00151 }
00152
00153 inline SSE4 maximum()
00154 {
00155 SSE4 val;
00156 movaps_m2r(data[0], xmm0);
00157
00158 for(int i=4; i<dataSize4; i+=4)
00159 {
00160 movaps_m2r(data[i],xmm1);
00161 maxps_r2r(xmm1, xmm0);
00162 }
00163
00164 movaps_r2m(xmm0, val.m);
00165
00166 return val;
00167 }
00168
00169 inline SSE4 mean()
00170 {
00171 SSE4 val;
00172 xorps_r2r(xmm0, xmm0);
00173
00174 val.f[0]= dataSize;
00175 val.f[1]= dataSize;
00176 val.f[2]= dataSize;
00177 val.f[3]= dataSize;
00178 movaps_m2r(val.m, xmm4);
00179
00180 for(int i=0; i<dataSize4; i+=4)
00181 {
00182 movaps_m2r(data[i],xmm2);
00183
00184 addps_r2r(xmm2, xmm0);
00185 }
00186
00187 divps_r2r(xmm4, xmm0);
00188
00189 movaps_r2m(xmm0, val.m);
00190
00191 return val;
00192 }
00193
00194 inline SSE4 sigma()
00195 {
00196 SSE4 val;
00197 xorps_r2r(xmm0, xmm0);
00198 xorps_r2r(xmm1, xmm1);
00199
00200 val.f[0]= dataSize;
00201 val.f[1]= dataSize;
00202 val.f[2]= dataSize;
00203 val.f[3]= dataSize;
00204 movaps_m2r(val.m, xmm4);
00205
00206 for(int i=0; i<dataSize4; i+=4)
00207 {
00208 movaps_m2r(data[i],xmm2);
00209
00210 movaps_r2r(xmm2, xmm3);
00211 mulps_r2r(xmm2, xmm3);
00212
00213 addps_r2r(xmm2, xmm0);
00214 addps_r2r(xmm3, xmm1);
00215 }
00216
00217 divps_r2r(xmm4, xmm0);
00218 divps_r2r(xmm4, xmm1);
00219
00220 mulps_r2r(xmm0, xmm0);
00221 subps_r2r(xmm0, xmm0);
00222 sqrtps_r2r(xmm0,xmm0);
00223
00224 movaps_r2m(xmm0, val.m);
00225
00226 return val;
00227 }
00228
00229
00230
00231 Float4Vector sqrt()
00232 {
00233 Float4Vector v(dataSize);
00234
00235 #ifndef SSE_USED
00236 for(int i=0; i<dataSize; i++)
00237 for(int line=0; line<4; line++)
00238 v.set0(line, i, ::sqrt(data[(i << 2)+line]) );
00239 #else
00240 float* vdata= v.getData();
00241
00242 for(int i=0; i<dataSize4; i+=4)
00243 {
00244 movaps_m2r(data[i],xmm1);
00245 sqrtps_r2r(xmm1, xmm0);
00246 movaps_r2m(xmm0, vdata[i]);
00247 }
00248 #endif
00249
00250 return v;
00251 }
00252
00253 Float4Vector pow(float pow);
00254
00255 Float4Vector sqr()
00256 {
00257 Float4Vector v(dataSize);
00258
00259 #ifndef SSE_USED
00260 for(int i=0; i<dataSize; i++)
00261 for(int line=0; line<4; line++)
00262 v.set0(line, i, data[(i << 2)+line]*data[(i << 2)+line] );
00263 #else
00264 float* vdata= v.getData();
00265
00266 for(int i=0; i<dataSize4; i+=4)
00267 {
00268 movaps_m2r(data[i],xmm1);
00269 movaps_r2r(xmm1, xmm0);
00270 mulps_r2r(xmm1, xmm0);
00271 movaps_r2m(xmm0, vdata[i]);
00272 }
00273 #endif
00274
00275 return v;
00276 }
00277
00278 Float4Vector exp();
00279 Float4Vector log();
00280 Float4Vector log(float base);
00281
00282 Float4Vector sin();
00283 Float4Vector cos();
00284 Float4Vector tan();
00285
00286
00287
00288 void operator+=(Float4Vector& v)
00289 {
00290 #ifndef SSE_USED
00291 for(int i=0; i<dataSize; i++)
00292 for(int line=0; line<4; line++)
00293 data[(i << 2)+line] += v.get0(line,i);
00294 #else
00295 float* vdata= v.getData();
00296
00297 for(int i=0; i<dataSize4; i+=4)
00298 {
00299 movaps_m2r(data[i], xmm0);
00300 movaps_m2r(vdata[i],xmm1);
00301 addps_r2r(xmm1, xmm0);
00302 movaps_r2m(xmm0, data[i]);
00303 }
00304 #endif
00305 }
00306
00307 void operator-=(Float4Vector& v)
00308 {
00309 #ifndef SSE_USED
00310 for(int i=0; i<dataSize; i++)
00311 for(int line=0; line<4; line++)
00312 data[(i << 2)+line] -= v.get0(line,i);
00313 #else
00314 float* vdata= v.getData();
00315
00316 for(int i=0; i<dataSize4; i+=4)
00317 {
00318 movaps_m2r(data[i], xmm0);
00319 movaps_m2r(vdata[i],xmm1);
00320 subps_r2r(xmm1, xmm0);
00321 movaps_r2m(xmm0, data[i]);
00322 }
00323 #endif
00324 }
00325
00326 void operator*=(Float4Vector& v)
00327 {
00328 #ifndef SSE_USED
00329 for(int i=0; i<dataSize; i++)
00330 for(int line=0; line<4; line++)
00331 data[(i << 2)+line] *= v.get0(line,i);
00332 #else
00333 float* vdata= v.getData();
00334
00335 for(int i=0; i<dataSize4; i+=4)
00336 {
00337 movaps_m2r(data[i], xmm0);
00338 movaps_m2r(vdata[i],xmm1);
00339 mulps_r2r(xmm1, xmm0);
00340 movaps_r2m(xmm0, data[i]);
00341 }
00342 #endif
00343 }
00344
00345 void operator/=(Float4Vector& v)
00346 {
00347 #ifndef SSE_USED
00348 for(int i=0; i<dataSize; i++)
00349 for(int line=0; line<4; line++)
00350 data[(i << 2)+line] /= v.get0(line,i);
00351 #else
00352 float* vdata= v.getData();
00353
00354 for(int i=0; i<dataSize4; i+=4)
00355 {
00356 movaps_m2r(data[i], xmm0);
00357 movaps_m2r(vdata[i],xmm1);
00358 divps_r2r(xmm1, xmm0);
00359 movaps_r2m(xmm0, data[i]);
00360 }
00361 #endif
00362 }
00363
00364
00365
00366 friend Float4Vector operator+(Float4Vector& v, float a);
00367 friend Float4Vector operator+(float a, Float4Vector& v);
00368 friend Float4Vector operator-(Float4Vector& v, float a);
00369 friend Float4Vector operator-(float a, Float4Vector& v);
00370 friend Float4Vector operator*(Float4Vector& v, float a);
00371 friend Float4Vector operator*(float a, Float4Vector& v);
00372 friend Float4Vector operator/(Float4Vector& v, float a);
00373
00374 friend Float4Vector operator+(Float4Vector& v1, Float4Vector& v2);
00375 friend Float4Vector operator-(Float4Vector& v1, Float4Vector& v2);
00376
00377 void output();
00378 void output(FILE* file);
00379 };
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398 #endif
00399
00400