Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   Related Pages  

Float4Vector.hpp

00001 #ifndef __Float4Vector_H__
00002 #define __Float4Vector_H__
00003 
00004 
00005 
00024 #include <stdio.h>
00025 #include <math.h>
00026 
00027 #include "sseUtil.h"
00028 
00029 
00030 class Float4Vector
00031 {
00032 private:
00033         
00034 
00035 int             dataSize4;
00036 int             dataSize;
00037 
00038 float*  palign;
00039 
00040 float*  data;
00041 float*  data1;
00042 
00043 public:
00044         
00045 Float4Vector(int size);
00046 Float4Vector(int size, float adata);
00047 Float4Vector(const Float4Vector& v);
00048 Float4Vector operator=(const Float4Vector& v);
00049 
00050 ~Float4Vector(){ data=0; delete data1; delete[] palign; }
00051 
00052 
00053 inline int size(){ return dataSize; }
00054 inline int sizeData(){ return dataSize4; }
00055 
00056 void load(Float4Vector* v);
00057 
00058 inline float* getData(){return data;}
00059 
00060 inline float* get0(){return data;}
00061 inline float* get(){return data1;}
00062 
00063 // base 0
00064 inline void set0(int line, int index, float value){ data[(index << 2)+line]= value; }
00065 inline float get0(int line, int index){ return data[(index << 2)+line]; }
00066 
00067 // base 1
00068 inline void set(int line, int index, float value){ data1[(index << 2)+line]= value; }
00069 inline float get(int line, int index){ return data1[(index << 2)+line]; }
00070 
00071 void setAll(float value);
00072 
00073 // computations
00074 float vT_v(int line);
00075 float norme2(int line);
00076 float sum(int line);
00077 
00078 float minimum(int line);
00079 float maximum(int line);
00080 float mean(int line){ return sum(line)/dataSize; }
00081 float sigma(int line);
00082 
00083 
00084 inline SSE4 vT_v()
00085         {
00086         SSE4 val;
00087         xorps_r2r(xmm0, xmm0);                          // 0
00088         
00089         for(int i=0; i<dataSize4; i+=4)
00090                 {
00091                 movaps_m2r(data[i],xmm1);               // get 4 data
00092                 mulps_r2r(xmm1, xmm1);                  // sqr
00093                 addps_r2r(xmm1, xmm0);                  // sum of sqr
00094                 }
00095 
00096         movaps_r2m(xmm0, val.m);                        // get 4 sum of square in one
00097 
00098         return val;
00099         }
00100 
00101 inline SSE4 norme2()
00102         {
00103         SSE4 val;
00104         xorps_r2r(xmm0, xmm0);                          // 0
00105 
00106         for(int i=0; i<dataSize4; i+=4)
00107                 {
00108                 movaps_m2r(data[i], xmm1);              // get 4 data
00109 
00110                 mulps_r2r(xmm1, xmm1);                  // sqr
00111                 addps_r2r(xmm1, xmm0);                  // sum of sqr
00112                 }
00113 
00114         sqrtps_r2r(xmm0,xmm0);                          // norme 2
00115         movaps_r2m(xmm0, val.m);                        // get 4 results
00116 
00117         return val;
00118         }
00119 
00120 inline SSE4 sum()
00121         {
00122         SSE4 val;
00123         xorps_r2r(xmm0, xmm0);                          // 0
00124         
00125 
00126         for(int i=0; i<dataSize4; i+=4)
00127                 {
00128                 movaps_m2r(data[i],xmm1);               // get 4 data
00129                 addps_r2r(xmm1, xmm0);                  // sum 
00130                 }
00131 
00132         movaps_r2m(xmm0, val.m);                        // get 4 sum 
00133 
00134         return val;
00135         }
00136 
00137 inline SSE4 minimum()
00138         {
00139         SSE4 val;
00140         movaps_m2r(data[0], xmm0);                              
00141 
00142         for(int i=4; i<dataSize4; i+=4)
00143                 {
00144                 movaps_m2r(data[i],xmm1);               // get 4 data
00145                 minps_r2r(xmm1, xmm0);                  // min 
00146                 }
00147 
00148         movaps_r2m(xmm0, val.m);                        // get result
00149 
00150         return val;
00151         }
00152 
00153 inline SSE4 maximum()
00154         {
00155         SSE4 val;
00156         movaps_m2r(data[0], xmm0);                              
00157 
00158         for(int i=4; i<dataSize4; i+=4)
00159                 {
00160                 movaps_m2r(data[i],xmm1);               // get 4 data
00161                 maxps_r2r(xmm1, xmm0);                  // min 
00162                 }
00163 
00164         movaps_r2m(xmm0, val.m);                        // get result
00165 
00166         return val;
00167         }
00168 
00169 inline SSE4 mean()
00170         {
00171         SSE4 val;
00172         xorps_r2r(xmm0, xmm0);                          // EX
00173 
00174         val.f[0]= dataSize;
00175         val.f[1]= dataSize;
00176         val.f[2]= dataSize;
00177         val.f[3]= dataSize;
00178         movaps_m2r(val.m, xmm4);
00179 
00180         for(int i=0; i<dataSize4; i+=4)
00181                 {
00182                 movaps_m2r(data[i],xmm2);               // get 4 data  x
00183 
00184                 addps_r2r(xmm2, xmm0);                  // sum
00185                 }
00186 
00187         divps_r2r(xmm4, xmm0);                          // EX
00188         
00189         movaps_r2m(xmm0, val.m);                        // get 4 sum of square in one
00190 
00191         return val;
00192         }
00193 
00194 inline SSE4 sigma()
00195         {
00196         SSE4 val;
00197         xorps_r2r(xmm0, xmm0);                          // EX
00198         xorps_r2r(xmm1, xmm1);                          // EX2  
00199 
00200         val.f[0]= dataSize;
00201         val.f[1]= dataSize;
00202         val.f[2]= dataSize;
00203         val.f[3]= dataSize;
00204         movaps_m2r(val.m, xmm4);
00205 
00206         for(int i=0; i<dataSize4; i+=4)
00207                 {
00208                 movaps_m2r(data[i],xmm2);               // get 4 data  x
00209                 
00210                 movaps_r2r(xmm2, xmm3);
00211                 mulps_r2r(xmm2, xmm3);                  // x2
00212 
00213                 addps_r2r(xmm2, xmm0);                  // sum
00214                 addps_r2r(xmm3, xmm1);                  // sum of sqr
00215                 }
00216 
00217         divps_r2r(xmm4, xmm0);          // EX
00218         divps_r2r(xmm4, xmm1);          // EX2
00219 
00220         mulps_r2r(xmm0, xmm0);          // EX
00221         subps_r2r(xmm0, xmm0);
00222         sqrtps_r2r(xmm0,xmm0);          // 4 sigma
00223         
00224         movaps_r2m(xmm0, val.m);                        // get 4 sum of square in one
00225 
00226         return val;
00227         }
00228 
00229 
00230 // math vectorial op
00231 Float4Vector sqrt()
00232         {
00233         Float4Vector v(dataSize);
00234 
00235 #ifndef SSE_USED
00236         for(int i=0; i<dataSize; i++)
00237                 for(int line=0; line<4; line++)
00238                         v.set0(line, i, ::sqrt(data[(i << 2)+line]) );
00239 #else
00240         float* vdata= v.getData();
00241 
00242         for(int i=0; i<dataSize4; i+=4)
00243                 {
00244                 movaps_m2r(data[i],xmm1);
00245                 sqrtps_r2r(xmm1, xmm0);
00246                 movaps_r2m(xmm0, vdata[i]);
00247                 }
00248 #endif
00249 
00250         return v;
00251         }
00252 
00253 Float4Vector pow(float pow);
00254 
00255 Float4Vector sqr()
00256         {
00257         Float4Vector v(dataSize);
00258 
00259 #ifndef SSE_USED
00260         for(int i=0; i<dataSize; i++)
00261                 for(int line=0; line<4; line++)
00262                         v.set0(line, i, data[(i << 2)+line]*data[(i << 2)+line] );
00263 #else
00264         float* vdata= v.getData();
00265 
00266         for(int i=0; i<dataSize4; i+=4)
00267                 {
00268                 movaps_m2r(data[i],xmm1);
00269                 movaps_r2r(xmm1, xmm0);
00270                 mulps_r2r(xmm1, xmm0);
00271                 movaps_r2m(xmm0, vdata[i]);
00272                 }
00273 #endif
00274 
00275         return v;
00276         }
00277 
00278 Float4Vector exp();
00279 Float4Vector log();
00280 Float4Vector log(float base);
00281 
00282 Float4Vector sin();
00283 Float4Vector cos();
00284 Float4Vector tan();
00285 
00286 
00287 // vector single operations
00288 void operator+=(Float4Vector& v)
00289         {
00290 #ifndef SSE_USED
00291         for(int i=0; i<dataSize; i++)
00292                 for(int line=0; line<4; line++)
00293                         data[(i << 2)+line] += v.get0(line,i);
00294 #else
00295         float* vdata= v.getData();
00296 
00297         for(int i=0; i<dataSize4; i+=4)
00298                 {
00299                 movaps_m2r(data[i], xmm0);
00300                 movaps_m2r(vdata[i],xmm1);
00301                 addps_r2r(xmm1, xmm0);
00302                 movaps_r2m(xmm0, data[i]);
00303                 }
00304 #endif
00305         }
00306 
00307 void operator-=(Float4Vector& v)
00308         {
00309 #ifndef SSE_USED
00310         for(int i=0; i<dataSize; i++)
00311                 for(int line=0; line<4; line++)
00312                         data[(i << 2)+line] -= v.get0(line,i);
00313 #else
00314         float* vdata= v.getData();
00315 
00316         for(int i=0; i<dataSize4; i+=4)
00317                 {
00318                 movaps_m2r(data[i], xmm0);
00319                 movaps_m2r(vdata[i],xmm1);
00320                 subps_r2r(xmm1, xmm0);
00321                 movaps_r2m(xmm0, data[i]);
00322                 }
00323 #endif
00324         }
00325 
00326 void operator*=(Float4Vector& v)
00327         {
00328 #ifndef SSE_USED
00329         for(int i=0; i<dataSize; i++)
00330                 for(int line=0; line<4; line++)
00331                         data[(i << 2)+line] *= v.get0(line,i);
00332 #else
00333         float* vdata= v.getData();
00334 
00335         for(int i=0; i<dataSize4; i+=4)
00336                 {
00337                 movaps_m2r(data[i], xmm0);
00338                 movaps_m2r(vdata[i],xmm1);
00339                 mulps_r2r(xmm1, xmm0);
00340                 movaps_r2m(xmm0, data[i]);
00341                 }
00342 #endif
00343         }
00344 
00345 void operator/=(Float4Vector& v)
00346         {
00347 #ifndef SSE_USED
00348         for(int i=0; i<dataSize; i++)
00349                 for(int line=0; line<4; line++)
00350                         data[(i << 2)+line] /= v.get0(line,i);
00351 #else
00352         float* vdata= v.getData();
00353 
00354         for(int i=0; i<dataSize4; i+=4)
00355                 {
00356                 movaps_m2r(data[i], xmm0);
00357                 movaps_m2r(vdata[i],xmm1);
00358                 divps_r2r(xmm1, xmm0);
00359                 movaps_r2m(xmm0, data[i]);
00360                 }
00361 #endif
00362         }
00363 
00364 // friends
00365 
00366 friend Float4Vector operator+(Float4Vector& v, float a);                        // v<op>a
00367 friend Float4Vector operator+(float a, Float4Vector& v);                        // a<op>v
00368 friend Float4Vector operator-(Float4Vector& v, float a);                        // v<op>a
00369 friend Float4Vector operator-(float a, Float4Vector& v);                        // a<op>v
00370 friend Float4Vector operator*(Float4Vector& v, float a);                        // v<op>a
00371 friend Float4Vector operator*(float a, Float4Vector& v);                        // a<op>v
00372 friend Float4Vector operator/(Float4Vector& v, float a);                        // v<op>a
00373 
00374 friend Float4Vector operator+(Float4Vector& v1, Float4Vector& v2);                      // v1<op>v2
00375 friend Float4Vector operator-(Float4Vector& v1, Float4Vector& v2);                      // v1<op>v2
00376         
00377 void output();
00378 void output(FILE* file);
00379 };
00380 
00381 /*
00382 static inline void Float4Vector_c_norme2_4(float* dataIn, int dataSize4, float** val)
00383         {
00384         xorps_r2r(xmm0, xmm0);                          // 0
00385 
00386         for(int i=0; i<dataSize4; i+=4)
00387                 {
00388                 movaps_m2r(dataIn[i], xmm1);            // get 4 data
00389 
00390 //              mulps_r2r(xmm1, xmm1);                  // sqr
00391                 addps_r2r(xmm1, xmm0);                  // sum of sqr
00392                 }
00393 
00394 //      sqrtps_r2r(xmm0,xmm0);                          // norme 2
00395 //      movaps_r2m(xmm0, *val[0]);                      // get 4 sum of square in one
00396         }
00397 */
00398 #endif
00399 
00400          
SourceForge.net Logo
Restoreinpaint sourceforge project `C++/Java Image Processing, Restoration, Inpainting Project'.

Bernard De Cuyper: Open Project Leader: Concept, design and development.
Bernard De Cuyper & Eddy Fraiha 2002, 2003. Bernard De Cuyper 2004. Open and free, for friendly usage only.
Modifications on Belgium ground of this piece of artistic work, by governement institutions or companies, must be notified to Bernard De Cuyper.
bern_bdc@hotmail.com