Hall-D Software  alpha
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatrix5x5.h
Go to the documentation of this file.
1 #include <align_16.h>
2 
3 #ifndef USE_SSE2
4 
5 #include "DMatrixDSym.h"
6 // Matrix class without SIMD instructions
7 
8 class DMatrix5x5{
9  public:
11  for (unsigned int i=0;i<5;i++)
12  for (unsigned int j=0;j<5;j++)
13  mA[i][j]=0.;
14  }
15  // Copy constructor
16  DMatrix5x5(const DMatrix5x5 &m2){
17  for (unsigned int i=0;i<5;i++){
18  for (unsigned int j=0;j<5;j++){
19  mA[i][j]=m2(i,j);
20  }
21  }
22  }
24 
25  // Constructor using block matrices from matrix inversion
26  DMatrix5x5(const DMatrix2x2 &A,const DMatrix2x3 &B,
27  const DMatrix3x2 &C,const DMatrix3x3 &D){
28  mA[0][0]=A(0,0);
29  mA[0][1]=A(0,1);
30  mA[1][0]=A(1,0);
31  mA[1][1]=A(1,1);
32  for (unsigned int i=0;i<2;i++){
33  for (unsigned int j=0;j<3;j++){
34  mA[i][j+2]=B(i,j);
35  }
36  }
37  for (unsigned int i=0;i<3;i++){
38  for (unsigned int j=0;j<2;j++){
39  mA[i+2][j]=C(i,j);
40  mA[i+2][j+2]=D(i,j);
41  }
42  mA[i+2][4]=D(i,2);
43  }
44  }
45 
46 
47  // Access by indices
48  double &operator() (int row, int col){
49  return mA[row][col];
50  }
51  double operator() (int row, int col) const{
52  return mA[row][col];
53  }
54 
55  // Assignment operator
57  for (unsigned int i=0;i<5;i++){
58  for (unsigned int j=0;j<5;j++){
59  mA[i][j]=m1(i,j);
60  }
61  }
62  return *this;
63  }
64 
65  // Find the transpose of this matrix
68  for (unsigned int i=0;i<5;i++){
69  for (unsigned int j=0;j<5;j++){
70  temp(i,j)=mA[j][i];
71  }
72  }
73  return temp;
74  }
75  // Matrix addition
76  DMatrix5x5 operator+(const DMatrix5x5 &m2) const{
78 
79  for (unsigned int row=0;row<5;row++){
80  for (unsigned int col=0;col<5;col++){
81  temp(row,col)=mA[row][col]+m2(row,col);
82  }
83  }
84  return temp;
85  }
87  for (unsigned int i=0;i<5;i++){
88  for (unsigned int j=0;j<5;j++){
89  mA[i][j]+=m2(i,j);
90  }
91  }
92  return *this;
93  }
94 
95  // Method for adding symmetric matrices
96  DMatrix5x5 AddSym(const DMatrix5x5 &m2) const{
98  for (unsigned int i=0;i<5;i++){
99  for (unsigned int j=i;j<5;j++){
100  temp(i,j)=mA[i][j]+m2(i,j);
101  temp(j,i)=temp(i,j);
102  }
103  }
104  return temp;
105  }
106 
107  // Method for subtracting symmetric matrices
108  DMatrix5x5 SubSym(const DMatrix5x5 &m2) const{
110  for (unsigned int i=0;i<5;i++){
111  for (unsigned int j=i;j<5;j++){
112  temp(i,j)=mA[i][j]-m2(i,j);
113  temp(j,i)=temp(i,j);
114  }
115  }
116  return temp;
117  }
118 
119  // Matrix subtraction
120  DMatrix5x5 operator-(const DMatrix5x5 &m2) const{
122 
123  for (unsigned int row=0;row<5;row++){
124  for (unsigned int col=0;col<5;col++){
125  temp(row,col)=mA[row][col]-m2(row,col);
126  }
127  }
128  return temp;
129  }
131  for (unsigned int i=0;i<5;i++){
132  for (unsigned int j=0;j<5;j++){
133  mA[i][j]-=m2(i,j);
134  }
135  }
136  return *this;
137  }
138  // Scale a matrix by a double
139  DMatrix5x5 &operator*=(const double c){
140  for (unsigned int i=0;i<5;i++){
141  for (unsigned int j=0;j<5;j++){
142  mA[i][j]*=c;
143  }
144  }
145  return *this;
146  }
147 
148  // Matrix multiplication: (5x5) x (5x1)
150  return DMatrix5x1(mA[0][0]*m2(0)+mA[0][1]*m2(1)+mA[0][2]*m2(2)
151  +mA[0][3]*m2(3)+mA[0][4]*m2(4),
152  mA[1][0]*m2(0)+mA[1][1]*m2(1)+mA[1][2]*m2(2)
153  +mA[1][3]*m2(3)+mA[1][4]*m2(4),
154  mA[2][0]*m2(0)+mA[2][1]*m2(1)+mA[2][2]*m2(2)
155  +mA[2][3]*m2(3)+mA[2][4]*m2(4),
156  mA[3][0]*m2(0)+mA[3][1]*m2(1)+mA[3][2]*m2(2)
157  +mA[3][3]*m2(3)+mA[3][4]*m2(4),
158  mA[4][0]*m2(0)+mA[4][1]*m2(1)+mA[4][2]*m2(2)
159  +mA[4][3]*m2(3)+mA[4][4]*m2(4));
160 
161  }
162 
163  // Matrix multiplication: (5x5) x (5x2)
165  return DMatrix5x2(mA[0][0]*m2(0,0)+mA[0][1]*m2(1,0)+mA[0][2]*m2(2,0)
166  +mA[0][3]*m2(3,0)+mA[0][4]*m2(4,0),
167  mA[0][0]*m2(0,1)+mA[0][1]*m2(1,1)+mA[0][2]*m2(2,1)
168  +mA[0][3]*m2(3,1)+mA[0][4]*m2(4,1),
169 
170  mA[1][0]*m2(0,0)+mA[1][1]*m2(1,0)+mA[1][2]*m2(2,0)
171  +mA[1][3]*m2(3,0)+mA[1][4]*m2(4,0),
172  mA[1][0]*m2(0,1)+mA[1][1]*m2(1,1)+mA[1][2]*m2(2,1)
173  +mA[1][3]*m2(3,1)+mA[1][4]*m2(4,1),
174 
175  mA[2][0]*m2(0,0)+mA[2][1]*m2(1,0)+mA[2][2]*m2(2,0)
176  +mA[2][3]*m2(3,0)+mA[2][4]*m2(4,0),
177  mA[2][0]*m2(0,1)+mA[2][1]*m2(1,1)+mA[2][2]*m2(2,1)
178  +mA[2][3]*m2(3,1)+mA[2][4]*m2(4,1),
179 
180  mA[3][0]*m2(0,0)+mA[3][1]*m2(1,0)+mA[3][2]*m2(2,0)
181  +mA[3][3]*m2(3,0)+mA[3][4]*m2(4,0),
182  mA[3][0]*m2(0,1)+mA[3][1]*m2(1,1)+mA[3][2]*m2(2,1)
183  +mA[3][3]*m2(3,1)+mA[3][4]*m2(4,1),
184 
185  mA[4][0]*m2(0,0)+mA[4][1]*m2(1,0)+mA[4][2]*m2(2,0)
186  +mA[4][3]*m2(3,0)+mA[4][4]*m2(4,0),
187  mA[4][0]*m2(0,1)+mA[4][1]*m2(1,1)+mA[4][2]*m2(2,1)
188  +mA[4][3]*m2(3,1)+mA[4][4]*m2(4,1)
189  );
190 
191 
192  }
193  // Matrix multiplication: (5x5) x (5x5)
196 
197  for (unsigned int j=0;j<5;j++){
198  for (unsigned int i=0;i<5;i++){
199  double temp2=0.;
200  for (unsigned int k=0;k<5;k++){
201  temp2+=mA[j][k]*m2(k,i);
202  }
203  temp(j,i)=temp2;
204  }
205  }
206  return temp;
207  }
208 
209  // Zero out the matrix
211  for (unsigned int i=0;i<5;i++){
212  for (unsigned int j=0;j<5;j++){
213  mA[i][j]=0.;
214  }
215  }
216  return *this;
217  }
218 
219 
220  // Matrix inversion by blocks for a symmetric matrix
222  DMatrix2x2 A(mA[0][0],mA[0][1],mA[1][0],mA[1][1]);
223  DMatrix3x2 C(mA[2][0],mA[2][1],mA[3][0],mA[3][1],mA[4][0],mA[4][1]);
224  DMatrix3x2 CAinv=C*A.Invert();
225  DMatrix3x3 D(mA[2][2],mA[2][3],mA[2][4],mA[3][2],mA[3][3],mA[3][4],
226  mA[4][2],mA[4][3],mA[4][4]);
227  DMatrix2x3 B(mA[0][2],mA[0][3],mA[0][4],mA[1][2],mA[1][3],mA[1][4]);
228  DMatrix2x3 BDinv=B*D.InvertSym();
229  DMatrix2x2 AA=(A-BDinv*C).Invert();
230  DMatrix3x3 DD=(D-CAinv*B).InvertSym();
231  return DMatrix5x5(AA,-AA*BDinv,-DD*CAinv,DD);
232  }
233 
234 
235  // The following code performs the matrix operation ABA^T, where B is a symmetric matrix. The end
236  // result is also a symmetric matrix.
239  double sums[5];
240  // First row/column of ACA^T
241  sums[0]= mA[0][0]*A(0,0);
242  sums[0]+=mA[0][1]*A(0,1);
243  sums[0]+=mA[0][2]*A(0,2);
244  sums[0]+=mA[0][3]*A(0,3);
245  sums[0]+=mA[0][4]*A(0,4);
246 
247  sums[1]= mA[1][0]*A(0,0);
248  sums[1]+=mA[1][1]*A(0,1);
249  sums[1]+=mA[1][2]*A(0,2);
250  sums[1]+=mA[1][3]*A(0,3);
251  sums[1]+=mA[1][4]*A(0,4);
252 
253  sums[2]= mA[2][0]*A(0,0);
254  sums[2]+=mA[2][1]*A(0,1);
255  sums[2]+=mA[2][2]*A(0,2);
256  sums[2]+=mA[2][3]*A(0,3);
257  sums[2]+=mA[2][4]*A(0,4);
258 
259  sums[3]= mA[3][0]*A(0,0);
260  sums[3]+=mA[3][1]*A(0,1);
261  sums[3]+=mA[3][2]*A(0,2);
262  sums[3]+=mA[3][3]*A(0,3);
263  sums[3]+=mA[3][4]*A(0,4);
264 
265  sums[4]= mA[4][0]*A(0,0);
266  sums[4]+=mA[4][1]*A(0,1);
267  sums[4]+=mA[4][2]*A(0,2);
268  sums[4]+=mA[4][3]*A(0,3);
269  sums[4]+=mA[4][4]*A(0,4);
270 
271  temp(0,0)+=A(0,0)*sums[0];
272  temp(0,0)+=A(0,1)*sums[1];
273  temp(0,0)+=A(0,2)*sums[2];
274  temp(0,0)+=A(0,3)*sums[3];
275  temp(0,0)+=A(0,4)*sums[4];
276 
277  temp(1,0)+=A(1,0)*sums[0];
278  temp(1,0)+=A(1,1)*sums[1];
279  temp(1,0)+=A(1,2)*sums[2];
280  temp(1,0)+=A(1,3)*sums[3];
281  temp(1,0)+=A(1,4)*sums[4];
282 
283  temp(2,0)+=A(2,0)*sums[0];
284  temp(2,0)+=A(2,1)*sums[1];
285  temp(2,0)+=A(2,2)*sums[2];
286  temp(2,0)+=A(2,3)*sums[3];
287  temp(2,0)+=A(2,4)*sums[4];
288 
289  temp(3,0)+=A(3,0)*sums[0];
290  temp(3,0)+=A(3,1)*sums[1];
291  temp(3,0)+=A(3,2)*sums[2];
292  temp(3,0)+=A(3,3)*sums[3];
293  temp(3,0)+=A(3,4)*sums[4];
294 
295  temp(4,0)+=A(4,0)*sums[0];
296  temp(4,0)+=A(4,1)*sums[1];
297  temp(4,0)+=A(4,2)*sums[2];
298  temp(4,0)+=A(4,3)*sums[3];
299  temp(4,0)+=A(4,4)*sums[4];
300 
301  temp(0,1)=temp(1,0);
302  temp(0,2)=temp(2,0);
303  temp(0,3)=temp(3,0);
304  temp(0,4)=temp(4,0);
305 
306  // Second row/column of ACA^T
307  sums[0] =mA[0][0]*A(1,0);
308  sums[0]+=mA[0][1]*A(1,1);
309  sums[0]+=mA[0][2]*A(1,2);
310  sums[0]+=mA[0][3]*A(1,3);
311  sums[0]+=mA[0][4]*A(1,4);
312 
313  sums[1]=mA[1][0]*A(1,0);
314  sums[1]+=mA[1][1]*A(1,1);
315  sums[1]+=mA[1][2]*A(1,2);
316  sums[1]+=mA[1][3]*A(1,3);
317  sums[1]+=mA[1][4]*A(1,4);
318 
319  sums[2]=mA[2][0]*A(1,0);
320  sums[2]+=mA[2][1]*A(1,1);
321  sums[2]+=mA[2][2]*A(1,2);
322  sums[2]+=mA[2][3]*A(1,3);
323  sums[2]+=mA[2][4]*A(1,4);
324 
325  sums[3]=mA[3][0]*A(1,0);
326  sums[3]+=mA[3][1]*A(1,1);
327  sums[3]+=mA[3][2]*A(1,2);
328  sums[3]+=mA[3][3]*A(1,3);
329  sums[3]+=mA[3][4]*A(1,4);
330 
331  sums[4]=mA[4][0]*A(1,0);
332  sums[4]+=mA[4][1]*A(1,1);
333  sums[4]+=mA[4][2]*A(1,2);
334  sums[4]+=mA[4][3]*A(1,3);
335  sums[4]+=mA[4][4]*A(1,4);
336 
337  temp(1,1)+=A(1,0)*sums[0];
338  temp(1,1)+=A(1,1)*sums[1];
339  temp(1,1)+=A(1,2)*sums[2];
340  temp(1,1)+=A(1,3)*sums[3];
341  temp(1,1)+=A(1,4)*sums[4];
342 
343  temp(2,1)+=A(2,0)*sums[0];
344  temp(2,1)+=A(2,1)*sums[1];
345  temp(2,1)+=A(2,2)*sums[2];
346  temp(2,1)+=A(2,3)*sums[3];
347  temp(2,1)+=A(2,4)*sums[4];
348 
349  temp(3,1)+=A(3,0)*sums[0];
350  temp(3,1)+=A(3,1)*sums[1];
351  temp(3,1)+=A(3,2)*sums[2];
352  temp(3,1)+=A(3,3)*sums[3];
353  temp(3,1)+=A(3,4)*sums[4];
354 
355  temp(4,1)+=A(4,0)*sums[0];
356  temp(4,1)+=A(4,1)*sums[1];
357  temp(4,1)+=A(4,2)*sums[2];
358  temp(4,1)+=A(4,3)*sums[3];
359  temp(4,1)+=A(4,4)*sums[4];
360 
361  temp(1,2)=temp(2,1);
362  temp(1,3)=temp(3,1);
363  temp(1,4)=temp(4,1);
364 
365  // Third row/column of ACA^T
366  sums[0] =mA[0][0]*A(2,0);
367  sums[0]+=mA[0][1]*A(2,1);
368  sums[0]+=mA[0][2]*A(2,2);
369  sums[0]+=mA[0][3]*A(2,3);
370  sums[0]+=mA[0][4]*A(2,4);
371 
372  sums[1] =mA[1][0]*A(2,0);
373  sums[1]+=mA[1][1]*A(2,1);
374  sums[1]+=mA[1][2]*A(2,2);
375  sums[1]+=mA[1][3]*A(2,3);
376  sums[1]+=mA[1][4]*A(2,4);
377 
378  sums[2] =mA[2][0]*A(2,0);
379  sums[2]+=mA[2][1]*A(2,1);
380  sums[2]+=mA[2][2]*A(2,2);
381  sums[2]+=mA[2][3]*A(2,3);
382  sums[2]+=mA[2][4]*A(2,4);
383 
384  sums[3] =mA[3][0]*A(2,0);
385  sums[3]+=mA[3][1]*A(2,1);
386  sums[3]+=mA[3][2]*A(2,2);
387  sums[3]+=mA[3][3]*A(2,3);
388  sums[3]+=mA[3][4]*A(2,4);
389 
390  sums[4] =mA[4][0]*A(2,0);
391  sums[4]+=mA[4][1]*A(2,1);
392  sums[4]+=mA[4][2]*A(2,2);
393  sums[4]+=mA[4][3]*A(2,3);
394  sums[4]+=mA[4][4]*A(2,4);
395 
396  temp(2,2)+=A(2,0)*sums[0];
397  temp(2,2)+=A(2,1)*sums[1];
398  temp(2,2)+=A(2,2)*sums[2];
399  temp(2,2)+=A(2,3)*sums[3];
400  temp(2,2)+=A(2,4)*sums[4];
401 
402  temp(3,2)+=A(3,0)*sums[0];
403  temp(3,2)+=A(3,1)*sums[1];
404  temp(3,2)+=A(3,2)*sums[2];
405  temp(3,2)+=A(3,3)*sums[3];
406  temp(3,2)+=A(3,4)*sums[4];
407 
408  temp(4,2)+=A(4,0)*sums[0];
409  temp(4,2)+=A(4,1)*sums[1];
410  temp(4,2)+=A(4,2)*sums[2];
411  temp(4,2)+=A(4,3)*sums[3];
412  temp(4,2)+=A(4,4)*sums[4];
413 
414  temp(2,3)=temp(3,2);
415  temp(2,4)=temp(4,2);
416 
417  // Fourth row/column of ACA^T
418  sums[0] =mA[0][0]*A(3,0);
419  sums[0]+=mA[0][1]*A(3,1);
420  sums[0]+=mA[0][2]*A(3,2);
421  sums[0]+=mA[0][3]*A(3,3);
422  sums[0]+=mA[0][4]*A(3,4);
423 
424  sums[1] =mA[1][0]*A(3,0);
425  sums[1]+=mA[1][1]*A(3,1);
426  sums[1]+=mA[1][2]*A(3,2);
427  sums[1]+=mA[1][3]*A(3,3);
428  sums[1]+=mA[1][4]*A(3,4);
429 
430  sums[2] =mA[2][0]*A(3,0);
431  sums[2]+=mA[2][1]*A(3,1);
432  sums[2]+=mA[2][2]*A(3,2);
433  sums[2]+=mA[2][3]*A(3,3);
434  sums[2]+=mA[2][4]*A(3,4);
435 
436  sums[3] =mA[3][0]*A(3,0);
437  sums[3]+=mA[3][1]*A(3,1);
438  sums[3]+=mA[3][2]*A(3,2);
439  sums[3]+=mA[3][3]*A(3,3);
440  sums[3]+=mA[3][4]*A(3,4);
441 
442  sums[4] =mA[4][0]*A(3,0);
443  sums[4]+=mA[4][1]*A(3,1);
444  sums[4]+=mA[4][2]*A(3,2);
445  sums[4]+=mA[4][3]*A(3,3);
446  sums[4]+=mA[4][4]*A(3,4);
447 
448  temp(3,3)+=A(3,0)*sums[0];
449  temp(3,3)+=A(3,1)*sums[1];
450  temp(3,3)+=A(3,2)*sums[2];
451  temp(3,3)+=A(3,3)*sums[3];
452  temp(3,3)+=A(3,4)*sums[4];
453 
454  temp(4,3)+=A(4,0)*sums[0];
455  temp(4,3)+=A(4,1)*sums[1];
456  temp(4,3)+=A(4,2)*sums[2];
457  temp(4,3)+=A(4,3)*sums[3];
458  temp(4,3)+=A(4,4)*sums[4];
459 
460  temp(3,4)=temp(4,3);
461 
462  // Last entry ACA^T[4][4]
463  sums[0] =mA[0][0]*A(4,0);
464  sums[0]+=mA[0][1]*A(4,1);
465  sums[0]+=mA[0][2]*A(4,2);
466  sums[0]+=mA[0][3]*A(4,3);
467  sums[0]+=mA[0][4]*A(4,4);
468 
469  sums[1] =mA[1][0]*A(4,0);
470  sums[1]+=mA[1][1]*A(4,1);
471  sums[1]+=mA[1][2]*A(4,2);
472  sums[1]+=mA[1][3]*A(4,3);
473  sums[1]+=mA[1][4]*A(4,4);
474 
475  sums[2] =mA[2][0]*A(4,0);
476  sums[2]+=mA[2][1]*A(4,1);
477  sums[2]+=mA[2][2]*A(4,2);
478  sums[2]+=mA[2][3]*A(4,3);
479  sums[2]+=mA[2][4]*A(4,4);
480 
481  sums[3] =mA[3][0]*A(4,0);
482  sums[3]+=mA[3][1]*A(4,1);
483  sums[3]+=mA[3][2]*A(4,2);
484  sums[3]+=mA[3][3]*A(4,3);
485  sums[3]+=mA[3][4]*A(4,4);
486 
487  sums[4] =mA[4][0]*A(4,0);
488  sums[4]+=mA[4][1]*A(4,1);
489  sums[4]+=mA[4][2]*A(4,2);
490  sums[4]+=mA[4][3]*A(4,3);
491  sums[4]+=mA[4][4]*A(4,4);
492 
493  temp(4,4)+=A(4,0)*sums[0];
494  temp(4,4)+=A(4,1)*sums[1];
495  temp(4,4)+=A(4,2)*sums[2];
496  temp(4,4)+=A(4,3)*sums[3];
497  temp(4,4)+=A(4,4)*sums[4];
498 
499  return temp;
500  }
501 
502 
503 
504  // The following code performs the matrix operation ABA^T, where B is a symmetric matrix. The end
505  // result is also a symmetric matrix.
508  double sums[5]={0};
509  // First row/column of ACA^T
510  for (unsigned int i=0;i<5;i++){
511  for (unsigned int k=0;k<5;k++){
512  sums[i]+=mA[i][k]*A(0,k);
513  }
514  }
515  for (unsigned int i=0;i<5;i++){
516  for (unsigned int k=0;k<5;k++){
517  temp(i,0)+=A(i,k)*sums[k];
518  }
519  temp(0,i)=temp(i,0);
520  }
521  // Second row/column of ACA^T
522  for (unsigned int i=0;i<5;i++){
523  sums[i]=0.;
524  for (unsigned int k=0;k<5;k++){
525  sums[i]+=mA[i][k]*A(1,k);
526  }
527  }
528  for (unsigned int i=1;i<5;i++){
529  for (unsigned int k=0;k<5;k++){
530  temp(i,1)+=A(i,k)*sums[k];
531  }
532  temp(1,i)=temp(i,1);
533  }
534  // Third row/column of ACA^T
535  for (unsigned int i=0;i<5;i++){
536  sums[i]=0.;
537  for (unsigned int k=0;k<5;k++){
538  sums[i]+=mA[i][k]*A(2,k);
539  }
540  }
541  for (unsigned int i=2;i<5;i++){
542  for (unsigned int k=0;k<5;k++){
543  temp(i,2)+=A(i,k)*sums[k];
544  }
545  temp(2,i)=temp(i,2);
546  }
547  // Fourth row/column of ACA^T
548  for (unsigned int i=0;i<5;i++){
549  sums[i]=0.;
550  for (unsigned int k=0;k<5;k++){
551  sums[i]+=mA[i][k]*A(3,k);
552  }
553  }
554  for (unsigned int i=3;i<5;i++){
555  for (unsigned int k=0;k<5;k++){
556  temp(i,3)+=A(i,k)*sums[k];
557  }
558  temp(3,i)=temp(i,3);
559  }
560  // Last entry ACA^T[4][4]
561  for (unsigned int i=0;i<5;i++){
562  sums[i]=0.;
563  for (unsigned int k=0;k<5;k++){
564  sums[i]+=mA[i][k]*A(4,k);
565  }
566  }
567  for (unsigned int k=0;k<5;k++){
568  temp(4,4)+=A(4,k)*sums[k];
569  }
570  return temp;
571  }
572 
573  double SandwichMultiply(const DMatrix5x1 &A){
574  double a1=A(0),a2=A(1),a3=A(2),a4=A(3),a5=A(4);
575  return a1*(a1*mA[0][0]+2.*a2*mA[0][1]+2.*a3*mA[0][2]+2.*a4*mA[0][3]
576  +2.*a5*mA[0][4])
577  +a2*(a2*mA[1][1]+2.*a3*mA[1][2]+2.*a4*mA[1][3]+2.*a5*mA[1][4])
578  +a3*(a3*mA[2][2]+2.*a4*mA[2][3]+2.*a5*mA[2][4])
579  +a4*(a4*mA[3][3]+2.*a5*mA[3][4])
580  +a5*a5*mA[4][4];
581 
582  }
583 
584  DMatrixDSym GetSub(unsigned int lowerBound, unsigned int upperBound){
585  if (upperBound >= lowerBound) return DMatrixDSym();
586  DMatrixDSym subMatrix(upperBound - lowerBound);
587  for (unsigned int i=lowerBound; i <= upperBound; i++){
588  for (unsigned int j=lowerBound; j <= upperBound; j++){
589  subMatrix(i,j) = mA[i][j];
590  }
591  }
592  return subMatrix;
593  }
594 
595  bool IsPosDef(){
596  if(mA[0][0] > 0. &&
597  GetSub(0,1).Determinant() > 0. && GetSub(0,2).Determinant() > 0. &&
598  GetSub(0,3).Determinant() > 0. && GetSub(0,4).Determinant() > 0.)
599  return true;
600  else return false;
601  }
602 
603  void Print(){
604  cout << "DMatrix5x5:" <<endl;
605  cout << " | 0 | 1 | 2 | 3 | 4 |" <<endl;
606  cout << "----------------------------------------------------------------------" <<endl;
607 
608  for (unsigned int i=0;i<5;i++){
609  cout <<" "<< i << " |";
610  for (unsigned int j=0;j<5;j++){
611  cout << setw(11)<<setprecision(6)<<mA[i][j] <<" ";
612  }
613  cout << endl;
614  }
615  }
616 
617  private:
618  double mA[5][5];
619 
620 };
621 
622 
623 // Scale 5x5 matrix by a floating point number
624 inline DMatrix5x5 operator*(const double c,const DMatrix5x5 &M){
626  for (unsigned int i=0;i<5;i++){
627  for (unsigned int j=0;j<5;j++){
628  temp(i,j)=c*M(i,j);
629  }
630  }
631  return temp;
632 }
633 
634 
635 #else
636 
637 // Matrix class with SIMD instructions
638 
639 class DMatrix5x5{
640  public:
641  DMatrix5x5()
642  : mA( ALIGNED_16_BLOCK_PTR(union dvec, 5, mA) )
643  {
644  for (unsigned int i=0;i<5;i++){
645  for (unsigned int j=0;j<3;j++){
646  mA[i].v[j]=_mm_setzero_pd();
647  }
648  }
649  }
650  DMatrix5x5( __m128d m11, __m128d m12, __m128d m13, __m128d m14, __m128d m15,
651  __m128d m21, __m128d m22, __m128d m23, __m128d m24, __m128d m25,
652  __m128d m31, __m128d m32, __m128d m33, __m128d m34, __m128d m35)
653  : mA( ALIGNED_16_BLOCK_PTR(union dvec, 5, mA) )
654  {
655  mA[0].v[0]=m11;
656  mA[1].v[0]=m12;
657  mA[2].v[0]=m13;
658  mA[3].v[0]=m14;
659  mA[4].v[0]=m15;
660  mA[0].v[1]=m21;
661  mA[1].v[1]=m22;
662  mA[2].v[1]=m23;
663  mA[3].v[1]=m24;
664  mA[4].v[1]=m25;
665  mA[0].v[2]=m31;
666  mA[1].v[2]=m32;
667  mA[2].v[2]=m33;
668  mA[3].v[2]=m34;
669  mA[4].v[2]=m35;
670  }
671  // Constructor for a symmetric matrix
672  DMatrix5x5( __m128d m11, __m128d m12, __m128d m13, __m128d m14, __m128d m15,
673  __m128d m23, __m128d m24, __m128d m25,
674  __m128d m35)
675  : mA( ALIGNED_16_BLOCK_PTR(union dvec, 5, mA) )
676  {
677  mA[0].v[0]=m11;
678  mA[1].v[0]=m12;
679  mA[2].v[0]=m13;
680  mA[3].v[0]=m14;
681  mA[4].v[0]=m15;
682  mA[0].v[1]=_mm_setr_pd(mA[2].d[0],mA[3].d[0]);
683  mA[1].v[1]=_mm_setr_pd(mA[2].d[1],mA[3].d[1]);
684  mA[2].v[1]=m23;
685  mA[3].v[1]=m24;
686  mA[4].v[1]=m25;
687  mA[0].v[2]=_mm_setr_pd(mA[4].d[0],0.);
688  mA[1].v[2]=_mm_setr_pd(mA[4].d[1],0.);
689  mA[2].v[2]=_mm_setr_pd(mA[4].d[2],0.);
690  mA[3].v[2]=_mm_setr_pd(mA[4].d[3],0.);
691  mA[4].v[2]=m35;
692  }
693  // Constructor for symmetric matrix by elements
694  DMatrix5x5(double C11, double C12, const double C13, double C14, double C15,
695  double C22, const double C23, double C24, double C25,
696  double C33, double C34, double C35,
697  double C44, double C45,
698  double C55)
699  : mA( ALIGNED_16_BLOCK_PTR(union dvec, 5, mA) )
700  {
701  mA[0].v[0]=_mm_setr_pd(C11,C12);
702  mA[1].v[0]=_mm_setr_pd(C12,C22);
703  mA[2].v[0]=_mm_setr_pd(C13,C23);
704  mA[3].v[0]=_mm_setr_pd(C14,C24);
705  mA[4].v[0]=_mm_setr_pd(C15,C25);
706  mA[0].v[1]=_mm_setr_pd(C13,C14);
707  mA[1].v[1]=_mm_setr_pd(C23,C24);
708  mA[2].v[1]=_mm_setr_pd(C33,C34);
709  mA[3].v[1]=_mm_setr_pd(C34,C44);
710  mA[4].v[1]=_mm_setr_pd(C35,C45);
711  mA[0].v[2]=_mm_setr_pd(C15,0);
712  mA[1].v[2]=_mm_setr_pd(C25,0);
713  mA[2].v[2]=_mm_setr_pd(C35,0);
714  mA[3].v[2]=_mm_setr_pd(C45,0);
715  mA[4].v[2]=_mm_setr_pd(C55,0);
716 
717  }
718 
719  // Constructor using block matrices from matrix inversion
720  DMatrix5x5(const DMatrix2x2 &A,const DMatrix2x3 &B,
721  const DMatrix3x2 &C,const DMatrix3x3 &D)
722  : mA( ALIGNED_16_BLOCK_PTR(union dvec, 5, mA) )
723  {
724  mA[0].v[0]=A.GetV(0);
725  mA[1].v[0]=A.GetV(1);
726  for (unsigned int i=0;i<3;i++){
727  mA[2+i].v[0]=B.GetV(i);
728  for (unsigned int j=0;j<2;j++){
729  mA[2+i].v[j+1]=D.GetV(j,i);
730  }
731  }
732  mA[0].v[1]=C.GetV(0,0);
733  mA[0].v[2]=C.GetV(1,0);
734  mA[1].v[1]=C.GetV(0,1);
735  mA[1].v[2]=C.GetV(1,1);
736  }
737  ~DMatrix5x5(){};
738 
739  __m128d GetV(int pair, int col) const{
740  return mA[col].v[pair];
741  }
742  void SetV(int pair, int col, __m128d v){
743  mA[col].v[pair]=v;
744  }
745 
746  // Copy constructor
747  DMatrix5x5(const DMatrix5x5 &m2)
748  : mA( ALIGNED_16_BLOCK_PTR(union dvec, 5, mA) )
749  {
750  for (unsigned int i=0;i<5;i++){
751  for (unsigned int j=0;j<3;j++){
752  mA[i].v[j]=m2.GetV(j,i);
753  }
754  }
755  }
756 
757  // return a column of the 5x5 matrix as a DMatrix5x1 object
758  DMatrix5x1 GetColumn(int col){
759  return DMatrix5x1(mA[col].v[0],mA[col].v[1],mA[col].v[2]);
760 
761  }
762  // return the trace of the matrix
763  double Trace(){
764  return(mA[0].d[0]+mA[1].d[1]+mA[2].d[2]+mA[3].d[3]+mA[4].d[4]);
765  }
766 
767 
768  // Access by indices
769  double &operator() (int row, int col){
770  return mA[col].d[row];
771  }
772  double operator() (int row, int col) const{
773  return mA[col].d[row];
774  }
775 
776  // Assignment operator
777  DMatrix5x5 &operator=(const DMatrix5x5 &m1){
778  for (unsigned int i=0;i<5;i++){
779  mA[i].v[0]=m1.GetV(0,i);
780  mA[i].v[1]=m1.GetV(1,i);
781  mA[i].v[2]=m1.GetV(2,i);
782  }
783  return *this;
784  }
785 
786  // Matrix addition
787  DMatrix5x5 operator+(const DMatrix5x5 &m2) const{
788 #define ADD(i,j) _mm_add_pd(GetV((i),(j)),m2.GetV((i),(j)))
789 
790  return DMatrix5x5(ADD(0,0),ADD(0,1),ADD(0,2),ADD(0,3),ADD(0,4),ADD(1,0),ADD(1,1),ADD(1,2),
791  ADD(1,3),ADD(1,4),ADD(2,0),ADD(2,1),ADD(2,2),ADD(2,3),ADD(2,4));
792  }
793  DMatrix5x5 &operator+=(const DMatrix5x5 &m2){
794  for (unsigned int i=0;i<5;i++){
795  for (unsigned int j=0;j<3;j++){
796  mA[i].v[j]=ADD(j,i);
797  }
798  }
799  return *this;
800  }
801  // Method for adding symmetric matrices
802  DMatrix5x5 AddSym(const DMatrix5x5 &m2) const{
803  return DMatrix5x5(ADD(0,0),ADD(0,1),ADD(0,2),ADD(0,3),ADD(0,4),ADD(1,2),
804  ADD(1,3),ADD(1,4),ADD(2,4));
805  }
806 
807  // Matrix subtraction
808  DMatrix5x5 operator-(const DMatrix5x5 &m2) const{
809 #define SUB(i,j) _mm_sub_pd(GetV((i),(j)),m2.GetV((i),(j)))
810 
811  return DMatrix5x5(SUB(0,0),SUB(0,1),SUB(0,2),SUB(0,3),SUB(0,4),SUB(1,0),SUB(1,1),SUB(1,2),
812  SUB(1,3),SUB(1,4),SUB(2,0),SUB(2,1),SUB(2,2),SUB(2,3),SUB(2,4));
813  }
814  // method for subtracting a symmetric matrix from another symmetric matrix
815  DMatrix5x5 SubSym(const DMatrix5x5 &m2) const{
816  return DMatrix5x5(SUB(0,0),SUB(0,1),SUB(0,2),SUB(0,3),SUB(0,4),SUB(1,2),
817  SUB(1,3),SUB(1,4),SUB(2,4));
818  }
819 
820  // Matrix multiplication: (5x5) x (5x1)
821  DMatrix5x1 operator*(const DMatrix5x1 &m2){
822  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 5, p)
823  __m128d &a1=p[0];
824  __m128d &a2=p[1];
825  __m128d &a3=p[2];
826  __m128d &a4=p[3];
827  __m128d &a5=p[4];
828  a1=_mm_set1_pd(m2(0));
829  a2=_mm_set1_pd(m2(1));
830  a3=_mm_set1_pd(m2(2));
831  a4=_mm_set1_pd(m2(3));
832  a5=_mm_set1_pd(m2(4));
833  return
834  DMatrix5x1(_mm_add_pd(_mm_mul_pd(GetV(0,0),a1),
835  _mm_add_pd(_mm_mul_pd(GetV(0,1),a2),
836  _mm_add_pd(_mm_mul_pd(GetV(0,2),a3),
837  _mm_add_pd(_mm_mul_pd(GetV(0,3),a4),
838  _mm_mul_pd(GetV(0,4),a5))))),
839  _mm_add_pd(_mm_mul_pd(GetV(1,0),a1),
840  _mm_add_pd(_mm_mul_pd(GetV(1,1),a2),
841  _mm_add_pd(_mm_mul_pd(GetV(1,2),a3),
842  _mm_add_pd(_mm_mul_pd(GetV(1,3),a4),
843  _mm_mul_pd(GetV(1,4),a5))))),
844  _mm_add_pd(_mm_mul_pd(GetV(2,0),a1),
845  _mm_add_pd(_mm_mul_pd(GetV(2,1),a2),
846  _mm_add_pd(_mm_mul_pd(GetV(2,2),a3),
847  _mm_add_pd(_mm_mul_pd(GetV(2,3),a4),
848  _mm_mul_pd(GetV(2,4),a5))))));
849 
850 
851  }
852 
853  // Matrix multiplication: (5x5) x (5x2)
854  DMatrix5x2 operator*(const DMatrix5x2 &m2){
855  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 10, p)
856  __m128d &m11=p[0];
857  __m128d &m12=p[1];
858  __m128d &m21=p[2];
859  __m128d &m22=p[3];
860  __m128d &m31=p[4];
861  __m128d &m32=p[5];
862  __m128d &m41=p[6];
863  __m128d &m42=p[7];
864  __m128d &m51=p[8];
865  __m128d &m52=p[9];
866  m11=_mm_set1_pd(m2(0,0));
867  m12=_mm_set1_pd(m2(0,1));
868  m21=_mm_set1_pd(m2(1,0));
869  m22=_mm_set1_pd(m2(1,1));
870  m31=_mm_set1_pd(m2(2,0));
871  m32=_mm_set1_pd(m2(2,1));
872  m41=_mm_set1_pd(m2(3,0));
873  m42=_mm_set1_pd(m2(3,1));
874  m51=_mm_set1_pd(m2(4,0));
875  m52=_mm_set1_pd(m2(4,1));
876  return
877  DMatrix5x2(_mm_add_pd(_mm_mul_pd(GetV(0,0),m11),
878  _mm_add_pd(_mm_mul_pd(GetV(0,1),m21),
879  _mm_add_pd(_mm_mul_pd(GetV(0,2),m31),
880  _mm_add_pd(_mm_mul_pd(GetV(0,3),m41),
881  _mm_mul_pd(GetV(0,4),m51))))),
882  _mm_add_pd(_mm_mul_pd(GetV(0,0),m12),
883  _mm_add_pd(_mm_mul_pd(GetV(0,1),m22),
884  _mm_add_pd(_mm_mul_pd(GetV(0,2),m32),
885  _mm_add_pd(_mm_mul_pd(GetV(0,3),m42),
886  _mm_mul_pd(GetV(0,4),m52))))),
887  _mm_add_pd(_mm_mul_pd(GetV(1,0),m11),
888  _mm_add_pd(_mm_mul_pd(GetV(1,1),m21),
889  _mm_add_pd(_mm_mul_pd(GetV(1,2),m31),
890  _mm_add_pd(_mm_mul_pd(GetV(1,3),m41),
891  _mm_mul_pd(GetV(1,4),m51))))),
892  _mm_add_pd(_mm_mul_pd(GetV(1,0),m12),
893  _mm_add_pd(_mm_mul_pd(GetV(1,1),m22),
894  _mm_add_pd(_mm_mul_pd(GetV(1,2),m32),
895  _mm_add_pd(_mm_mul_pd(GetV(1,3),m42),
896  _mm_mul_pd(GetV(1,4),m52))))),
897  _mm_add_pd(_mm_mul_pd(GetV(2,0),m11),
898  _mm_add_pd(_mm_mul_pd(GetV(2,1),m21),
899  _mm_add_pd(_mm_mul_pd(GetV(2,2),m31),
900  _mm_add_pd(_mm_mul_pd(GetV(2,3),m41),
901  _mm_mul_pd(GetV(2,4),m51))))),
902  _mm_add_pd(_mm_mul_pd(GetV(2,0),m12),
903  _mm_add_pd(_mm_mul_pd(GetV(2,1),m22),
904  _mm_add_pd(_mm_mul_pd(GetV(2,2),m32),
905  _mm_add_pd(_mm_mul_pd(GetV(2,3),m42),
906  _mm_mul_pd(GetV(2,4),m52))))));
907  }
908 
909  // The following code performs the matrix operation ABA^T,where B is a 5x5 matrix, and A is 5x1
910  double SandwichMultiply(const DMatrix5x1 &A){
911  double a1=A(0),a2=A(1),a3=A(2),a4=A(3),a5=A(4);
912  return a1*(a1*mA[0].d[0]+2.*a2*mA[0].d[1]+2.*a3*mA[0].d[2]+2.*a4*mA[0].d[3]
913  +2.*a5*mA[0].d[4])
914  +a2*(a2*mA[1].d[1]+2.*a3*mA[1].d[2]+2.*a4*mA[1].d[3]+2.*a5*mA[1].d[4])
915  +a3*(a3*mA[2].d[2]+2.*a4*mA[2].d[3]+2.*a5*mA[2].d[4])
916  +a4*(a4*mA[3].d[3]+2.*a5*mA[3].d[4])
917  +a5*a5*mA[4].d[4];
918 
919  }
920 
921 
922 
923 #ifdef USE_SSE3
924 
925  // The following code performs the matrix operation ABA^T, where B is a symmetric matrix
927  //__m128d A1112=_mm_setr_pd(A(0,0),A(0,1));
928  //__m128d A1314=_mm_setr_pd(A(0,2),A(0,3));
929 #define A1112 _mm_setr_pd(A(0,0),A(0,1))
930 #define A1314 _mm_setr_pd(A(0,2),A(0,3))
931  //__m128d A15=_mm_set1_pd(A(0,4));
932  union dvec1{
933  __m128d v[3];
934  double d[6];
935  };
936  ALIGNED_16_BLOCK_WITH_PTR(union dvec1, 1, p1)
937  union dvec1 &temp=p1[0];
938 
939  union dvec2{
940  __m128d v;
941  double d[2];
942  };
943  ALIGNED_16_BLOCK_WITH_PTR(union dvec2, 1, p2)
944  union dvec2 &temp2=p2[0];
945  temp2.v=_mm_set1_pd(A(0,4));
946 
947  // BA^T column 1
948  temp.v[0]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,0),A1112),
949  _mm_mul_pd(GetV(0,1),A1112)),
950  _mm_hadd_pd(_mm_mul_pd(GetV(1,0),A1314),
951  _mm_mul_pd(GetV(1,1),A1314))),
952  _mm_mul_pd(GetV(0,4),temp2.v));
953  temp.v[1]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,2),A1112),
954  _mm_mul_pd(GetV(0,3),A1112)),
955  _mm_hadd_pd(_mm_mul_pd(GetV(1,2),A1314),
956  _mm_mul_pd(GetV(1,3),A1314))),
957  _mm_mul_pd(GetV(1,4),temp2.v));
958  temp.v[2] =_mm_set1_pd(mA[4].d[0]*A(0,0)+mA[4].d[1]*A(0,1)+mA[4].d[2]*A(0,2)+mA[4].d[3]*A(0,3)+mA[4].d[4]*A(0,4));
959 
960 
961 
962  // C=ABA^T elements C11,C12
963 #define A2122 _mm_setr_pd(A(1,0),A(1,1))
964 #define A2324 _mm_setr_pd(A(1,2),A(1,3))
965  // __m128d A2122=_mm_setr_pd(A(1,0),A(1,1));
966  //__m128d A2324=_mm_setr_pd(A(1,2),A(1,3));
967  temp2.v=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A1112,temp.v[0]),_mm_mul_pd(A2122,temp.v[0])),
968  _mm_hadd_pd(_mm_mul_pd(A1314,temp.v[1]),_mm_mul_pd(A2324,temp.v[1]))),
969  _mm_mul_pd(A.GetV(0,4),temp.v[2]));
970  double C11=temp2.d[0];
971  double C12=temp2.d[1];
972 
973  // C=ABA^T elements C13,C14
974 #define A3132 _mm_setr_pd(A(2,0),A(2,1))
975 #define A3334 _mm_setr_pd(A(2,2),A(2,3))
976 #define A4142 _mm_setr_pd(A(3,0),A(3,1))
977 #define A4344 _mm_setr_pd(A(3,2),A(3,3))
978  //__m128d A3132=_mm_setr_pd(A(2,0),A(2,1));
979  //__m128d A3334=_mm_setr_pd(A(2,2),A(2,3));
980  //__m128d A4142=_mm_setr_pd(A(3,0),A(3,1));
981  //__m128d A4344=_mm_setr_pd(A(3,2),A(3,3));
982  temp2.v=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A3132,temp.v[0]),_mm_mul_pd(A4142,temp.v[0])),
983  _mm_hadd_pd(_mm_mul_pd(A3334,temp.v[1]),_mm_mul_pd(A4344,temp.v[1]))),
984  _mm_mul_pd(A.GetV(1,4),temp.v[2]));
985  double C13=temp2.d[0];
986  double C14=temp2.d[1];
987 
988  // BA^T column 2
989  //#define A25 _mm_set1_pd(A(1,4))
990  temp2.v=_mm_set1_pd(A(1,4));
991  temp.v[0]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,0),A2122),
992  _mm_mul_pd(GetV(0,1),A2122)),
993  _mm_hadd_pd(_mm_mul_pd(GetV(1,0),A2324),
994  _mm_mul_pd(GetV(1,1),A2324))),
995  _mm_mul_pd(GetV(0,4),temp2.v));
996  temp.v[1]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,2),A2122),
997  _mm_mul_pd(GetV(0,3),A2122)),
998  _mm_hadd_pd(_mm_mul_pd(GetV(1,2),A2324),
999  _mm_mul_pd(GetV(1,3),A2324))),
1000  _mm_mul_pd(GetV(1,4),temp2.v));
1001  temp.v[2]=_mm_set1_pd(mA[4].d[0]*A(1,0)+mA[4].d[1]*A(1,1)+mA[4].d[2]*A(1,2)+mA[4].d[3]*A(1,3)+mA[4].d[4]*A(1,4));
1002 
1003  // C=ABA^T elements C22,C23
1004 #define A2535 _mm_setr_pd(A(1,4),A(2,4))
1005  temp2.v=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A2122,temp.v[0]),_mm_mul_pd(A3132,temp.v[0])),
1006  _mm_hadd_pd(_mm_mul_pd(A2324,temp.v[1]),_mm_mul_pd(A3334,temp.v[1]))),
1007  _mm_mul_pd(A2535,temp.v[2]));
1008  double C22=temp2.d[0];
1009  double C23=temp2.d[1];
1010 
1011  // C=ABA^T elements C24,C25
1012 #define A5152 _mm_setr_pd(A(4,0),A(4,1))
1013 #define A5354 _mm_setr_pd(A(4,2),A(4,3))
1014 #define A4555 _mm_setr_pd(A(3,4),A(4,4))
1015  //__m128d A5152=_mm_setr_pd(A(4,0),A(4,1));
1016  //__m128d A5354=_mm_setr_pd(A(4,2),A(4,3));
1017  //__m128d A4555=_mm_setr_pd(A(3,4),A(4,4));
1018  temp2.v=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A4142,temp.v[0]),_mm_mul_pd(A5152,temp.v[0])),
1019  _mm_hadd_pd(_mm_mul_pd(A4344,temp.v[1]),_mm_mul_pd(A5354,temp.v[1]))),
1020  _mm_mul_pd(A4555,temp.v[2]));
1021  double C24=temp2.d[0];
1022  double C25=temp2.d[1];
1023 
1024  // BA^T column 3
1025  //#define A35 _mm_set1_pd(A(2,4))
1026  temp2.v=_mm_set1_pd(A(2,4));
1027  temp.v[0]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,0),A3132),
1028  _mm_mul_pd(GetV(0,1),A3132)),
1029  _mm_hadd_pd(_mm_mul_pd(GetV(1,0),A3334),
1030  _mm_mul_pd(GetV(1,1),A3334))),
1031  _mm_mul_pd(GetV(0,4),temp2.v));
1032  temp.v[1]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,2),A3132),
1033  _mm_mul_pd(GetV(0,3),A3132)),
1034  _mm_hadd_pd(_mm_mul_pd(GetV(1,2),A3334),
1035  _mm_mul_pd(GetV(1,3),A3334))),
1036  _mm_mul_pd(GetV(1,4),temp2.v));
1037  temp.v[2]=_mm_set1_pd(mA[4].d[0]*A(2,0)+mA[4].d[1]*A(2,1)+mA[4].d[2]*A(2,2)+mA[4].d[3]*A(2,3)+mA[4].d[4]*A(2,4));
1038 
1039 
1040  // C=ABA^T elements C33,C34,C35
1041 #define A3545 _mm_setr_pd(A(2,4),A(3,4))
1042  temp2.v=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A3132,temp.v[0]),_mm_mul_pd(A4142,temp.v[0])),
1043  _mm_hadd_pd(_mm_mul_pd(A3334,temp.v[1]),_mm_mul_pd(A4344,temp.v[1]))),
1044  _mm_mul_pd(A3545,temp.v[2]));
1045  double C33=temp2.d[0];
1046  double C34=temp2.d[1];
1047  double C35=A(4,0)*temp.d[0]+A(4,1)*temp.d[1]+A(4,2)*temp.d[2]+A(4,3)*temp.d[3]+A(4,4)*temp.d[4];
1048 
1049  // BA^T column 4
1050  //#define A45 _mm_set1_pd(A(3,4))
1051  temp2.v=_mm_set1_pd(A(3,4));
1052  temp.v[0]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,0),A4142),
1053  _mm_mul_pd(GetV(0,1),A4142)),
1054  _mm_hadd_pd(_mm_mul_pd(GetV(1,0),A4344),
1055  _mm_mul_pd(GetV(1,1),A4344))),
1056  _mm_mul_pd(GetV(0,4),temp2.v));
1057  temp.v[1]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,2),A4142),
1058  _mm_mul_pd(GetV(0,3),A4142)),
1059  _mm_hadd_pd(_mm_mul_pd(GetV(1,2),A4344),
1060  _mm_mul_pd(GetV(1,3),A4344))),
1061  _mm_mul_pd(GetV(1,4),temp2.v));
1062  temp.v[2]=_mm_set1_pd(mA[4].d[0]*A(3,0)+mA[4].d[1]*A(3,1)+mA[4].d[2]*A(3,2)+mA[4].d[3]*A(3,3)+mA[4].d[4]*A(3,4));
1063 
1064  // C=ABA^T elements C44,C45
1065  temp2.v=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A4142,temp.v[0]),_mm_mul_pd(A5152,temp.v[0])),
1066  _mm_hadd_pd(_mm_mul_pd(A4344,temp.v[1]),_mm_mul_pd(A5354,temp.v[1]))),
1067  _mm_mul_pd(A4555,temp.v[2]));
1068  double C44=temp2.d[0];
1069  double C45=temp2.d[1];
1070 
1071  // BA^T column 5
1072  //#define A55 _mm_set1_pd(A(4,4))
1073  temp2.v=_mm_set1_pd(A(4,4));
1074  temp.v[0]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,0),A5152),
1075  _mm_mul_pd(GetV(0,1),A5152)),
1076  _mm_hadd_pd(_mm_mul_pd(GetV(1,0),A5354),
1077  _mm_mul_pd(GetV(1,1),A5354))),
1078  _mm_mul_pd(GetV(0,4),temp2.v));
1079  temp.v[1]=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(GetV(0,2),A5152),
1080  _mm_mul_pd(GetV(0,3),A5152)),
1081  _mm_hadd_pd(_mm_mul_pd(GetV(1,2),A5354),
1082  _mm_mul_pd(GetV(1,3),A5354))),
1083  _mm_mul_pd(GetV(1,4),temp2.v));
1084  temp.v[2]=_mm_set1_pd(mA[4].d[0]*A(4,0)+mA[4].d[1]*A(4,1)+mA[4].d[2]*A(4,2)+mA[4].d[3]*A(4,3)+mA[4].d[4]*A(4,4));
1085 
1086 
1087  // C=ABA^T elements C15,C55
1088 #define A1555 _mm_setr_pd(A(0,4),A(4,4))
1089  temp2.v=_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A1112,temp.v[0]),_mm_mul_pd(A5152,temp.v[0])),
1090  _mm_hadd_pd(_mm_mul_pd(A1314,temp.v[1]),_mm_mul_pd(A5354,temp.v[1]))),
1091  _mm_mul_pd(A1555,temp.v[2]));
1092  double C15=temp2.d[0];
1093  double C55=temp2.d[1];
1094 
1095  return DMatrix5x5(C11,C12,C13,C14,C15,C22,C23,C24,C25,C33,C34,C35,C44,C45,C55);
1096  }
1097 
1098  // Matrix multiplication. Requires the SSE3 instruction HADD (horizontal add)
1099  DMatrix5x5 operator*(const DMatrix5x5 &m2){
1100  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 8, p)
1101  __m128d &A11A12=p[0];
1102  __m128d &A13A14=p[1];
1103  __m128d &A21A22=p[2];
1104  __m128d &A23A24=p[3];
1105  __m128d &A31A32=p[4];
1106  __m128d &A33A34=p[5];
1107  __m128d &A41A42=p[6];
1108  __m128d &A43A44=p[7];
1109  A11A12=_mm_setr_pd(mA[0].d[0],mA[1].d[0]);
1110  A13A14=_mm_setr_pd(mA[2].d[0],mA[3].d[0]);
1111  A21A22=_mm_setr_pd(mA[0].d[1],mA[1].d[1]);
1112  A23A24=_mm_setr_pd(mA[2].d[1],mA[3].d[1]);
1113  A31A32=_mm_setr_pd(mA[0].d[2],mA[1].d[2]);
1114  A33A34=_mm_setr_pd(mA[2].d[2],mA[3].d[2]);
1115  A41A42=_mm_setr_pd(mA[0].d[3],mA[1].d[3]);
1116  A43A44=_mm_setr_pd(mA[2].d[3],mA[3].d[3]);
1117  return
1118  DMatrix5x5(_mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A11A12,m2.GetV(0,0)),_mm_mul_pd(A21A22,m2.GetV(0,0))),
1119  _mm_hadd_pd(_mm_mul_pd(A13A14,m2.GetV(1,0)),_mm_mul_pd(A23A24,m2.GetV(1,0)))),
1120  _mm_mul_pd(mA[4].v[0],_mm_set1_pd(m2(4,0)))),
1121  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A11A12,m2.GetV(0,1)),_mm_mul_pd(A21A22,m2.GetV(0,1))),
1122  _mm_hadd_pd(_mm_mul_pd(A13A14,m2.GetV(1,1)),_mm_mul_pd(A23A24,m2.GetV(1,1)))),
1123  _mm_mul_pd(mA[4].v[0],_mm_set1_pd(m2(4,1)))),
1124  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A11A12,m2.GetV(0,2)),_mm_mul_pd(A21A22,m2.GetV(0,2))),
1125  _mm_hadd_pd(_mm_mul_pd(A13A14,m2.GetV(1,2)),_mm_mul_pd(A23A24,m2.GetV(1,2)))),
1126  _mm_mul_pd(mA[4].v[0],_mm_set1_pd(m2(4,2)))),
1127  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A11A12,m2.GetV(0,3)),_mm_mul_pd(A21A22,m2.GetV(0,3))),
1128  _mm_hadd_pd(_mm_mul_pd(A13A14,m2.GetV(1,3)),_mm_mul_pd(A23A24,m2.GetV(1,3)))),
1129  _mm_mul_pd(mA[4].v[0],_mm_set1_pd(m2(4,3)))),
1130  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A11A12,m2.GetV(0,4)),_mm_mul_pd(A21A22,m2.GetV(0,4))),
1131  _mm_hadd_pd(_mm_mul_pd(A13A14,m2.GetV(1,4)),_mm_mul_pd(A23A24,m2.GetV(1,4)))),
1132  _mm_mul_pd(mA[4].v[0],_mm_set1_pd(m2(4,4)))),
1133 
1134  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A31A32,m2.GetV(0,0)),_mm_mul_pd(A41A42,m2.GetV(0,0))),
1135  _mm_hadd_pd(_mm_mul_pd(A33A34,m2.GetV(1,0)),_mm_mul_pd(A43A44,m2.GetV(1,0)))),
1136  _mm_mul_pd(mA[4].v[1],_mm_set1_pd(m2(4,0)))),
1137  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A31A32,m2.GetV(0,1)),_mm_mul_pd(A41A42,m2.GetV(0,1))),
1138  _mm_hadd_pd(_mm_mul_pd(A33A34,m2.GetV(1,1)),_mm_mul_pd(A43A44,m2.GetV(1,1)))),
1139  _mm_mul_pd(mA[4].v[1],_mm_set1_pd(m2(4,1)))),
1140  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A31A32,m2.GetV(0,2)),_mm_mul_pd(A41A42,m2.GetV(0,2))),
1141  _mm_hadd_pd(_mm_mul_pd(A33A34,m2.GetV(1,2)),_mm_mul_pd(A43A44,m2.GetV(1,2)))),
1142  _mm_mul_pd(mA[4].v[1],_mm_set1_pd(m2(4,2)))),
1143  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A31A32,m2.GetV(0,3)),_mm_mul_pd(A41A42,m2.GetV(0,3))),
1144  _mm_hadd_pd(_mm_mul_pd(A33A34,m2.GetV(1,3)),_mm_mul_pd(A43A44,m2.GetV(1,3)))),
1145  _mm_mul_pd(mA[4].v[1],_mm_set1_pd(m2(4,3)))),
1146  _mm_add_pd(_mm_add_pd(_mm_hadd_pd(_mm_mul_pd(A31A32,m2.GetV(0,4)),_mm_mul_pd(A41A42,m2.GetV(0,4))),
1147  _mm_hadd_pd(_mm_mul_pd(A33A34,m2.GetV(1,4)),_mm_mul_pd(A43A44,m2.GetV(1,4)))),
1148  _mm_mul_pd(mA[4].v[1],_mm_set1_pd(m2(4,4)))),
1149 
1150  _mm_set_sd(mA[0].d[4]*m2(0,0)+mA[1].d[4]*m2(1,0)+mA[2].d[4]*m2(2,0)+mA[3].d[4]*m2(3,0)+mA[4].d[4]*m2(4,0)),
1151  _mm_set_sd(mA[0].d[4]*m2(0,1)+mA[1].d[4]*m2(1,1)+mA[2].d[4]*m2(2,1)+mA[3].d[4]*m2(3,1)+mA[4].d[4]*m2(4,1)),
1152  _mm_set_sd(mA[0].d[4]*m2(0,2)+mA[1].d[4]*m2(1,2)+mA[2].d[4]*m2(2,2)+mA[3].d[4]*m2(3,2)+mA[4].d[4]*m2(4,2)),
1153  _mm_set_sd(mA[0].d[4]*m2(0,3)+mA[1].d[4]*m2(1,3)+mA[2].d[4]*m2(2,3)+mA[3].d[4]*m2(3,3)+mA[4].d[4]*m2(4,3)),
1154  _mm_set_sd(mA[0].d[4]*m2(0,4)+mA[1].d[4]*m2(1,4)+mA[2].d[4]*m2(2,4)+mA[3].d[4]*m2(3,4)+mA[4].d[4]*m2(4,4))
1155  );
1156  }
1157 
1158 
1159 #else
1160  // Matrix multiplication: (5x5) x (5x5)
1161  DMatrix5x5 operator*(const DMatrix5x5 &m2){
1162  struct dvec1{
1163  __m128d v[5][5];
1164  };
1165  ALIGNED_16_BLOCK_WITH_PTR(struct dvec1, 1, p)
1166  struct dvec1 &temp=p[0];
1167  for (unsigned int i=0;i<5;i++){
1168  for (unsigned int j=0;j<5;j++){
1169  temp.v[i][j]=_mm_set1_pd(m2(i,j));
1170  }
1171  }
1172  // Preprocessor macro for multiplying two __m128d elements together
1173 #define MUL(i,j,k) _mm_mul_pd(GetV((i),(j)),temp.v[(j)][(k)])
1174 
1175  return DMatrix5x5(_mm_add_pd(MUL(0,0,0),
1176  _mm_add_pd(MUL(0,1,0),
1177  _mm_add_pd(MUL(0,2,0),
1178  _mm_add_pd(MUL(0,3,0),
1179  MUL(0,4,0))))),
1180  _mm_add_pd(MUL(0,0,1),
1181  _mm_add_pd(MUL(0,1,1),
1182  _mm_add_pd(MUL(0,2,1),
1183  _mm_add_pd(MUL(0,3,1),
1184  MUL(0,4,1))))),
1185  _mm_add_pd(MUL(0,0,2),
1186  _mm_add_pd(MUL(0,1,2),
1187  _mm_add_pd(MUL(0,2,2),
1188  _mm_add_pd(MUL(0,3,2),
1189  MUL(0,4,2))))),
1190  _mm_add_pd(MUL(0,0,3),
1191  _mm_add_pd(MUL(0,1,3),
1192  _mm_add_pd(MUL(0,2,3),
1193  _mm_add_pd(MUL(0,3,3),
1194  MUL(0,4,3))))),
1195  _mm_add_pd(MUL(0,0,4),
1196  _mm_add_pd(MUL(0,1,4),
1197  _mm_add_pd(MUL(0,2,4),
1198  _mm_add_pd(MUL(0,3,4),
1199  MUL(0,4,4))))),
1200  _mm_add_pd(MUL(1,0,0),
1201  _mm_add_pd(MUL(1,1,0),
1202  _mm_add_pd(MUL(1,2,0),
1203  _mm_add_pd(MUL(1,3,0),
1204  MUL(1,4,0))))),
1205  _mm_add_pd(MUL(1,0,1),
1206  _mm_add_pd(MUL(1,1,1),
1207  _mm_add_pd(MUL(1,2,1),
1208  _mm_add_pd(MUL(1,3,1),
1209  MUL(1,4,1))))),
1210  _mm_add_pd(MUL(1,0,2),
1211  _mm_add_pd(MUL(1,1,2),
1212  _mm_add_pd(MUL(1,2,2),
1213  _mm_add_pd(MUL(1,3,2),
1214  MUL(1,4,2))))),
1215  _mm_add_pd(MUL(1,0,3),
1216  _mm_add_pd(MUL(1,1,3),
1217  _mm_add_pd(MUL(1,2,3),
1218  _mm_add_pd(MUL(1,3,3),
1219  MUL(1,4,3))))),
1220  _mm_add_pd(MUL(1,0,4),
1221  _mm_add_pd(MUL(1,1,4),
1222  _mm_add_pd(MUL(1,2,4),
1223  _mm_add_pd(MUL(1,3,4),
1224  MUL(1,4,4))))),
1225  _mm_add_pd(MUL(2,0,0),
1226  _mm_add_pd(MUL(2,1,0),
1227  _mm_add_pd(MUL(2,2,0),
1228  _mm_add_pd(MUL(2,3,0),
1229  MUL(2,4,0))))),
1230  _mm_add_pd(MUL(2,0,1),
1231  _mm_add_pd(MUL(2,1,1),
1232  _mm_add_pd(MUL(2,2,1),
1233  _mm_add_pd(MUL(2,3,1),
1234  MUL(2,4,1))))),
1235  _mm_add_pd(MUL(2,0,2),
1236  _mm_add_pd(MUL(2,1,2),
1237  _mm_add_pd(MUL(2,2,2),
1238  _mm_add_pd(MUL(2,3,2),
1239  MUL(2,4,2))))),
1240  _mm_add_pd(MUL(2,0,3),
1241  _mm_add_pd(MUL(2,1,3),
1242  _mm_add_pd(MUL(2,2,3),
1243  _mm_add_pd(MUL(2,3,3),
1244  MUL(2,4,3))))),
1245  _mm_add_pd(MUL(2,0,4),
1246  _mm_add_pd(MUL(2,1,4),
1247  _mm_add_pd(MUL(2,2,4),
1248  _mm_add_pd(MUL(2,3,4),
1249  MUL(2,4,4))))));
1250  }
1251 
1252  // The following code performs the matrix operation ABA^T, where B is a symmetric matrix
1254  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 5, p)
1255  __m128d &A1=p[0];
1256  __m128d &A2=p[1];
1257  __m128d &A3=p[2];
1258  __m128d &A4=p[3];
1259  __m128d &A5=p[4];
1260  A5=_mm_set1_pd(A(0,4));
1261  A4=_mm_set1_pd(A(0,3));
1262  A3=_mm_set1_pd(A(0,2));
1263  A2=_mm_set1_pd(A(0,1));
1264  A1=_mm_set1_pd(A(0,0));
1265 
1266  // BA^T column 1
1267  union dvec2{
1268  __m128d v;
1269  double d[2];
1270  };
1271  ALIGNED_16_BLOCK_WITH_PTR(union dvec2, 1, p2)
1272  union dvec2 &temp2=p2[0];
1273  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(0,0)),
1274  _mm_add_pd(_mm_mul_pd(A2,GetV(0,1)),
1275  _mm_add_pd(_mm_mul_pd(A3,GetV(0,2)),
1276  _mm_add_pd(_mm_mul_pd(A4,GetV(0,3)),
1277  _mm_mul_pd(A5,GetV(0,4))))));
1278  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 5, p3)
1279  __m128d &BA1=p3[0];
1280  __m128d &BA2=p3[1];
1281  __m128d &BA3=p3[2];
1282  __m128d &BA4=p3[3];
1283  __m128d &BA5=p3[4];
1284  BA1=_mm_set1_pd(temp2.d[0]);
1285  BA2=_mm_set1_pd(temp2.d[1]);
1286  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(1,0)),
1287  _mm_add_pd(_mm_mul_pd(A2,GetV(1,1)),
1288  _mm_add_pd(_mm_mul_pd(A3,GetV(1,2)),
1289  _mm_add_pd(_mm_mul_pd(A4,GetV(1,3)),
1290  _mm_mul_pd(A5,GetV(1,4))))));
1291  BA3=_mm_set1_pd(temp2.d[0]);
1292  BA4=_mm_set1_pd(temp2.d[1]);
1293  BA5=_mm_set1_pd(mA[0].d[4]*A(0,0)+mA[1].d[4]*A(0,1)+mA[2].d[4]*A(0,2)+mA[3].d[4]*A(0,3)+mA[4].d[4]*A(0,4));
1294 
1295  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(0,0),BA1),
1296  _mm_add_pd(_mm_mul_pd(A.GetV(0,1),BA2),
1297  _mm_add_pd(_mm_mul_pd(A.GetV(0,2),BA3),
1298  _mm_add_pd(_mm_mul_pd(A.GetV(0,3),BA4),
1299  _mm_mul_pd(A.GetV(0,4),BA5)))));
1300  double C11=temp2.d[0];
1301  double C12=temp2.d[1];
1302 
1303  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(1,0),BA1),
1304  _mm_add_pd(_mm_mul_pd(A.GetV(1,1),BA2),
1305  _mm_add_pd(_mm_mul_pd(A.GetV(1,2),BA3),
1306  _mm_add_pd(_mm_mul_pd(A.GetV(1,3),BA4),
1307  _mm_mul_pd(A.GetV(1,4),BA5)))));
1308 
1309  double C13=temp2.d[0];
1310  double C14=temp2.d[1];
1311 
1312  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(2,0),BA1),
1313  _mm_add_pd(_mm_mul_pd(A.GetV(2,1),BA2),
1314  _mm_add_pd(_mm_mul_pd(A.GetV(2,2),BA3),
1315  _mm_add_pd(_mm_mul_pd(A.GetV(2,3),BA4),
1316  _mm_mul_pd(A.GetV(2,4),BA5)))));
1317 
1318  double C15=temp2.d[0];
1319 
1320 
1321  // BA^T column 2
1322  A5=_mm_set1_pd(A(1,4));
1323  A4=_mm_set1_pd(A(1,3));
1324  A3=_mm_set1_pd(A(1,2));
1325  A2=_mm_set1_pd(A(1,1));
1326  A1=_mm_set1_pd(A(1,0));
1327  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(0,0)),
1328  _mm_add_pd(_mm_mul_pd(A2,GetV(0,1)),
1329  _mm_add_pd(_mm_mul_pd(A3,GetV(0,2)),
1330  _mm_add_pd(_mm_mul_pd(A4,GetV(0,3)),
1331  _mm_mul_pd(A5,GetV(0,4))))));
1332  BA1=_mm_set1_pd(temp2.d[0]);
1333  BA2=_mm_set1_pd(temp2.d[1]);
1334  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(1,0)),
1335  _mm_add_pd(_mm_mul_pd(A2,GetV(1,1)),
1336  _mm_add_pd(_mm_mul_pd(A3,GetV(1,2)),
1337  _mm_add_pd(_mm_mul_pd(A4,GetV(1,3)),
1338  _mm_mul_pd(A5,GetV(1,4))))));
1339  BA3=_mm_set1_pd(temp2.d[0]);
1340  BA4=_mm_set1_pd(temp2.d[1]);
1341  BA5=_mm_set1_pd(mA[0].d[4]*A(1,0)+mA[1].d[4]*A(1,1)+mA[2].d[4]*A(1,2)+mA[3].d[4]*A(1,3)+mA[4].d[4]*A(1,4));
1342 
1343  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(0,0),BA1),
1344  _mm_add_pd(_mm_mul_pd(A.GetV(0,1),BA2),
1345  _mm_add_pd(_mm_mul_pd(A.GetV(0,2),BA3),
1346  _mm_add_pd(_mm_mul_pd(A.GetV(0,3),BA4),
1347  _mm_mul_pd(A.GetV(0,4),BA5)))));
1348 
1349  double C22=temp2.d[1];
1350 
1351  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(1,0),BA1),
1352  _mm_add_pd(_mm_mul_pd(A.GetV(1,1),BA2),
1353  _mm_add_pd(_mm_mul_pd(A.GetV(1,2),BA3),
1354  _mm_add_pd(_mm_mul_pd(A.GetV(1,3),BA4),
1355  _mm_mul_pd(A.GetV(1,4),BA5)))));
1356 
1357  double C23=temp2.d[0];
1358  double C24=temp2.d[1];
1359 
1360  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(2,0),BA1),
1361  _mm_add_pd(_mm_mul_pd(A.GetV(2,1),BA2),
1362  _mm_add_pd(_mm_mul_pd(A.GetV(2,2),BA3),
1363  _mm_add_pd(_mm_mul_pd(A.GetV(2,3),BA4),
1364  _mm_mul_pd(A.GetV(2,4),BA5)))));
1365 
1366  double C25=temp2.d[0];
1367 
1368 
1369  // BA^T column 3
1370  A5=_mm_set1_pd(A(2,4));
1371  A4=_mm_set1_pd(A(2,3));
1372  A3=_mm_set1_pd(A(2,2));
1373  A2=_mm_set1_pd(A(2,1));
1374  A1=_mm_set1_pd(A(2,0));
1375  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(0,0)),
1376  _mm_add_pd(_mm_mul_pd(A2,GetV(0,1)),
1377  _mm_add_pd(_mm_mul_pd(A3,GetV(0,2)),
1378  _mm_add_pd(_mm_mul_pd(A4,GetV(0,3)),
1379  _mm_mul_pd(A5,GetV(0,4))))));
1380  BA1=_mm_set1_pd(temp2.d[0]);
1381  BA2=_mm_set1_pd(temp2.d[1]);
1382  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(1,0)),
1383  _mm_add_pd(_mm_mul_pd(A2,GetV(1,1)),
1384  _mm_add_pd(_mm_mul_pd(A3,GetV(1,2)),
1385  _mm_add_pd(_mm_mul_pd(A4,GetV(1,3)),
1386  _mm_mul_pd(A5,GetV(1,4))))));
1387  BA3=_mm_set1_pd(temp2.d[0]);
1388  BA4=_mm_set1_pd(temp2.d[1]);
1389  BA5=_mm_set1_pd(mA[0].d[4]*A(2,0)+mA[1].d[4]*A(2,1)+mA[2].d[4]*A(2,2)+mA[3].d[4]*A(2,3)+mA[4].d[4]*A(2,4));
1390 
1391  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(1,0),BA1),
1392  _mm_add_pd(_mm_mul_pd(A.GetV(1,1),BA2),
1393  _mm_add_pd(_mm_mul_pd(A.GetV(1,2),BA3),
1394  _mm_add_pd(_mm_mul_pd(A.GetV(1,3),BA4),
1395  _mm_mul_pd(A.GetV(1,4),BA5)))));
1396 
1397  double C33=temp2.d[0];
1398  double C34=temp2.d[1];
1399 
1400  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(2,0),BA1),
1401  _mm_add_pd(_mm_mul_pd(A.GetV(2,1),BA2),
1402  _mm_add_pd(_mm_mul_pd(A.GetV(2,2),BA3),
1403  _mm_add_pd(_mm_mul_pd(A.GetV(2,3),BA4),
1404  _mm_mul_pd(A.GetV(2,4),BA5)))));
1405 
1406  double C35=temp2.d[0];
1407 
1408 
1409  // BA^T column 4
1410  A5=_mm_set1_pd(A(3,4));
1411  A4=_mm_set1_pd(A(3,3));
1412  A3=_mm_set1_pd(A(3,2));
1413  A2=_mm_set1_pd(A(3,1));
1414  A1=_mm_set1_pd(A(3,0));
1415  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(0,0)),
1416  _mm_add_pd(_mm_mul_pd(A2,GetV(0,1)),
1417  _mm_add_pd(_mm_mul_pd(A3,GetV(0,2)),
1418  _mm_add_pd(_mm_mul_pd(A4,GetV(0,3)),
1419  _mm_mul_pd(A5,GetV(0,4))))));
1420  BA1=_mm_set1_pd(temp2.d[0]);
1421  BA2=_mm_set1_pd(temp2.d[1]);
1422  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(1,0)),
1423  _mm_add_pd(_mm_mul_pd(A2,GetV(1,1)),
1424  _mm_add_pd(_mm_mul_pd(A3,GetV(1,2)),
1425  _mm_add_pd(_mm_mul_pd(A4,GetV(1,3)),
1426  _mm_mul_pd(A5,GetV(1,4))))));
1427  BA3=_mm_set1_pd(temp2.d[0]);
1428  BA4=_mm_set1_pd(temp2.d[1]);
1429  BA5=_mm_set1_pd(mA[0].d[4]*A(3,0)+mA[1].d[4]*A(3,1)+mA[2].d[4]*A(3,2)+mA[3].d[4]*A(3,3)+mA[4].d[4]*A(3,4));
1430 
1431  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(1,0),BA1),
1432  _mm_add_pd(_mm_mul_pd(A.GetV(1,1),BA2),
1433  _mm_add_pd(_mm_mul_pd(A.GetV(1,2),BA3),
1434  _mm_add_pd(_mm_mul_pd(A.GetV(1,3),BA4),
1435  _mm_mul_pd(A.GetV(1,4),BA5)))));
1436  double C44=temp2.d[1];
1437 
1438 
1439  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(2,0),BA1),
1440  _mm_add_pd(_mm_mul_pd(A.GetV(2,1),BA2),
1441  _mm_add_pd(_mm_mul_pd(A.GetV(2,2),BA3),
1442  _mm_add_pd(_mm_mul_pd(A.GetV(2,3),BA4),
1443  _mm_mul_pd(A.GetV(2,4),BA5)))));
1444 
1445  double C45=temp2.d[0];
1446 
1447  // BA^T column 5
1448  A5=_mm_set1_pd(A(4,4));
1449  A4=_mm_set1_pd(A(4,3));
1450  A3=_mm_set1_pd(A(4,2));
1451  A2=_mm_set1_pd(A(4,1));
1452  A1=_mm_set1_pd(A(4,0));
1453  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(0,0)),
1454  _mm_add_pd(_mm_mul_pd(A2,GetV(0,1)),
1455  _mm_add_pd(_mm_mul_pd(A3,GetV(0,2)),
1456  _mm_add_pd(_mm_mul_pd(A4,GetV(0,3)),
1457  _mm_mul_pd(A5,GetV(0,4))))));
1458  BA1=_mm_set1_pd(temp2.d[0]);
1459  BA2=_mm_set1_pd(temp2.d[1]);
1460  temp2.v=_mm_add_pd(_mm_mul_pd(A1,GetV(1,0)),
1461  _mm_add_pd(_mm_mul_pd(A2,GetV(1,1)),
1462  _mm_add_pd(_mm_mul_pd(A3,GetV(1,2)),
1463  _mm_add_pd(_mm_mul_pd(A4,GetV(1,3)),
1464  _mm_mul_pd(A5,GetV(1,4))))));
1465 
1466  BA3=_mm_set1_pd(temp2.d[0]);
1467  BA4=_mm_set1_pd(temp2.d[1]);
1468  BA5=_mm_set1_pd(mA[0].d[4]*A(4,0)+mA[1].d[4]*A(4,1)+mA[2].d[4]*A(4,2)+mA[3].d[4]*A(4,3)+mA[4].d[4]*A(4,4));
1469 
1470 
1471  temp2.v=_mm_add_pd(_mm_mul_pd(A.GetV(2,0),BA1),
1472  _mm_add_pd(_mm_mul_pd(A.GetV(2,1),BA2),
1473  _mm_add_pd(_mm_mul_pd(A.GetV(2,2),BA3),
1474  _mm_add_pd(_mm_mul_pd(A.GetV(2,3),BA4),
1475  _mm_mul_pd(A.GetV(2,4),BA5)))));
1476  double C55=temp2.d[0];
1477 
1478  return DMatrix5x5(C11,C12,C13,C14,C15,C22,C23,C24,C25,C33,C34,C35,C44,C45,C55);
1479  }
1480 
1481 #endif
1482 
1483 
1484  // Find the transpose of this matrix
1486 #define SWAP(i,j,k,m) _mm_setr_pd(mA[(j)].d[(i)],mA[(m)].d[(k)])
1487 
1488  return DMatrix5x5(SWAP(0,0,0,1),SWAP(1,0,1,1),SWAP(2,0,2,1),SWAP(3,0,3,1),SWAP(4,0,4,1),
1489  SWAP(0,2,0,3),SWAP(1,2,1,3),SWAP(2,2,2,3),SWAP(3,2,3,3),SWAP(4,2,4,3),
1490 
1491  _mm_setr_pd(mA[4].d[0],0.),_mm_setr_pd(mA[4].d[1],0.),
1492  _mm_setr_pd(mA[4].d[2],0.),_mm_setr_pd(mA[4].d[3],0.),
1493  _mm_setr_pd(mA[4].d[4],0.));
1494  }
1495 
1496  // Matrix inversion by blocks
1497  DMatrix5x5 Invert(){
1498  DMatrix2x2 A(GetV(0,0),GetV(0,1));
1499  DMatrix3x2 C(GetV(1,0),GetV(1,1),GetV(2,0),GetV(2,1));
1500  DMatrix3x2 CAinv=C*A.Invert();
1501  DMatrix3x3 D(GetV(1,2),GetV(1,3),GetV(1,4),GetV(2,2),GetV(2,3),GetV(2,4));
1502  DMatrix2x3 B(GetV(0,2),GetV(0,3),GetV(0,4));
1503  DMatrix2x3 BDinv=B*D.Invert();
1504  DMatrix2x2 AA=(A-BDinv*C).Invert();
1505  DMatrix3x3 DD=(D-CAinv*B).Invert();
1506  return DMatrix5x5(AA,-AA*BDinv,-DD*CAinv,DD);
1507  }
1508  // Matrix inversion by blocks for a symmetric matrix
1510  DMatrix2x2 A(GetV(0,0),GetV(0,1));
1511  DMatrix3x2 C(GetV(1,0),GetV(1,1),GetV(2,0),GetV(2,1));
1512  DMatrix3x2 CAinv=C*A.Invert();
1513  DMatrix3x3 D(GetV(1,2),GetV(1,3),GetV(1,4),GetV(2,2),GetV(2,3),GetV(2,4));
1514  DMatrix2x3 B(GetV(0,2),GetV(0,3),GetV(0,4));
1515  DMatrix2x3 BDinv=B*D.InvertSym();
1516  DMatrix2x2 AA=(A-BDinv*C).Invert();
1517  DMatrix3x3 DD=(D-CAinv*B).InvertSym();
1518  return DMatrix5x5(AA,-AA*BDinv,-DD*CAinv,DD);
1519  }
1520 
1521 
1522 
1523  // Zero out the matrix
1524  DMatrix5x5 Zero(){
1525  for (unsigned int i=0;i<5;i++){
1526  for (unsigned int j=0;j<3;j++){
1527  mA[i].v[j]=_mm_setzero_pd();
1528  }
1529  }
1530  return *this;
1531  }
1532 
1533  // Scale 5x5 matrix by a floating point number
1534  DMatrix5x5 operator*=(const double c){
1535  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 1, p)
1536  __m128d &scale=p[0];
1537  scale=_mm_set1_pd(c);
1538 
1539  for (unsigned int i=0;i<5;i++){
1540  for (unsigned int j=0;j<3;j++){
1541  mA[i].v[j]=_mm_mul_pd(scale,mA[i].v[j]);
1542  }
1543  }
1544  return *this;
1545  }
1546 
1547  void Print(){
1548  cout << "DMatrix5x5:" <<endl;
1549  cout << " | 0 | 1 | 2 | 3 | 4 |" <<endl;
1550  cout << "----------------------------------------------------------------------" <<endl;
1551 
1552  for (unsigned int i=0;i<5;i++){
1553  cout <<" "<< i << " |";
1554  for (unsigned int j=0;j<5;j++){
1555  cout << setw(11)<<setprecision(4)<<mA[j].d[i] <<" ";
1556  }
1557  cout << endl;
1558  }
1559  }
1560 
1561  private:
1562  union dvec{
1563  __m128d v[3];
1564  double d[6];
1565  };
1566  ALIGNED_16_BLOCK(union dvec, 5, mA)
1567 };
1568 
1569 // Scale 5x5 matrix by a floating point number
1570 inline DMatrix5x5 operator*(const double c,const DMatrix5x5 &M){
1571  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 1, p)
1572  __m128d &scale=p[0];
1573  scale=_mm_set1_pd(c);
1574 
1575 #define SCALE(i,j) _mm_mul_pd(scale,M.GetV((i),(j)))
1576  return DMatrix5x5(SCALE(0,0),SCALE(0,1),SCALE(0,2),SCALE(0,3),SCALE(0,4),
1577  SCALE(1,0),SCALE(1,1),SCALE(1,2),SCALE(1,3),SCALE(1,4),
1578  SCALE(2,0),SCALE(2,1),SCALE(2,2),SCALE(2,3),SCALE(2,4));
1579 
1580 }
1581 
1582 #endif
1583 
1584 
DMatrix3x3 Invert() const
Definition: DMatrix3x3.h:90
DMatrix5x5 Zero()
Definition: DMatrix5x5.h:210
DMatrix2x2 Invert()
Definition: DMatrix2x2.h:64
double SandwichMultiply(const DMatrix5x1 &A)
Definition: DMatrix5x5.h:573
DMatrix5x5(const DMatrix5x5 &m2)
Definition: DMatrix5x5.h:16
DMatrix2x1 operator*(const double c, const DMatrix2x1 &M)
Definition: DMatrix2x1.h:58
DMatrix5x5 Transpose()
Definition: DMatrix5x5.h:66
#define ALIGNED_16_BLOCK_PTR(TYPE, NUM, PTR)
Definition: align_16.h:24
double mA[5][5]
Definition: DMatrix5x5.h:618
#define c
#define ALIGNED_16_BLOCK_WITH_PTR(TYPE, NUM, PTR)
Definition: align_16.h:27
DMatrix5x5 & operator+=(const DMatrix5x5 &m2)
Definition: DMatrix5x5.h:86
DMatrixDSym GetSub(unsigned int lowerBound, unsigned int upperBound)
Definition: DMatrix5x5.h:584
void Print()
Definition: DMatrix5x5.h:603
DMatrix5x2 operator*(const DMatrix5x2 &m2)
Definition: DMatrix5x5.h:164
TMatrixDSym DMatrixDSym
Definition: DMatrixDSym.h:13
DMatrix5x5 & operator*=(const double c)
Definition: DMatrix5x5.h:139
DMatrix5x5(const DMatrix2x2 &A, const DMatrix2x3 &B, const DMatrix3x2 &C, const DMatrix3x3 &D)
Definition: DMatrix5x5.h:26
DMatrix5x5 operator+(const DMatrix5x5 &m2) const
Definition: DMatrix5x5.h:76
DMatrix5x5 & operator=(const DMatrix5x5 &m1)
Definition: DMatrix5x5.h:56
DMatrix3x3 InvertSym()
Definition: DMatrix3x3.h:110
DMatrix5x5 InvertSym()
Definition: DMatrix5x5.h:221
DMatrix5x5 SubSym(const DMatrix5x5 &m2) const
Definition: DMatrix5x5.h:108
DMatrix5x5 SandwichMultiply(const DMatrix5x5 &A)
Definition: DMatrix5x5.h:237
DMatrix5x5 SandwichMultiply2(const DMatrix5x5 &A)
Definition: DMatrix5x5.h:506
double & operator()(int row, int col)
Definition: DMatrix5x5.h:48
DMatrix5x5 operator*(const DMatrix5x5 &m2)
Definition: DMatrix5x5.h:194
DMatrix5x5 operator-(const DMatrix5x5 &m2) const
Definition: DMatrix5x5.h:120
DMatrix5x1 operator*(const DMatrix5x1 &m2)
Definition: DMatrix5x5.h:149
#define ALIGNED_16_BLOCK(TYPE, NUM, PTR)
Definition: align_16.h:19
bool IsPosDef()
Definition: DMatrix5x5.h:595
for(auto locVertexInfo:dStepVertexInfos)
TH2F * temp
DMatrix5x5 AddSym(const DMatrix5x5 &m2) const
Definition: DMatrix5x5.h:96
static double DD[5]
DMatrix5x5 & operator-=(const DMatrix5x5 &m2)
Definition: DMatrix5x5.h:130