Hall-D Software  alpha
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatrixSIMD.h
Go to the documentation of this file.
1 #ifndef _DMatrixSIMD_
2 #define _DMatrixSIMD_
3 
4 #include <math.h>
5 #ifdef USE_SSE2
6 #include <emmintrin.h> // Header file for SSE2 SIMD instructions
7 #endif
8 #ifdef USE_SSE3
9 #include <pmmintrin.h> // Header file for SSE3 SIMD instructions
10 #endif
11 #include <iostream>
12 #include <iomanip>
13 using namespace std;
14 
15 #include "DMatrix2x1.h"
16 #include "DMatrix2x2.h"
17 #include "DMatrix3x1.h"
18 #include "DMatrix3x2.h"
19 #include "DMatrix3x3.h"
20 #include "DMatrix2x3.h"
21 #include "DMatrix1x3.h"
22 #include "DMatrix1x2.h"
23 // We are not currently using any of the 4x2,4x4,etc matrices in the main code
24 #include "DMatrix4x1.h"
25 #include "DMatrix4x2.h"
26 #include "DMatrix4x4.h"
27 #include "DMatrix2x4.h"
28 #include "DMatrix1x4.h"
29 #include "align_16.h"
30 
31 
32 #ifndef USE_SSE2
33 
34 // Matrix multiplication: (2x1) x (1x2)
35 inline DMatrix2x2 operator*(const DMatrix2x1 &m1,const DMatrix1x2 &m2){
36  return DMatrix2x2(m1(0)*m2(0),m1(0)*m2(1),
37  m1(1)*m2(0),m1(1)*m2(1)
38  );
39 }
40 
41 
42 // Matrix multiplication: (3x1) x (1x3)
43 inline DMatrix3x3 operator*(const DMatrix3x1 &m1,const DMatrix1x3 &m2){
44  return DMatrix3x3(m1(0)*m2(0),m1(0)*m2(1),m1(0)*m2(2),
45  m1(1)*m2(0),m1(1)*m2(1),m1(1)*m2(2),
46  m1(2)*m2(0),m1(2)*m2(1),m1(2)*m2(2)
47  );
48 }
49 
50 
51 // Matrix multiplication: (3x2) x (2x3)
52 inline DMatrix3x3 operator*(const DMatrix3x2 &m1,const DMatrix2x3 &m2){
53  return DMatrix3x3(m1(0,0)*m2(0,0)+m1(0,1)*m2(1,0),
54  m1(0,0)*m2(0,1)+m1(0,1)*m2(1,1),
55  m1(0,0)*m2(0,2)+m1(0,1)*m2(1,2),
56  m1(1,0)*m2(0,0)+m1(1,1)*m2(1,0),
57  m1(1,0)*m2(0,1)+m1(1,1)*m2(1,1),
58  m1(1,0)*m2(0,2)+m1(1,1)*m2(1,2),
59  m1(2,0)*m2(0,0)+m1(2,1)*m2(1,0),
60  m1(2,0)*m2(0,1)+m1(2,1)*m2(1,1),
61  m1(2,0)*m2(0,2)+m1(2,1)*m2(1,2)
62  );
63 }
64 
65 // Matrix multiplication: (2x2) x (2x3)
66 inline DMatrix2x3 operator*(const DMatrix2x2 &m1,const DMatrix2x3 &m2){
67  return DMatrix2x3(m1(0,0)*m2(0,0)+m1(0,1)*m2(1,0),
68  m1(0,0)*m2(0,1)+m1(0,1)*m2(1,1),
69  m1(0,0)*m2(0,2)+m1(0,1)*m2(1,2),
70  m1(1,0)*m2(0,0)+m1(1,1)*m2(1,0),
71  m1(1,0)*m2(0,1)+m1(1,1)*m2(1,1),
72  m1(1,0)*m2(0,2)+m1(1,1)*m2(1,2)
73  );
74 
75 
76 }
77 
78 // Matrix multiplication: (4x2) x (2x4)
79 inline DMatrix4x4 operator*(const DMatrix4x2 &m1,const DMatrix2x4 &m2){
80  return DMatrix4x4(m1(0,0)*m2(0,0)+m1(0,1)*m2(1,0),
81  m1(0,0)*m2(0,1)+m1(0,1)*m2(1,1),
82  m1(0,0)*m2(0,2)+m1(0,1)*m2(1,2),
83  m1(0,0)*m2(0,3)+m1(0,1)*m2(1,3),
84 
85  m1(1,0)*m2(0,0)+m1(1,1)*m2(1,0),
86  m1(1,0)*m2(0,1)+m1(1,1)*m2(1,1),
87  m1(1,0)*m2(0,2)+m1(1,1)*m2(1,2),
88  m1(1,0)*m2(0,3)+m1(1,1)*m2(1,3),
89 
90  m1(2,0)*m2(0,0)+m1(2,1)*m2(1,0),
91  m1(2,0)*m2(0,1)+m1(2,1)*m2(1,1),
92  m1(2,0)*m2(0,2)+m1(2,1)*m2(1,2),
93  m1(2,0)*m2(0,3)+m1(2,1)*m2(1,3),
94 
95  m1(3,0)*m2(0,0)+m1(3,1)*m2(1,0),
96  m1(3,0)*m2(0,1)+m1(3,1)*m2(1,1),
97  m1(3,0)*m2(0,2)+m1(3,1)*m2(1,2),
98  m1(3,0)*m2(0,3)+m1(3,1)*m2(1,3)
99  );
100 }
101 
102 
103 #else
104 
105 // Matrix multiplication: (3x2) x (2x3)
106 inline DMatrix3x3 operator*(const DMatrix3x2 &m1,
107  const DMatrix2x3 &m2){
108  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 6, p)
109  __m128d &a11=p[0];
110  __m128d &a12=p[1];
111  __m128d &a13=p[2];
112  __m128d &a21=p[3];
113  __m128d &a22=p[4];
114  __m128d &a23=p[5];
115  a11=_mm_set1_pd(m2(0,0));
116  a12=_mm_set1_pd(m2(0,1));
117  a13=_mm_set1_pd(m2(0,2));
118  a21=_mm_set1_pd(m2(1,0));
119  a22=_mm_set1_pd(m2(1,1));
120  a23=_mm_set1_pd(m2(1,2));
121  return DMatrix3x3(_mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a11),
122  _mm_mul_pd(m1.GetV(0,1),a21)),
123  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a12),
124  _mm_mul_pd(m1.GetV(0,1),a22)),
125  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a13),
126  _mm_mul_pd(m1.GetV(0,1),a23)),
127  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a11),
128  _mm_mul_pd(m1.GetV(1,1),a21)),
129  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a12),
130  _mm_mul_pd(m1.GetV(1,1),a22)),
131  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a13),
132  _mm_mul_pd(m1.GetV(1,1),a23)));
133 }
134 
135 // Matrix multiplication: (2x2) x (2x3)
136 inline DMatrix2x3 operator*(const DMatrix2x2 &m1,const DMatrix2x3 &m2){
137  return DMatrix2x3(_mm_add_pd(_mm_mul_pd(m1.GetV(0),_mm_set1_pd(m2(0,0))),
138  _mm_mul_pd(m1.GetV(1),_mm_set1_pd(m2(1,0)))),
139  _mm_add_pd(_mm_mul_pd(m1.GetV(0),_mm_set1_pd(m2(0,1))),
140  _mm_mul_pd(m1.GetV(1),_mm_set1_pd(m2(1,1)))),
141  _mm_add_pd(_mm_mul_pd(m1.GetV(0),_mm_set1_pd(m2(0,2))),
142  _mm_mul_pd(m1.GetV(1),_mm_set1_pd(m2(1,2)))));
143 }
144 
145 //----------------------------------------------------------------------------
146 // NOTE: We are not currently using any of the 2x4,4x2, etc matrices in the
147 // code. As a consequence I am commenting out the following routines until
148 // I get around to writing the non-SIMD versions of this code... whenever that
149 // may be...
150 // Matrix multiplication: (4x2) x (2x4)
151 
152 inline DMatrix4x4 operator*(const DMatrix4x2 &m1,
153  const DMatrix2x4 &m2){
154  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 8, p)
155  __m128d &a11=p[0];
156  __m128d &a12=p[1];
157  __m128d &a13=p[2];
158  __m128d &a14=p[3];
159  __m128d &a21=p[4];
160  __m128d &a22=p[5];
161  __m128d &a23=p[6];
162  __m128d &a24=p[7];
163  a11=_mm_set1_pd(m2(0,0));
164  a12=_mm_set1_pd(m2(0,1));
165  a13=_mm_set1_pd(m2(0,2));
166  a14=_mm_set1_pd(m2(0,3));
167  a21=_mm_set1_pd(m2(1,0));
168  a22=_mm_set1_pd(m2(1,1));
169  a23=_mm_set1_pd(m2(1,2));
170  a24=_mm_set1_pd(m2(1,3));
171  return DMatrix4x4(_mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a11),
172  _mm_mul_pd(m1.GetV(0,1),a21)),
173  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a12),
174  _mm_mul_pd(m1.GetV(0,1),a22)),
175  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a13),
176  _mm_mul_pd(m1.GetV(0,1),a23)),
177  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a14),
178  _mm_mul_pd(m1.GetV(0,1),a24)),
179  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a11),
180  _mm_mul_pd(m1.GetV(1,1),a21)),
181  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a12),
182  _mm_mul_pd(m1.GetV(1,1),a22)),
183  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a13),
184  _mm_mul_pd(m1.GetV(1,1),a23)),
185  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a14),
186  _mm_mul_pd(m1.GetV(1,1),a24)));
187  }
188 
189  // Matrix multiplication: (2x2) x (2x4)
190  inline DMatrix2x4 operator*(const DMatrix2x2 &m1,const DMatrix2x4 &m2){
191  return DMatrix2x4(_mm_add_pd(_mm_mul_pd(m1.GetV(0),_mm_set1_pd(m2(0,0))),
192  _mm_mul_pd(m1.GetV(1),_mm_set1_pd(m2(1,0)))),
193  _mm_add_pd(_mm_mul_pd(m1.GetV(0),_mm_set1_pd(m2(0,1))),
194  _mm_mul_pd(m1.GetV(1),_mm_set1_pd(m2(1,1)))),
195  _mm_add_pd(_mm_mul_pd(m1.GetV(0),_mm_set1_pd(m2(0,2))),
196  _mm_mul_pd(m1.GetV(1),_mm_set1_pd(m2(1,2)))),
197  _mm_add_pd(_mm_mul_pd(m1.GetV(0),_mm_set1_pd(m2(0,3))),
198  _mm_mul_pd(m1.GetV(1),_mm_set1_pd(m2(1,3)))));
199  }
200 
201 // end of un-used matrix block...
202 #endif
203 
204 
205 #include "DMatrix5x1.h"
206 #include "DMatrix5x2.h"
207 #include "DMatrix5x5.h"
208 #include "DMatrix2x5.h"
209 #include "DMatrix1x5.h"
210 
211 #ifndef USE_SSE2
212 
213 // Multiply a 4x1 matrix by a 1x4 matrix
214 inline DMatrix4x4 operator*(const DMatrix4x1 &m1,const DMatrix1x4 &m2){
216  for (unsigned int i=0;i<4;i++){
217  for (unsigned int j=0;j<4;j++){
218  temp(i,j)=m1(i)*m2(j);
219  }
220  }
221  return temp;
222 }
223 
224 
225 
226 // Find the tranpose of a 5x2 matrix
227 inline DMatrix2x5 Transpose(const DMatrix5x2 &M){
228  return DMatrix2x5(M(0,0),M(1,0),M(2,0),M(3,0),M(4,0),
229  M(0,1),M(1,1),M(2,1),M(3,1),M(4,1));
230 }
231 
232 // Multiply a 5x1 matrix by its transpose
235  for (unsigned int i=0;i<5;i++){
236  for (unsigned int j=0;j<5;j++){
237  temp(i,j)=m1(i)*m1(j);
238  }
239  }
240  return temp;
241 }
242 
243 // Multiply a 5x1 matrix by a 1x5 matrix
244 inline DMatrix5x5 operator*(const DMatrix5x1 &m1,const DMatrix1x5 &m2){
246  for (unsigned int i=0;i<5;i++){
247  for (unsigned int j=0;j<5;j++){
248  temp(i,j)=m1(i)*m2(j);
249  }
250  }
251  return temp;
252 }
253 
254 // Multiply a 5x2 matrix by a 2x5 matrix
255 inline DMatrix5x5 operator*(const DMatrix5x2 &m1,const DMatrix2x5 &m2){
257  for (unsigned int i=0;i<5;i++){
258  for (unsigned int j=0;j<5;j++){
259  temp(i,j)=m1(i,0)*m2(0,j)+m1(i,1)*m2(1,j);
260  }
261  }
262  return temp;
263 
264 }
265 
266 #else
267 
268 // Multiply a 4x1 matrix by a 1x4 matrix
269 inline DMatrix4x4 operator*(const DMatrix4x1 &m1,const DMatrix1x4 &m2){
270  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 4, p)
271  __m128d &b1=p[0];
272  __m128d &b2=p[1];
273  __m128d &b3=p[2];
274  __m128d &b4=p[3];
275  b1=_mm_set1_pd(m2(0));
276  b2=_mm_set1_pd(m2(1));
277  b3=_mm_set1_pd(m2(2));
278  b4=_mm_set1_pd(m2(3));
279  return DMatrix4x4(_mm_mul_pd(m1.GetV(0),b1),_mm_mul_pd(m1.GetV(0),b2),
280  _mm_mul_pd(m1.GetV(0),b3),_mm_mul_pd(m1.GetV(0),b4),
281  _mm_mul_pd(m1.GetV(1),b1),_mm_mul_pd(m1.GetV(1),b2),
282  _mm_mul_pd(m1.GetV(1),b3),_mm_mul_pd(m1.GetV(1),b4)
283  );
284 }
285 
286 
287 
288 
289 // Find the tranpose of a 5x2 matrix
290 inline DMatrix2x5 Transpose(const DMatrix5x2 &M){
291  return DMatrix2x5(_mm_setr_pd(M(0,0),M(0,1)),_mm_setr_pd(M(1,0),M(1,1)),
292  _mm_setr_pd(M(2,0),M(2,1)),_mm_setr_pd(M(3,0),M(3,1)),
293  _mm_setr_pd(M(4,0),M(4,1)));
294 }
295 
296 
297 // Multiply a 5x1 matrix by its transpose
298 inline DMatrix5x5 MultiplyTranspose(const DMatrix5x1 &m1){
299  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 5, p)
300  __m128d &b1=p[0];
301  __m128d &b2=p[1];
302  __m128d &b3=p[2];
303  __m128d &b4=p[3];
304  __m128d &b5=p[4];
305  b1=_mm_set1_pd(m1(0));
306  b2=_mm_set1_pd(m1(1));
307  b3=_mm_set1_pd(m1(2));
308  b4=_mm_set1_pd(m1(3));
309  b5=_mm_set1_pd(m1(4));
310  return DMatrix5x5(_mm_mul_pd(m1.GetV(0),b1),_mm_mul_pd(m1.GetV(0),b2),
311  _mm_mul_pd(m1.GetV(0),b3),_mm_mul_pd(m1.GetV(0),b4),
312  _mm_mul_pd(m1.GetV(0),b5),
313  _mm_mul_pd(m1.GetV(1),b1),_mm_mul_pd(m1.GetV(1),b2),
314  _mm_mul_pd(m1.GetV(1),b3),_mm_mul_pd(m1.GetV(1),b4),
315  _mm_mul_pd(m1.GetV(1),b5),
316  _mm_mul_pd(m1.GetV(2),b1),_mm_mul_pd(m1.GetV(2),b2),
317  _mm_mul_pd(m1.GetV(2),b3),_mm_mul_pd(m1.GetV(2),b4),
318  _mm_mul_pd(m1.GetV(2),b5));
319 }
320 
321 // Multiply a 5x1 matrix by a 1x5 matrix
322 inline DMatrix5x5 operator*(const DMatrix5x1 &m1,const DMatrix1x5 &m2){
323  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 5, p)
324  __m128d &b1=p[0];
325  __m128d &b2=p[1];
326  __m128d &b3=p[2];
327  __m128d &b4=p[3];
328  __m128d &b5=p[4];
329  b1=_mm_set1_pd(m2(0));
330  b2=_mm_set1_pd(m2(1));
331  b3=_mm_set1_pd(m2(2));
332  b4=_mm_set1_pd(m2(3));
333  b5=_mm_set1_pd(m2(4));
334  return DMatrix5x5(_mm_mul_pd(m1.GetV(0),b1),_mm_mul_pd(m1.GetV(0),b2),
335  _mm_mul_pd(m1.GetV(0),b3),_mm_mul_pd(m1.GetV(0),b4),
336  _mm_mul_pd(m1.GetV(0),b5),
337  _mm_mul_pd(m1.GetV(1),b1),_mm_mul_pd(m1.GetV(1),b2),
338  _mm_mul_pd(m1.GetV(1),b3),_mm_mul_pd(m1.GetV(1),b4),
339  _mm_mul_pd(m1.GetV(1),b5),
340  _mm_mul_pd(m1.GetV(2),b1),_mm_mul_pd(m1.GetV(2),b2),
341  _mm_mul_pd(m1.GetV(2),b3),_mm_mul_pd(m1.GetV(2),b4),
342  _mm_mul_pd(m1.GetV(2),b5));
343 }
344 
345 inline DMatrix5x5 operator*(const DMatrix5x2 &m1,const DMatrix2x5 &m2){
346  ALIGNED_16_BLOCK_WITH_PTR(__m128d, 10, p)
347  __m128d &a11=p[0];
348  __m128d &a12=p[1];
349  __m128d &a13=p[2];
350  __m128d &a14=p[3];
351  __m128d &a15=p[4];
352  __m128d &a21=p[5];
353  __m128d &a22=p[6];
354  __m128d &a23=p[7];
355  __m128d &a24=p[8];
356  __m128d &a25=p[9];
357  a11=_mm_set1_pd(m2(0,0));
358  a12=_mm_set1_pd(m2(0,1));
359  a13=_mm_set1_pd(m2(0,2));
360  a14=_mm_set1_pd(m2(0,3));
361  a15=_mm_set1_pd(m2(0,4));
362  a21=_mm_set1_pd(m2(1,0));
363  a22=_mm_set1_pd(m2(1,1));
364  a23=_mm_set1_pd(m2(1,2));
365  a24=_mm_set1_pd(m2(1,3));
366  a25=_mm_set1_pd(m2(1,4));
367  return DMatrix5x5(_mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a11),
368  _mm_mul_pd(m1.GetV(0,1),a21)),
369  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a12),
370  _mm_mul_pd(m1.GetV(0,1),a22)),
371  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a13),
372  _mm_mul_pd(m1.GetV(0,1),a23)),
373  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a14),
374  _mm_mul_pd(m1.GetV(0,1),a24)),
375  _mm_add_pd(_mm_mul_pd(m1.GetV(0,0),a15),
376  _mm_mul_pd(m1.GetV(0,1),a25)),
377 
378  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a11),
379  _mm_mul_pd(m1.GetV(1,1),a21)),
380  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a12),
381  _mm_mul_pd(m1.GetV(1,1),a22)),
382  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a13),
383  _mm_mul_pd(m1.GetV(1,1),a23)),
384  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a14),
385  _mm_mul_pd(m1.GetV(1,1),a24)),
386  _mm_add_pd(_mm_mul_pd(m1.GetV(1,0),a15),
387  _mm_mul_pd(m1.GetV(1,1),a25)),
388 
389  _mm_add_pd(_mm_mul_pd(m1.GetV(2,0),a11),
390  _mm_mul_pd(m1.GetV(2,1),a21)),
391  _mm_add_pd(_mm_mul_pd(m1.GetV(2,0),a12),
392  _mm_mul_pd(m1.GetV(2,1),a22)),
393  _mm_add_pd(_mm_mul_pd(m1.GetV(2,0),a13),
394  _mm_mul_pd(m1.GetV(2,1),a23)),
395  _mm_add_pd(_mm_mul_pd(m1.GetV(2,0),a14),
396  _mm_mul_pd(m1.GetV(2,1),a24)),
397  _mm_add_pd(_mm_mul_pd(m1.GetV(2,0),a15),
398  _mm_mul_pd(m1.GetV(2,1),a25)));
399 }
400 
401 #endif
402 #endif // _DMatrixSIMD_
DMatrix2x1 operator*(const double c, const DMatrix2x1 &M)
Definition: DMatrix2x1.h:58
#define ALIGNED_16_BLOCK_WITH_PTR(TYPE, NUM, PTR)
Definition: align_16.h:27
DMatrix2x5 Transpose(const DMatrix5x2 &M)
Definition: DMatrixSIMD.h:227
DMatrix5x5 MultiplyTranspose(const DMatrix5x1 &m1)
Definition: DMatrixSIMD.h:233
TH2F * temp