imate
C++/CUDA Reference
c_matrix_operations.cpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3  * SPDX-License-Identifier: BSD-3-Clause
4  * SPDX-FileType: SOURCE
5  *
6  * This program is free software: you can redistribute it and/or modify it
7  * under the terms of the license found in the LICENSE.txt file in the root
8  * directory of this source tree.
9  */
10 
11 
12 // =======
13 // Headers
14 // =======
15 
16 #include "./c_matrix_operations.h"
17 #include "../_definitions/definitions.h" // USE_CBLAS
18 
19 #if (USE_CBLAS == 1)
20  #include "./cblas_interface.h"
21 #endif
22 
23 
24 // ============
25 // dense matvec
26 // ============
27 
59 
60 template <typename DataType>
62  const DataType* A,
63  const DataType* b,
64  const LongIndexType num_rows,
65  const LongIndexType num_columns,
66  const FlagType A_is_row_major,
67  DataType* c)
68 {
69  #if (USE_CBLAS == 1)
70 
71  // Using OpenBlas
72  CBLAS_LAYOUT layout;
73  if (A_is_row_major)
74  {
75  layout = CblasRowMajor;
76  }
77  else
78  {
79  layout = CblasColMajor;
80  }
81 
82  CBLAS_TRANSPOSE transpose = CblasNoTrans;
83  int lda = num_rows;
84  int incb = 1;
85  int incc = 1;
86  DataType alpha = 1.0;
87  DataType beta = 0.0;
88 
89  cblas_interface::xgemv(layout, transpose, num_rows, num_columns, alpha, A,
90  lda, b, incb, beta, c, incc);
91 
92  #else
93 
94  // Not using OpenBlas
95  LongIndexType j;
96  long double sum;
97  LongIndexType chunk = 5;
98  LongIndexType num_columns_chunked = num_columns - (num_columns % chunk);
99 
100  // Determine major order of A
101  if (A_is_row_major)
102  {
103  // For row major (C ordering) matrix A
104  for (LongIndexType i=0; i < num_rows; ++i)
105  {
106  sum = 0.0;
107  for (j=0; j < num_columns_chunked; j+= chunk)
108  {
109  sum += A[i*num_columns + j] * b[j] +
110  A[i*num_columns + j+1] * b[j+1] +
111  A[i*num_columns + j+2] * b[j+2] +
112  A[i*num_columns + j+3] * b[j+3] +
113  A[i*num_columns + j+4] * b[j+4];
114  }
115 
116  for (j= num_columns_chunked; j < num_columns; ++j)
117  {
118  sum += A[i*num_columns + j] * b[j];
119  }
120 
121  c[i] = static_cast<DataType>(sum);
122  }
123  }
124  else
125  {
126  // For column major (Fortran ordering) matrix A
127  for (LongIndexType i=0; i < num_rows; ++i)
128  {
129  sum = 0.0;
130  for (j=0; j < num_columns; ++j)
131  {
132  sum += A[i + num_rows*j] * b[j];
133  }
134  c[i] = static_cast<DataType>(sum);
135  }
136  }
137 
138  #endif
139 }
140 
141 
142 // =================
143 // dense matvec plus
144 // =================
145 
179 
180 template <typename DataType>
182  const DataType* A,
183  const DataType* b,
184  const DataType alpha,
185  const LongIndexType num_rows,
186  const LongIndexType num_columns,
187  const FlagType A_is_row_major,
188  DataType* c)
189 {
190  if (alpha == 0.0)
191  {
192  return;
193  }
194 
195  LongIndexType j;
196  long double sum;
197  LongIndexType chunk = 5;
198  LongIndexType num_columns_chunked = num_columns - (num_columns % chunk);
199 
200  // Determine major order of A
201  if (A_is_row_major)
202  {
203  // For row major (C ordering) matrix A
204  for (LongIndexType i=0; i < num_rows; ++i)
205  {
206  sum = 0.0;
207  for (j=0; j < num_columns_chunked; j+= chunk)
208  {
209  sum += A[i*num_columns + j] * b[j] +
210  A[i*num_columns + j+1] * b[j+1] +
211  A[i*num_columns + j+2] * b[j+2] +
212  A[i*num_columns + j+3] * b[j+3] +
213  A[i*num_columns + j+4] * b[j+4];
214  }
215 
216  for (j= num_columns_chunked; j < num_columns; ++j)
217  {
218  sum += A[i*num_columns + j] * b[j];
219  }
220 
221  c[i] += alpha * static_cast<DataType>(sum);
222  }
223  }
224  else
225  {
226  // For column major (Fortran ordering) matrix A
227  for (LongIndexType i=0; i < num_rows; ++i)
228  {
229  sum = 0.0;
230  for (j=0; j < num_columns; ++j)
231  {
232  sum += A[i + num_rows*j] * b[j];
233  }
234  c[i] += alpha* static_cast<DataType>(sum);
235  }
236  }
237 }
238 
239 
240 // =======================
241 // dense transposed matvec
242 // =======================
243 
276 
277 template <typename DataType>
279  const DataType* A,
280  const DataType* b,
281  const LongIndexType num_rows,
282  const LongIndexType num_columns,
283  const FlagType A_is_row_major,
284  DataType* c)
285 {
286  LongIndexType i;
287  long double sum;
288  LongIndexType chunk = 5;
289  LongIndexType num_rows_chunked = num_rows - (num_rows % chunk);
290 
291  // Determine major order of A
292  if (A_is_row_major)
293  {
294  // For row major (C ordering) matrix A
295  for (LongIndexType j=0; j < num_columns; ++j)
296  {
297  sum = 0.0;
298  for (i=0; i < num_rows; ++i)
299  {
300  sum += A[i*num_columns + j] * b[i];
301  }
302  c[j] = static_cast<DataType>(sum);
303  }
304  }
305  else
306  {
307  // For column major (Fortran ordering) matrix A
308  for (LongIndexType j=0; j < num_columns; ++j)
309  {
310  sum = 0.0;
311  for (i=0; i < num_rows_chunked; i += chunk)
312  {
313  sum += A[i + num_rows*j] * b[i] +
314  A[i+1 + num_rows*j] * b[i+1] +
315  A[i+2 + num_rows*j] * b[i+2] +
316  A[i+3 + num_rows*j] * b[i+3] +
317  A[i+4 + num_rows*j] * b[i+4];
318  }
319 
320  for (i=num_rows_chunked; i < num_rows; ++i)
321  {
322  sum += A[i + num_rows*j] * b[i];
323  }
324 
325  c[j] = static_cast<DataType>(sum);
326  }
327  }
328 }
329 
330 
331 // ============================
332 // dense transposed matvec plus
333 // ============================
334 
369 
370 template <typename DataType>
372  const DataType* A,
373  const DataType* b,
374  const DataType alpha,
375  const LongIndexType num_rows,
376  const LongIndexType num_columns,
377  const FlagType A_is_row_major,
378  DataType* c)
379 {
380  if (alpha == 0.0)
381  {
382  return;
383  }
384 
385  LongIndexType i;
386  long double sum;
387  LongIndexType chunk = 5;
388  LongIndexType num_rows_chunked = num_rows - (num_rows % chunk);
389 
390  // Determine major order of A
391  if (A_is_row_major)
392  {
393  // For row major (C ordering) matrix A
394  for (LongIndexType j=0; j < num_columns; ++j)
395  {
396  sum = 0.0;
397  for (i=0; i < num_rows; ++i)
398  {
399  sum += A[i*num_columns + j] * b[i];
400  }
401  c[j] += alpha * static_cast<DataType>(sum);
402  }
403  }
404  else
405  {
406  // For column major (Fortran ordering) matrix A
407  for (LongIndexType j=0; j < num_columns; ++j)
408  {
409  sum = 0.0;
410  for (i=0; i < num_rows_chunked; i += chunk)
411  {
412  sum += A[i + num_rows*j] * b[i] +
413  A[i+1 + num_rows*j] * b[i+1] +
414  A[i+2 + num_rows*j] * b[i+2] +
415  A[i+3 + num_rows*j] * b[i+3] +
416  A[i+4 + num_rows*j] * b[i+4];
417  }
418 
419  for (i=num_rows_chunked; i < num_rows; ++i)
420  {
421  sum += A[i + num_rows*j] * b[i];
422  }
423 
424  c[j] += alpha * static_cast<DataType>(sum);
425  }
426  }
427 }
428 
429 
430 // ==========
431 // csr matvec
432 // ==========
433 
467 
468 template <typename DataType>
470  const DataType* A_data,
471  const LongIndexType* A_column_indices,
472  const LongIndexType* A_index_pointer,
473  const DataType* b,
474  const LongIndexType num_rows,
475  DataType* c)
476 {
477  LongIndexType index_pointer;
478  LongIndexType row;
479  LongIndexType column;
480  long double sum;
481 
482  for (row=0; row < num_rows; ++row)
483  {
484  sum = 0.0;
485  for (index_pointer=A_index_pointer[row];
486  index_pointer < A_index_pointer[row+1];
487  ++index_pointer)
488  {
489  column = A_column_indices[index_pointer];
490  sum += A_data[index_pointer] * b[column];
491  }
492  c[row] = static_cast<DataType>(sum);
493  }
494 }
495 
496 
497 // ===============
498 // csr matvec plus
499 // ===============
500 
538 
539 template <typename DataType>
541  const DataType* A_data,
542  const LongIndexType* A_column_indices,
543  const LongIndexType* A_index_pointer,
544  const DataType* b,
545  const DataType alpha,
546  const LongIndexType num_rows,
547  DataType* c)
548 {
549  if (alpha == 0.0)
550  {
551  return;
552  }
553 
554  LongIndexType index_pointer;
555  LongIndexType row;
556  LongIndexType column;
557  long double sum;
558 
559  for (row=0; row < num_rows; ++row)
560  {
561  sum = 0.0;
562  for (index_pointer=A_index_pointer[row];
563  index_pointer < A_index_pointer[row+1];
564  ++index_pointer)
565  {
566  column = A_column_indices[index_pointer];
567  sum += A_data[index_pointer] * b[column];
568  }
569  c[row] += alpha * static_cast<DataType>(sum);
570  }
571 }
572 
573 
574 // =====================
575 // csr transposed matvec
576 // =====================
577 
604 
605 template <typename DataType>
607  const DataType* A_data,
608  const LongIndexType* A_column_indices,
609  const LongIndexType* A_index_pointer,
610  const DataType* b,
611  const LongIndexType num_rows,
612  const LongIndexType num_columns,
613  DataType* c)
614 {
615  LongIndexType index_pointer;
616  LongIndexType row;
617  LongIndexType column;
618 
619  // Initialize output to zero
620  for (column=0; column < num_columns; ++column)
621  {
622  c[column] = 0.0;
623  }
624 
625  for (row=0; row < num_rows; ++row)
626  {
627  for (index_pointer=A_index_pointer[row];
628  index_pointer < A_index_pointer[row+1];
629  ++index_pointer)
630  {
631  column = A_column_indices[index_pointer];
632  c[column] += A_data[index_pointer] * b[row];
633  }
634  }
635 }
636 
637 
638 // ==========================
639 // csr transposed matvec plus
640 // ==========================
641 
670 
671 template <typename DataType>
673  const DataType* A_data,
674  const LongIndexType* A_column_indices,
675  const LongIndexType* A_index_pointer,
676  const DataType* b,
677  const DataType alpha,
678  const LongIndexType num_rows,
679  DataType* c)
680 {
681  if (alpha == 0.0)
682  {
683  return;
684  }
685 
686  LongIndexType index_pointer;
687  LongIndexType row;
688  LongIndexType column;
689 
690  for (row=0; row < num_rows; ++row)
691  {
692  for (index_pointer=A_index_pointer[row];
693  index_pointer < A_index_pointer[row+1];
694  ++index_pointer)
695  {
696  column = A_column_indices[index_pointer];
697  c[column] += alpha * A_data[index_pointer] * b[row];
698  }
699  }
700 }
701 
702 
703 // ==========
704 // csc matvec
705 // ==========
706 
733 
734 template <typename DataType>
736  const DataType* A_data,
737  const LongIndexType* A_row_indices,
738  const LongIndexType* A_index_pointer,
739  const DataType* b,
740  const LongIndexType num_rows,
741  const LongIndexType num_columns,
742  DataType* c)
743 {
744  LongIndexType index_pointer;
745  LongIndexType row;
746  LongIndexType column;
747 
748  // Initialize output to zero
749  for (row=0; row < num_rows; ++row)
750  {
751  c[row] = 0.0;
752  }
753 
754  for (column=0; column < num_columns; ++column)
755  {
756  for (index_pointer=A_index_pointer[column];
757  index_pointer < A_index_pointer[column+1];
758  ++index_pointer)
759  {
760  row = A_row_indices[index_pointer];
761  c[row] += A_data[index_pointer] * b[column];
762  }
763  }
764 }
765 
766 
767 // ===============
768 // csc matvec plus
769 // ===============
770 
799 
800 template <typename DataType>
802  const DataType* A_data,
803  const LongIndexType* A_row_indices,
804  const LongIndexType* A_index_pointer,
805  const DataType* b,
806  const DataType alpha,
807  const LongIndexType num_columns,
808  DataType* c)
809 {
810  if (alpha == 0.0)
811  {
812  return;
813  }
814 
815  LongIndexType index_pointer;
816  LongIndexType row;
817  LongIndexType column;
818 
819  for (column=0; column < num_columns; ++column)
820  {
821  for (index_pointer=A_index_pointer[column];
822  index_pointer < A_index_pointer[column+1];
823  ++index_pointer)
824  {
825  row = A_row_indices[index_pointer];
826  c[row] += alpha * A_data[index_pointer] * b[column];
827  }
828  }
829 }
830 
831 
832 // =====================
833 // csc transposed matvec
834 // =====================
835 
870 
871 template <typename DataType>
873  const DataType* A_data,
874  const LongIndexType* A_row_indices,
875  const LongIndexType* A_index_pointer,
876  const DataType* b,
877  const LongIndexType num_columns,
878  DataType* c)
879 {
880  LongIndexType index_pointer;
881  LongIndexType row;
882  LongIndexType column;
883  long double sum;
884 
885  for (column=0; column < num_columns; ++column)
886  {
887  sum = 0.0;
888  for (index_pointer=A_index_pointer[column];
889  index_pointer < A_index_pointer[column+1];
890  ++index_pointer)
891  {
892  row = A_row_indices[index_pointer];
893  sum += A_data[index_pointer] * b[row];
894  }
895  c[column] = static_cast<DataType>(sum);
896  }
897 }
898 
899 
900 // ==========================
901 // csc transposed matvec plus
902 // ==========================
903 
941 
942 template <typename DataType>
944  const DataType* A_data,
945  const LongIndexType* A_row_indices,
946  const LongIndexType* A_index_pointer,
947  const DataType* b,
948  const DataType alpha,
949  const LongIndexType num_columns,
950  DataType* c)
951 {
952  if (alpha == 0.0)
953  {
954  return;
955  }
956 
957  LongIndexType index_pointer;
958  LongIndexType row;
959  LongIndexType column;
960  long double sum;
961 
962  for (column=0; column < num_columns; ++column)
963  {
964  sum = 0.0;
965  for (index_pointer=A_index_pointer[column];
966  index_pointer < A_index_pointer[column+1];
967  ++index_pointer)
968  {
969  row = A_row_indices[index_pointer];
970  sum += A_data[index_pointer] * b[row];
971  }
972  c[column] += static_cast<DataType>(alpha * sum);
973  }
974 }
975 
976 
977 // ==================
978 // create band matrix
979 // ==================
980 
1019 
1020 template <typename DataType>
1022  const DataType* diagonals,
1023  const DataType* supdiagonals,
1024  const IndexType non_zero_size,
1025  const FlagType tridiagonal,
1026  DataType** matrix)
1027 {
1028  for (IndexType j=0; j < non_zero_size; ++j)
1029  {
1030  // Diagonals
1031  matrix[j][j] = diagonals[j];
1032 
1033  // Off diagonals
1034  if (j < non_zero_size-1)
1035  {
1036  // Sup-diagonal
1037  matrix[j][j+1] = supdiagonals[j];
1038 
1039  // Sub-diagonal, making symmetric tri-diagonal matrix
1040  if (tridiagonal)
1041  {
1042  matrix[j+1][j] = supdiagonals[j];
1043  }
1044  }
1045  }
1046 }
1047 
1048 
1049 // ===============================
1050 // Explicit template instantiation
1051 // ===============================
1052 
1053 template class cMatrixOperations<float>;
1054 template class cMatrixOperations<double>;
1055 template class cMatrixOperations<long double>;
A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS ...
static void csr_transposed_matvec(const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void csr_matvec(const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void dense_transposed_matvec_plus(const DataType *A, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes where is dense, and is the transpose of the matrix .
static void csr_matvec_plus(const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void csc_transposed_matvec_plus(const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void dense_transposed_matvec(const DataType *A, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes matrix vector multiplication where is dense, and is the transpose of the matrix .
static void csc_transposed_matvec(const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void dense_matvec_plus(const DataType *A, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes the operation where is a dense matrix.
static void csc_matvec(const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void csr_transposed_matvec_plus(const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void dense_matvec(const DataType *A, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes the matrix vector multiplication where is a dense matrix.
static void csc_matvec_plus(const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void create_band_matrix(const DataType *diagonals, const DataType *supdiagonals, const IndexType non_zero_size, const FlagType tridiagonal, DataType **matrix)
Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (diagonals) and off-diag...
int LongIndexType
Definition: types.h:60
int FlagType
Definition: types.h:68
int IndexType
Definition: types.h:65