imate
C++/CUDA Reference
cu_matrix_operations.cu
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3  * SPDX-License-Identifier: BSD-3-Clause
4  * SPDX-FileType: SOURCE
5  *
6  * This program is free software: you can redistribute it and/or modify it
7  * under the terms of the license found in the LICENSE.txt file in the root
8  * directory of this source tree.
9  */
10 
11 
12 // =======
13 // Headers
14 // =======
15 
16 #include "./cu_matrix_operations.h"
17 #include <cassert> // assert
18 #include "./cublas_interface.h" // cublas_interface
19 #include "./cusparse_interface.h" // cusparse_interface
20 
21 
22 // ============
23 // dense matvec
24 // ============
25 
50 
51 template <typename DataType>
53  cublasHandle_t cublas_handle,
54  const DataType* A,
55  const DataType* b,
56  const LongIndexType num_rows,
57  const LongIndexType num_columns,
58  const FlagType A_is_row_major,
59  DataType* c)
60 {
61  cublasOperation_t trans;
62  int m;
63  int n;
64  int lda;
65  DataType alpha = 1.0;
66  DataType beta = 0.0;
67  int incb = 1;
68  int incc = 1;
69 
70  // Since cublas accepts column major (Fortran) ordering, use transpose for
71  // row_major matrix.
72  if (A_is_row_major)
73  {
74  trans = CUBLAS_OP_T;
75  m = num_columns;
76  n = num_rows;
77  }
78  else
79  {
80  trans = CUBLAS_OP_N;
81  m = num_rows;
82  n = num_columns;
83  }
84 
85  lda = m;
86 
87  // Calling cublas
88  cublasStatus_t status = cublas_interface::cublasXgemv(cublas_handle, trans,
89  m, n, &alpha, A, lda,
90  b, incb, &beta, c,
91  incc);
92  assert(status == CUBLAS_STATUS_SUCCESS);
93 }
94 
95 
96 // =================
97 // dense matvec plus
98 // =================
99 
126 
127 template <typename DataType>
129  cublasHandle_t cublas_handle,
130  const DataType* A,
131  const DataType* b,
132  const DataType alpha,
133  const LongIndexType num_rows,
134  const LongIndexType num_columns,
135  const FlagType A_is_row_major,
136  DataType* c)
137 {
138  cublasOperation_t trans;
139  int m;
140  int n;
141  int lda;
142  DataType beta = 1.0;
143  int incb = 1;
144  int incc = 1;
145 
146  // Since cublas accepts column major (Fortran) ordering, use transpose for
147  // row_major matrix.
148  if (A_is_row_major)
149  {
150  trans = CUBLAS_OP_T;
151  m = num_columns;
152  n = num_rows;
153  }
154  else
155  {
156  trans = CUBLAS_OP_N;
157  m = num_rows;
158  n = num_columns;
159  }
160 
161  lda = m;
162 
163  // Calling cublas
164  cublasStatus_t status = cublas_interface::cublasXgemv(cublas_handle, trans,
165  m, n, &alpha, A, lda,
166  b, incb, &beta, c,
167  incc);
168  assert(status == CUBLAS_STATUS_SUCCESS);
169 }
170 
171 
172 // =======================
173 // dense transposed matvec
174 // =======================
175 
201 
202 template <typename DataType>
204  cublasHandle_t cublas_handle,
205  const DataType* A,
206  const DataType* b,
207  const LongIndexType num_rows,
208  const LongIndexType num_columns,
209  const FlagType A_is_row_major,
210  DataType* c)
211 {
212  cublasOperation_t trans;
213  int m;
214  int n;
215  int lda;
216  DataType alpha = 1.0;
217  DataType beta = 0.0;
218  int incb = 1;
219  int incc = 1;
220 
221  // Since cublas accepts column major (Fortran) ordering, use non-transpose
222  // for row_major matrix.
223  if (A_is_row_major)
224  {
225  trans = CUBLAS_OP_N;
226  m = num_columns;
227  n = num_rows;
228  }
229  else
230  {
231  trans = CUBLAS_OP_T;
232  m = num_rows;
233  n = num_columns;
234  }
235 
236  lda = m;
237 
238  // Calling cublas
239  cublasStatus_t status = cublas_interface::cublasXgemv(cublas_handle, trans,
240  m, n, &alpha, A, lda,
241  b, incb, &beta, c,
242  incc);
243  assert(status == CUBLAS_STATUS_SUCCESS);
244 }
245 
246 
247 // ============================
248 // dense transposed matvec plus
249 // ============================
250 
278 
279 template <typename DataType>
281  cublasHandle_t cublas_handle,
282  const DataType* A,
283  const DataType* b,
284  const DataType alpha,
285  const LongIndexType num_rows,
286  const LongIndexType num_columns,
287  const FlagType A_is_row_major,
288  DataType* c)
289 {
290  if (alpha == 0.0)
291  {
292  return;
293  }
294 
295  cublasOperation_t trans;
296  int m;
297  int n;
298  int lda;
299  DataType beta = 0.0;
300  int incb = 1;
301  int incc = 1;
302 
303  // Since cublas accepts column major (Fortran) ordering, use non-transpose
304  // for row_major matrix.
305  if (A_is_row_major)
306  {
307  trans = CUBLAS_OP_N;
308  m = num_columns;
309  n = num_rows;
310  }
311  else
312  {
313  trans = CUBLAS_OP_T;
314  m = num_rows;
315  n = num_columns;
316  }
317 
318  lda = m;
319 
320  // Calling cublas
321  cublasStatus_t status = cublas_interface::cublasXgemv(cublas_handle, trans,
322  m, n, &alpha, A, lda,
323  b, incb, &beta, c,
324  incc);
325  assert(status == CUBLAS_STATUS_SUCCESS);
326 }
327 
328 
329 // ==========
330 // csr matvec
331 // ==========
332 
359 
360 template <typename DataType>
362  cusparseHandle_t cusparse_handle,
363  const DataType* A_data,
364  const LongIndexType* A_column_indices,
365  const LongIndexType* A_index_pointer,
366  const DataType* b,
367  const LongIndexType num_rows,
368  DataType* c)
369 {
370  LongIndexType index_pointer;
371  LongIndexType row;
372  LongIndexType column;
373  DataType sum;
374 
375  for (row=0; row < num_rows; ++row)
376  {
377  sum = 0.0;
378  for (index_pointer=A_index_pointer[row];
379  index_pointer < A_index_pointer[row+1];
380  ++index_pointer)
381  {
382  column = A_column_indices[index_pointer];
383  sum += A_data[index_pointer] * b[column];
384  }
385  c[row] = sum;
386  }
387 }
388 
389 
390 // ===============
391 // csr matvec plus
392 // ===============
393 
424 
425 template <typename DataType>
427  cusparseHandle_t cusparse_handle,
428  const DataType* A_data,
429  const LongIndexType* A_column_indices,
430  const LongIndexType* A_index_pointer,
431  const DataType* b,
432  const DataType alpha,
433  const LongIndexType num_rows,
434  DataType* c)
435 {
436  if (alpha == 0.0)
437  {
438  return;
439  }
440 
441  LongIndexType index_pointer;
442  LongIndexType row;
443  LongIndexType column;
444  DataType sum;
445 
446  for (row=0; row < num_rows; ++row)
447  {
448  sum = 0.0;
449  for (index_pointer=A_index_pointer[row];
450  index_pointer < A_index_pointer[row+1];
451  ++index_pointer)
452  {
453  column = A_column_indices[index_pointer];
454  sum += A_data[index_pointer] * b[column];
455  }
456  c[row] += alpha * sum;
457  }
458 }
459 
460 
461 // =====================
462 // csr transposed matvec
463 // =====================
464 
493 
494 template <typename DataType>
496  cusparseHandle_t cusparse_handle,
497  const DataType* A_data,
498  const LongIndexType* A_column_indices,
499  const LongIndexType* A_index_pointer,
500  const DataType* b,
501  const LongIndexType num_rows,
502  const LongIndexType num_columns,
503  DataType* c)
504 {
505  LongIndexType index_pointer;
506  LongIndexType row;
507  LongIndexType column;
508 
509  // Initialize output to zero
510  for (column=0; column < num_columns; ++column)
511  {
512  c[column] = 0.0;
513  }
514 
515  for (row=0; row < num_rows; ++row)
516  {
517  for (index_pointer=A_index_pointer[row];
518  index_pointer < A_index_pointer[row+1];
519  ++index_pointer)
520  {
521  column = A_column_indices[index_pointer];
522  c[column] += A_data[index_pointer] * b[row];
523  }
524  }
525 }
526 
527 
528 // ==========================
529 // csr transposed matvec plus
530 // ==========================
531 
564 
565 template <typename DataType>
567  cusparseHandle_t cusparse_handle,
568  const DataType* A_data,
569  const LongIndexType* A_column_indices,
570  const LongIndexType* A_index_pointer,
571  const DataType* b,
572  const DataType alpha,
573  const LongIndexType num_rows,
574  const LongIndexType num_columns,
575  DataType* c)
576 {
577  if (alpha == 0.0)
578  {
579  return;
580  }
581 
582  LongIndexType index_pointer;
583  LongIndexType row;
584  LongIndexType column;
585 
586  for (row=0; row < num_rows; ++row)
587  {
588  for (index_pointer=A_index_pointer[row];
589  index_pointer < A_index_pointer[row+1];
590  ++index_pointer)
591  {
592  column = A_column_indices[index_pointer];
593  c[column] += alpha * A_data[index_pointer] * b[row];
594  }
595  }
596 }
597 
598 
599 // ==========
600 // csc matvec
601 // ==========
602 
631 
632 template <typename DataType>
634  cusparseHandle_t cusparse_handle,
635  const DataType* A_data,
636  const LongIndexType* A_row_indices,
637  const LongIndexType* A_index_pointer,
638  const DataType* b,
639  const LongIndexType num_rows,
640  const LongIndexType num_columns,
641  DataType* c)
642 {
643  LongIndexType index_pointer;
644  LongIndexType row;
645  LongIndexType column;
646 
647  // Initialize output to zero
648  for (row=0; row < num_rows; ++row)
649  {
650  c[row] = 0.0;
651  }
652 
653  for (column=0; column < num_columns; ++column)
654  {
655  for (index_pointer=A_index_pointer[column];
656  index_pointer < A_index_pointer[column+1];
657  ++index_pointer)
658  {
659  row = A_row_indices[index_pointer];
660  c[row] += A_data[index_pointer] * b[column];
661  }
662  }
663 }
664 
665 
666 // ===============
667 // csc matvec plus
668 // ===============
669 
702 
703 template <typename DataType>
705  cusparseHandle_t cusparse_handle,
706  const DataType* A_data,
707  const LongIndexType* A_row_indices,
708  const LongIndexType* A_index_pointer,
709  const DataType* b,
710  const DataType alpha,
711  const LongIndexType num_rows,
712  const LongIndexType num_columns,
713  DataType* c)
714 {
715  if (alpha == 0.0)
716  {
717  return;
718  }
719 
720  LongIndexType index_pointer;
721  LongIndexType row;
722  LongIndexType column;
723 
724  for (column=0; column < num_columns; ++column)
725  {
726  for (index_pointer=A_index_pointer[column];
727  index_pointer < A_index_pointer[column+1];
728  ++index_pointer)
729  {
730  row = A_row_indices[index_pointer];
731  c[row] += alpha * A_data[index_pointer] * b[column];
732  }
733  }
734 }
735 
736 
737 // =====================
738 // csc transposed matvec
739 // =====================
740 
768 
769 template <typename DataType>
771  cusparseHandle_t cusparse_handle,
772  const DataType* A_data,
773  const LongIndexType* A_row_indices,
774  const LongIndexType* A_index_pointer,
775  const DataType* b,
776  const LongIndexType num_columns,
777  DataType* c)
778 {
779  LongIndexType index_pointer;
780  LongIndexType row;
781  LongIndexType column;
782  DataType sum;
783 
784  for (column=0; column < num_columns; ++column)
785  {
786  sum = 0.0;
787  for (index_pointer=A_index_pointer[column];
788  index_pointer < A_index_pointer[column+1];
789  ++index_pointer)
790  {
791  row = A_row_indices[index_pointer];
792  sum += A_data[index_pointer] * b[row];
793  }
794  c[column] = sum;
795  }
796 }
797 
798 
799 // ==========================
800 // csc transposed matvec plus
801 // ==========================
802 
833 
834 template <typename DataType>
836  cusparseHandle_t cusparse_handle,
837  const DataType* A_data,
838  const LongIndexType* A_row_indices,
839  const LongIndexType* A_index_pointer,
840  const DataType* b,
841  const DataType alpha,
842  const LongIndexType num_columns,
843  DataType* c)
844 {
845  if (alpha == 0.0)
846  {
847  return;
848  }
849 
850  LongIndexType index_pointer;
851  LongIndexType row;
852  LongIndexType column;
853  DataType sum;
854 
855  for (column=0; column < num_columns; ++column)
856  {
857  sum = 0.0;
858  for (index_pointer=A_index_pointer[column];
859  index_pointer < A_index_pointer[column+1];
860  ++index_pointer)
861  {
862  row = A_row_indices[index_pointer];
863  sum += A_data[index_pointer] * b[row];
864  }
865  c[column] += alpha * sum;
866  }
867 }
868 
869 
870 // ==================
871 // create band matrix
872 // ==================
873 
914 
915 template <typename DataType>
917  cusparseHandle_t cusparse_handle,
918  const DataType* diagonals,
919  const DataType* supdiagonals,
920  const IndexType non_zero_size,
921  const FlagType tridiagonal,
922  DataType** matrix)
923 {
924  for (IndexType j=0; j < non_zero_size; ++j)
925  {
926  // Diagonals
927  matrix[j][j] = diagonals[j];
928 
929  // Off diagonals
930  if (j < non_zero_size-1)
931  {
932  // Sup-diagonal
933  matrix[j][j+1] = supdiagonals[j];
934 
935  // Sub-diagonal, making symmetric tri-diagonal matrix
936  if (tridiagonal)
937  {
938  matrix[j+1][j] = supdiagonals[j];
939  }
940  }
941  }
942 }
943 
944 
945 // ===============================
946 // Explicit template instantiation
947 // ===============================
948 
949 template class cuMatrixOperations<float>;
950 template class cuMatrixOperations<double>;
A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS ...
static void csc_transposed_matvec(cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void csr_matvec(cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void dense_matvec(cublasHandle_t cublas_handle, const DataType *A, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes the matrix vector multiplication where is a dense matrix.
static void csc_matvec_plus(cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void dense_transposed_matvec_plus(cublasHandle_t cublas_handle, const DataType *A, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes where is dense, and is the transpose of the matrix .
static void csc_transposed_matvec_plus(cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void dense_transposed_matvec(cublasHandle_t cublas_handle, const DataType *A, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes matrix vector multiplication where is dense, and is the transpose of the matrix .
static void csr_transposed_matvec_plus(cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void create_band_matrix(cusparseHandle_t cublas_handle, const DataType *diagonals, const DataType *supdiagonals, const IndexType non_zero_size, const FlagType tridiagonal, DataType **matrix)
Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (diagonals) and off-diag...
static void csr_matvec_plus(cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void csr_transposed_matvec(cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void dense_matvec_plus(cublasHandle_t cublas_handle, const DataType *A, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes the operation where is a dense matrix.
static void csc_matvec(cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
cublasStatus_t cublasXgemv(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const DataType *alpha, const DataType *A, int lda, const DataType *x, int incx, const DataType *beta, DataType *y, int incy)
int LongIndexType
Definition: types.h:60
int FlagType
Definition: types.h:68
int IndexType
Definition: types.h:65