17 #include "../_definitions/definitions.h"
60 template <
typename DataType>
75 layout = CblasRowMajor;
79 layout = CblasColMajor;
82 CBLAS_TRANSPOSE transpose = CblasNoTrans;
89 cblas_interface::xgemv(layout, transpose, num_rows, num_columns, alpha, A,
90 lda, b, incb, beta, c, incc);
98 LongIndexType num_columns_chunked = num_columns - (num_columns % chunk);
107 for (j=0; j < num_columns_chunked; j+= chunk)
109 sum += A[i*num_columns + j] * b[j] +
110 A[i*num_columns + j+1] * b[j+1] +
111 A[i*num_columns + j+2] * b[j+2] +
112 A[i*num_columns + j+3] * b[j+3] +
113 A[i*num_columns + j+4] * b[j+4];
116 for (j= num_columns_chunked; j < num_columns; ++j)
118 sum += A[i*num_columns + j] * b[j];
121 c[i] =
static_cast<DataType
>(sum);
130 for (j=0; j < num_columns; ++j)
132 sum += A[i + num_rows*j] * b[j];
134 c[i] =
static_cast<DataType
>(sum);
180 template <
typename DataType>
184 const DataType alpha,
198 LongIndexType num_columns_chunked = num_columns - (num_columns % chunk);
207 for (j=0; j < num_columns_chunked; j+= chunk)
209 sum += A[i*num_columns + j] * b[j] +
210 A[i*num_columns + j+1] * b[j+1] +
211 A[i*num_columns + j+2] * b[j+2] +
212 A[i*num_columns + j+3] * b[j+3] +
213 A[i*num_columns + j+4] * b[j+4];
216 for (j= num_columns_chunked; j < num_columns; ++j)
218 sum += A[i*num_columns + j] * b[j];
221 c[i] += alpha *
static_cast<DataType
>(sum);
230 for (j=0; j < num_columns; ++j)
232 sum += A[i + num_rows*j] * b[j];
234 c[i] += alpha*
static_cast<DataType
>(sum);
277 template <
typename DataType>
289 LongIndexType num_rows_chunked = num_rows - (num_rows % chunk);
298 for (i=0; i < num_rows; ++i)
300 sum += A[i*num_columns + j] * b[i];
302 c[j] =
static_cast<DataType
>(sum);
311 for (i=0; i < num_rows_chunked; i += chunk)
313 sum += A[i + num_rows*j] * b[i] +
314 A[i+1 + num_rows*j] * b[i+1] +
315 A[i+2 + num_rows*j] * b[i+2] +
316 A[i+3 + num_rows*j] * b[i+3] +
317 A[i+4 + num_rows*j] * b[i+4];
320 for (i=num_rows_chunked; i < num_rows; ++i)
322 sum += A[i + num_rows*j] * b[i];
325 c[j] =
static_cast<DataType
>(sum);
370 template <
typename DataType>
374 const DataType alpha,
388 LongIndexType num_rows_chunked = num_rows - (num_rows % chunk);
397 for (i=0; i < num_rows; ++i)
399 sum += A[i*num_columns + j] * b[i];
401 c[j] += alpha *
static_cast<DataType
>(sum);
410 for (i=0; i < num_rows_chunked; i += chunk)
412 sum += A[i + num_rows*j] * b[i] +
413 A[i+1 + num_rows*j] * b[i+1] +
414 A[i+2 + num_rows*j] * b[i+2] +
415 A[i+3 + num_rows*j] * b[i+3] +
416 A[i+4 + num_rows*j] * b[i+4];
419 for (i=num_rows_chunked; i < num_rows; ++i)
421 sum += A[i + num_rows*j] * b[i];
424 c[j] += alpha *
static_cast<DataType
>(sum);
468 template <
typename DataType>
470 const DataType* A_data,
482 for (row=0; row < num_rows; ++row)
485 for (index_pointer=A_index_pointer[row];
486 index_pointer < A_index_pointer[row+1];
489 column = A_column_indices[index_pointer];
490 sum += A_data[index_pointer] * b[column];
492 c[row] =
static_cast<DataType
>(sum);
539 template <
typename DataType>
541 const DataType* A_data,
545 const DataType alpha,
559 for (row=0; row < num_rows; ++row)
562 for (index_pointer=A_index_pointer[row];
563 index_pointer < A_index_pointer[row+1];
566 column = A_column_indices[index_pointer];
567 sum += A_data[index_pointer] * b[column];
569 c[row] += alpha *
static_cast<DataType
>(sum);
605 template <
typename DataType>
607 const DataType* A_data,
620 for (column=0; column < num_columns; ++column)
625 for (row=0; row < num_rows; ++row)
627 for (index_pointer=A_index_pointer[row];
628 index_pointer < A_index_pointer[row+1];
631 column = A_column_indices[index_pointer];
632 c[column] += A_data[index_pointer] * b[row];
671 template <
typename DataType>
673 const DataType* A_data,
677 const DataType alpha,
690 for (row=0; row < num_rows; ++row)
692 for (index_pointer=A_index_pointer[row];
693 index_pointer < A_index_pointer[row+1];
696 column = A_column_indices[index_pointer];
697 c[column] += alpha * A_data[index_pointer] * b[row];
734 template <
typename DataType>
736 const DataType* A_data,
749 for (row=0; row < num_rows; ++row)
754 for (column=0; column < num_columns; ++column)
756 for (index_pointer=A_index_pointer[column];
757 index_pointer < A_index_pointer[column+1];
760 row = A_row_indices[index_pointer];
761 c[row] += A_data[index_pointer] * b[column];
800 template <
typename DataType>
802 const DataType* A_data,
806 const DataType alpha,
819 for (column=0; column < num_columns; ++column)
821 for (index_pointer=A_index_pointer[column];
822 index_pointer < A_index_pointer[column+1];
825 row = A_row_indices[index_pointer];
826 c[row] += alpha * A_data[index_pointer] * b[column];
871 template <
typename DataType>
873 const DataType* A_data,
885 for (column=0; column < num_columns; ++column)
888 for (index_pointer=A_index_pointer[column];
889 index_pointer < A_index_pointer[column+1];
892 row = A_row_indices[index_pointer];
893 sum += A_data[index_pointer] * b[row];
895 c[column] =
static_cast<DataType
>(sum);
942 template <
typename DataType>
944 const DataType* A_data,
948 const DataType alpha,
962 for (column=0; column < num_columns; ++column)
965 for (index_pointer=A_index_pointer[column];
966 index_pointer < A_index_pointer[column+1];
969 row = A_row_indices[index_pointer];
970 sum += A_data[index_pointer] * b[row];
972 c[column] +=
static_cast<DataType
>(alpha * sum);
1020 template <
typename DataType>
1022 const DataType* diagonals,
1023 const DataType* supdiagonals,
1028 for (
IndexType j=0; j < non_zero_size; ++j)
1031 matrix[j][j] = diagonals[j];
1034 if (j < non_zero_size-1)
1037 matrix[j][j+1] = supdiagonals[j];
1042 matrix[j+1][j] = supdiagonals[j];
A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS ...
static void csr_transposed_matvec(const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void csr_matvec(const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void dense_transposed_matvec_plus(const DataType *A, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes where is dense, and is the transpose of the matrix .
static void csr_matvec_plus(const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void csc_transposed_matvec_plus(const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void dense_transposed_matvec(const DataType *A, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes matrix vector multiplication where is dense, and is the transpose of the matrix .
static void csc_transposed_matvec(const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void dense_matvec_plus(const DataType *A, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes the operation where is a dense matrix.
static void csc_matvec(const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void csr_transposed_matvec_plus(const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, DataType *c)
Computes where is compressed sparse row (CSR) matrix and is a dense vector. The output is a dense...
static void dense_matvec(const DataType *A, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
Computes the matrix vector multiplication where is a dense matrix.
static void csc_matvec_plus(const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_columns, DataType *c)
Computes where is compressed sparse column (CSC) matrix and is a dense vector. The output is a de...
static void create_band_matrix(const DataType *diagonals, const DataType *supdiagonals, const IndexType non_zero_size, const FlagType tridiagonal, DataType **matrix)
Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (diagonals) and off-diag...