5836 lines
366 KiB
C
5836 lines
366 KiB
C
/*
|
|
* Copyright 1993-2022 NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* NOTICE TO LICENSEE:
|
|
*
|
|
* This source code and/or documentation ("Licensed Deliverables") are
|
|
* subject to NVIDIA intellectual property rights under U.S. and
|
|
* international Copyright laws.
|
|
*
|
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
|
* conditions of a form of NVIDIA software license agreement by and
|
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
|
* the contrary in the License Agreement, reproduction or disclosure
|
|
* of the Licensed Deliverables to any third party without the express
|
|
* written consent of NVIDIA is prohibited.
|
|
*
|
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
|
* OF THESE LICENSED DELIVERABLES.
|
|
*
|
|
* U.S. Government End Users. These Licensed Deliverables are a
|
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
|
* 1995), consisting of "commercial computer software" and "commercial
|
|
* computer software documentation" as such terms are used in 48
|
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
|
* only those rights set forth herein.
|
|
*
|
|
* Any use of the Licensed Deliverables in individual and commercial
|
|
* software must include, in the user documentation and internal
|
|
* comments to the code, the above Disclaimer and U.S. Government End
|
|
* Users Notice.
|
|
*/
|
|
|
|
/*
|
|
* This is the public header file for the CUBLAS library, defining the API
|
|
*
|
|
* CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines)
|
|
* on top of the CUDA runtime.
|
|
*/
|
|
|
|
#if !defined(CUBLAS_API_H_)
|
|
#define CUBLAS_API_H_
|
|
|
|
#ifndef CUBLASWINAPI
|
|
#ifdef _WIN32
|
|
#define CUBLASWINAPI __stdcall
|
|
#else
|
|
#define CUBLASWINAPI
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef CUBLASAPI
|
|
#error "This file should not be included without defining CUBLASAPI"
|
|
#endif
|
|
|
|
#include <stdint.h>
|
|
|
|
#include "driver_types.h"
|
|
#include "cuComplex.h" /* import complex data type */
|
|
|
|
#include <cuda_fp16.h>
|
|
#include <cuda_bf16.h>
|
|
|
|
#include <library_types.h>
|
|
|
|
#if defined(__cplusplus)
|
|
extern "C" {
|
|
#endif /* __cplusplus */
|
|
|
|
#define CUBLAS_VER_MAJOR 12
|
|
#define CUBLAS_VER_MINOR 8
|
|
#define CUBLAS_VER_PATCH 4
|
|
#define CUBLAS_VER_BUILD 1
|
|
#define CUBLAS_VERSION (CUBLAS_VER_MAJOR * 10000 + CUBLAS_VER_MINOR * 100 + CUBLAS_VER_PATCH)
|
|
|
|
/* CUBLAS status type returns */
|
|
typedef enum {
|
|
CUBLAS_STATUS_SUCCESS = 0,
|
|
CUBLAS_STATUS_NOT_INITIALIZED = 1,
|
|
CUBLAS_STATUS_ALLOC_FAILED = 3,
|
|
CUBLAS_STATUS_INVALID_VALUE = 7,
|
|
CUBLAS_STATUS_ARCH_MISMATCH = 8,
|
|
CUBLAS_STATUS_MAPPING_ERROR = 11,
|
|
CUBLAS_STATUS_EXECUTION_FAILED = 13,
|
|
CUBLAS_STATUS_INTERNAL_ERROR = 14,
|
|
CUBLAS_STATUS_NOT_SUPPORTED = 15,
|
|
CUBLAS_STATUS_LICENSE_ERROR = 16
|
|
} cublasStatus_t;
|
|
|
|
typedef enum { CUBLAS_FILL_MODE_LOWER = 0, CUBLAS_FILL_MODE_UPPER = 1, CUBLAS_FILL_MODE_FULL = 2 } cublasFillMode_t;
|
|
|
|
typedef enum { CUBLAS_DIAG_NON_UNIT = 0, CUBLAS_DIAG_UNIT = 1 } cublasDiagType_t;
|
|
|
|
typedef enum { CUBLAS_SIDE_LEFT = 0, CUBLAS_SIDE_RIGHT = 1 } cublasSideMode_t;
|
|
|
|
typedef enum {
|
|
CUBLAS_OP_N = 0,
|
|
CUBLAS_OP_T = 1,
|
|
CUBLAS_OP_C = 2,
|
|
CUBLAS_OP_HERMITAN = 2, /* synonym if CUBLAS_OP_C */
|
|
CUBLAS_OP_CONJG = 3 /* conjugate, placeholder - not supported in the current release */
|
|
} cublasOperation_t;
|
|
|
|
typedef enum { CUBLAS_POINTER_MODE_HOST = 0, CUBLAS_POINTER_MODE_DEVICE = 1 } cublasPointerMode_t;
|
|
|
|
typedef enum { CUBLAS_ATOMICS_NOT_ALLOWED = 0, CUBLAS_ATOMICS_ALLOWED = 1 } cublasAtomicsMode_t;
|
|
|
|
/*For different GEMM algorithm */
|
|
typedef enum {
|
|
CUBLAS_GEMM_DFALT = -1,
|
|
CUBLAS_GEMM_DEFAULT = -1,
|
|
CUBLAS_GEMM_ALGO0 = 0,
|
|
CUBLAS_GEMM_ALGO1 = 1,
|
|
CUBLAS_GEMM_ALGO2 = 2,
|
|
CUBLAS_GEMM_ALGO3 = 3,
|
|
CUBLAS_GEMM_ALGO4 = 4,
|
|
CUBLAS_GEMM_ALGO5 = 5,
|
|
CUBLAS_GEMM_ALGO6 = 6,
|
|
CUBLAS_GEMM_ALGO7 = 7,
|
|
CUBLAS_GEMM_ALGO8 = 8,
|
|
CUBLAS_GEMM_ALGO9 = 9,
|
|
CUBLAS_GEMM_ALGO10 = 10,
|
|
CUBLAS_GEMM_ALGO11 = 11,
|
|
CUBLAS_GEMM_ALGO12 = 12,
|
|
CUBLAS_GEMM_ALGO13 = 13,
|
|
CUBLAS_GEMM_ALGO14 = 14,
|
|
CUBLAS_GEMM_ALGO15 = 15,
|
|
CUBLAS_GEMM_ALGO16 = 16,
|
|
CUBLAS_GEMM_ALGO17 = 17,
|
|
CUBLAS_GEMM_ALGO18 = 18, // sliced 32x32
|
|
CUBLAS_GEMM_ALGO19 = 19, // sliced 64x32
|
|
CUBLAS_GEMM_ALGO20 = 20, // sliced 128x32
|
|
CUBLAS_GEMM_ALGO21 = 21, // sliced 32x32 -splitK
|
|
CUBLAS_GEMM_ALGO22 = 22, // sliced 64x32 -splitK
|
|
CUBLAS_GEMM_ALGO23 = 23, // sliced 128x32 -splitK
|
|
CUBLAS_GEMM_DEFAULT_TENSOR_OP = 99,
|
|
CUBLAS_GEMM_DFALT_TENSOR_OP = 99,
|
|
CUBLAS_GEMM_ALGO0_TENSOR_OP = 100,
|
|
CUBLAS_GEMM_ALGO1_TENSOR_OP = 101,
|
|
CUBLAS_GEMM_ALGO2_TENSOR_OP = 102,
|
|
CUBLAS_GEMM_ALGO3_TENSOR_OP = 103,
|
|
CUBLAS_GEMM_ALGO4_TENSOR_OP = 104,
|
|
CUBLAS_GEMM_ALGO5_TENSOR_OP = 105,
|
|
CUBLAS_GEMM_ALGO6_TENSOR_OP = 106,
|
|
CUBLAS_GEMM_ALGO7_TENSOR_OP = 107,
|
|
CUBLAS_GEMM_ALGO8_TENSOR_OP = 108,
|
|
CUBLAS_GEMM_ALGO9_TENSOR_OP = 109,
|
|
CUBLAS_GEMM_ALGO10_TENSOR_OP = 110,
|
|
CUBLAS_GEMM_ALGO11_TENSOR_OP = 111,
|
|
CUBLAS_GEMM_ALGO12_TENSOR_OP = 112,
|
|
CUBLAS_GEMM_ALGO13_TENSOR_OP = 113,
|
|
CUBLAS_GEMM_ALGO14_TENSOR_OP = 114,
|
|
CUBLAS_GEMM_ALGO15_TENSOR_OP = 115
|
|
} cublasGemmAlgo_t;
|
|
|
|
/*Enum for default math mode/tensor operation*/
|
|
typedef enum {
|
|
CUBLAS_DEFAULT_MATH = 0,
|
|
|
|
/* deprecated, same effect as using CUBLAS_COMPUTE_32F_FAST_16F, will be removed in a future release */
|
|
CUBLAS_TENSOR_OP_MATH = 1,
|
|
|
|
/* same as using matching _PEDANTIC compute type when using cublas<T>routine calls or cublasEx() calls with
|
|
cudaDataType as compute type */
|
|
CUBLAS_PEDANTIC_MATH = 2,
|
|
|
|
/* allow accelerating single precision routines using TF32 tensor cores */
|
|
CUBLAS_TF32_TENSOR_OP_MATH = 3,
|
|
|
|
/* flag to force any reductons to use the accumulator type and not output type in case of mixed precision routines
|
|
with lower size output type */
|
|
CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION = 16,
|
|
} cublasMath_t;
|
|
|
|
/* For backward compatibility purposes */
|
|
typedef cudaDataType cublasDataType_t;
|
|
|
|
/* Enum for compute type
|
|
*
|
|
* - default types provide best available performance using all available hardware features
|
|
* and guarantee internal storage precision with at least the same precision and range;
|
|
* - _PEDANTIC types ensure standard arithmetic and exact specified internal storage format;
|
|
* - _FAST types allow for some loss of precision to enable higher throughput arithmetic.
|
|
*/
|
|
typedef enum {
|
|
CUBLAS_COMPUTE_16F = 64, /* half - default */
|
|
CUBLAS_COMPUTE_16F_PEDANTIC = 65, /* half - pedantic */
|
|
CUBLAS_COMPUTE_32F = 68, /* float - default */
|
|
CUBLAS_COMPUTE_32F_PEDANTIC = 69, /* float - pedantic */
|
|
CUBLAS_COMPUTE_32F_FAST_16F = 74, /* float - fast, allows down-converting inputs to half or TF32 */
|
|
CUBLAS_COMPUTE_32F_FAST_16BF = 75, /* float - fast, allows down-converting inputs to bfloat16 or TF32 */
|
|
CUBLAS_COMPUTE_32F_FAST_TF32 = 77, /* float - fast, allows down-converting inputs to TF32 */
|
|
CUBLAS_COMPUTE_64F = 70, /* double - default */
|
|
CUBLAS_COMPUTE_64F_PEDANTIC = 71, /* double - pedantic */
|
|
CUBLAS_COMPUTE_32I = 72, /* signed 32-bit int - default */
|
|
CUBLAS_COMPUTE_32I_PEDANTIC = 73, /* signed 32-bit int - pedantic */
|
|
} cublasComputeType_t;
|
|
|
|
/* Opaque structure holding CUBLAS library context */
|
|
struct cublasContext;
|
|
typedef struct cublasContext* cublasHandle_t;
|
|
|
|
/* Cublas logging */
|
|
typedef void (*cublasLogCallback)(const char* msg);
|
|
|
|
/* cuBLAS Exported API {{{ */
|
|
|
|
/* --------------- CUBLAS Helper Functions ---------------- */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t* handle);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, int* version);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type, int* value);
|
|
|
|
CUBLASAPI size_t CUBLASWINAPI cublasGetCudartVersion(void);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetWorkspace_v2(cublasHandle_t handle,
|
|
void* workspace,
|
|
size_t workspaceSizeInBytes);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle, cudaStream_t streamId);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle, cudaStream_t* streamId);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle, cublasPointerMode_t* mode);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle, cublasPointerMode_t mode);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t* mode);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t mode);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, cublasMath_t* mode);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetSmCountTarget(cublasHandle_t handle, int* smCountTarget);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetSmCountTarget(cublasHandle_t handle, int smCountTarget);
|
|
|
|
CUBLASAPI const char* CUBLASWINAPI cublasGetStatusName(cublasStatus_t status);
|
|
|
|
CUBLASAPI const char* CUBLASWINAPI cublasGetStatusString(cublasStatus_t status);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasLoggerConfigure(int logIsOn,
|
|
int logToStdOut,
|
|
int logToStdErr,
|
|
const char* logFileName);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetLoggerCallback(cublasLogCallback userCallback);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetLoggerCallback(cublasLogCallback* userCallback);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void* x, int incx, void* devicePtr, int incy);
|
|
|
|
cublasStatus_t CUBLASWINAPI
|
|
cublasSetVector_64(int64_t n, int64_t elemSize, const void* x, int64_t incx, void* devicePtr, int64_t incy);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void* x, int incx, void* y, int incy);
|
|
|
|
cublasStatus_t CUBLASWINAPI
|
|
cublasGetVector_64(int64_t n, int64_t elemSize, const void* x, int64_t incx, void* y, int64_t incy);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb);
|
|
|
|
cublasStatus_t CUBLASWINAPI
|
|
cublasSetMatrix_64(int64_t rows, int64_t cols, int64_t elemSize, const void* A, int64_t lda, void* B, int64_t ldb);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb);
|
|
|
|
cublasStatus_t CUBLASWINAPI
|
|
cublasGetMatrix_64(int64_t rows, int64_t cols, int64_t elemSize, const void* A, int64_t lda, void* B, int64_t ldb);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(
|
|
int n, int elemSize, const void* hostPtr, int incx, void* devicePtr, int incy, cudaStream_t stream);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasSetVectorAsync_64(
|
|
int64_t n, int64_t elemSize, const void* hostPtr, int64_t incx, void* devicePtr, int64_t incy, cudaStream_t stream);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(
|
|
int n, int elemSize, const void* devicePtr, int incx, void* hostPtr, int incy, cudaStream_t stream);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasGetVectorAsync_64(
|
|
int64_t n, int64_t elemSize, const void* devicePtr, int64_t incx, void* hostPtr, int64_t incy, cudaStream_t stream);
|
|
|
|
cublasStatus_t CUBLASWINAPI
|
|
cublasSetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync_64(int64_t rows,
|
|
int64_t cols,
|
|
int64_t elemSize,
|
|
const void* A,
|
|
int64_t lda,
|
|
void* B,
|
|
int64_t ldb,
|
|
cudaStream_t stream);
|
|
|
|
cublasStatus_t CUBLASWINAPI
|
|
cublasGetMatrixAsync(int rows, int cols, int elemSize, const void* A, int lda, void* B, int ldb, cudaStream_t stream);
|
|
|
|
cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync_64(int64_t rows,
|
|
int64_t cols,
|
|
int64_t elemSize,
|
|
const void* A,
|
|
int64_t lda,
|
|
void* B,
|
|
int64_t ldb,
|
|
cudaStream_t stream);
|
|
|
|
CUBLASAPI void CUBLASWINAPI cublasXerbla(const char* srName, int info);
|
|
|
|
/* --------------- CUBLAS BLAS1 Functions ---------------- */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle,
|
|
int n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int incx,
|
|
void* result,
|
|
cudaDataType resultType,
|
|
cudaDataType executionType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasNrm2Ex_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
void* result,
|
|
cudaDataType resultType,
|
|
cudaDataType executionType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSnrm2_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSnrm2_v2_64(cublasHandle_t handle, int64_t n, const float* x, int64_t incx, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDnrm2_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDnrm2_v2_64(cublasHandle_t handle, int64_t n, const double* x, int64_t incx, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasScnrm2_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasScnrm2_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* x, int64_t incx, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDznrm2_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDznrm2_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* x, int64_t incx, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle,
|
|
int n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int incx,
|
|
const void* y,
|
|
cudaDataType yType,
|
|
int incy,
|
|
void* result,
|
|
cudaDataType resultType,
|
|
cudaDataType executionType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
const void* y,
|
|
cudaDataType yType,
|
|
int64_t incy,
|
|
void* result,
|
|
cudaDataType resultType,
|
|
cudaDataType executionType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle,
|
|
int n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int incx,
|
|
const void* y,
|
|
cudaDataType yType,
|
|
int incy,
|
|
void* result,
|
|
cudaDataType resultType,
|
|
cudaDataType executionType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDotcEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
const void* y,
|
|
cudaDataType yType,
|
|
int64_t incy,
|
|
void* result,
|
|
cudaDataType resultType,
|
|
cudaDataType executionType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSdot_v2(cublasHandle_t handle, int n, const float* x, int incx, const float* y, int incy, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdot_v2_64(
|
|
cublasHandle_t handle, int64_t n, const float* x, int64_t incx, const float* y, int64_t incy, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDdot_v2(cublasHandle_t handle, int n, const double* x, int incx, const double* y, int incy, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdot_v2_64(
|
|
cublasHandle_t handle, int64_t n, const double* x, int64_t incx, const double* y, int64_t incy, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2(
|
|
cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* y,
|
|
int64_t incy,
|
|
cuComplex* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2(
|
|
cublasHandle_t handle, int n, const cuComplex* x, int incx, const cuComplex* y, int incy, cuComplex* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* y,
|
|
int64_t incy,
|
|
cuComplex* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle,
|
|
int n,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* y,
|
|
int incy,
|
|
cuDoubleComplex* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* y,
|
|
int64_t incy,
|
|
cuDoubleComplex* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle,
|
|
int n,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* y,
|
|
int incy,
|
|
cuDoubleComplex* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* y,
|
|
int64_t incy,
|
|
cuDoubleComplex* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScalEx(cublasHandle_t handle,
|
|
int n,
|
|
const void* alpha,
|
|
cudaDataType alphaType,
|
|
void* x,
|
|
cudaDataType xType,
|
|
int incx,
|
|
cudaDataType executionType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScalEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const void* alpha,
|
|
cudaDataType alphaType,
|
|
void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
cudaDataType executionType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSscal_v2(cublasHandle_t handle, int n, const float* alpha, float* x, int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSscal_v2_64(cublasHandle_t handle, int64_t n, const float* alpha, float* x, int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDscal_v2(cublasHandle_t handle, int n, const double* alpha, double* x, int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDscal_v2_64(cublasHandle_t handle, int64_t n, const double* alpha, double* x, int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCscal_v2(cublasHandle_t handle, int n, const cuComplex* alpha, cuComplex* x, int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCscal_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* alpha, cuComplex* x, int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCsscal_v2(cublasHandle_t handle, int n, const float* alpha, cuComplex* x, int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCsscal_v2_64(cublasHandle_t handle, int64_t n, const float* alpha, cuComplex* x, int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasZscal_v2(cublasHandle_t handle, int n, const cuDoubleComplex* alpha, cuDoubleComplex* x, int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasZscal_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* alpha, cuDoubleComplex* x, int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasZdscal_v2(cublasHandle_t handle, int n, const double* alpha, cuDoubleComplex* x, int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasZdscal_v2_64(cublasHandle_t handle, int64_t n, const double* alpha, cuDoubleComplex* x, int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAxpyEx(cublasHandle_t handle,
|
|
int n,
|
|
const void* alpha,
|
|
cudaDataType alphaType,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int incx,
|
|
void* y,
|
|
cudaDataType yType,
|
|
int incy,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAxpyEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const void* alpha,
|
|
cudaDataType alphaType,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
void* y,
|
|
cudaDataType yType,
|
|
int64_t incy,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSaxpy_v2(cublasHandle_t handle, int n, const float* alpha, const float* x, int incx, float* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSaxpy_v2_64(
|
|
cublasHandle_t handle, int64_t n, const float* alpha, const float* x, int64_t incx, float* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDaxpy_v2(cublasHandle_t handle, int n, const double* alpha, const double* x, int incx, double* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDaxpy_v2_64(
|
|
cublasHandle_t handle, int64_t n, const double* alpha, const double* x, int64_t incx, double* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2(
|
|
cublasHandle_t handle, int n, const cuComplex* alpha, const cuComplex* x, int incx, cuComplex* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
cuComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2(cublasHandle_t handle,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
cuDoubleComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
cuDoubleComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCopyEx(
|
|
cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCopyEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
void* y,
|
|
cudaDataType yType,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasScopy_v2(cublasHandle_t handle, int n, const float* x, int incx, float* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasScopy_v2_64(cublasHandle_t handle, int64_t n, const float* x, int64_t incx, float* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDcopy_v2(cublasHandle_t handle, int n, const double* x, int incx, double* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDcopy_v2_64(cublasHandle_t handle, int64_t n, const double* x, int64_t incx, double* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCcopy_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, cuComplex* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCcopy_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* x, int64_t incx, cuComplex* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasZcopy_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZcopy_v2_64(
|
|
cublasHandle_t handle, int64_t n, const cuDoubleComplex* x, int64_t incx, cuDoubleComplex* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSswap_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSswap_v2_64(cublasHandle_t handle, int64_t n, float* x, int64_t incx, float* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDswap_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDswap_v2_64(cublasHandle_t handle, int64_t n, double* x, int64_t incx, double* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCswap_v2(cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCswap_v2_64(cublasHandle_t handle, int64_t n, cuComplex* x, int64_t incx, cuComplex* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasZswap_v2(cublasHandle_t handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasZswap_v2_64(cublasHandle_t handle, int64_t n, cuDoubleComplex* x, int64_t incx, cuDoubleComplex* y, int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSwapEx(
|
|
cublasHandle_t handle, int n, void* x, cudaDataType xType, int incx, void* y, cudaDataType yType, int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSwapEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
void* y,
|
|
cudaDataType yType,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIsamax_v2(cublasHandle_t handle, int n, const float* x, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIsamax_v2_64(cublasHandle_t handle, int64_t n, const float* x, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIdamax_v2(cublasHandle_t handle, int n, const double* x, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIdamax_v2_64(cublasHandle_t handle, int64_t n, const double* x, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIcamax_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIcamax_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* x, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIzamax_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIzamax_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* x, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIamaxEx(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIamaxEx_64(cublasHandle_t handle, int64_t n, const void* x, cudaDataType xType, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIsamin_v2(cublasHandle_t handle, int n, const float* x, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIsamin_v2_64(cublasHandle_t handle, int64_t n, const float* x, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIdamin_v2(cublasHandle_t handle, int n, const double* x, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIdamin_v2_64(cublasHandle_t handle, int64_t n, const double* x, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIcamin_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIcamin_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* x, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIzamin_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIzamin_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* x, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIaminEx(cublasHandle_t handle, int n, const void* x, cudaDataType xType, int incx, int* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasIaminEx_64(cublasHandle_t handle, int64_t n, const void* x, cudaDataType xType, int64_t incx, int64_t* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAsumEx(cublasHandle_t handle,
|
|
int n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int incx,
|
|
void* result,
|
|
cudaDataType resultType,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasAsumEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
const void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
void* result,
|
|
cudaDataType resultType,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSasum_v2(cublasHandle_t handle, int n, const float* x, int incx, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSasum_v2_64(cublasHandle_t handle, int64_t n, const float* x, int64_t incx, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDasum_v2(cublasHandle_t handle, int n, const double* x, int incx, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDasum_v2_64(cublasHandle_t handle, int64_t n, const double* x, int64_t incx, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasScasum_v2(cublasHandle_t handle, int n, const cuComplex* x, int incx, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasScasum_v2_64(cublasHandle_t handle, int64_t n, const cuComplex* x, int64_t incx, float* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDzasum_v2(cublasHandle_t handle, int n, const cuDoubleComplex* x, int incx, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDzasum_v2_64(cublasHandle_t handle, int64_t n, const cuDoubleComplex* x, int64_t incx, double* result);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSrot_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* c, const float* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrot_v2_64(
|
|
cublasHandle_t handle, int64_t n, float* x, int64_t incx, float* y, int64_t incy, const float* c, const float* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDrot_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* c, const double* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrot_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
double* x,
|
|
int64_t incx,
|
|
double* y,
|
|
int64_t incy,
|
|
const double* c,
|
|
const double* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrot_v2(
|
|
cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, const float* c, const cuComplex* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrot_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
cuComplex* x,
|
|
int64_t incx,
|
|
cuComplex* y,
|
|
int64_t incy,
|
|
const float* c,
|
|
const cuComplex* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsrot_v2(
|
|
cublasHandle_t handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, const float* c, const float* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsrot_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
cuComplex* x,
|
|
int64_t incx,
|
|
cuComplex* y,
|
|
int64_t incy,
|
|
const float* c,
|
|
const float* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrot_v2(cublasHandle_t handle,
|
|
int n,
|
|
cuDoubleComplex* x,
|
|
int incx,
|
|
cuDoubleComplex* y,
|
|
int incy,
|
|
const double* c,
|
|
const cuDoubleComplex* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrot_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
cuDoubleComplex* x,
|
|
int64_t incx,
|
|
cuDoubleComplex* y,
|
|
int64_t incy,
|
|
const double* c,
|
|
const cuDoubleComplex* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdrot_v2(cublasHandle_t handle,
|
|
int n,
|
|
cuDoubleComplex* x,
|
|
int incx,
|
|
cuDoubleComplex* y,
|
|
int incy,
|
|
const double* c,
|
|
const double* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdrot_v2_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
cuDoubleComplex* x,
|
|
int64_t incx,
|
|
cuDoubleComplex* y,
|
|
int64_t incy,
|
|
const double* c,
|
|
const double* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotEx(cublasHandle_t handle,
|
|
int n,
|
|
void* x,
|
|
cudaDataType xType,
|
|
int incx,
|
|
void* y,
|
|
cudaDataType yType,
|
|
int incy,
|
|
const void* c,
|
|
const void* s,
|
|
cudaDataType csType,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
void* y,
|
|
cudaDataType yType,
|
|
int64_t incy,
|
|
const void* c,
|
|
const void* s,
|
|
cudaDataType csType,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotg_v2(cublasHandle_t handle, float* a, float* b, float* c, float* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotg_v2(cublasHandle_t handle, double* a, double* b, double* c, double* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCrotg_v2(cublasHandle_t handle, cuComplex* a, cuComplex* b, float* c, cuComplex* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasZrotg_v2(cublasHandle_t handle, cuDoubleComplex* a, cuDoubleComplex* b, double* c, cuDoubleComplex* s);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotgEx(cublasHandle_t handle,
|
|
void* a,
|
|
void* b,
|
|
cudaDataType abType,
|
|
void* c,
|
|
void* s,
|
|
cudaDataType csType,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSrotm_v2(cublasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSrotm_v2_64(cublasHandle_t handle, int64_t n, float* x, int64_t incx, float* y, int64_t incy, const float* param);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDrotm_v2(cublasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotm_v2_64(
|
|
cublasHandle_t handle, int64_t n, double* x, int64_t incx, double* y, int64_t incy, const double* param);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotmEx(cublasHandle_t handle,
|
|
int n,
|
|
void* x,
|
|
cudaDataType xType,
|
|
int incx,
|
|
void* y,
|
|
cudaDataType yType,
|
|
int incy,
|
|
const void* param,
|
|
cudaDataType paramType,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotmEx_64(cublasHandle_t handle,
|
|
int64_t n,
|
|
void* x,
|
|
cudaDataType xType,
|
|
int64_t incx,
|
|
void* y,
|
|
cudaDataType yType,
|
|
int64_t incy,
|
|
const void* param,
|
|
cudaDataType paramType,
|
|
cudaDataType executiontype);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSrotmg_v2(cublasHandle_t handle, float* d1, float* d2, float* x1, const float* y1, float* param);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDrotmg_v2(cublasHandle_t handle, double* d1, double* d2, double* x1, const double* y1, double* param);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasRotmgEx(cublasHandle_t handle,
|
|
void* d1,
|
|
cudaDataType d1Type,
|
|
void* d2,
|
|
cudaDataType d2Type,
|
|
void* x1,
|
|
cudaDataType x1Type,
|
|
const void* y1,
|
|
cudaDataType y1Type,
|
|
void* param,
|
|
cudaDataType paramType,
|
|
cudaDataType executiontype);
|
|
|
|
/* --------------- CUBLAS BLAS2 Functions ---------------- */
|
|
|
|
/* GEMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* x,
|
|
int incx,
|
|
const float* beta,
|
|
float* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* x,
|
|
int64_t incx,
|
|
const float* beta,
|
|
float* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* x,
|
|
int incx,
|
|
const double* beta,
|
|
double* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* x,
|
|
int64_t incx,
|
|
const double* beta,
|
|
double* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int64_t incy);
|
|
|
|
/* GBMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgbmv_v2(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
int kl,
|
|
int ku,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* x,
|
|
int incx,
|
|
const float* beta,
|
|
float* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgbmv_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t kl,
|
|
int64_t ku,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* x,
|
|
int64_t incx,
|
|
const float* beta,
|
|
float* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgbmv_v2(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
int kl,
|
|
int ku,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* x,
|
|
int incx,
|
|
const double* beta,
|
|
double* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgbmv_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t kl,
|
|
int64_t ku,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* x,
|
|
int64_t incx,
|
|
const double* beta,
|
|
double* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
int kl,
|
|
int ku,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t kl,
|
|
int64_t ku,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
int kl,
|
|
int ku,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t kl,
|
|
int64_t ku,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int64_t incy);
|
|
|
|
/* TRMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const float* A,
|
|
int lda,
|
|
float* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const float* A,
|
|
int64_t lda,
|
|
float* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const double* A,
|
|
int lda,
|
|
double* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const double* A,
|
|
int64_t lda,
|
|
double* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const cuComplex* A,
|
|
int lda,
|
|
cuComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
cuComplex* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
cuDoubleComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
cuDoubleComplex* x,
|
|
int64_t incx);
|
|
|
|
/* TBMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
int k,
|
|
const float* A,
|
|
int lda,
|
|
float* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* A,
|
|
int64_t lda,
|
|
float* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
int k,
|
|
const double* A,
|
|
int lda,
|
|
double* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* A,
|
|
int64_t lda,
|
|
double* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
int k,
|
|
const cuComplex* A,
|
|
int lda,
|
|
cuComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
cuComplex* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
cuDoubleComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
cuDoubleComplex* x,
|
|
int64_t incx);
|
|
|
|
/* TPMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const float* AP,
|
|
float* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const float* AP,
|
|
float* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const double* AP,
|
|
double* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const double* AP,
|
|
double* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const cuComplex* AP,
|
|
cuComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const cuComplex* AP,
|
|
cuComplex* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const cuDoubleComplex* AP,
|
|
cuDoubleComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const cuDoubleComplex* AP,
|
|
cuDoubleComplex* x,
|
|
int64_t incx);
|
|
|
|
/* TRSV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const float* A,
|
|
int lda,
|
|
float* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const float* A,
|
|
int64_t lda,
|
|
float* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const double* A,
|
|
int lda,
|
|
double* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const double* A,
|
|
int64_t lda,
|
|
double* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const cuComplex* A,
|
|
int lda,
|
|
cuComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
cuComplex* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
cuDoubleComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
cuDoubleComplex* x,
|
|
int64_t incx);
|
|
|
|
/* TPSV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const float* AP,
|
|
float* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const float* AP,
|
|
float* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const double* AP,
|
|
double* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const double* AP,
|
|
double* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const cuComplex* AP,
|
|
cuComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const cuComplex* AP,
|
|
cuComplex* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
const cuDoubleComplex* AP,
|
|
cuDoubleComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
const cuDoubleComplex* AP,
|
|
cuDoubleComplex* x,
|
|
int64_t incx);
|
|
|
|
/* TBSV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
int k,
|
|
const float* A,
|
|
int lda,
|
|
float* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* A,
|
|
int64_t lda,
|
|
float* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
int k,
|
|
const double* A,
|
|
int lda,
|
|
double* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* A,
|
|
int64_t lda,
|
|
double* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
int k,
|
|
const cuComplex* A,
|
|
int lda,
|
|
cuComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
cuComplex* x,
|
|
int64_t incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
cuDoubleComplex* x,
|
|
int incx);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
cuDoubleComplex* x,
|
|
int64_t incx);
|
|
|
|
/* SYMV/HEMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* x,
|
|
int incx,
|
|
const float* beta,
|
|
float* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* x,
|
|
int64_t incx,
|
|
const float* beta,
|
|
float* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* x,
|
|
int incx,
|
|
const double* beta,
|
|
double* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* x,
|
|
int64_t incx,
|
|
const double* beta,
|
|
double* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int64_t incy);
|
|
|
|
/* SBMV/HBMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* x,
|
|
int incx,
|
|
const float* beta,
|
|
float* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* x,
|
|
int64_t incx,
|
|
const float* beta,
|
|
float* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
int k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* x,
|
|
int incx,
|
|
const double* beta,
|
|
double* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* x,
|
|
int64_t incx,
|
|
const double* beta,
|
|
double* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int64_t incy);
|
|
|
|
/* SPMV/HPMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const float* alpha,
|
|
const float* AP,
|
|
const float* x,
|
|
int incx,
|
|
const float* beta,
|
|
float* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* AP,
|
|
const float* x,
|
|
int64_t incx,
|
|
const float* beta,
|
|
float* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const double* alpha,
|
|
const double* AP,
|
|
const double* x,
|
|
int incx,
|
|
const double* beta,
|
|
double* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* AP,
|
|
const double* x,
|
|
int64_t incx,
|
|
const double* beta,
|
|
double* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* AP,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* AP,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int64_t incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* AP,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int incy);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* AP,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int64_t incy);
|
|
|
|
/* GER */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int incx,
|
|
const float* y,
|
|
int incy,
|
|
float* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2_64(cublasHandle_t handle,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int64_t incx,
|
|
const float* y,
|
|
int64_t incy,
|
|
float* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int incx,
|
|
const double* y,
|
|
int incy,
|
|
double* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2_64(cublasHandle_t handle,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int64_t incx,
|
|
const double* y,
|
|
int64_t incy,
|
|
double* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* y,
|
|
int incy,
|
|
cuComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2_64(cublasHandle_t handle,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* y,
|
|
int64_t incy,
|
|
cuComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* y,
|
|
int incy,
|
|
cuComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2_64(cublasHandle_t handle,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* y,
|
|
int64_t incy,
|
|
cuComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* y,
|
|
int incy,
|
|
cuDoubleComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2_64(cublasHandle_t handle,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* y,
|
|
int64_t incy,
|
|
cuDoubleComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* y,
|
|
int incy,
|
|
cuDoubleComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2_64(cublasHandle_t handle,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* y,
|
|
int64_t incy,
|
|
cuDoubleComplex* A,
|
|
int64_t lda);
|
|
|
|
/* SYR/HER */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int incx,
|
|
float* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int64_t incx,
|
|
float* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int incx,
|
|
double* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int64_t incx,
|
|
double* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int incx,
|
|
cuComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
cuComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
cuDoubleComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
cuDoubleComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const float* alpha,
|
|
const cuComplex* x,
|
|
int incx,
|
|
cuComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
cuComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const double* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
cuDoubleComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
cuDoubleComplex* A,
|
|
int64_t lda);
|
|
|
|
/* SPR/HPR */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2(
|
|
cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* alpha, const float* x, int incx, float* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int64_t incx,
|
|
float* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2(
|
|
cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* alpha, const double* x, int incx, double* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int64_t incx,
|
|
double* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const float* alpha,
|
|
const cuComplex* x,
|
|
int incx,
|
|
cuComplex* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
cuComplex* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const double* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
cuDoubleComplex* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
cuDoubleComplex* AP);
|
|
|
|
/* SYR2/HER2 */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int incx,
|
|
const float* y,
|
|
int incy,
|
|
float* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int64_t incx,
|
|
const float* y,
|
|
int64_t incy,
|
|
float* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int incx,
|
|
const double* y,
|
|
int incy,
|
|
double* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int64_t incx,
|
|
const double* y,
|
|
int64_t incy,
|
|
double* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* y,
|
|
int incy,
|
|
cuComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* y,
|
|
int64_t incy,
|
|
cuComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* y,
|
|
int incy,
|
|
cuDoubleComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* y,
|
|
int64_t incy,
|
|
cuDoubleComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* y,
|
|
int incy,
|
|
cuComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* y,
|
|
int64_t incy,
|
|
cuComplex* A,
|
|
int64_t lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* y,
|
|
int incy,
|
|
cuDoubleComplex* A,
|
|
int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* y,
|
|
int64_t incy,
|
|
cuDoubleComplex* A,
|
|
int64_t lda);
|
|
|
|
/* SPR2/HPR2 */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int incx,
|
|
const float* y,
|
|
int incy,
|
|
float* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* x,
|
|
int64_t incx,
|
|
const float* y,
|
|
int64_t incy,
|
|
float* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int incx,
|
|
const double* y,
|
|
int incy,
|
|
double* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* x,
|
|
int64_t incx,
|
|
const double* y,
|
|
int64_t incy,
|
|
double* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int incx,
|
|
const cuComplex* y,
|
|
int incy,
|
|
cuComplex* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
const cuComplex* y,
|
|
int64_t incy,
|
|
cuComplex* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
const cuDoubleComplex* y,
|
|
int incy,
|
|
cuDoubleComplex* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
const cuDoubleComplex* y,
|
|
int64_t incy,
|
|
cuDoubleComplex* AP);
|
|
|
|
/* BATCH GEMV */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* const Aarray[],
|
|
int lda,
|
|
const float* const xarray[],
|
|
int incx,
|
|
const float* beta,
|
|
float* const yarray[],
|
|
int incy,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* const Aarray[],
|
|
int64_t lda,
|
|
const float* const xarray[],
|
|
int64_t incx,
|
|
const float* beta,
|
|
float* const yarray[],
|
|
int64_t incy,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* const Aarray[],
|
|
int lda,
|
|
const double* const xarray[],
|
|
int incx,
|
|
const double* beta,
|
|
double* const yarray[],
|
|
int incy,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* const Aarray[],
|
|
int64_t lda,
|
|
const double* const xarray[],
|
|
int64_t incx,
|
|
const double* beta,
|
|
double* const yarray[],
|
|
int64_t incy,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* const Aarray[],
|
|
int lda,
|
|
const cuComplex* const xarray[],
|
|
int incx,
|
|
const cuComplex* beta,
|
|
cuComplex* const yarray[],
|
|
int incy,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* const Aarray[],
|
|
int64_t lda,
|
|
const cuComplex* const xarray[],
|
|
int64_t incx,
|
|
const cuComplex* beta,
|
|
cuComplex* const yarray[],
|
|
int64_t incy,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* const Aarray[],
|
|
int lda,
|
|
const cuDoubleComplex* const xarray[],
|
|
int incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* const yarray[],
|
|
int incy,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* const Aarray[],
|
|
int64_t lda,
|
|
const cuDoubleComplex* const xarray[],
|
|
int64_t incx,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* const yarray[],
|
|
int64_t incy,
|
|
int64_t batchCount);
|
|
|
|
#if defined(__cplusplus)
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSHgemvBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const __half* const Aarray[],
|
|
int lda,
|
|
const __half* const xarray[],
|
|
int incx,
|
|
const float* beta,
|
|
__half* const yarray[],
|
|
int incy,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSHgemvBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const __half* const Aarray[],
|
|
int64_t lda,
|
|
const __half* const xarray[],
|
|
int64_t incx,
|
|
const float* beta,
|
|
__half* const yarray[],
|
|
int64_t incy,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSSgemvBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const __half* const Aarray[],
|
|
int lda,
|
|
const __half* const xarray[],
|
|
int incx,
|
|
const float* beta,
|
|
float* const yarray[],
|
|
int incy,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSSgemvBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const __half* const Aarray[],
|
|
int64_t lda,
|
|
const __half* const xarray[],
|
|
int64_t incx,
|
|
const float* beta,
|
|
float* const yarray[],
|
|
int64_t incy,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSTgemvBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const __nv_bfloat16* const Aarray[],
|
|
int lda,
|
|
const __nv_bfloat16* const xarray[],
|
|
int incx,
|
|
const float* beta,
|
|
__nv_bfloat16* const yarray[],
|
|
int incy,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSTgemvBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const __nv_bfloat16* const Aarray[],
|
|
int64_t lda,
|
|
const __nv_bfloat16* const xarray[],
|
|
int64_t incx,
|
|
const float* beta,
|
|
__nv_bfloat16* const yarray[],
|
|
int64_t incy,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSSgemvBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const __nv_bfloat16* const Aarray[],
|
|
int lda,
|
|
const __nv_bfloat16* const xarray[],
|
|
int incx,
|
|
const float* beta,
|
|
float* const yarray[],
|
|
int incy,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSSgemvBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const __nv_bfloat16* const Aarray[],
|
|
int64_t lda,
|
|
const __nv_bfloat16* const xarray[],
|
|
int64_t incx,
|
|
const float* beta,
|
|
float* const yarray[],
|
|
int64_t incy,
|
|
int64_t batchCount);
|
|
|
|
#endif
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const float* x,
|
|
int incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
float* y,
|
|
int incy,
|
|
long long int stridey,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const float* x,
|
|
int64_t incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
float* y,
|
|
int64_t incy,
|
|
long long int stridey,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const double* x,
|
|
int incx,
|
|
long long int stridex,
|
|
const double* beta,
|
|
double* y,
|
|
int incy,
|
|
long long int stridey,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const double* x,
|
|
int64_t incx,
|
|
long long int stridex,
|
|
const double* beta,
|
|
double* y,
|
|
int64_t incy,
|
|
long long int stridey,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const cuComplex* x,
|
|
int incx,
|
|
long long int stridex,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int incy,
|
|
long long int stridey,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemvStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
long long int stridex,
|
|
const cuComplex* beta,
|
|
cuComplex* y,
|
|
int64_t incy,
|
|
long long int stridey,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
long long int stridex,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int incy,
|
|
long long int stridey,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemvStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
long long int stridex,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* y,
|
|
int64_t incy,
|
|
long long int stridey,
|
|
int64_t batchCount);
|
|
|
|
#if defined(__cplusplus)
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSHgemvStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const __half* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const __half* x,
|
|
int incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
__half* y,
|
|
int incy,
|
|
long long int stridey,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSHgemvStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const __half* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const __half* x,
|
|
int64_t incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
__half* y,
|
|
int64_t incy,
|
|
long long int stridey,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSSgemvStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const __half* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const __half* x,
|
|
int incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
float* y,
|
|
int incy,
|
|
long long int stridey,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHSSgemvStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const __half* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const __half* x,
|
|
int64_t incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
float* y,
|
|
int64_t incy,
|
|
long long int stridey,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSTgemvStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const __nv_bfloat16* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const __nv_bfloat16* x,
|
|
int incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
__nv_bfloat16* y,
|
|
int incy,
|
|
long long int stridey,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSTgemvStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const __nv_bfloat16* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const __nv_bfloat16* x,
|
|
int64_t incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
__nv_bfloat16* y,
|
|
int64_t incy,
|
|
long long int stridey,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSSgemvStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const __nv_bfloat16* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const __nv_bfloat16* x,
|
|
int incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
float* y,
|
|
int incy,
|
|
long long int stridey,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasTSSgemvStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const __nv_bfloat16* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const __nv_bfloat16* x,
|
|
int64_t incx,
|
|
long long int stridex,
|
|
const float* beta,
|
|
float* y,
|
|
int64_t incy,
|
|
long long int stridey,
|
|
int64_t batchCount);
|
|
|
|
#endif
|
|
|
|
/* ---------------- CUBLAS BLAS3 Functions ---------------- */
|
|
|
|
/* GEMM */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* B,
|
|
int ldb,
|
|
const float* beta,
|
|
float* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* B,
|
|
int64_t ldb,
|
|
const float* beta,
|
|
float* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* B,
|
|
int ldb,
|
|
const double* beta,
|
|
double* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* B,
|
|
int64_t ldb,
|
|
const double* beta,
|
|
double* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm3m(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm3m_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm3mEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm3mEx_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm3m(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm3m_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
#if defined(__cplusplus)
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const __half* alpha,
|
|
const __half* A,
|
|
int lda,
|
|
const __half* B,
|
|
int ldb,
|
|
const __half* beta,
|
|
__half* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const __half* alpha,
|
|
const __half* A,
|
|
int64_t lda,
|
|
const __half* B,
|
|
int64_t ldb,
|
|
const __half* beta,
|
|
__half* C,
|
|
int64_t ldc);
|
|
|
|
#endif
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
const float* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmEx_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int64_t ldb,
|
|
const float* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const void* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
const void* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc,
|
|
cublasComputeType_t computeType,
|
|
cublasGemmAlgo_t algo);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmEx_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const void* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int64_t ldb,
|
|
const void* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc,
|
|
cublasComputeType_t computeType,
|
|
cublasGemmAlgo_t algo);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmEx_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc);
|
|
|
|
/* SYRK */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* beta,
|
|
float* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* beta,
|
|
float* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* beta,
|
|
double* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* beta,
|
|
double* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkEx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const cuComplex* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkEx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const cuComplex* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const cuComplex* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const cuComplex* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc);
|
|
|
|
/* HERK */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const float* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const float* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const double* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const double* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const double* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkEx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const float* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkEx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const float* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk3mEx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const float* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk3mEx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const float* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc);
|
|
|
|
/* SYR2K / HER2K */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* B,
|
|
int ldb,
|
|
const float* beta,
|
|
float* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* B,
|
|
int64_t ldb,
|
|
const float* beta,
|
|
float* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* B,
|
|
int ldb,
|
|
const double* beta,
|
|
double* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* B,
|
|
int64_t ldb,
|
|
const double* beta,
|
|
double* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
const float* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
const float* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
const double* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
const double* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
/* SYRKX / HERKX */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* B,
|
|
int ldb,
|
|
const float* beta,
|
|
float* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* B,
|
|
int64_t ldb,
|
|
const float* beta,
|
|
float* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* B,
|
|
int ldb,
|
|
const double* beta,
|
|
double* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* B,
|
|
int64_t ldb,
|
|
const double* beta,
|
|
double* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
const float* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
const float* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
const double* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx_64(cublasHandle_t handle,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
const double* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
/* SYMM */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* B,
|
|
int ldb,
|
|
const float* beta,
|
|
float* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* B,
|
|
int64_t ldb,
|
|
const float* beta,
|
|
float* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* B,
|
|
int ldb,
|
|
const double* beta,
|
|
double* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* B,
|
|
int64_t ldb,
|
|
const double* beta,
|
|
double* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
/* HEMM */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
/* TRSM */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
float* B,
|
|
int ldb);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
float* B,
|
|
int64_t ldb);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
double* B,
|
|
int ldb);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
double* B,
|
|
int64_t ldb);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
cuComplex* B,
|
|
int ldb);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
cuComplex* B,
|
|
int64_t ldb);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
cuDoubleComplex* B,
|
|
int ldb);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
cuDoubleComplex* B,
|
|
int64_t ldb);
|
|
|
|
/* TRMM */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* B,
|
|
int ldb,
|
|
float* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* B,
|
|
int64_t ldb,
|
|
float* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* B,
|
|
int ldb,
|
|
double* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* B,
|
|
int64_t ldb,
|
|
double* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
/* BATCH GEMM */
|
|
|
|
#if defined(__cplusplus)
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const __half* alpha,
|
|
const __half* const Aarray[],
|
|
int lda,
|
|
const __half* const Barray[],
|
|
int ldb,
|
|
const __half* beta,
|
|
__half* const Carray[],
|
|
int ldc,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const __half* alpha,
|
|
const __half* const Aarray[],
|
|
int64_t lda,
|
|
const __half* const Barray[],
|
|
int64_t ldb,
|
|
const __half* beta,
|
|
__half* const Carray[],
|
|
int64_t ldc,
|
|
int64_t batchCount);
|
|
|
|
#endif
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const float* const Aarray[],
|
|
int lda,
|
|
const float* const Barray[],
|
|
int ldb,
|
|
const float* beta,
|
|
float* const Carray[],
|
|
int ldc,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const float* const Aarray[],
|
|
int64_t lda,
|
|
const float* const Barray[],
|
|
int64_t ldb,
|
|
const float* beta,
|
|
float* const Carray[],
|
|
int64_t ldc,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const double* alpha,
|
|
const double* const Aarray[],
|
|
int lda,
|
|
const double* const Barray[],
|
|
int ldb,
|
|
const double* beta,
|
|
double* const Carray[],
|
|
int ldc,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* alpha,
|
|
const double* const Aarray[],
|
|
int64_t lda,
|
|
const double* const Barray[],
|
|
int64_t ldb,
|
|
const double* beta,
|
|
double* const Carray[],
|
|
int64_t ldc,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* const Aarray[],
|
|
int lda,
|
|
const cuComplex* const Barray[],
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* const Carray[],
|
|
int ldc,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* const Aarray[],
|
|
int64_t lda,
|
|
const cuComplex* const Barray[],
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* const Carray[],
|
|
int64_t ldc,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* const Aarray[],
|
|
int lda,
|
|
const cuComplex* const Barray[],
|
|
int ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* const Carray[],
|
|
int ldc,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* const Aarray[],
|
|
int64_t lda,
|
|
const cuComplex* const Barray[],
|
|
int64_t ldb,
|
|
const cuComplex* beta,
|
|
cuComplex* const Carray[],
|
|
int64_t ldc,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* const Aarray[],
|
|
int lda,
|
|
const cuDoubleComplex* const Barray[],
|
|
int ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* const Carray[],
|
|
int ldc,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* const Aarray[],
|
|
int64_t lda,
|
|
const cuDoubleComplex* const Barray[],
|
|
int64_t ldb,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* const Carray[],
|
|
int64_t ldc,
|
|
int64_t batchCount);
|
|
|
|
#if defined(__cplusplus)
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const __half* alpha,
|
|
const __half* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const __half* B,
|
|
int ldb,
|
|
long long int strideB,
|
|
const __half* beta,
|
|
__half* C,
|
|
int ldc,
|
|
long long int strideC,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemmStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const __half* alpha,
|
|
const __half* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const __half* B,
|
|
int64_t ldb,
|
|
long long int strideB,
|
|
const __half* beta,
|
|
__half* C,
|
|
int64_t ldc,
|
|
long long int strideC,
|
|
int64_t batchCount);
|
|
|
|
#endif
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const float* B,
|
|
int ldb,
|
|
long long int strideB,
|
|
const float* beta,
|
|
float* C,
|
|
int ldc,
|
|
long long int strideC,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const float* B,
|
|
int64_t ldb,
|
|
long long int strideB,
|
|
const float* beta,
|
|
float* C,
|
|
int64_t ldc,
|
|
long long int strideC,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const double* B,
|
|
int ldb,
|
|
long long int strideB,
|
|
const double* beta,
|
|
double* C,
|
|
int ldc,
|
|
long long int strideC,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const double* B,
|
|
int64_t ldb,
|
|
long long int strideB,
|
|
const double* beta,
|
|
double* C,
|
|
int64_t ldc,
|
|
long long int strideC,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
long long int strideB,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc,
|
|
long long int strideC,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
long long int strideB,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc,
|
|
long long int strideC,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
long long int strideB,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int ldc,
|
|
long long int strideC,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
long long int strideB,
|
|
const cuComplex* beta,
|
|
cuComplex* C,
|
|
int64_t ldc,
|
|
long long int strideC,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
long long int strideA,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
long long int strideB,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int ldc,
|
|
long long int strideC,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
long long int strideB,
|
|
const cuDoubleComplex* beta,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc,
|
|
long long int strideC,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const void* alpha,
|
|
const void* const Aarray[],
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const void* const Barray[],
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
const void* beta,
|
|
void* const Carray[],
|
|
cudaDataType Ctype,
|
|
int ldc,
|
|
int batchCount,
|
|
cublasComputeType_t computeType,
|
|
cublasGemmAlgo_t algo);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const void* alpha,
|
|
const void* const Aarray[],
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
const void* const Barray[],
|
|
cudaDataType Btype,
|
|
int64_t ldb,
|
|
const void* beta,
|
|
void* const Carray[],
|
|
cudaDataType Ctype,
|
|
int64_t ldc,
|
|
int64_t batchCount,
|
|
cublasComputeType_t computeType,
|
|
cublasGemmAlgo_t algo);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const void* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
long long int strideA,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
long long int strideB,
|
|
const void* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc,
|
|
long long int strideC,
|
|
int batchCount,
|
|
cublasComputeType_t computeType,
|
|
cublasGemmAlgo_t algo);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
int64_t k,
|
|
const void* alpha,
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int64_t lda,
|
|
long long int strideA,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int64_t ldb,
|
|
long long int strideB,
|
|
const void* beta,
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int64_t ldc,
|
|
long long int strideC,
|
|
int64_t batchCount,
|
|
cublasComputeType_t computeType,
|
|
cublasGemmAlgo_t algo);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmGroupedBatched(cublasHandle_t handle,
|
|
const cublasOperation_t transa_array[],
|
|
const cublasOperation_t transb_array[],
|
|
const int m_array[],
|
|
const int n_array[],
|
|
const int k_array[],
|
|
const float alpha_array[],
|
|
const float* const Aarray[],
|
|
const int lda_array[],
|
|
const float* const Barray[],
|
|
const int ldb_array[],
|
|
const float beta_array[],
|
|
float* const Carray[],
|
|
const int ldc_array[],
|
|
int group_count,
|
|
const int group_size[]);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmGroupedBatched_64(cublasHandle_t handle,
|
|
const cublasOperation_t transa_array[],
|
|
const cublasOperation_t transb_array[],
|
|
const int64_t m_array[],
|
|
const int64_t n_array[],
|
|
const int64_t k_array[],
|
|
const float alpha_array[],
|
|
const float* const Aarray[],
|
|
const int64_t lda_array[],
|
|
const float* const Barray[],
|
|
const int64_t ldb_array[],
|
|
const float beta_array[],
|
|
float* const Carray[],
|
|
const int64_t ldc_array[],
|
|
int64_t group_count,
|
|
const int64_t group_size[]);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmGroupedBatched(cublasHandle_t handle,
|
|
const cublasOperation_t transa_array[],
|
|
const cublasOperation_t transb_array[],
|
|
const int m_array[],
|
|
const int n_array[],
|
|
const int k_array[],
|
|
const double alpha_array[],
|
|
const double* const Aarray[],
|
|
const int lda_array[],
|
|
const double* const Barray[],
|
|
const int ldb_array[],
|
|
const double beta_array[],
|
|
double* const Carray[],
|
|
const int ldc_array[],
|
|
int group_count,
|
|
const int group_size[]);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmGroupedBatched_64(cublasHandle_t handle,
|
|
const cublasOperation_t transa_array[],
|
|
const cublasOperation_t transb_array[],
|
|
const int64_t m_array[],
|
|
const int64_t n_array[],
|
|
const int64_t k_array[],
|
|
const double alpha_array[],
|
|
const double* const Aarray[],
|
|
const int64_t lda_array[],
|
|
const double* const Barray[],
|
|
const int64_t ldb_array[],
|
|
const double beta_array[],
|
|
double* const Carray[],
|
|
const int64_t ldc_array[],
|
|
int64_t group_count,
|
|
const int64_t group_size[]);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmGroupedBatchedEx(cublasHandle_t handle,
|
|
const cublasOperation_t transa_array[],
|
|
const cublasOperation_t transb_array[],
|
|
const int m_array[],
|
|
const int n_array[],
|
|
const int k_array[],
|
|
const void* alpha_array,
|
|
const void* const Aarray[],
|
|
cudaDataType_t Atype,
|
|
const int lda_array[],
|
|
const void* const Barray[],
|
|
cudaDataType_t Btype,
|
|
const int ldb_array[],
|
|
const void* beta_array,
|
|
void* const Carray[],
|
|
cudaDataType_t Ctype,
|
|
const int ldc_array[],
|
|
int group_count,
|
|
const int group_size[],
|
|
cublasComputeType_t computeType);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGemmGroupedBatchedEx_64(cublasHandle_t handle,
|
|
const cublasOperation_t transa_array[],
|
|
const cublasOperation_t transb_array[],
|
|
const int64_t m_array[],
|
|
const int64_t n_array[],
|
|
const int64_t k_array[],
|
|
const void* alpha_array,
|
|
const void* const Aarray[],
|
|
cudaDataType_t Atype,
|
|
const int64_t lda_array[],
|
|
const void* const Barray[],
|
|
cudaDataType_t Btype,
|
|
const int64_t ldb_array[],
|
|
const void* beta_array,
|
|
void* const Carray[],
|
|
cudaDataType_t Ctype,
|
|
const int64_t ldc_array[],
|
|
int64_t group_count,
|
|
const int64_t group_size[],
|
|
cublasComputeType_t computeType);
|
|
|
|
/* ---------------- CUBLAS BLAS-like Extension ---------------- */
|
|
|
|
/* GEAM */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int lda,
|
|
const float* beta,
|
|
const float* B,
|
|
int ldb,
|
|
float* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* beta,
|
|
const float* B,
|
|
int64_t ldb,
|
|
float* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int lda,
|
|
const double* beta,
|
|
const double* B,
|
|
int ldb,
|
|
double* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* beta,
|
|
const double* B,
|
|
int64_t ldb,
|
|
double* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* beta,
|
|
const cuComplex* B,
|
|
int ldb,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* beta,
|
|
const cuComplex* B,
|
|
int64_t ldb,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* beta,
|
|
const cuDoubleComplex* B,
|
|
int ldb,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam_64(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* beta,
|
|
const cuDoubleComplex* B,
|
|
int64_t ldb,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
/* TRSM - Batched Triangular Solver */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const float* alpha,
|
|
const float* const A[],
|
|
int lda,
|
|
float* const B[],
|
|
int ldb,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* alpha,
|
|
const float* const A[],
|
|
int64_t lda,
|
|
float* const B[],
|
|
int64_t ldb,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const double* alpha,
|
|
const double* const A[],
|
|
int lda,
|
|
double* const B[],
|
|
int ldb,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* alpha,
|
|
const double* const A[],
|
|
int64_t lda,
|
|
double* const B[],
|
|
int64_t ldb,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* const A[],
|
|
int lda,
|
|
cuComplex* const B[],
|
|
int ldb,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* alpha,
|
|
const cuComplex* const A[],
|
|
int64_t lda,
|
|
cuComplex* const B[],
|
|
int64_t ldb,
|
|
int64_t batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* const A[],
|
|
int lda,
|
|
cuDoubleComplex* const B[],
|
|
int ldb,
|
|
int batchCount);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched_64(cublasHandle_t handle,
|
|
cublasSideMode_t side,
|
|
cublasFillMode_t uplo,
|
|
cublasOperation_t trans,
|
|
cublasDiagType_t diag,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* alpha,
|
|
const cuDoubleComplex* const A[],
|
|
int64_t lda,
|
|
cuDoubleComplex* const B[],
|
|
int64_t ldb,
|
|
int64_t batchCount);
|
|
|
|
/* DGMM */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle,
|
|
cublasSideMode_t mode,
|
|
int m,
|
|
int n,
|
|
const float* A,
|
|
int lda,
|
|
const float* x,
|
|
int incx,
|
|
float* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm_64(cublasHandle_t handle,
|
|
cublasSideMode_t mode,
|
|
int64_t m,
|
|
int64_t n,
|
|
const float* A,
|
|
int64_t lda,
|
|
const float* x,
|
|
int64_t incx,
|
|
float* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle,
|
|
cublasSideMode_t mode,
|
|
int m,
|
|
int n,
|
|
const double* A,
|
|
int lda,
|
|
const double* x,
|
|
int incx,
|
|
double* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm_64(cublasHandle_t handle,
|
|
cublasSideMode_t mode,
|
|
int64_t m,
|
|
int64_t n,
|
|
const double* A,
|
|
int64_t lda,
|
|
const double* x,
|
|
int64_t incx,
|
|
double* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle,
|
|
cublasSideMode_t mode,
|
|
int m,
|
|
int n,
|
|
const cuComplex* A,
|
|
int lda,
|
|
const cuComplex* x,
|
|
int incx,
|
|
cuComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm_64(cublasHandle_t handle,
|
|
cublasSideMode_t mode,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuComplex* A,
|
|
int64_t lda,
|
|
const cuComplex* x,
|
|
int64_t incx,
|
|
cuComplex* C,
|
|
int64_t ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle,
|
|
cublasSideMode_t mode,
|
|
int m,
|
|
int n,
|
|
const cuDoubleComplex* A,
|
|
int lda,
|
|
const cuDoubleComplex* x,
|
|
int incx,
|
|
cuDoubleComplex* C,
|
|
int ldc);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm_64(cublasHandle_t handle,
|
|
cublasSideMode_t mode,
|
|
int64_t m,
|
|
int64_t n,
|
|
const cuDoubleComplex* A,
|
|
int64_t lda,
|
|
const cuDoubleComplex* x,
|
|
int64_t incx,
|
|
cuDoubleComplex* C,
|
|
int64_t ldc);
|
|
|
|
/* Batched - MATINV*/
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSmatinvBatched(cublasHandle_t handle,
|
|
int n,
|
|
const float* const A[],
|
|
int lda,
|
|
float* const Ainv[],
|
|
int lda_inv,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDmatinvBatched(cublasHandle_t handle,
|
|
int n,
|
|
const double* const A[],
|
|
int lda,
|
|
double* const Ainv[],
|
|
int lda_inv,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCmatinvBatched(cublasHandle_t handle,
|
|
int n,
|
|
const cuComplex* const A[],
|
|
int lda,
|
|
cuComplex* const Ainv[],
|
|
int lda_inv,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZmatinvBatched(cublasHandle_t handle,
|
|
int n,
|
|
const cuDoubleComplex* const A[],
|
|
int lda,
|
|
cuDoubleComplex* const Ainv[],
|
|
int lda_inv,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
/* Batch QR Factorization */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
float* const Aarray[],
|
|
int lda,
|
|
float* const TauArray[],
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeqrfBatched(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
double* const Aarray[],
|
|
int lda,
|
|
double* const TauArray[],
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeqrfBatched(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
cuComplex* const Aarray[],
|
|
int lda,
|
|
cuComplex* const TauArray[],
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched(cublasHandle_t handle,
|
|
int m,
|
|
int n,
|
|
cuDoubleComplex* const Aarray[],
|
|
int lda,
|
|
cuDoubleComplex* const TauArray[],
|
|
int* info,
|
|
int batchSize);
|
|
|
|
/* Least Square Min only m >= n and Non-transpose supported */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgelsBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
int nrhs,
|
|
float* const Aarray[],
|
|
int lda,
|
|
float* const Carray[],
|
|
int ldc,
|
|
int* info,
|
|
int* devInfoArray,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgelsBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
int nrhs,
|
|
double* const Aarray[],
|
|
int lda,
|
|
double* const Carray[],
|
|
int ldc,
|
|
int* info,
|
|
int* devInfoArray,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgelsBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
int nrhs,
|
|
cuComplex* const Aarray[],
|
|
int lda,
|
|
cuComplex* const Carray[],
|
|
int ldc,
|
|
int* info,
|
|
int* devInfoArray,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgelsBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int m,
|
|
int n,
|
|
int nrhs,
|
|
cuDoubleComplex* const Aarray[],
|
|
int lda,
|
|
cuDoubleComplex* const Carray[],
|
|
int ldc,
|
|
int* info,
|
|
int* devInfoArray,
|
|
int batchSize);
|
|
|
|
/* TPTTR : Triangular Pack format to Triangular format */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasStpttr(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* AP, float* A, int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDtpttr(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* AP, double* A, int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCtpttr(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* AP, cuComplex* A, int lda);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpttr(
|
|
cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* AP, cuDoubleComplex* A, int lda);
|
|
|
|
/* TRTTP : Triangular format to Triangular Pack format */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasStrttp(cublasHandle_t handle, cublasFillMode_t uplo, int n, const float* A, int lda, float* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDtrttp(cublasHandle_t handle, cublasFillMode_t uplo, int n, const double* A, int lda, double* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCtrttp(cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuComplex* A, int lda, cuComplex* AP);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrttp(
|
|
cublasHandle_t handle, cublasFillMode_t uplo, int n, const cuDoubleComplex* A, int lda, cuDoubleComplex* AP);
|
|
|
|
/* Batched LU - GETRF*/
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasSgetrfBatched(cublasHandle_t handle, int n, float* const A[], int lda, int* P, int* info, int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasDgetrfBatched(cublasHandle_t handle, int n, double* const A[], int lda, int* P, int* info, int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI
|
|
cublasCgetrfBatched(cublasHandle_t handle, int n, cuComplex* const A[], int lda, int* P, int* info, int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrfBatched(
|
|
cublasHandle_t handle, int n, cuDoubleComplex* const A[], int lda, int* P, int* info, int batchSize);
|
|
|
|
/* Batched inversion based on LU factorization from getrf */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle,
|
|
int n,
|
|
const float* const A[],
|
|
int lda,
|
|
const int* P,
|
|
float* const C[],
|
|
int ldc,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle,
|
|
int n,
|
|
const double* const A[],
|
|
int lda,
|
|
const int* P,
|
|
double* const C[],
|
|
int ldc,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle,
|
|
int n,
|
|
const cuComplex* const A[],
|
|
int lda,
|
|
const int* P,
|
|
cuComplex* const C[],
|
|
int ldc,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle,
|
|
int n,
|
|
const cuDoubleComplex* const A[],
|
|
int lda,
|
|
const int* P,
|
|
cuDoubleComplex* const C[],
|
|
int ldc,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
/* Batched solver based on LU factorization from getrf */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int nrhs,
|
|
const float* const Aarray[],
|
|
int lda,
|
|
const int* devIpiv,
|
|
float* const Barray[],
|
|
int ldb,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int nrhs,
|
|
const double* const Aarray[],
|
|
int lda,
|
|
const int* devIpiv,
|
|
double* const Barray[],
|
|
int ldb,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int nrhs,
|
|
const cuComplex* const Aarray[],
|
|
int lda,
|
|
const int* devIpiv,
|
|
cuComplex* const Barray[],
|
|
int ldb,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(cublasHandle_t handle,
|
|
cublasOperation_t trans,
|
|
int n,
|
|
int nrhs,
|
|
const cuDoubleComplex* const Aarray[],
|
|
int lda,
|
|
const int* devIpiv,
|
|
cuDoubleComplex* const Barray[],
|
|
int ldb,
|
|
int* info,
|
|
int batchSize);
|
|
|
|
/* Deprecated */
|
|
|
|
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasUint8gemmBias(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
cublasOperation_t transc,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const unsigned char* A,
|
|
int A_bias,
|
|
int lda,
|
|
const unsigned char* B,
|
|
int B_bias,
|
|
int ldb,
|
|
unsigned char* C,
|
|
int C_bias,
|
|
int ldc,
|
|
int C_mult,
|
|
int C_shift);
|
|
|
|
/* }}} cuBLAS Exported API */
|
|
|
|
#if defined(__cplusplus)
|
|
}
|
|
|
|
static inline cublasStatus_t cublasMigrateComputeType(cublasHandle_t handle,
|
|
cudaDataType_t dataType,
|
|
cublasComputeType_t* computeType) {
|
|
cublasMath_t mathMode = CUBLAS_DEFAULT_MATH;
|
|
cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
|
|
|
|
status = cublasGetMathMode(handle, &mathMode);
|
|
if (status != CUBLAS_STATUS_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
bool isPedantic = ((mathMode & 0xf) == CUBLAS_PEDANTIC_MATH);
|
|
|
|
switch (dataType) {
|
|
case CUDA_R_32F:
|
|
case CUDA_C_32F:
|
|
*computeType = isPedantic ? CUBLAS_COMPUTE_32F_PEDANTIC : CUBLAS_COMPUTE_32F;
|
|
return CUBLAS_STATUS_SUCCESS;
|
|
case CUDA_R_64F:
|
|
case CUDA_C_64F:
|
|
*computeType = isPedantic ? CUBLAS_COMPUTE_64F_PEDANTIC : CUBLAS_COMPUTE_64F;
|
|
return CUBLAS_STATUS_SUCCESS;
|
|
case CUDA_R_16F:
|
|
*computeType = isPedantic ? CUBLAS_COMPUTE_16F_PEDANTIC : CUBLAS_COMPUTE_16F;
|
|
return CUBLAS_STATUS_SUCCESS;
|
|
case CUDA_R_32I:
|
|
*computeType = isPedantic ? CUBLAS_COMPUTE_32I_PEDANTIC : CUBLAS_COMPUTE_32I;
|
|
return CUBLAS_STATUS_SUCCESS;
|
|
default:
|
|
return CUBLAS_STATUS_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
/* wrappers to accept old code with cudaDataType computeType when referenced from c++ code */
|
|
static inline cublasStatus_t cublasGemmEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const void* alpha, /* host or device pointer */
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
const void* beta, /* host or device pointer */
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc,
|
|
cudaDataType computeType,
|
|
cublasGemmAlgo_t algo) {
|
|
cublasComputeType_t migratedComputeType = CUBLAS_COMPUTE_32F;
|
|
cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
|
|
status = cublasMigrateComputeType(handle, computeType, &migratedComputeType);
|
|
if (status != CUBLAS_STATUS_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
return cublasGemmEx(handle,
|
|
transa,
|
|
transb,
|
|
m,
|
|
n,
|
|
k,
|
|
alpha,
|
|
A,
|
|
Atype,
|
|
lda,
|
|
B,
|
|
Btype,
|
|
ldb,
|
|
beta,
|
|
C,
|
|
Ctype,
|
|
ldc,
|
|
migratedComputeType,
|
|
algo);
|
|
}
|
|
|
|
static inline cublasStatus_t cublasGemmBatchedEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const void* alpha, /* host or device pointer */
|
|
const void* const Aarray[],
|
|
cudaDataType Atype,
|
|
int lda,
|
|
const void* const Barray[],
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
const void* beta, /* host or device pointer */
|
|
void* const Carray[],
|
|
cudaDataType Ctype,
|
|
int ldc,
|
|
int batchCount,
|
|
cudaDataType computeType,
|
|
cublasGemmAlgo_t algo) {
|
|
cublasComputeType_t migratedComputeType;
|
|
cublasStatus_t status;
|
|
status = cublasMigrateComputeType(handle, computeType, &migratedComputeType);
|
|
if (status != CUBLAS_STATUS_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
return cublasGemmBatchedEx(handle,
|
|
transa,
|
|
transb,
|
|
m,
|
|
n,
|
|
k,
|
|
alpha,
|
|
Aarray,
|
|
Atype,
|
|
lda,
|
|
Barray,
|
|
Btype,
|
|
ldb,
|
|
beta,
|
|
Carray,
|
|
Ctype,
|
|
ldc,
|
|
batchCount,
|
|
migratedComputeType,
|
|
algo);
|
|
}
|
|
|
|
static inline cublasStatus_t cublasGemmStridedBatchedEx(cublasHandle_t handle,
|
|
cublasOperation_t transa,
|
|
cublasOperation_t transb,
|
|
int m,
|
|
int n,
|
|
int k,
|
|
const void* alpha, /* host or device pointer */
|
|
const void* A,
|
|
cudaDataType Atype,
|
|
int lda,
|
|
long long int strideA, /* purposely signed */
|
|
const void* B,
|
|
cudaDataType Btype,
|
|
int ldb,
|
|
long long int strideB,
|
|
const void* beta, /* host or device pointer */
|
|
void* C,
|
|
cudaDataType Ctype,
|
|
int ldc,
|
|
long long int strideC,
|
|
int batchCount,
|
|
cudaDataType computeType,
|
|
cublasGemmAlgo_t algo) {
|
|
cublasComputeType_t migratedComputeType;
|
|
cublasStatus_t status;
|
|
status = cublasMigrateComputeType(handle, computeType, &migratedComputeType);
|
|
if (status != CUBLAS_STATUS_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
return cublasGemmStridedBatchedEx(handle,
|
|
transa,
|
|
transb,
|
|
m,
|
|
n,
|
|
k,
|
|
alpha,
|
|
A,
|
|
Atype,
|
|
lda,
|
|
strideA,
|
|
B,
|
|
Btype,
|
|
ldb,
|
|
strideB,
|
|
beta,
|
|
C,
|
|
Ctype,
|
|
ldc,
|
|
strideC,
|
|
batchCount,
|
|
migratedComputeType,
|
|
algo);
|
|
}
|
|
#endif /* __cplusplus */
|
|
|
|
#endif /* !defined(CUBLAS_API_H_) */
|