1142 lines
43 KiB
C++
1142 lines
43 KiB
C++
//
|
|
// NVIDIA_COPYRIGHT_BEGIN
|
|
//
|
|
// Copyright (c) 2014-2024, NVIDIA CORPORATION. All rights reserved.
|
|
//
|
|
// NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
// and proprietary rights in and to this software, related documentation
|
|
// and any modifications thereto. Any use, reproduction, disclosure or
|
|
// distribution of this software and related documentation without an express
|
|
// license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
//
|
|
// NVIDIA_COPYRIGHT_END
|
|
//
|
|
|
|
#ifndef __NVRTC_H__
|
|
#define __NVRTC_H__
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif /* __cplusplus */
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
/*************************************************************************//**
|
|
*
|
|
* \defgroup error Error Handling
|
|
*
|
|
* NVRTC defines the following enumeration type and function for API call
|
|
* error handling.
|
|
*
|
|
****************************************************************************/
|
|
|
|
|
|
/**
|
|
* \ingroup error
|
|
* \brief The enumerated type nvrtcResult defines API call result codes.
|
|
* NVRTC API functions return nvrtcResult to indicate the call
|
|
* result.
|
|
*/
|
|
typedef enum {
|
|
NVRTC_SUCCESS = 0,
|
|
NVRTC_ERROR_OUT_OF_MEMORY = 1,
|
|
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
|
|
NVRTC_ERROR_INVALID_INPUT = 3,
|
|
NVRTC_ERROR_INVALID_PROGRAM = 4,
|
|
NVRTC_ERROR_INVALID_OPTION = 5,
|
|
NVRTC_ERROR_COMPILATION = 6,
|
|
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
|
|
NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
|
|
NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
|
|
NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
|
|
NVRTC_ERROR_INTERNAL_ERROR = 11,
|
|
NVRTC_ERROR_TIME_FILE_WRITE_FAILED = 12,
|
|
NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED = 13,
|
|
NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED = 14,
|
|
NVRTC_ERROR_PCH_CREATE = 15,
|
|
NVRTC_ERROR_CANCELLED = 16
|
|
} nvrtcResult;
|
|
|
|
|
|
/**
|
|
* \ingroup error
|
|
* \brief nvrtcGetErrorString is a helper function that returns a string
|
|
* describing the given nvrtcResult code, e.g., NVRTC_SUCCESS to
|
|
* \c "NVRTC_SUCCESS".
|
|
* For unrecognized enumeration values, it returns
|
|
* \c "NVRTC_ERROR unknown".
|
|
*
|
|
* \param [in] result CUDA Runtime Compilation API result code.
|
|
* \return Message string for the given #nvrtcResult code.
|
|
*/
|
|
const char *nvrtcGetErrorString(nvrtcResult result);
|
|
|
|
|
|
/*************************************************************************//**
|
|
*
|
|
* \defgroup query General Information Query
|
|
*
|
|
* NVRTC defines the following function for general information query.
|
|
*
|
|
****************************************************************************/
|
|
|
|
|
|
/**
|
|
* \ingroup query
|
|
* \brief nvrtcVersion sets the output parameters \p major and \p minor
|
|
* with the CUDA Runtime Compilation version number.
|
|
*
|
|
* \param [out] major CUDA Runtime Compilation major version number.
|
|
* \param [out] minor CUDA Runtime Compilation minor version number.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
*
|
|
*/
|
|
nvrtcResult nvrtcVersion(int *major, int *minor);
|
|
|
|
|
|
/**
|
|
* \ingroup query
|
|
* \brief nvrtcGetNumSupportedArchs sets the output parameter \p numArchs
|
|
* with the number of architectures supported by NVRTC. This can
|
|
* then be used to pass an array to ::nvrtcGetSupportedArchs to
|
|
* get the supported architectures.
|
|
*
|
|
* \param [out] numArchs number of supported architectures.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
*
|
|
* see ::nvrtcGetSupportedArchs
|
|
*/
|
|
nvrtcResult nvrtcGetNumSupportedArchs(int* numArchs);
|
|
|
|
|
|
/**
|
|
* \ingroup query
|
|
* \brief nvrtcGetSupportedArchs populates the array passed via the output parameter
|
|
* \p supportedArchs with the architectures supported by NVRTC. The array is
|
|
* sorted in the ascending order. The size of the array to be passed can be
|
|
* determined using ::nvrtcGetNumSupportedArchs.
|
|
*
|
|
* \param [out] supportedArchs sorted array of supported architectures.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
*
|
|
* see ::nvrtcGetNumSupportedArchs
|
|
*/
|
|
nvrtcResult nvrtcGetSupportedArchs(int* supportedArchs);
|
|
|
|
|
|
/*************************************************************************//**
|
|
*
|
|
* \defgroup compilation Compilation
|
|
*
|
|
* NVRTC defines the following type and functions for actual compilation.
|
|
*
|
|
****************************************************************************/
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcProgram is the unit of compilation, and an opaque handle for
|
|
* a program.
|
|
*
|
|
* To compile a CUDA program string, an instance of nvrtcProgram must be
|
|
* created first with ::nvrtcCreateProgram, then compiled with
|
|
* ::nvrtcCompileProgram.
|
|
*/
|
|
typedef struct _nvrtcProgram *nvrtcProgram;
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcCreateProgram creates an instance of nvrtcProgram with the
|
|
* given input parameters, and sets the output parameter \p prog with
|
|
* it.
|
|
*
|
|
* \param [out] prog CUDA Runtime Compilation program.
|
|
* \param [in] src CUDA program source.
|
|
* \param [in] name CUDA program name.\n
|
|
* \p name can be \c NULL; \c "default_program" is
|
|
* used when \p name is \c NULL or "".
|
|
* \param [in] numHeaders Number of headers used.\n
|
|
* \p numHeaders must be greater than or equal to 0.
|
|
* \param [in] headers Sources of the headers.\n
|
|
* \p headers can be \c NULL when \p numHeaders is
|
|
* 0.
|
|
* \param [in] includeNames Name of each header by which they can be
|
|
* included in the CUDA program source.\n
|
|
* \p includeNames can be \c NULL when \p numHeaders
|
|
* is 0. These headers must be included with the exact
|
|
* names specified here.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_PROGRAM_CREATION_FAILURE \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcDestroyProgram
|
|
*/
|
|
nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
|
|
const char *src,
|
|
const char *name,
|
|
int numHeaders,
|
|
const char * const *headers,
|
|
const char * const *includeNames);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcDestroyProgram destroys the given program.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcCreateProgram
|
|
*/
|
|
nvrtcResult nvrtcDestroyProgram(nvrtcProgram *prog);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcCompileProgram compiles the given program.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [in] numOptions Number of compiler options passed.
|
|
* \param [in] options Compiler options in the form of C string array.\n
|
|
* \p options can be \c NULL when \p numOptions is 0.
|
|
*
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_OPTION \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_COMPILATION \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_BUILTIN_OPERATION_FAILURE \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_TIME_FILE_WRITE_FAILED \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_CANCELLED \endlink
|
|
*
|
|
* It supports compile options listed in \ref options.
|
|
*/
|
|
nvrtcResult nvrtcCompileProgram(nvrtcProgram prog,
|
|
int numOptions, const char * const *options);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetPTXSize sets the value of \p ptxSizeRet with the size of the PTX
|
|
* generated by the previous compilation of \p prog (including the
|
|
* trailing \c NULL).
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] ptxSizeRet Size of the generated PTX (including the trailing
|
|
* \c NULL).
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetPTX
|
|
*/
|
|
nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetPTX stores the PTX generated by the previous compilation
|
|
* of \p prog in the memory pointed by \p ptx.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] ptx Compiled result.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetPTXSize
|
|
*/
|
|
nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetCUBINSize sets the value of \p cubinSizeRet with the size of the cubin
|
|
* generated by the previous compilation of \p prog. The value of
|
|
* cubinSizeRet is set to 0 if the value specified to \c -arch is a
|
|
* virtual architecture instead of an actual architecture.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] cubinSizeRet Size of the generated cubin.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetCUBIN
|
|
*/
|
|
nvrtcResult nvrtcGetCUBINSize(nvrtcProgram prog, size_t *cubinSizeRet);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetCUBIN stores the cubin generated by the previous compilation
|
|
* of \p prog in the memory pointed by \p cubin. No cubin is available
|
|
* if the value specified to \c -arch is a virtual architecture instead
|
|
* of an actual architecture.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] cubin Compiled and assembled result.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetCUBINSize
|
|
*/
|
|
nvrtcResult nvrtcGetCUBIN(nvrtcProgram prog, char *cubin);
|
|
|
|
|
|
#if defined(_WIN32)
|
|
# define __DEPRECATED__(msg) __declspec(deprecated(msg))
|
|
#elif (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5 && !defined(__clang__))))
|
|
# define __DEPRECATED__(msg) __attribute__((deprecated))
|
|
#elif (defined(__GNUC__))
|
|
# define __DEPRECATED__(msg) __attribute__((deprecated(msg)))
|
|
#else
|
|
# define __DEPRECATED__(msg)
|
|
#endif
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief
|
|
* DEPRECATION NOTICE: This function will be removed in a future release. Please use
|
|
* nvrtcGetLTOIRSize (and nvrtcGetLTOIR) instead.
|
|
*/
|
|
__DEPRECATED__("This function will be removed in a future release. Please use nvrtcGetLTOIRSize instead")
|
|
nvrtcResult nvrtcGetNVVMSize(nvrtcProgram prog, size_t *nvvmSizeRet);
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief
|
|
* DEPRECATION NOTICE: This function will be removed in a future release. Please use
|
|
* nvrtcGetLTOIR (and nvrtcGetLTOIRSize) instead.
|
|
*/
|
|
__DEPRECATED__("This function will be removed in a future release. Please use nvrtcGetLTOIR instead")
|
|
nvrtcResult nvrtcGetNVVM(nvrtcProgram prog, char *nvvm);
|
|
|
|
#undef __DEPRECATED__
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetLTOIRSize sets the value of \p LTOIRSizeRet with the size of the LTO IR
|
|
* generated by the previous compilation of \p prog. The value of
|
|
* LTOIRSizeRet is set to 0 if the program was not compiled with
|
|
* \c -dlto.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] LTOIRSizeRet Size of the generated LTO IR.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetLTOIR
|
|
*/
|
|
nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *LTOIRSizeRet);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetLTOIR stores the LTO IR generated by the previous compilation
|
|
* of \p prog in the memory pointed by \p LTOIR. No LTO IR is available
|
|
* if the program was compiled without \c -dlto.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] LTOIR Compiled result.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetLTOIRSize
|
|
*/
|
|
nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *LTOIR);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetOptiXIRSize sets the value of \p optixirSizeRet with the size of the OptiX IR
|
|
* generated by the previous compilation of \p prog. The value of
|
|
* nvrtcGetOptiXIRSize is set to 0 if the program was compiled with
|
|
* options incompatible with OptiX IR generation.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] optixirSizeRet Size of the generated LTO IR.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetOptiXIR
|
|
*/
|
|
nvrtcResult nvrtcGetOptiXIRSize(nvrtcProgram prog, size_t *optixirSizeRet);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetOptiXIR stores the OptiX IR generated by the previous compilation
|
|
* of \p prog in the memory pointed by \p optixir. No OptiX IR is available
|
|
* if the program was compiled with options incompatible with OptiX IR generation.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] optixir Optix IR Compiled result.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetOptiXIRSize
|
|
*/
|
|
nvrtcResult nvrtcGetOptiXIR(nvrtcProgram prog, char *optixir);
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetProgramLogSize sets \p logSizeRet with the size of the
|
|
* log generated by the previous compilation of \p prog (including the
|
|
* trailing \c NULL).
|
|
*
|
|
* Note that compilation log may be generated with warnings and informative
|
|
* messages, even when the compilation of \p prog succeeds.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] logSizeRet Size of the compilation log
|
|
* (including the trailing \c NULL).
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetProgramLog
|
|
*/
|
|
nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetProgramLog stores the log generated by the previous
|
|
* compilation of \p prog in the memory pointed by \p log.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] log Compilation log.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* \see ::nvrtcGetProgramLogSize
|
|
*/
|
|
nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
|
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcAddNameExpression notes the given name expression
|
|
* denoting the address of a __global__ function
|
|
* or __device__/__constant__ variable.
|
|
*
|
|
* The identical name expression string must be provided on a subsequent
|
|
* call to nvrtcGetLoweredName to extract the lowered name.
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [in] name_expression constant expression denoting the address of
|
|
* a __global__ function or __device__/__constant__ variable.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION \endlink
|
|
*
|
|
* \see ::nvrtcGetLoweredName
|
|
*/
|
|
nvrtcResult nvrtcAddNameExpression(nvrtcProgram prog,
|
|
const char * const name_expression);
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcGetLoweredName extracts the lowered (mangled) name
|
|
* for a __global__ function or __device__/__constant__ variable,
|
|
* and updates *lowered_name to point to it. The memory containing
|
|
* the name is released when the NVRTC program is destroyed by
|
|
* nvrtcDestroyProgram.
|
|
* The identical name expression must have been previously
|
|
* provided to nvrtcAddNameExpression.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [in] name_expression constant expression denoting the address of
|
|
* a __global__ function or __device__/__constant__ variable.
|
|
* \param [out] lowered_name initialized by the function to point to a
|
|
* C string containing the lowered (mangled)
|
|
* name corresponding to the provided name expression.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID \endlink
|
|
*
|
|
* \see ::nvrtcAddNameExpression
|
|
*/
|
|
nvrtcResult nvrtcGetLoweredName(nvrtcProgram prog,
|
|
const char *const name_expression,
|
|
const char** lowered_name);
|
|
|
|
|
|
/*************************************************************************//**
|
|
*
|
|
* \defgroup precompiled_header Precompiled header (PCH) (CUDA 12.8+)
|
|
*
|
|
* NVRTC defines the following function related to PCH. Also see PCH related
|
|
* flags passed to nvrtcCompileProgram.
|
|
****************************************************************************/
|
|
|
|
|
|
/**
|
|
* \ingroup precompiled_header
|
|
* \brief retrieve the current size of the PCH Heap.
|
|
*
|
|
* \param [out] ret pointer to location where the size of the PCH Heap
|
|
* will be stored
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
*
|
|
*/
|
|
nvrtcResult nvrtcGetPCHHeapSize(size_t* ret);
|
|
|
|
/**
|
|
* \ingroup precompiled_header
|
|
* \brief set the size of the PCH Heap.
|
|
*
|
|
* \param [in] size requested size of the PCH Heap, in bytes
|
|
*
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
*
|
|
* The requested size may be rounded up to a platform dependent
|
|
* alignment (e.g. page size). If the PCH Heap has already been allocated,
|
|
* the heap memory will be freed and a new PCH Heap will be allocated.
|
|
*/
|
|
nvrtcResult nvrtcSetPCHHeapSize(size_t size);
|
|
|
|
/**
|
|
* \ingroup precompiled_header
|
|
* \brief returns the PCH creation status.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
*
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_PCH_CREATE \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
*
|
|
* NVRTC_SUCCESS indicates that the PCH was successfully created.
|
|
* NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED indicates that no PCH creation
|
|
* was attempted, either because PCH functionality was not requested during
|
|
* the preceding nvrtcCompileProgram call, or automatic PCH processing was
|
|
* requested, and compiler chose not to create a PCH file.
|
|
* NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED indicates that a PCH file could
|
|
* potentially have been created, but the compiler ran out space in the PCH
|
|
* heap. In this scenario, the nvrtcGetPCHHeapSizeRequired() can be used to
|
|
* query the required heap size, the heap can be reallocated for this size with
|
|
* nvrtcSetPCHHeapSize() and PCH creation may be reattempted again invoking
|
|
* nvrtcCompileProgram() with a new NVRTC program instance.
|
|
* NVRTC_ERROR_PCH_CREATE indicates that an error condition prevented the
|
|
* PCH file from being created.
|
|
*/
|
|
nvrtcResult nvrtcGetPCHCreateStatus(nvrtcProgram prog);
|
|
|
|
/**
|
|
* \ingroup precompiled_header
|
|
* \brief retrieve the required size of the PCH heap required to compile
|
|
* the given program.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [out] size pointer to location where the required size of the PCH Heap
|
|
* will be stored
|
|
*
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
* The size retrieved using this function is only valid if nvrtcGetPCHCreateStatus()
|
|
* returned NVRTC_SUCCESS or NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED
|
|
*/
|
|
nvrtcResult nvrtcGetPCHHeapSizeRequired(nvrtcProgram prog, size_t* size);
|
|
|
|
/**
|
|
* \ingroup compilation
|
|
* \brief nvrtcSetFlowCallback registers a callback function that the compiler
|
|
* will invoke at different points during a call to nvrtcCompileProgram,
|
|
* and the callback function can decide whether to cancel compilation by
|
|
* returning specific values.
|
|
*
|
|
* The callback function must satisfy the following constraints:
|
|
*
|
|
* (1) Its signature should be:
|
|
* @code
|
|
* int callback(void* param1, void* param2);
|
|
* @endcode
|
|
* When invoking the callback, the compiler will always pass \p payload to
|
|
* param1 so that the callback may make decisions based on \p payload . It'll
|
|
* always pass NULL to param2 for now which is reserved for future extensions.
|
|
*
|
|
* (2) It must return 1 to cancel compilation or 0 to continue.
|
|
* Other return values are reserved for future use.
|
|
*
|
|
* (3) It must return consistent values. Once it returns 1 at one point, it must
|
|
* return 1 in all following invocations during the current nvrtcCompileProgram
|
|
* call in progress.
|
|
*
|
|
* (4) It must be thread-safe.
|
|
*
|
|
* (5) It must not invoke any nvrtc/libnvvm/ptx APIs.
|
|
*
|
|
* \param [in] prog CUDA Runtime Compilation program.
|
|
* \param [in] callback the callback that issues cancellation signal.
|
|
* \param [in] payload to be passed as a parameter when invoking the callback.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
|
*/
|
|
nvrtcResult nvrtcSetFlowCallback(nvrtcProgram prog, int (*callback)(void*, void*), void *payload);
|
|
|
|
/**
|
|
* \defgroup options Supported Compile Options
|
|
*
|
|
* NVRTC supports the compile options below.
|
|
* Option names with two preceding dashs (\c --) are long option names and
|
|
* option names with one preceding dash (\c -) are short option names.
|
|
* Short option names can be used instead of long option names.
|
|
* When a compile option takes an argument, an assignment operator (\c =)
|
|
* is used to separate the compile option argument from the compile option
|
|
* name, e.g., \c "--gpu-architecture=compute_60".
|
|
* Alternatively, the compile option name and the argument can be specified in
|
|
* separate strings without an assignment operator, .e.g,
|
|
* \c "--gpu-architecture" \c "compute_60".
|
|
* Single-character short option names, such as \c -D, \c -U, and \c -I, do
|
|
* not require an assignment operator, and the compile option name and the
|
|
* argument can be present in the same string with or without spaces between
|
|
* them.
|
|
* For instance, \c "-D=<def>", \c "-D<def>", and \c "-D <def>" are all
|
|
* supported.
|
|
*
|
|
* The valid compiler options are:
|
|
*
|
|
* - Compilation targets
|
|
* - \c --gpu-architecture=\<arch\> (\c -arch)
|
|
*
|
|
* Specify the name of the class of GPU architectures for which the
|
|
* input must be compiled.\n
|
|
* - Valid <c>\<arch\></c>s:
|
|
* - \c compute_50
|
|
* - \c compute_52
|
|
* - \c compute_53
|
|
* - \c compute_60
|
|
* - \c compute_61
|
|
* - \c compute_62
|
|
* - \c compute_70
|
|
* - \c compute_72
|
|
* - \c compute_75
|
|
* - \c compute_80
|
|
* - \c compute_87
|
|
* - \c compute_89
|
|
* - \c compute_90
|
|
* - \c compute_90a
|
|
* - \c compute_100
|
|
* - \c compute_100a
|
|
* - \c sm_50
|
|
* - \c sm_52
|
|
* - \c sm_53
|
|
* - \c sm_60
|
|
* - \c sm_61
|
|
* - \c sm_62
|
|
* - \c sm_70
|
|
* - \c sm_72
|
|
* - \c sm_75
|
|
* - \c sm_80
|
|
* - \c sm_87
|
|
* - \c sm_89
|
|
* - \c sm_90
|
|
* - \c sm_90a
|
|
* - \c sm_100
|
|
* - \c sm_100a
|
|
* - Default: \c compute_52
|
|
* - Separate compilation / whole-program compilation
|
|
* - \c --device-c (\c -dc)
|
|
*
|
|
* Generate relocatable code that can be linked with other relocatable
|
|
* device code. It is equivalent to \c --relocatable-device-code=true.
|
|
* - \c --device-w (\c -dw)
|
|
*
|
|
* Generate non-relocatable code. It is equivalent to \c --relocatable-device-code=false.
|
|
* - \c --relocatable-device-code={true|false} (\c -rdc)
|
|
*
|
|
* Enable (disable) the generation of relocatable device code.
|
|
* - Default: \c false
|
|
* - \c --extensible-whole-program (\c -ewp)
|
|
*
|
|
* Do extensible whole program compilation of device code.
|
|
* - Default: \c false
|
|
* - Debugging support
|
|
* - \c --device-debug (\c -G)
|
|
*
|
|
* Generate debug information. If \c --dopt is not specified, then turns off all optimizations.
|
|
* - \c --generate-line-info (\c -lineinfo)
|
|
*
|
|
* Generate line-number information.
|
|
* - Code generation
|
|
* - \c --dopt \c on (\c -dopt)
|
|
*
|
|
* - \c --dopt=on
|
|
*
|
|
* Enable device code optimization. When specified along with \c -G, enables
|
|
* limited debug information generation for optimized device code (currently,
|
|
* only line number information). When \c -G is not specified, \c -dopt=on is implicit.
|
|
*
|
|
* - \c --ptxas-options \<options\> (\c -Xptxas)
|
|
*
|
|
* - \c --ptxas-options=\<options\>
|
|
*
|
|
* Specify options directly to ptxas, the PTX optimizing assembler.
|
|
* - \c --maxrregcount=\<N\> (\c -maxrregcount)
|
|
*
|
|
* Specify the maximum amount of registers that GPU functions can use.
|
|
* Until a function-specific limit, a higher value will generally
|
|
* increase the performance of individual GPU threads that execute this
|
|
* function. However, because thread registers are allocated from a
|
|
* global register pool on each GPU, a higher value of this option will
|
|
* also reduce the maximum thread block size, thereby reducing the amount
|
|
* of thread parallelism. Hence, a good maxrregcount value is the result
|
|
* of a trade-off. If this option is not specified, then no maximum is
|
|
* assumed. Value less than the minimum registers required by ABI will
|
|
* be bumped up by the compiler to ABI minimum limit.
|
|
*
|
|
* - \c --ftz={true|false} (\c -ftz)
|
|
*
|
|
* When performing single-precision floating-point operations, flush
|
|
* denormal values to zero or preserve denormal values.
|
|
*
|
|
* \c --use_fast_math implies \c --ftz=true.
|
|
* - Default: \c false
|
|
*
|
|
* - \c --prec-sqrt={true|false} (\c -prec-sqrt)
|
|
*
|
|
* For single-precision floating-point square root, use IEEE
|
|
* round-to-nearest mode or use a faster approximation.
|
|
* \c --use_fast_math implies \c --prec-sqrt=false.
|
|
* - Default: \c true
|
|
*
|
|
* - \c --prec-div={true|false} (\c -prec-div)
|
|
* For single-precision floating-point division and reciprocals, use IEEE
|
|
* round-to-nearest mode or use a faster approximation.
|
|
* \c --use_fast_math implies \c --prec-div=false.
|
|
* - Default: \c true
|
|
*
|
|
* - \c --fmad={true|false} (\c -fmad)
|
|
*
|
|
* Enables (disables) the contraction of floating-point multiplies and
|
|
* adds/subtracts into floating-point multiply-add operations (FMAD,
|
|
* FFMA, or DFMA). \c --use_fast_math implies \c --fmad=true.
|
|
* - Default: \c true
|
|
*
|
|
* - \c --use_fast_math (\c -use_fast_math)
|
|
*
|
|
* Make use of fast math operations.
|
|
* \c --use_fast_math implies \c --ftz=true \c --prec-div=false
|
|
* \c --prec-sqrt=false \c --fmad=true.
|
|
*
|
|
* - \c --extra-device-vectorization (\c -extra-device-vectorization)
|
|
*
|
|
* Enables more aggressive device code vectorization in the NVVM optimizer.
|
|
*
|
|
* - \c --modify-stack-limit={true|false} (\c -modify-stack-limit)
|
|
*
|
|
* On Linux, during compilation, use \c setrlimit() to increase stack size
|
|
* to maximum allowed. The limit is reset to the previous value at the
|
|
* end of compilation.
|
|
* Note: \c setrlimit() changes the value for the entire process.
|
|
* - Default: \c true
|
|
*
|
|
* - \c --dlink-time-opt (\c -dlto)
|
|
*
|
|
* Generate intermediate code for later link-time optimization.
|
|
* It implies \c -rdc=true.
|
|
* Note: when this option is used the \c nvrtcGetLTOIR API should be used,
|
|
* as PTX or Cubin will not be generated.
|
|
*
|
|
* - \c --gen-opt-lto (\c -gen-opt-lto)
|
|
*
|
|
* Run the optimizer passes before generating the LTO IR.
|
|
*
|
|
* - \c --optix-ir (\c -optix-ir)
|
|
*
|
|
* Generate OptiX IR. The Optix IR is only intended for consumption by OptiX
|
|
* through appropriate APIs. This feature is not supported with
|
|
* link-time-optimization (\c -dlto).
|
|
*
|
|
* Note: when this option is used the nvrtcGetOptiX API should be used,
|
|
* as PTX or Cubin will not be generated.
|
|
*
|
|
* - \c --jump-table-density=[0-101] (\c -jtd)
|
|
*
|
|
* Specify the case density percentage in switch statements, and use it as
|
|
* a minimal threshold to determine whether jump table(brx.idx instruction)
|
|
* will be used to implement a switch statement. Default value is 101. The
|
|
* percentage ranges from 0 to 101 inclusively.
|
|
*
|
|
* - \c --device-stack-protector={true|false} (\c -device-stack-protector)
|
|
*
|
|
* Enable (disable) the generation of stack canaries in device code.
|
|
*
|
|
* - Default: \c false
|
|
*
|
|
* - Preprocessing
|
|
* - \c --define-macro=\<def\> (\c -D)
|
|
*
|
|
* \c \<def\> can be either \c \<name\> or \c \<name=definitions\>.
|
|
* - \c \<name\>
|
|
*
|
|
* Predefine \c \<name\> as a macro with definition \c 1.
|
|
* - \c \<name\>=\<definition\>
|
|
*
|
|
* The contents of \c \<definition\> are tokenized and preprocessed
|
|
* as if they appeared during translation phase three in a \c \#define
|
|
* directive. In particular, the definition will be truncated by
|
|
* embedded new line characters.
|
|
*
|
|
* - \c --undefine-macro=\<def\> (\c -U)
|
|
*
|
|
* Cancel any previous definition of \c \<def\>.
|
|
*
|
|
* - \c --include-path=\<dir\> (\c -I)
|
|
*
|
|
* Add the directory \c \<dir\> to the list of directories to be
|
|
* searched for headers. These paths are searched after the list of
|
|
* headers given to ::nvrtcCreateProgram.
|
|
*
|
|
* - \c --pre-include=\<header\> (\c -include)
|
|
*
|
|
* Preinclude \c \<header\> during preprocessing.
|
|
*
|
|
* - \c --no-source-include (\c -no-source-include)
|
|
*
|
|
* The preprocessor by default adds the directory of each input sources
|
|
* to the include path. This option disables this feature and only
|
|
* considers the path specified explicitly.
|
|
*
|
|
* - Language Dialect
|
|
* - \c --std={c++03|c++11|c++14|c++17|c++20} (\c -std)
|
|
*
|
|
* Set language dialect to C++03, C++11, C++14, C++17 or C++20
|
|
* - Default: \c c++17
|
|
*
|
|
* - \c --builtin-move-forward={true|false} (\c -builtin-move-forward)
|
|
*
|
|
* Provide builtin definitions of \c std::move and \c std::forward,
|
|
* when C++11 or later language dialect is selected.
|
|
* - Default: \c true
|
|
*
|
|
* - \c --builtin-initializer-list={true|false}
|
|
* (\c -builtin-initializer-list)
|
|
*
|
|
* Provide builtin definitions of \c std::initializer_list class and
|
|
* member functions when C++11 or later language dialect is selected.
|
|
* - Default: \c true
|
|
*
|
|
* - Precompiled header support (CUDA 12.8+)
|
|
* - \c --pch (\c -pch)
|
|
*
|
|
* Enable automatic PCH processing.
|
|
*
|
|
* - \c --create-pch=<file-name> (\c -create-pch)
|
|
*
|
|
* Create a PCH file.
|
|
*
|
|
* - \c --use-pch=<file-name> (\c -use-pch)
|
|
*
|
|
* Use the specified PCH file.
|
|
*
|
|
* - \c --pch-dir=<directory-name> (\c -pch-dir)
|
|
*
|
|
* When using automatic PCH (\c -pch), look for and create PCH files in the
|
|
* specified directory. When using explicit PCH (\c -create-pch or \c -use-pch),
|
|
* the directory name is prefixed before the specified file name, unless
|
|
* the file name is an absolute path name.
|
|
*
|
|
* - \c --pch-verbose={true|false} (\c -pch-verbose)
|
|
*
|
|
* In automatic PCH mode, for each PCH file that could not be used in current
|
|
* compilation, print the reason in the compilation log.
|
|
* - Default: \c true
|
|
*
|
|
* - \c --pch-messages={true|false} (\c -pch-messages)
|
|
*
|
|
* Print a message in the compilation log, if a PCH file was created or used
|
|
* in the current compilation.
|
|
* - Default: \c true
|
|
*
|
|
* - \c --instantiate-templates-in-pch={true|false} (\c -instantiate-templates-in-pch)
|
|
*
|
|
* Enable or disable instantiatiation of templates before PCH creation. Instantiating
|
|
* templates may increase the size of the PCH file, while reducing the compilation
|
|
* cost when using the PCH file (since some template instantiations can be skipped).
|
|
* - Default: \c true
|
|
*
|
|
* - Misc.
|
|
* - \c --disable-warnings (\c -w)
|
|
*
|
|
* Inhibit all warning messages.
|
|
*
|
|
* - \c --restrict (\c -restrict)
|
|
*
|
|
* Programmer assertion that all kernel pointer parameters are restrict
|
|
* pointers.
|
|
*
|
|
* - \c --device-as-default-execution-space
|
|
* (\c -default-device)
|
|
*
|
|
* Treat entities with no execution space annotation as \c __device__
|
|
* entities.
|
|
*
|
|
* - \c --device-int128 (\c -device-int128)
|
|
*
|
|
* Allow the \c __int128 type in device code. Also causes the macro \c __CUDACC_RTC_INT128__
|
|
* to be defined.
|
|
*
|
|
* - \c --device-float128 (\c -device-float128)
|
|
*
|
|
* Allow the \c __float128 and \c _Float128 types in device code. Also
|
|
* causes the macro \c D__CUDACC_RTC_FLOAT128__ to be defined.
|
|
*
|
|
* - \c --optimization-info=\<kind\> (\c -opt-info)
|
|
*
|
|
* Provide optimization reports for the specified kind of optimization.
|
|
* The following kind tags are supported:
|
|
* - \c inline : emit a remark when a function is inlined.
|
|
*
|
|
* - \c --display-error-number (\c -err-no)
|
|
*
|
|
* Display diagnostic number for warning messages. (Default)
|
|
*
|
|
* - \c --no-display-error-number (\c -no-err-no)
|
|
*
|
|
* Disables the display of a diagnostic number for warning messages.
|
|
*
|
|
* - \c --diag-error=<error-number>,... (\c -diag-error)
|
|
*
|
|
* Emit error for specified diagnostic message number(s). Message numbers can be separated by comma.
|
|
*
|
|
* - \c --diag-suppress=<error-number>,... (\c -diag-suppress)
|
|
*
|
|
* Suppress specified diagnostic message number(s). Message numbers can be separated by comma.
|
|
*
|
|
* - \c --diag-warn=<error-number>,... (\c -diag-warn)
|
|
*
|
|
* Emit warning for specified diagnostic message number(s). Message numbers can be separated by comma.
|
|
*
|
|
* - \c --brief-diagnostics={true|false} (\c -brief-diag)
|
|
*
|
|
* This option disables or enables showing source line and column info
|
|
* in a diagnostic.
|
|
* The \c --brief-diagnostics=true will not show the source line and column info.
|
|
* - Default: \c false
|
|
*
|
|
* - \c --time=<file-name> (\c -time)
|
|
*
|
|
* Generate a comma separated value table with the time taken by each compilation
|
|
* phase, and append it at the end of the file given as the option argument.
|
|
* If the file does not exist, the column headings are generated in the first row
|
|
* of the table. If the file name is '-', the timing data is written to the compilation log.
|
|
*
|
|
* - \c --split-compile=<number-of-threads> (\c -split-compile=<number-of-threads>)
|
|
*
|
|
* Perform compiler optimizations in parallel.
|
|
* Split compilation attempts to reduce compile time by enabling the compiler to run certain
|
|
* optimization passes concurrently. This option accepts a numerical value that specifies the
|
|
* maximum number of threads the compiler can use. One can also allow the compiler to use the maximum
|
|
* threads available on the system by setting \c --split-compile=0.
|
|
* Setting \c --split-compile=1 will cause this option to be ignored.
|
|
*
|
|
* - \c --fdevice-syntax-only (\c -fdevice-syntax-only)
|
|
*
|
|
* Ends device compilation after front-end syntax checking. This option does not generate valid
|
|
* device code.
|
|
*
|
|
* - \c --minimal (\c -minimal)
|
|
*
|
|
* Omit certain language features to reduce compile time for small programs.
|
|
* In particular, the following are omitted:
|
|
* - Texture and surface functions and associated types, e.g., \c cudaTextureObject_t.
|
|
* - CUDA Runtime Functions that are provided by the cudadevrt device code library,
|
|
* typically named with prefix "cuda", e.g., \c cudaMalloc.
|
|
* - Kernel launch from device code.
|
|
* - Types and macros associated with CUDA Runtime and Driver APIs,
|
|
* provided by \c cuda/tools/cudart/driver_types.h, typically named with prefix "cuda", e.g., \c cudaError_t.
|
|
*
|
|
* - \c --device-stack-protector (\c -device-stack-protector)
|
|
*
|
|
* Enable stack canaries in device code.
|
|
* Stack canaries make it more difficult to exploit certain types of memory safety bugs involving
|
|
* stack-local variables. The compiler uses heuristics to assess the risk of such a bug in each function.
|
|
* Only those functions which are deemed high-risk make use of a stack canary.
|
|
*
|
|
* - \c --fdevice-time-trace=<file-name> (\c -fdevice-time-trace=<file-name>)
|
|
* Enables the time profiler, outputting a JSON file based on given <file-name>. Results can be analyzed on
|
|
* chrome://tracing for a flamegraph visualization.
|
|
*
|
|
*/
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif /* __cplusplus */
|
|
|
|
|
|
/* The utility function 'nvrtcGetTypeName' is not available by default. Define
|
|
the macro 'NVRTC_GET_TYPE_NAME' to a non-zero value to make it available.
|
|
*/
|
|
|
|
#if NVRTC_GET_TYPE_NAME || __DOXYGEN_ONLY__
|
|
|
|
#if NVRTC_USE_CXXABI || __clang__ || __GNUC__ || __DOXYGEN_ONLY__
|
|
#include <cxxabi.h>
|
|
#include <cstdlib>
|
|
|
|
#elif defined(_WIN32)
|
|
#include <Windows.h>
|
|
#include <DbgHelp.h>
|
|
#endif /* NVRTC_USE_CXXABI || __clang__ || __GNUC__ */
|
|
|
|
|
|
#include <string>
|
|
#include <typeinfo>
|
|
|
|
template <typename T> struct __nvrtcGetTypeName_helper_t { };
|
|
|
|
/*************************************************************************//**
|
|
*
|
|
* \defgroup hosthelper Host Helper
|
|
*
|
|
* NVRTC defines the following functions for easier interaction with host code.
|
|
*
|
|
****************************************************************************/
|
|
|
|
/**
|
|
* \ingroup hosthelper
|
|
* \brief nvrtcGetTypeName stores the source level name of a type in the given
|
|
* std::string location.
|
|
*
|
|
* This function is only provided when the macro NVRTC_GET_TYPE_NAME is
|
|
* defined with a non-zero value. It uses abi::__cxa_demangle or UnDecorateSymbolName
|
|
* function calls to extract the type name, when using gcc/clang or cl.exe compilers,
|
|
* respectively. If the name extraction fails, it will return NVRTC_INTERNAL_ERROR,
|
|
* otherwise *result is initialized with the extracted name.
|
|
*
|
|
* Windows-specific notes:
|
|
* - nvrtcGetTypeName() is not multi-thread safe because it calls UnDecorateSymbolName(),
|
|
* which is not multi-thread safe.
|
|
* - The returned string may contain Microsoft-specific keywords such as __ptr64 and __cdecl.
|
|
*
|
|
* \param [in] tinfo: reference to object of type std::type_info for a given type.
|
|
* \param [in] result: pointer to std::string in which to store the type name.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INTERNAL_ERROR \endlink
|
|
*
|
|
*/
|
|
inline nvrtcResult nvrtcGetTypeName(const std::type_info &tinfo, std::string *result)
|
|
{
|
|
#if USE_CXXABI || __clang__ || __GNUC__
|
|
const char *name = tinfo.name();
|
|
int status;
|
|
char *undecorated_name = abi::__cxa_demangle(name, 0, 0, &status);
|
|
if (status == 0) {
|
|
*result = undecorated_name;
|
|
free(undecorated_name);
|
|
return NVRTC_SUCCESS;
|
|
}
|
|
#elif defined(_WIN32)
|
|
const char *name = tinfo.raw_name();
|
|
if (!name || *name != '.') {
|
|
return NVRTC_ERROR_INTERNAL_ERROR;
|
|
}
|
|
char undecorated_name[4096];
|
|
//name+1 skips over the '.' prefix
|
|
if(UnDecorateSymbolName(name+1, undecorated_name,
|
|
sizeof(undecorated_name) / sizeof(*undecorated_name),
|
|
//note: doesn't seem to work correctly without UNDNAME_NO_ARGUMENTS.
|
|
UNDNAME_NO_ARGUMENTS | UNDNAME_NAME_ONLY ) ) {
|
|
*result = undecorated_name;
|
|
return NVRTC_SUCCESS;
|
|
}
|
|
#endif /* USE_CXXABI || __clang__ || __GNUC__ */
|
|
|
|
return NVRTC_ERROR_INTERNAL_ERROR;
|
|
}
|
|
|
|
/**
|
|
* \ingroup hosthelper
|
|
* \brief nvrtcGetTypeName stores the source level name of the template type argument
|
|
* T in the given std::string location.
|
|
*
|
|
* This function is only provided when the macro NVRTC_GET_TYPE_NAME is
|
|
* defined with a non-zero value. It uses abi::__cxa_demangle or UnDecorateSymbolName
|
|
* function calls to extract the type name, when using gcc/clang or cl.exe compilers,
|
|
* respectively. If the name extraction fails, it will return NVRTC_INTERNAL_ERROR,
|
|
* otherwise *result is initialized with the extracted name.
|
|
*
|
|
* Windows-specific notes:
|
|
* - nvrtcGetTypeName() is not multi-thread safe because it calls UnDecorateSymbolName(),
|
|
* which is not multi-thread safe.
|
|
* - The returned string may contain Microsoft-specific keywords such as __ptr64 and __cdecl.
|
|
*
|
|
* \param [in] result: pointer to std::string in which to store the type name.
|
|
* \return
|
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
|
* - \link #nvrtcResult NVRTC_ERROR_INTERNAL_ERROR \endlink
|
|
*
|
|
*/
|
|
|
|
template <typename T>
|
|
nvrtcResult nvrtcGetTypeName(std::string *result)
|
|
{
|
|
nvrtcResult res = nvrtcGetTypeName(typeid(__nvrtcGetTypeName_helper_t<T>),
|
|
result);
|
|
if (res != NVRTC_SUCCESS)
|
|
return res;
|
|
|
|
std::string repr = *result;
|
|
std::size_t idx = repr.find("__nvrtcGetTypeName_helper_t");
|
|
idx = (idx != std::string::npos) ? repr.find("<", idx) : idx;
|
|
std::size_t last_idx = repr.find_last_of('>');
|
|
if (idx == std::string::npos || last_idx == std::string::npos) {
|
|
return NVRTC_ERROR_INTERNAL_ERROR;
|
|
}
|
|
++idx;
|
|
*result = repr.substr(idx, last_idx - idx);
|
|
return NVRTC_SUCCESS;
|
|
}
|
|
|
|
#endif /* NVRTC_GET_TYPE_NAME */
|
|
|
|
#endif /* __NVRTC_H__ */
|