DriverTrac/venv/lib/python3.12/site-packages/nvidia/cufile/include/cufile.h
2025-11-28 09:08:33 +05:30

741 lines
29 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright 1993-2023 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
/**
* @file cufile.h
* @brief cuFile C APIs
*
* This file contains all the C APIs to perform GPUDirect Storage supported IO operations
*/
#ifdef __cplusplus
extern "C"
{
#endif
/// @cond DOXYGEN_SKIP_MACRO
#ifndef __CUFILE_H_
#define __CUFILE_H_
#include <stdlib.h>
#include <stdbool.h>
#include <cuda.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#define CUFILEOP_BASE_ERR 5000
//Note :Data path errors are captured via standard error codes
#define CUFILEOP_STATUS_ENTRIES \
CUFILE_OP(0, CU_FILE_SUCCESS, cufile success) \
CUFILE_OP(CUFILEOP_BASE_ERR + 1, CU_FILE_DRIVER_NOT_INITIALIZED, nvidia-fs driver is not loaded. Set allow_compat_mode to true in cufile.json file to enable compatible mode) \
CUFILE_OP(CUFILEOP_BASE_ERR + 2, CU_FILE_DRIVER_INVALID_PROPS, invalid property) \
CUFILE_OP(CUFILEOP_BASE_ERR + 3, CU_FILE_DRIVER_UNSUPPORTED_LIMIT, property range error) \
CUFILE_OP(CUFILEOP_BASE_ERR + 4, CU_FILE_DRIVER_VERSION_MISMATCH, nvidia-fs driver version mismatch) \
CUFILE_OP(CUFILEOP_BASE_ERR + 5, CU_FILE_DRIVER_VERSION_READ_ERROR, nvidia-fs driver version read error) \
CUFILE_OP(CUFILEOP_BASE_ERR + 6, CU_FILE_DRIVER_CLOSING, driver shutdown in progress) \
CUFILE_OP(CUFILEOP_BASE_ERR + 7, CU_FILE_PLATFORM_NOT_SUPPORTED, GPUDirect Storage not supported on current platform) \
CUFILE_OP(CUFILEOP_BASE_ERR + 8, CU_FILE_IO_NOT_SUPPORTED, GPUDirect Storage not supported on current file) \
CUFILE_OP(CUFILEOP_BASE_ERR + 9, CU_FILE_DEVICE_NOT_SUPPORTED, GPUDirect Storage not supported on current GPU) \
CUFILE_OP(CUFILEOP_BASE_ERR + 10, CU_FILE_NVFS_DRIVER_ERROR, nvidia-fs driver ioctl error) \
CUFILE_OP(CUFILEOP_BASE_ERR + 11, CU_FILE_CUDA_DRIVER_ERROR, CUDA Driver API error) \
CUFILE_OP(CUFILEOP_BASE_ERR + 12, CU_FILE_CUDA_POINTER_INVALID, invalid device pointer) \
CUFILE_OP(CUFILEOP_BASE_ERR + 13, CU_FILE_CUDA_MEMORY_TYPE_INVALID, invalid pointer memory type) \
CUFILE_OP(CUFILEOP_BASE_ERR + 14, CU_FILE_CUDA_POINTER_RANGE_ERROR, pointer range exceeds allocated address range) \
CUFILE_OP(CUFILEOP_BASE_ERR + 15, CU_FILE_CUDA_CONTEXT_MISMATCH, cuda context mismatch) \
CUFILE_OP(CUFILEOP_BASE_ERR + 16, CU_FILE_INVALID_MAPPING_SIZE, access beyond maximum pinned size) \
CUFILE_OP(CUFILEOP_BASE_ERR + 17, CU_FILE_INVALID_MAPPING_RANGE, access beyond mapped size) \
CUFILE_OP(CUFILEOP_BASE_ERR + 18, CU_FILE_INVALID_FILE_TYPE, unsupported file type) \
CUFILE_OP(CUFILEOP_BASE_ERR + 19, CU_FILE_INVALID_FILE_OPEN_FLAG, unsupported file open flags) \
CUFILE_OP(CUFILEOP_BASE_ERR + 20, CU_FILE_DIO_NOT_SET, fd direct IO not set) \
CUFILE_OP(CUFILEOP_BASE_ERR + 22, CU_FILE_INVALID_VALUE, invalid arguments) \
CUFILE_OP(CUFILEOP_BASE_ERR + 23, CU_FILE_MEMORY_ALREADY_REGISTERED, device pointer already registered) \
CUFILE_OP(CUFILEOP_BASE_ERR + 24, CU_FILE_MEMORY_NOT_REGISTERED, device pointer lookup failure) \
CUFILE_OP(CUFILEOP_BASE_ERR + 25, CU_FILE_PERMISSION_DENIED, driver or file access error) \
CUFILE_OP(CUFILEOP_BASE_ERR + 26, CU_FILE_DRIVER_ALREADY_OPEN, driver is already open) \
CUFILE_OP(CUFILEOP_BASE_ERR + 27, CU_FILE_HANDLE_NOT_REGISTERED, file descriptor is not registered) \
CUFILE_OP(CUFILEOP_BASE_ERR + 28, CU_FILE_HANDLE_ALREADY_REGISTERED, file descriptor is already registered) \
CUFILE_OP(CUFILEOP_BASE_ERR + 29, CU_FILE_DEVICE_NOT_FOUND, GPU device not found) \
CUFILE_OP(CUFILEOP_BASE_ERR + 30, CU_FILE_INTERNAL_ERROR, internal error) \
CUFILE_OP(CUFILEOP_BASE_ERR + 31, CU_FILE_GETNEWFD_FAILED, failed to obtain new file descriptor) \
CUFILE_OP(CUFILEOP_BASE_ERR + 33, CU_FILE_NVFS_SETUP_ERROR, NVFS driver initialization error) \
CUFILE_OP(CUFILEOP_BASE_ERR + 34, CU_FILE_IO_DISABLED, GPUDirect Storage disabled by config on current file)\
CUFILE_OP(CUFILEOP_BASE_ERR + 35, CU_FILE_BATCH_SUBMIT_FAILED, failed to submit batch operation)\
CUFILE_OP(CUFILEOP_BASE_ERR + 36, CU_FILE_GPU_MEMORY_PINNING_FAILED, failed to allocate pinned GPU Memory) \
CUFILE_OP(CUFILEOP_BASE_ERR + 37, CU_FILE_BATCH_FULL, queue full for batch operation) \
CUFILE_OP(CUFILEOP_BASE_ERR + 38, CU_FILE_ASYNC_NOT_SUPPORTED, cuFile stream operation not supported) \
CUFILE_OP(CUFILEOP_BASE_ERR + 39, CU_FILE_IO_MAX_ERROR, GPUDirect Storage Max Error)
/**
* @brief cufileop status enum
*
* @note on success the error code is set to @ref CU_FILE_SUCCESS.
* @note The error code can be inspected using @ref IS_CUFILE_ERR and @ref CUFILE_ERRSTR.
* @note The error code if set to @ref CU_FILE_CUDA_DRIVER_ERROR, then cuda error can be inspected using @ref IS_CUDA_ERR and @ref CU_FILE_CUDA_ERR.
* @note Data path errors are captured via standard error codes
*/
typedef enum CUfileOpError {
/// @cond DOXYGEN_SKIP_MACRO
#define CUFILE_OP(code, name, string) name = code,
CUFILEOP_STATUS_ENTRIES
#undef CUFILE_OP
///@endcond
} CUfileOpError;
/// @endcond
/**
* @brief cufileop status string
*/
static inline const char *cufileop_status_error(CUfileOpError status)
{
switch (status) {
/// @cond DOXYGEN_SKIP_MACRO
#define CUFILE_OP(code, name, string) \
case name: return #string;
CUFILEOP_STATUS_ENTRIES
#undef CUFILE_OP
///@endcond
default:return "unknown cufile error";
}
}
/**
* @brief cufileop status string
*/
typedef struct CUfileError {
CUfileOpError err; // cufile error
CUresult cu_err; // cuda driver error
}CUfileError_t;
/**
* @brief error macros to inspect error status of type @ref CUfileOpError
*/
#define IS_CUFILE_ERR(err) \
(abs((err)) > CUFILEOP_BASE_ERR)
#define CUFILE_ERRSTR(err) \
cufileop_status_error((CUfileOpError)abs((err)))
#define IS_CUDA_ERR(status) \
((status).err == CU_FILE_CUDA_DRIVER_ERROR)
#define CU_FILE_CUDA_ERR(status) ((status).cu_err)
/* driver properties */
typedef enum CUfileDriverStatusFlags {
CU_FILE_LUSTRE_SUPPORTED = 0, /*!< Support for DDN LUSTRE */
CU_FILE_WEKAFS_SUPPORTED = 1, /*!< Support for WEKAFS */
CU_FILE_NFS_SUPPORTED = 2, /*!< Support for NFS */
CU_FILE_GPFS_SUPPORTED = 3, /*! < Support for GPFS */
CU_FILE_NVME_SUPPORTED = 4, /*!< Support for NVMe */
CU_FILE_NVMEOF_SUPPORTED = 5, /*!< Support for NVMeOF */
CU_FILE_SCSI_SUPPORTED = 6, /*!< Support for SCSI */
CU_FILE_SCALEFLUX_CSD_SUPPORTED = 7, /*!< Support for Scaleflux CSD*/
CU_FILE_NVMESH_SUPPORTED = 8, /*!< Support for NVMesh Block Dev*/
CU_FILE_BEEGFS_SUPPORTED = 9, /*!< Support for BeeGFS */
//10 is reserved for YRCloudFile
CU_FILE_NVME_P2P_SUPPORTED = 11, /*!< Support for NVMe using PCI P2PDMA */
}CUfileDriverStatusFlags_t;
typedef enum CUfileDriverControlFlags {
CU_FILE_USE_POLL_MODE = 0 , /*!< use POLL mode. properties.use_poll_mode*/
CU_FILE_ALLOW_COMPAT_MODE = 1/*!< allow COMPATIBILITY mode. properties.allow_compat_mode*/
}CUfileDriverControlFlags_t;
typedef enum CUfileFeatureFlags {
CU_FILE_DYN_ROUTING_SUPPORTED = 0, /*!< Support for Dynamic routing to handle devices across the PCIe bridges */
CU_FILE_BATCH_IO_SUPPORTED = 1, /*!< Unsupported */
CU_FILE_STREAMS_SUPPORTED = 2, /*!< Unsupported */
CU_FILE_PARALLEL_IO_SUPPORTED = 3 /*!< Unsupported */
}CUfileFeatureFlags_t;
typedef struct CUfileDrvProps {
struct {
unsigned int major_version;
unsigned int minor_version;
size_t poll_thresh_size;
size_t max_direct_io_size;
unsigned int dstatusflags;
unsigned int dcontrolflags;
} nvfs;
unsigned int fflags;
unsigned int max_device_cache_size;
unsigned int per_buffer_cache_size;
unsigned int max_device_pinned_mem_size;
unsigned int max_batch_io_size;
unsigned int max_batch_io_timeout_msecs;
}CUfileDrvProps_t;
typedef struct sockaddr sockaddr_t;
typedef struct cufileRDMAInfo
{
int version;
int desc_len;
const char *desc_str;
}cufileRDMAInfo_t;
#define CU_FILE_RDMA_REGISTER 1
#define CU_FILE_RDMA_RELAXED_ORDERING (1<<1)
typedef struct CUfileFSOps {
/* NULL means discover using fstat */
const char* (*fs_type) (void *handle);
/* list of host addresses to use, NULL means no restriction */
int (*getRDMADeviceList)(void *handle, sockaddr_t **hostaddrs);
/* -1 no pref */
int (*getRDMADevicePriority)(void *handle, char*, size_t,
loff_t, sockaddr_t* hostaddr);
/* NULL means try VFS */
ssize_t (*read) (void *handle, char*, size_t, loff_t, cufileRDMAInfo_t*);
ssize_t (*write) (void *handle, const char *, size_t, loff_t , cufileRDMAInfo_t*);
}CUfileFSOps_t;
/* File Handle */
enum CUfileFileHandleType {
CU_FILE_HANDLE_TYPE_OPAQUE_FD = 1, /*!< Linux based fd */
CU_FILE_HANDLE_TYPE_OPAQUE_WIN32 = 2, /*!< Windows based handle (unsupported) */
CU_FILE_HANDLE_TYPE_USERSPACE_FS = 3, /* Userspace based FS */
};
typedef struct CUfileDescr_t {
enum CUfileFileHandleType type; /* type of file being registered */
union {
int fd; /* Linux */
void *handle; /* Windows */
} handle;
const CUfileFSOps_t *fs_ops; /* file system operation table */
}CUfileDescr_t;
/**
* @brief File handle type
*
*/
typedef void* CUfileHandle_t;
#pragma GCC visibility push(default)
/**
* @brief cuFileHandleRegister is required, and performs extra checking that is memoized to provide increased performance on later cuFile operations.
*
* @param fh @ref CUfileHandle_t opaque file handle for IO operations
* @param descr @ref CUfileDescr_t file descriptor (OS agnostic)
*
* @return CU_FILE_SUCCESS on successful completion. fh will be updated for use in @ref cuFileRead, @ref cuFileWrite, @ref cuFileHandleDeregister
* @return CU_FILE_DRIVER_NOT_INITIALIZED on failure to load driver
* @return CU_FILE_IO_NOT_SUPPORTED - if filesystem is not supported
* @return CU_FILE_INVALID_VALUE if null or bad api arguments
* @return CU_FILE_INVALID_FILE_OPEN_FLAG if file is opened with unsupported modes like no O_DIRECT
* @return CU_FILE_INVALID_FILE_TYPE if filepath is not valid or is not a regular file
* @return CU_FILE_HANDLE_ALREADY_REGISTERED if file handle/descriptor is already registered
*
* <b>Description</b>
* cuFileHandleRegister registers the open file descriptor for use with cuFile IO operations.
*
* This API will ensure that the files descriptor is checked for GPUDirect Storage support and returns a valid file handle on CU_FILE_SUCCESS.
*
* @note the file needs to be opened in O_DIRECT mode to support GPUDirect Storage.
*
* @see cuFileRead
* @see cuFileWrite
* @see cuFileHandleDeregister
*
*/
CUfileError_t cuFileHandleRegister(CUfileHandle_t *fh, CUfileDescr_t *descr);
/**
* @brief releases a registered filehandle from cuFile
*
* @param fh @ref CUfileHandle_t file handle
*
* @return void
*
* @see cuFileHandleRegister
*/
void cuFileHandleDeregister(CUfileHandle_t fh);
/**
* @brief register an existing cudaMalloced memory with cuFile to pin for GPUDirect Storage access or
* register host allocated memory with cuFile.
*
* @param bufPtr_base buffer pointer allocated
* @param length size of memory region from the above specified bufPtr
* @param flags CU_FILE_RDMA_REGISTER
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_NVFS_DRIVER_ERROR
* @return CU_FILE_INVALID_VALUE
* @return CU_FILE_CUDA_ERROR for unsuported memory type
* @return CU_FILE_MEMORY_ALREADY_REGISTERED on error
* @return CU_FILE_GPU_MEMORY_PINNING_FAILED if not enough pinned memory is available
* @note This memory will be use to perform GPU direct DMA from the supported storage.
* @warning This API is intended for usecases where the memory is used as streaming buffer that is reused across multiple cuFile IO operations before calling @ref cuFileBufDeregister
*
* @see cuFileBufDeregister
* @see cuFileRead
* @see cuFileWrite
*/
CUfileError_t cuFileBufRegister(const void *bufPtr_base, size_t length, int flags);
/**
* @brief deregister an already registered device or host memory from cuFile
*
* @param bufPtr_base buffer pointer to deregister
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_INVALID_VALUE on invalid memory pointer or unregistered memory pointer
*
* @see cuFileBufRegister
* @see cuFileRead
* @see cuFileWrite
*/
CUfileError_t cuFileBufDeregister(const void *bufPtr_base);
/**
* @brief read data from a registered file handle to a specified device or host memory
*
* @param fh @ref CUfileHandle_t opaque file handle
* @param bufPtr_base base address of buffer in device or host memory
* @param size size bytes to read
* @param file_offset file-offset from begining of the file
* @param bufPtr_offset offset relative to the bufPtr_base pointer to read into.
*
* @return size of bytes successfully read
* @return -1 on error, in which case errno is set to indicate filesystem errors.
* @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
*
* @note If the bufPtr is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers if needed.
* @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
* for cases where the BAR1 memory size is smaller than the size of the allocated memory.
*
* @see cuFileBufRegister
* @see cuFileHandleRegister
* @see cuFileWrite
*/
ssize_t cuFileRead(CUfileHandle_t fh, void *bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset);
/**
* @brief write data from a specified device or host memory to a registered file handle
*
* @param fh @ref CUfileHandle_t opaque file handle
* @param bufPtr_base base address of buffer in device or host memory
* @param size size bytes to write
* @param file_offset file-offset from begining of the file
* @param bufPtr_offset offset relative to the bufPtr_base pointer to write from.
*
* @return size of bytes successfully written
* @return -1 on error, in which case errno is set to indicate filesystem errors.
* @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
*
* @note If the bufPtr is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers if needed.
* @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
* for cases where the BAR1 memory size is smaller than the size of the allocated memory.
*
* @see cuFileBufRegister
* @see cuFileHandleRegister
* @see cuFileRead
*/
ssize_t cuFileWrite(CUfileHandle_t fh, const void *bufPtr_base, size_t size, off_t file_offset, off_t bufPtr_offset);
// CUFile Driver APIs
/**
* @brief
* Initialize the cuFile library and open the nvidia-fs driver
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_DRIVER_NOT_INITIALIZED
* @return CU_FILE_DRIVER_VERSION_MISMATCH on driver version mismatch error
*
* @see cuFileDriverClose
*/
CUfileError_t cuFileDriverOpen(void);
CUfileError_t cuFileDriverClose(void);
#define cuFileDriverClose cuFileDriverClose_v2
/**
* @brief
* reset the cuFile library and release the nvidia-fs driver
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_DRIVER_CLOSING if there are any active IO operations using @ref cuFileRead or @ref cuFileWrite
*
* @see cuFileDriverOpen
*/
CUfileError_t cuFileDriverClose(void);
/**
* @brief
* returns use count of cufile drivers at that moment by the process.
*/
long cuFileUseCount(void);
/**
* @brief
* Gets the Driver session properties
*
* @return CU_FILE_SUCCESS on success
*
* @see cuFileDriverSetPollMode
* @see cuFileDriverSetMaxDirectIOSize
* @see cuFileDriverSetMaxCacheSize
* @see cuFileDriverSetMaxPinnedMemSize
*/
CUfileError_t cuFileDriverGetProperties(CUfileDrvProps_t *props);
/**
* @brief
* Sets whether the Read/Write APIs use polling to do IO operations
*
* @param poll boolean to indicate whether to use poll mode or not
* @param poll_threshold_size max IO size to use for POLLING mode in KB
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
* @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
*
* @warning This is an advanced command and should be tuned based on available system memory
*
* @see cuFileDriverGetProperties
*/
CUfileError_t cuFileDriverSetPollMode(bool poll, size_t poll_threshold_size);
/**
* @brief
* Control parameter to set max IO size(KB) used by the library to talk to nvidia-fs driver
*
* @param max_direct_io_size maximum allowed direct io size in KB
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
* @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
*
* @warning This is an advanced command and should be tuned based on available system memory
*
* @see cuFileDriverGetProperties
*
*/
CUfileError_t cuFileDriverSetMaxDirectIOSize(size_t max_direct_io_size);
/**
* @brief
* Control parameter to set maximum GPU memory reserved per device by the library for internal buffering
*
* @param max_cache_size The maximum GPU buffer space per device used for internal use in KB
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
* @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
*
* @warning This is an advanced command and should be tuned based on supported GPU memory
*
* @see cuFileDriverGetProperties
*/
CUfileError_t cuFileDriverSetMaxCacheSize(size_t max_cache_size);
/**
* @brief
* Sets maximum buffer space that is pinned in KB for use by @ref cuFileBufRegister
*
* @param max_pinned_size maximum buffer space that is pinned in KB
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
* @return CU_FILE_DRIVER_VERSION_MISMATCH, CU_FILE_DRIVER_UNSUPPORTED_LIMIT on error
*
* @warning This is an advanced command and should be tuned based on supported GPU memory
*
* @see cuFileDriverGetProperties
*
*/
CUfileError_t cuFileDriverSetMaxPinnedMemSize(size_t max_pinned_size);
//Experimental Batch API's
typedef enum CUfileOpcode {
CUFILE_READ = 0,
CUFILE_WRITE
}CUfileOpcode_t;
typedef enum CUFILEStatus_enum {
CUFILE_WAITING = 0x000001, /* required value prior to submission */
CUFILE_PENDING = 0x000002, /* once enqueued */
CUFILE_INVALID = 0x000004, /* request was ill-formed or could not be enqueued */
CUFILE_CANCELED = 0x000008, /* request successfully canceled */
CUFILE_COMPLETE = 0x0000010, /* request successfully completed */
CUFILE_TIMEOUT = 0x0000020, /* request timed out */
CUFILE_FAILED = 0x0000040 /* unable to complete */
}CUfileStatus_t;
typedef enum cufileBatchMode {
CUFILE_BATCH = 1,
} CUfileBatchMode_t;
typedef struct CUfileIOParams {
CUfileBatchMode_t mode; // Must be the very first field.
union {
struct {
void *devPtr_base; //This can be a device memory or a host memory pointer.
off_t file_offset;
off_t devPtr_offset;
size_t size;
}batch;
}u;
CUfileHandle_t fh;
CUfileOpcode_t opcode;
void *cookie;
}CUfileIOParams_t;
typedef struct CUfileIOEvents {
void *cookie;
CUfileStatus_t status; /* status of the operation */
size_t ret; /* -ve error or amount of I/O done. */
}CUfileIOEvents_t;
typedef void* CUfileBatchHandle_t;
CUfileError_t cuFileBatchIOSetUp(CUfileBatchHandle_t *batch_idp, unsigned nr);
CUfileError_t cuFileBatchIOSubmit(CUfileBatchHandle_t batch_idp, unsigned nr, CUfileIOParams_t *iocbp, unsigned int flags);
CUfileError_t cuFileBatchIOGetStatus(CUfileBatchHandle_t batch_idp, unsigned min_nr, unsigned* nr,
CUfileIOEvents_t *iocbp, struct timespec* timeout);
CUfileError_t cuFileBatchIOCancel(CUfileBatchHandle_t batch_idp);
void cuFileBatchIODestroy(CUfileBatchHandle_t batch_idp);
//Async API's with cuda streams
// cuFile stream API registration flags
// buffer pointer offset is set at submission time
#define CU_FILE_STREAM_FIXED_BUF_OFFSET 1
// file offset is set at submission time
#define CU_FILE_STREAM_FIXED_FILE_OFFSET 2
// file size is set at submission time
#define CU_FILE_STREAM_FIXED_FILE_SIZE 4
// size, offset and buffer offset are 4k aligned
#define CU_FILE_STREAM_PAGE_ALIGNED_INPUTS 8
/**
*@brief
* @param fh The cuFile handle for the file.
* @param bufPtr_base base address of buffer in device or host memory
* @param size_p pointer to size bytes to read
* @note *size_p if the size is not known at the time of submission, then must provide the max possible size for I/O request.
* @param file_offset_p pointer to file-offset from begining of the file
* @param bufPtr_offset_p pointer to offset relative to the bufPtr_base pointer to read into.
* @param bytes_read_p pointer to the number of bytes that were successfully read.
* @param CUstream stream cuda stream for the operation.
*
* @return size of bytes successfully read in *bytes_read_p
* @return -1 on error, in which case errno is set to indicate filesystem errors.
* @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
*
* @note If the bufPtr_base is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers.
* @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
* for cases where the BAR1 memory size is smaller than the size of the allocated memory.
* @note If the stream is registered with cuFileStreamRegister, the IO setup and teardown overhead will be reduced.
* @note on cuda stream errors, the user must call cuFileStreamDeregister to release any outstanding cuFile resources for the stream.
*
*
* @see cuFileBufRegister
* @see cuFileHandleRegister
* @see cuFileRead
* @see cuFileStreamRegister
* @see cuFileStreamDeregister
*/
CUfileError_t cuFileReadAsync(CUfileHandle_t fh, void *bufPtr_base,
size_t *size_p, off_t *file_offset_p, off_t *bufPtr_offset_p, ssize_t *bytes_read_p, CUstream stream);
/**
*@brief
* @param fh The cuFile handle for the file.
* @param bufPtr_base base address of buffer in device or host memory
* @param size_p pointer to size bytes to write.
* @note *size_p if the size is not known at the time of submission, then must provide the max possible size for I/O request.
* @param file_offset_p pointer to file-offset from begining of the file
* @param bufPtr_offset_p pointer to offset relative to the bufPtr_base pointer to write from.
* @param bytes_written_p pointer to the number of bytes that were successfully written.
* @param CUstream cuda stream for the operation.
*
* @return size of bytes successfully written in *bytes_written_p
* @return -1 on error, in which case errno is set to indicate filesystem errors.
* @return all other errors will return a negative integer value of @ref CUfileOpError enum value.
*
* @note If the bufPtr_base is not registered with @ref cuFileBufRegister, the data will be buffered through preallocated pinned buffers.
* @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
* for cases where the BAR1 memory size is smaller than the size of the allocated memory.
* @note If the stream is registered with cuFileStreamRegister prior to this call, the IO setup and teardown overhead will be reduced.
* @note on cuda stream errors, the user must call cuFileStreamDeregister to release any outstanding cuFile resources for the stream.
*
* @see cuFileBufRegister
* @see cuFileHandleRegister
* @see cuFileWrite
* @see cuFileStreamRegister
* @see cuFileStreamDeregister
*/
CUfileError_t cuFileWriteAsync(CUfileHandle_t fh, void *bufPtr_base,
size_t *size_p, off_t *file_offset_p, off_t *bufPtr_offset_p, ssize_t *bytes_written_p, CUstream stream);
/**
*@brief
* @param CUstream cuda stream for the operation.
* @param flags for the stream to improve the stream execution of IO based on input parameters.
* @note supported FLAGS are
* @note CU_FILE_STREAM_FIXED_BUF_OFFSET - buffer pointer offset is set at submission time
* @note CU_FILE_STREAM_FIXED_FILE_OFFSET - file offset is set at submission time
* @note CU_FILE_STREAM_FIXED_FILE_SIZE - file size is set at submission time
* @note CU_FILE_STREAM_PAGE_ALIGNED_INPUTS - size, offset and buffer offset are 4k aligned
*
* @note allocates resources needed to support cuFile operations asynchronously for the cuda stream
* @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
* for cases where the BAR1 memory size is smaller than the size of the allocated memory.
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
* @return CU_FILE_INVALID_VALUE if the stream is invalid
*
* @see cuFileReadAsync
* @see cuFileWriteAsync
* @see cuFileStreamDeregister
*/
CUfileError_t cuFileStreamRegister(CUstream stream, unsigned flags);
/**
*@brief
* @param CUstream cuda stream for the operation.
*
* @note deallocates resources used by previous cuFile asynchronous operations for the cuda stream
* @note highly recommend to call after cuda stream errors to release any outstanding cuFile resources for this stream
* @note must be called before cuStreamDestroy call for the specified stream.
* @note This is useful for applications that need to perform IO to unaligned file offsets and/or size. This is also recommended
* for cases where the BAR1 memory size is smaller than the size of the allocated memory.
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_DRIVER_NOT_INITIALIZED if the driver is not initialized
* @return CU_FILE_INVALID_VALUE if the stream is invalid
*
* @see cuFileReadAsync
* @see cuFileWriteAsync
* @see cuFileStreamRegister
*/
CUfileError_t cuFileStreamDeregister(CUstream stream);
/**
*@brief
* @returns cufile library version.
*
* @The version is returned as (1000 major + 10 minor).
* @For example, CUFILE 1.7.0 would be represented by 1070.
* @note This is useful for applications that need to inquire the library.
*
* @return CU_FILE_SUCCESS on success
* @return CU_FILE_INVALID_VALUE if the input parameter is null.
* @return CU_FILE_DRIVER_VERSION_READ_ERROR if the version is not available.
*
*/
CUfileError_t cuFileGetVersion(int *version);
#pragma GCC visibility pop
/// @cond DOXYGEN_SKIP_MACRO
#endif // CUFILE_H
/// @endcond
#ifdef __cplusplus
}
#endif