StarPU Handbook - StarPU Introduction
|
Data management is done at a high-level in StarPU: rather than accessing a mere list of contiguous buffers, the tasks may manipulate data that are described by a high-level construct which we call data interface. More...
Data Structures | |
struct | starpu_data_copy_methods |
struct | starpu_data_interface_ops |
struct | starpu_matrix_interface |
struct | starpu_coo_interface |
struct | starpu_block_interface |
struct | starpu_tensor_interface |
struct | starpu_ndim_interface |
struct | starpu_vector_interface |
struct | starpu_variable_interface |
struct | starpu_csr_interface |
struct | starpu_bcsr_interface |
struct | starpu_multiformat_data_interface_ops |
struct | starpu_multiformat_interface |
Accessing Matrix Data Interfaces | |
struct starpu_data_interface_ops | starpu_interface_matrix_ops |
void | starpu_matrix_data_register (starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize) |
void | starpu_matrix_data_register_allocsize (starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize, size_t allocsize) |
void | starpu_matrix_ptr_register (starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld) |
uint32_t | starpu_matrix_get_nx (starpu_data_handle_t handle) |
uint32_t | starpu_matrix_get_ny (starpu_data_handle_t handle) |
uint32_t | starpu_matrix_get_local_ld (starpu_data_handle_t handle) |
uintptr_t | starpu_matrix_get_local_ptr (starpu_data_handle_t handle) |
size_t | starpu_matrix_get_elemsize (starpu_data_handle_t handle) |
size_t | starpu_matrix_get_allocsize (starpu_data_handle_t handle) |
#define | STARPU_MATRIX_GET_PTR(interface) |
#define | STARPU_MATRIX_GET_DEV_HANDLE(interface) |
#define | STARPU_MATRIX_GET_OFFSET(interface) |
#define | STARPU_MATRIX_GET_NX(interface) |
#define | STARPU_MATRIX_GET_NY(interface) |
#define | STARPU_MATRIX_GET_LD(interface) |
#define | STARPU_MATRIX_GET_ELEMSIZE(interface) |
#define | STARPU_MATRIX_GET_ALLOCSIZE(interface) |
#define | STARPU_MATRIX_SET_NX(interface, newnx) |
#define | STARPU_MATRIX_SET_NY(interface, newny) |
#define | STARPU_MATRIX_SET_LD(interface, newld) |
Accessing COO Data Interfaces | |
struct starpu_data_interface_ops | starpu_interface_coo_ops |
void | starpu_coo_data_register (starpu_data_handle_t *handleptr, int home_node, uint32_t nx, uint32_t ny, uint32_t n_values, uint32_t *columns, uint32_t *rows, uintptr_t values, size_t elemsize) |
#define | STARPU_COO_GET_COLUMNS(interface) |
#define | STARPU_COO_GET_COLUMNS_DEV_HANDLE(interface) |
#define | STARPU_COO_GET_ROWS(interface) |
#define | STARPU_COO_GET_ROWS_DEV_HANDLE(interface) |
#define | STARPU_COO_GET_VALUES(interface) |
#define | STARPU_COO_GET_VALUES_DEV_HANDLE(interface) |
#define | STARPU_COO_GET_OFFSET |
#define | STARPU_COO_GET_NX(interface) |
#define | STARPU_COO_GET_NY(interface) |
#define | STARPU_COO_GET_NVALUES(interface) |
#define | STARPU_COO_GET_ELEMSIZE(interface) |
Block Data Interface | |
struct starpu_data_interface_ops | starpu_interface_block_ops |
void | starpu_block_data_register (starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize) |
void | starpu_block_ptr_register (starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz) |
uint32_t | starpu_block_get_nx (starpu_data_handle_t handle) |
uint32_t | starpu_block_get_ny (starpu_data_handle_t handle) |
uint32_t | starpu_block_get_nz (starpu_data_handle_t handle) |
uint32_t | starpu_block_get_local_ldy (starpu_data_handle_t handle) |
uint32_t | starpu_block_get_local_ldz (starpu_data_handle_t handle) |
uintptr_t | starpu_block_get_local_ptr (starpu_data_handle_t handle) |
size_t | starpu_block_get_elemsize (starpu_data_handle_t handle) |
#define | STARPU_BLOCK_GET_PTR(interface) |
#define | STARPU_BLOCK_GET_DEV_HANDLE(interface) |
#define | STARPU_BLOCK_GET_OFFSET(interface) |
#define | STARPU_BLOCK_GET_NX(interface) |
#define | STARPU_BLOCK_GET_NY(interface) |
#define | STARPU_BLOCK_GET_NZ(interface) |
#define | STARPU_BLOCK_GET_LDY(interface) |
#define | STARPU_BLOCK_GET_LDZ(interface) |
#define | STARPU_BLOCK_GET_ELEMSIZE(interface) |
Ndim Array Data Interface | |
struct starpu_data_interface_ops | starpu_interface_ndim_ops |
void | starpu_ndim_data_register (starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t *ldn, uint32_t *nn, size_t ndim, size_t elemsize) |
void | starpu_ndim_ptr_register (starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t *ldn) |
uint32_t * | starpu_ndim_get_nn (starpu_data_handle_t handle) |
uint32_t | starpu_ndim_get_ni (starpu_data_handle_t handle, size_t i) |
uint32_t * | starpu_ndim_get_local_ldn (starpu_data_handle_t handle) |
uint32_t | starpu_ndim_get_local_ldi (starpu_data_handle_t handle, size_t i) |
uintptr_t | starpu_ndim_get_local_ptr (starpu_data_handle_t handle) |
size_t | starpu_ndim_get_ndim (starpu_data_handle_t handle) |
size_t | starpu_ndim_get_elemsize (starpu_data_handle_t handle) |
#define | STARPU_NDIM_GET_PTR(interface) |
#define | STARPU_NDIM_GET_DEV_HANDLE(interface) |
#define | STARPU_NDIM_GET_OFFSET(interface) |
#define | STARPU_NDIM_GET_NN(interface) |
#define | STARPU_NDIM_GET_LDN(interface) |
#define | STARPU_NDIM_GET_NDIM(interface) |
#define | STARPU_NDIM_GET_ELEMSIZE(interface) |
Vector Data Interface | |
struct starpu_data_interface_ops | starpu_interface_vector_ops |
void | starpu_vector_data_register (starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize) |
void | starpu_vector_data_register_allocsize (starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize) |
void | starpu_vector_ptr_register (starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset) |
uint32_t | starpu_vector_get_nx (starpu_data_handle_t handle) |
size_t | starpu_vector_get_elemsize (starpu_data_handle_t handle) |
size_t | starpu_vector_get_allocsize (starpu_data_handle_t handle) |
uintptr_t | starpu_vector_get_local_ptr (starpu_data_handle_t handle) |
#define | STARPU_VECTOR_GET_PTR(interface) |
#define | STARPU_VECTOR_GET_DEV_HANDLE(interface) |
#define | STARPU_VECTOR_GET_OFFSET(interface) |
#define | STARPU_VECTOR_GET_NX(interface) |
#define | STARPU_VECTOR_GET_ELEMSIZE(interface) |
#define | STARPU_VECTOR_GET_ALLOCSIZE(interface) |
#define | STARPU_VECTOR_GET_SLICE_BASE(interface) |
#define | STARPU_VECTOR_SET_NX(interface, newnx) |
Variable Data Interface | |
struct starpu_data_interface_ops | starpu_interface_variable_ops |
void | starpu_variable_data_register (starpu_data_handle_t *handle, int home_node, uintptr_t ptr, size_t size) |
void | starpu_variable_ptr_register (starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset) |
size_t | starpu_variable_get_elemsize (starpu_data_handle_t handle) |
uintptr_t | starpu_variable_get_local_ptr (starpu_data_handle_t handle) |
#define | STARPU_VARIABLE_GET_PTR(interface) |
#define | STARPU_VARIABLE_GET_OFFSET(interface) |
#define | STARPU_VARIABLE_GET_ELEMSIZE(interface) |
#define | STARPU_VARIABLE_GET_DEV_HANDLE(interface) |
Void Data Interface | |
struct starpu_data_interface_ops | starpu_interface_void_ops |
void | starpu_void_data_register (starpu_data_handle_t *handle) |
Basic API | |
void | starpu_data_register (starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops) |
void | starpu_data_register_ops (struct starpu_data_interface_ops *ops) |
void | starpu_data_ptr_register (starpu_data_handle_t handle, unsigned node) |
void | starpu_data_register_same (starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc) |
void * | starpu_data_handle_to_pointer (starpu_data_handle_t handle, unsigned node) |
void * | starpu_data_get_local_ptr (starpu_data_handle_t handle) |
void * | starpu_data_get_interface_on_node (starpu_data_handle_t handle, unsigned memory_node) |
enum starpu_data_interface_id | starpu_data_get_interface_id (starpu_data_handle_t handle) |
int | starpu_data_pack_node (starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) |
int | starpu_data_pack (starpu_data_handle_t handle, void **ptr, starpu_ssize_t *count) |
int | starpu_data_peek_node (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) |
int | starpu_data_peek (starpu_data_handle_t handle, void *ptr, size_t count) |
int | starpu_data_unpack_node (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) |
int | starpu_data_unpack (starpu_data_handle_t handle, void *ptr, size_t count) |
size_t | starpu_data_get_size (starpu_data_handle_t handle) |
size_t | starpu_data_get_alloc_size (starpu_data_handle_t handle) |
starpu_ssize_t | starpu_data_get_max_size (starpu_data_handle_t handle) |
int | starpu_data_get_home_node (starpu_data_handle_t handle) |
void | starpu_data_print (starpu_data_handle_t handle, unsigned node, FILE *stream) |
int | starpu_data_interface_get_next_id (void) |
int | starpu_interface_copy (uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data) |
int | starpu_interface_copy2d (uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, void *async_data) |
int | starpu_interface_copy3d (uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks1, size_t ld1_src, size_t ld1_dst, size_t numblocks2, size_t ld2_src, size_t ld2_dst, void *async_data) |
int | starpu_interface_copy4d (uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks1, size_t ld1_src, size_t ld1_dst, size_t numblocks2, size_t ld2_src, size_t ld2_dst, size_t numblocks3, size_t ld3_src, size_t ld3_dst, void *async_data) |
int | starpu_interface_copynd (uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t elemsize, size_t ndim, uint32_t *nn, uint32_t *ldn_src, uint32_t *ldn_dst, void *async_data) |
void | starpu_interface_start_driver_copy_async (unsigned src_node, unsigned dst_node, double *start) |
void | starpu_interface_end_driver_copy_async (unsigned src_node, unsigned dst_node, double start) |
void | starpu_interface_data_copy (unsigned src_node, unsigned dst_node, size_t size) |
uintptr_t | starpu_malloc_on_node_flags (unsigned dst_node, size_t size, int flags) |
uintptr_t | starpu_malloc_on_node (unsigned dst_node, size_t size) |
void | starpu_free_on_node_flags (unsigned dst_node, uintptr_t addr, size_t size, int flags) |
void | starpu_free_on_node (unsigned dst_node, uintptr_t addr, size_t size) |
void | starpu_malloc_on_node_set_default_flags (unsigned node, int flags) |
MAP API | |
uintptr_t | starpu_interface_map (uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret) |
int | starpu_interface_unmap (uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size) |
int | starpu_interface_update_map (uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) |
Multiformat Data Interface | |
void | starpu_multiformat_data_register (starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nobjects, struct starpu_multiformat_data_interface_ops *format_ops) |
#define | STARPU_MULTIFORMAT_GET_CPU_PTR(interface) |
#define | STARPU_MULTIFORMAT_GET_CUDA_PTR(interface) |
#define | STARPU_MULTIFORMAT_GET_HIP_PTR(interface) |
#define | STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface) |
#define | STARPU_MULTIFORMAT_GET_NX(interface) |
uint32_t | starpu_hash_crc32c_be_n (const void *input, size_t n, uint32_t inputcrc) |
uint32_t | starpu_hash_crc32c_be_ptr (void *input, uint32_t inputcrc) |
uint32_t | starpu_hash_crc32c_be (uint32_t input, uint32_t inputcrc) |
uint32_t | starpu_hash_crc32c_string (const char *str, uint32_t inputcrc) |
Data management is done at a high-level in StarPU: rather than accessing a mere list of contiguous buffers, the tasks may manipulate data that are described by a high-level construct which we call data interface.
An example of data interface is the "vector" interface which describes a contiguous data array on a specific memory node. This interface is a simple structure containing the number of elements in the array, the size of the elements, and the address of the array in the appropriate address space (this address may be invalid if there is no valid copy of the array in the memory node). More information on the data interfaces provided by StarPU are given in Data Interfaces.
When a piece of data managed by StarPU is used by a task, the task implementation is given a pointer to an interface describing a valid copy of the data that is accessible from the current processing unit.
Every worker is associated to a memory node which is a logical abstraction of the address space from which the processing unit gets its data. For instance, the memory node associated to the different CPU workers represents main memory (RAM), the memory node associated to a GPU is DRAM embedded on the device. Every memory node is identified by a logical index which is accessible from the function starpu_worker_get_memory_node(). When registering a piece of data to StarPU, the specified memory node indicates where the piece of data initially resides (we also call this memory node the home node of a piece of data).
In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id() and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node numbers as seen by the Operating System and NUMA node numbers as seen by StarPU.
There are several ways to register a memory region so that it can be managed by StarPU. StarPU provides data interfaces for vectors, 2D matrices, 3D matrices as well as BCSR and CSR sparse matrices.
Each data interface is provided with a set of field access functions. The ones using a void *
parameter aimed to be used in codelet implementations (see for example the code in VectorScalingUsingStarPUAPI).
Applications can provide their own interface as shown in DefiningANewDataInterface.
struct starpu_data_copy_methods |
Define the per-interface methods. If the starpu_data_copy_methods::any_to_any method is provided, it will be used by default if no specific method is provided. It can still be useful to provide more specific method in case of e.g. available particular CUDA, HIP or OpenCL support.
See DefiningANewDataInterface_copy for more details.
Data Fields | |
int(* | can_copy )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) |
int(* | ram_to_ram )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | ram_to_cuda )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | ram_to_hip )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | ram_to_opencl )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | ram_to_max_fpga )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | cuda_to_ram )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | cuda_to_cuda )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | hip_to_ram )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | hip_to_hip )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | opencl_to_ram )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | opencl_to_opencl )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | max_fpga_to_ram )(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node) |
int(* | ram_to_cuda_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream) |
int(* | cuda_to_ram_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream) |
int(* | cuda_to_cuda_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream) |
int(* | ram_to_hip_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream) |
int(* | hip_to_ram_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream) |
int(* | hip_to_hip_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream) |
int(* | ram_to_opencl_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) |
int(* | opencl_to_ram_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) |
int(* | opencl_to_opencl_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) |
int(* | ram_to_max_fpga_async )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | max_fpga_to_ram_async )(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node) |
int(* | any_to_any )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) |
int(* starpu_data_copy_methods::can_copy) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) |
If defined, allow the interface to declare whether it supports transferring from src_interface
on node src_node
to dst_interface
on node dst_node
, run from node handling_node
. If not defined, it is assumed that the interface supports all transfers.
int(* starpu_data_copy_methods::ram_to_ram) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
CPU node. Return 0 on success.
int(* starpu_data_copy_methods::ram_to_cuda) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
CUDA node. Return 0 on success.
int(* starpu_data_copy_methods::ram_to_hip) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
HIP node. Return 0 on success.
int(* starpu_data_copy_methods::ram_to_opencl) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
OpenCL node. Return 0 on success.
int(* starpu_data_copy_methods::ram_to_max_fpga) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
FPGA node. Return 0 on success.
int(* starpu_data_copy_methods::cuda_to_ram) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
CUDA node to the dst_interface
interface on the dst_node
CPU node. Return 0 on success.
int(* starpu_data_copy_methods::cuda_to_cuda) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
CUDA node to the dst_interface
interface on the dst_node
CUDA node. Return 0 on success.
int(* starpu_data_copy_methods::hip_to_ram) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
HIP node to the dst_interface
interface on the dst_node
CPU node. Return 0 on success.
int(* starpu_data_copy_methods::hip_to_hip) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
HIP node to the dst_interface
interface on the dst_node
HIP node. Return 0 on success.
int(* starpu_data_copy_methods::opencl_to_ram) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
OpenCL node to the dst_interface
interface on the dst_node
CPU node. Return 0 on success.
int(* starpu_data_copy_methods::opencl_to_opencl) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
OpenCL node to the dst_interface
interface on the dst_node
OpenCL node. Return 0 on success.
int(* starpu_data_copy_methods::max_fpga_to_ram) (void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
FPGA node to the dst_interface
interface on the dst_node
CPU node. Return 0 on success.
int(* starpu_data_copy_methods::ram_to_cuda_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
CUDA node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::cuda_to_ram_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream) |
Define how to copy data from the src_interface
interface on the src_node
CUDA node to the dst_interface
interface on the dst_node
CPU node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::cuda_to_cuda_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream) |
Define how to copy data from the src_interface
interface on the src_node
CUDA node to the dst_interface
interface on the dst_node
CUDA node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::ram_to_hip_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
HIP node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::hip_to_ram_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream) |
Define how to copy data from the src_interface
interface on the src_node
HIP node to the dst_interface
interface on the dst_node
CPU node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::hip_to_hip_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream) |
Define how to copy data from the src_interface
interface on the src_node
HIP node to the dst_interface
interface on the dst_node
HIP node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::ram_to_opencl_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
OpenCL node, by recording in event
, a pointer to a cl_event
, the event of the last submitted transfer. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::opencl_to_ram_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) |
Define how to copy data from the src_interface
interface on the src_node
OpenCL node to the dst_interface
interface on the dst_node
CPU node, by recording in event
, a pointer to a cl_event
, the event of the last submitted transfer. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::opencl_to_opencl_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) |
Define how to copy data from the src_interface
interface on the src_node
OpenCL node to the dst_interface
interface on the dst_node
OpenCL node, by recording in event
, a pointer to a cl_event
, the event of the last submitted transfer. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::ram_to_max_fpga_async) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
CPU node to the dst_interface
interface on the dst_node
FPGA node. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::max_fpga_to_ram_async) (void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node) |
Define how to copy data from the src_interface
interface on the src_node
FPGA node to the dst_interface
interface on the dst_node
CPU node. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN
if at least some transfers are still ongoing and should be awaited for by the core.
int(* starpu_data_copy_methods::any_to_any) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) |
Define how to copy data from the src_interface
interface on the src_node
node to the dst_interface
interface on the dst_node
node. This is meant to be implemented through the starpu_interface_copy() helper, to which async_data should be passed as such, and will be used to manage asynchronicity. This must return -EAGAIN
if any of the starpu_interface_copy() calls has returned -EAGAIN
(i.e. at least some transfer is still ongoing), and return 0 otherwise.
This can only be implemented if the interface has ready-to-send data blocks. If the interface is more involved than this, i.e. it needs to collect pieces of data before transferring, starpu_data_interface_ops::pack_data and starpu_data_interface_ops::peek_data should be implemented instead, and the core will just transfer the resulting data buffer.
struct starpu_data_interface_ops |
Per-interface data management methods.
Data Fields | |
void(* | register_data_handle )(starpu_data_handle_t handle, int home_node, void *data_interface) |
void(* | unregister_data_handle )(starpu_data_handle_t handle) |
starpu_ssize_t(* | allocate_data_on_node )(void *data_interface, unsigned node) |
void(* | free_data_on_node )(void *data_interface, unsigned node) |
void(* | cache_data_on_node )(void *cached_interface, void *src_interface, unsigned node) |
void(* | reuse_data_on_node )(void *dst_data_interface, const void *cached_interface, unsigned node) |
int(* | map_data )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | unmap_data )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
int(* | update_map )(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
void(* | init )(void *data_interface) |
const struct starpu_data_copy_methods * | copy_methods |
void *(* | handle_to_pointer )(starpu_data_handle_t handle, unsigned node) |
void *(* | to_pointer )(void *data_interface, unsigned node) |
size_t(* | get_size )(starpu_data_handle_t handle) |
size_t(* | get_alloc_size )(starpu_data_handle_t handle) |
size_t(* | get_max_size )(starpu_data_handle_t handle) |
uint32_t(* | footprint )(starpu_data_handle_t handle) |
uint32_t(* | alloc_footprint )(starpu_data_handle_t handle) |
int(* | compare )(void *data_interface_a, void *data_interface_b) |
int(* | alloc_compare )(void *data_interface_a, void *data_interface_b) |
void(* | display )(starpu_data_handle_t handle, FILE *f) |
starpu_ssize_t(* | describe )(void *data_interface, char *buf, size_t size) |
enum starpu_data_interface_id | interfaceid |
size_t | interface_size |
char | is_multiformat |
char | dontcache |
struct starpu_multiformat_data_interface_ops *(* | get_mf_ops )(void *data_interface) |
int(* | pack_data )(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) |
int(* | peek_data )(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) |
int(* | unpack_data )(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) |
int(* | pack_meta )(void *data_interface, void **ptr, starpu_ssize_t *count) |
int(* | unpack_meta )(void **data_interface, void *ptr, starpu_ssize_t *count) |
int(* | free_meta )(void *data_interface) |
char * | name |
void(* starpu_data_interface_ops::register_data_handle) (starpu_data_handle_t handle, int home_node, void *data_interface) |
Register an existing interface into a data handle.
This iterates over all memory nodes to initialize all fields of the data interface on each of them. Since data is not allocated yet except on the home node, pointers should be left as NULL except on the home_node
(if >= 0), for which the pointers should be copied from the given data_interface
, which was filled with the application's pointers.
This method is mandatory.
See DefiningANewDataInterface_registration for more details.
void(* starpu_data_interface_ops::unregister_data_handle) (starpu_data_handle_t handle) |
Unregister a data handle.
This iterates over all memory nodes to free any pointer in the data interface on each of them.
At this point, free_data_on_node has been already called on each of them. This just clears anything that would still be left.
See DefiningANewDataInterface_registration for more details.
starpu_ssize_t(* starpu_data_interface_ops::allocate_data_on_node) (void *data_interface, unsigned node) |
Allocate data for the interface on a given node. This should use starpu_malloc_on_node() to perform the allocation(s), and fill the pointers in the data interface. It should return the size of the allocated memory, or -ENOMEM if memory could not be allocated.
Note that the memory node can be CPU memory, GPU memory, or even disk area. The result returned by starpu_malloc_on_node() should be just stored as uintptr_t without trying to interpret it since it may be a GPU pointer, a disk descriptor, etc.
This method is mandatory to be able to support memory nodes.
See DefiningANewDataInterface_pointers for more details.
void(* starpu_data_interface_ops::free_data_on_node) (void *data_interface, unsigned node) |
Free data of the interface on a given node.
This method is mandatory to be able to support memory nodes.
See DefiningANewDataInterface_pointers for more details.
void(* starpu_data_interface_ops::cache_data_on_node) (void *cached_interface, void *src_interface, unsigned node) |
Cache the buffers from the given node to a caching interface.
This method is optional, mostly useful when also making starpu_data_interface_ops::unregister_data_handle check that pointers are NULL.
src_interface
is an interface that already has buffers allocated, but which we don't need any more. cached_interface
is a new interface into which the buffer pointers should be transferred, for later reuse when allocating data of the same kind.
Usually we can just memcpy over the set of pointers and descriptions (this is what StarPU does when this method is not implemented), but if unregister_data_handle checks that pointers are NULL, we need to additionally clear the pointers in src_interface
. Also, it is not useful to copy the whole interface, only the pointers need to be copied (essentially the pointers that starpu_data_interface_ops::reuse_data_on_node will then transfer into a new handle interface), as well as the properties that starpu_data_interface_ops::compare (or starpu_data_interface_ops::alloc_compare if defined) needs for comparing interfaces for caching compatibility.
When this method is not defined, StarPU will just copy the cached_interface
into src_interface
.
See VariableSizeDataInterface and DefiningANewDataInterface_pointers for more details.
void(* starpu_data_interface_ops::reuse_data_on_node) (void *dst_data_interface, const void *cached_interface, unsigned node) |
Reuse on the given node the buffers of the provided interface
This method is optional, mostly useful when also defining alloc_footprint to share tiles of the same allocation size but different shapes, or when the interface contains pointers which are initialized at registration (e.g. nn array in the ndim interface)
cached_interface
is an already-allocated buffer that we want to reuse, and new_data_interface
is an interface in which we want to install that already-allocated buffer. Usually we can just memcpy over the set of pointers and descriptions. But e.g. with 2D tiles the ld value may not be correct, and memcpy would wrongly overwrite it in new_data_interface, i.e. reusing a vertical tile allocation for a horizontal tile, or vice-versa.
reuse_data_on_node should thus copy over pointers, and define fields that are usually set by allocate_data_on_node (e.g. ld).
See VariableSizeDataInterface and DefiningANewDataInterface_pointers for more details.
int(* starpu_data_interface_ops::map_data) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Map data from a source to a destination. Define function starpu_interface_map() to set this field. See DefiningANewDataInterface_pointers for more details.
int(* starpu_data_interface_ops::unmap_data) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Unmap data from a source to a destination. Define function starpu_interface_unmap() to set this field. See DefiningANewDataInterface_pointers for more details.
int(* starpu_data_interface_ops::update_map) (void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) |
Update map data from a source to a destination. Define function starpu_interface_update_map() to set this field. See DefiningANewDataInterface_pointers for more details.
void(* starpu_data_interface_ops::init) (void *data_interface) |
Initialize the interface. This method is optional. It is called when initializing the handler on all the memory nodes.
const struct starpu_data_copy_methods* starpu_data_interface_ops::copy_methods |
Struct with pointer to functions for performing ram/cuda/opencl synchronous and asynchronous transfers.
This field is mandatory to be able to support memory nodes, except disk nodes which can be supported by just implementing starpu_data_interface_ops::pack_data and starpu_data_interface_ops::unpack_data.
void *(* starpu_data_interface_ops::handle_to_pointer) (starpu_data_handle_t handle, unsigned node) |
This method is only required if starpu_data_interface_ops::to_pointer is not implemented.
void *(* starpu_data_interface_ops::to_pointer) (void *data_interface, unsigned node) |
Return the current pointer (if any) for the given interface on the given node.
This method is only required for starpu_data_handle_to_pointer() and starpu_data_get_local_ptr(), and for disk support.
size_t(* starpu_data_interface_ops::get_size) (starpu_data_handle_t handle) |
Return an estimation of the size of data, for performance models and tracing feedback.
size_t(* starpu_data_interface_ops::get_alloc_size) (starpu_data_handle_t handle) |
Return an estimation of the size of allocated data, for allocation management. If not specified, the starpu_data_interface_ops::get_size method is used instead.
size_t(* starpu_data_interface_ops::get_max_size) (starpu_data_handle_t handle) |
Return the maximum size that the data may need to increase to. For instance, in the case of compressed matrix tiles this is the size when the block is fully dense. This is currently only used for feedback tools.
uint32_t(* starpu_data_interface_ops::footprint) (starpu_data_handle_t handle) |
Return a 32bit footprint which characterizes the data size and layout (nx, ny, ld, elemsize, etc.), required for indexing performance models.
starpu_hash_crc32c_be() and alike can be used to produce this 32bit value from various types of values.
uint32_t(* starpu_data_interface_ops::alloc_footprint) (starpu_data_handle_t handle) |
Return a 32bit footprint which characterizes the data allocation, to be used for indexing allocation cache. If not specified, the starpu_data_interface_ops::footprint method is used instead. If specified, alloc_compare should be set to provide the strict comparison, and reuse_data_on_node should be set to provide correct buffer reuse.
int(* starpu_data_interface_ops::compare) (void *data_interface_a, void *data_interface_b) |
Compare the data size and layout of two interfaces (nx, ny, ld, elemsize, etc.), to be used for indexing performance models. It should return 1 if the two interfaces size and layout match computation-wise, and 0 otherwise. It does not compare the actual content of the interfaces.
int(* starpu_data_interface_ops::alloc_compare) (void *data_interface_a, void *data_interface_b) |
Compare the data allocation of two interfaces etc.), to be used for indexing allocation cache. It should return 1 if the two interfaces are allocation-compatible, i.e. basically have the same alloc_size, and 0 otherwise. If not specified, the starpu_data_interface_ops::compare method is used instead.
void(* starpu_data_interface_ops::display) (starpu_data_handle_t handle, FILE *f) |
Dump the sizes of a handle to a file. This is required for performance models
starpu_ssize_t(* starpu_data_interface_ops::describe) (void *data_interface, char *buf, size_t size) |
Describe the data into a string in a brief way, such as one letter to describe the type of data, and the data dimensions. This is required for tracing feedback.
enum starpu_data_interface_id starpu_data_interface_ops::interfaceid |
An identifier that is unique to each interface.
size_t starpu_data_interface_ops::interface_size |
Size of the interface data descriptor.
char starpu_data_interface_ops::dontcache |
If set to non-zero, StarPU will never try to reuse an allocated buffer for a different handle. This can be notably useful for application-defined interfaces which have a dynamic size, and for which it thus does not make sense to reuse the buffer since will probably not have the proper size.
int(* starpu_data_interface_ops::pack_data) (starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) |
Pack the data handle into a contiguous buffer at the address allocated with starpu_malloc_flags(ptr, size, 0)
(and thus returned in ptr
) and set the size of the newly created buffer in count
. If ptr
is NULL
, the function should not copy the data in the buffer but just set count to the size of the buffer which would have been allocated. The special value -1 indicates the size is yet unknown.
This method (and starpu_data_interface_ops::unpack_data) is required for disk support if the starpu_data_copy_methods::any_to_any method is not implemented (because the in-memory data layout is too complex).
This is also required for MPI support if there is no registered MPI data type.
int(* starpu_data_interface_ops::peek_data) (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) |
Read the data handle from the contiguous buffer at the address ptr
of size count
.
int(* starpu_data_interface_ops::unpack_data) (starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) |
Unpack the data handle from the contiguous buffer at the address ptr
of size count
. The memory at the address ptr
should be freed after the data unpacking operation.
int(* starpu_data_interface_ops::pack_meta) (void *data_interface, void **ptr, starpu_ssize_t *count) |
Pack the interface into a contiguous buffer and set the size of the newly created buffer in count
. This function is used in master slave mode for data interfaces with a dynamic content.
int(* starpu_data_interface_ops::unpack_meta) (void **data_interface, void *ptr, starpu_ssize_t *count) |
Unpack the interface from the given buffer and set the size of the unpacked data in count
. This function is used in master slave mode for data interfaces with a dynamic content.
int(* starpu_data_interface_ops::free_meta) (void *data_interface) |
Free the allocated memory by a previous call to unpack_meta()
char* starpu_data_interface_ops::name |
Name of the interface
struct starpu_matrix_interface |
Matrix interface for dense matrices
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
Identifier of the interface |
uintptr_t | ptr |
local pointer of the matrix |
uintptr_t | dev_handle |
device handle of the matrix |
size_t | offset |
offset in the matrix |
uint32_t | nx |
number of elements on the x-axis of the matrix |
uint32_t | ny |
number of elements on the y-axis of the matrix |
uint32_t | ld |
number of elements between each row of the matrix. Maybe be equal to starpu_matrix_interface::nx when there is no padding. |
size_t | elemsize |
size of the elements of the matrix |
size_t | allocsize |
size actually currently allocated |
struct starpu_coo_interface |
COO Matrices
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
identifier of the interface |
uint32_t * | columns |
column array of the matrix |
uint32_t * | rows |
row array of the matrix |
uintptr_t | values |
values of the matrix |
uint32_t | nx |
number of elements on the x-axis of the matrix |
uint32_t | ny |
number of elements on the y-axis of the matrix |
uint32_t | n_values |
number of values registered in the matrix |
size_t | elemsize |
size of the elements of the matrix |
struct starpu_block_interface |
Block interface for 3D dense blocks
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
identifier of the interface |
uintptr_t | ptr |
local pointer of the block |
uintptr_t | dev_handle |
device handle of the block. |
size_t | offset |
offset in the block. |
uint32_t | nx |
number of elements on the x-axis of the block. |
uint32_t | ny |
number of elements on the y-axis of the block. |
uint32_t | nz |
number of elements on the z-axis of the block. |
uint32_t | ldy |
number of elements between two lines |
uint32_t | ldz |
number of elements between two planes |
size_t | elemsize |
size of the elements of the block. |
struct starpu_tensor_interface |
Tensor interface for 4D dense tensors
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
identifier of the interface |
uintptr_t | ptr |
local pointer of the tensor |
uintptr_t | dev_handle |
device handle of the tensor. |
size_t | offset |
offset in the tensor. |
uint32_t | nx |
number of elements on the x-axis of the tensor. |
uint32_t | ny |
number of elements on the y-axis of the tensor. |
uint32_t | nz |
number of elements on the z-axis of the tensor. |
uint32_t | nt |
number of elements on the t-axis of the tensor. |
uint32_t | ldy |
number of elements between two lines |
uint32_t | ldz |
number of elements between two planes |
uint32_t | ldt |
number of elements between two cubes |
size_t | elemsize |
size of the elements of the tensor. |
struct starpu_ndim_interface |
ndim interface for ndim array
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
identifier of the interface |
uintptr_t | ptr |
local pointer of the ndim |
uintptr_t | dev_handle |
device handle of the ndim. |
size_t | offset |
offset in the ndim. |
size_t | allocsize |
size actually currently allocated. |
uint32_t * | nn |
array of element number on each dimension |
uint32_t * | ldn |
array of element number between two units on each dimension |
size_t | ndim |
size of the dimension. |
size_t | elemsize |
size of the elements of the ndim. |
struct starpu_vector_interface |
todo
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
Identifier of the interface |
uintptr_t | ptr |
local pointer of the vector |
uintptr_t | dev_handle |
device handle of the vector. |
size_t | offset |
offset in the vector |
uint32_t | nx |
number of elements on the x-axis of the vector |
size_t | elemsize |
size of the elements of the vector |
uint32_t | slice_base |
vector slice base, used by the StarPU OpenMP runtime support |
size_t | allocsize |
size actually currently allocated |
struct starpu_variable_interface |
Variable interface for a single data (not a vector, a matrix, a list, ...)
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
Identifier of the interface |
uintptr_t | ptr |
local pointer of the variable |
uintptr_t | dev_handle |
device handle of the variable. |
size_t | offset |
offset in the variable |
size_t | elemsize |
size of the variable |
struct starpu_csr_interface |
CSR interface for sparse matrices (compressed sparse row representation)
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
Identifier of the interface |
uint32_t | nnz |
number of non-zero entries |
uint32_t | nrow |
number of rows |
uintptr_t | nzval |
non-zero values |
uint32_t * | colind |
position of non-zero entries on the row |
uint32_t * | rowptr |
index (in nzval) of the first entry of the row |
uint32_t * | ram_colind |
position of non-zero entries on the row (stored in RAM) |
uint32_t * | ram_rowptr |
index (in nzval) of the first entry of the row (stored in RAM) |
uint32_t | firstentry |
k for k-based indexing (0 or 1 usually). also useful when partitioning the matrix. |
size_t | elemsize |
size of the elements of the matrix |
struct starpu_bcsr_interface |
BCSR interface for sparse matrices (blocked compressed sparse row representation)
Note: when a BCSR matrix is partitioned, nzval, colind, and rowptr point into the corresponding father arrays. The rowptr content is thus the same as the father's. Firstentry is used to offset this so it becomes valid for the child arrays.
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id |
Identifier of the interface |
uint32_t | nnz |
number of non-zero BLOCKS |
uint32_t | nrow |
number of rows (in terms of BLOCKS) |
uintptr_t | nzval |
non-zero values: nnz blocks of r*c elements |
uint32_t * | colind |
array of nnz elements, colind[i] is the block-column index for block i in nzval |
uint32_t * | rowptr |
array of nrow+1 elements, rowptr[i] is the block-index (in nzval) of the first block of row i. By convention, rowptr[nrow] is the number of blocks, this allows an easier access of the matrix's elements for the kernels. |
uint32_t * | ram_colind |
array of nnz elements (stored in RAM) |
uint32_t * | ram_rowptr |
array of nrow+1 elements (stored in RAM) |
uint32_t | firstentry |
k for k-based indexing (0 or 1 usually). Also useful when partitioning the matrix. |
uint32_t | r |
height of the blocks |
uint32_t | c |
width of the blocks |
size_t | elemsize |
size of the elements of the matrix |
struct starpu_multiformat_data_interface_ops |
Multiformat operations
Data Fields | ||
---|---|---|
size_t | cpu_elemsize |
size of each element on CPUs |
size_t | opencl_elemsize |
size of each element on OpenCL devices |
struct starpu_codelet * | cpu_to_opencl_cl |
pointer to a codelet which converts from CPU to OpenCL |
struct starpu_codelet * | opencl_to_cpu_cl |
pointer to a codelet which converts from OpenCL to CPU |
size_t | cuda_elemsize |
size of each element on CUDA devices |
struct starpu_codelet * | cpu_to_cuda_cl |
pointer to a codelet which converts from CPU to CUDA |
struct starpu_codelet * | cuda_to_cpu_cl |
pointer to a codelet which converts from CUDA to CPU |
struct starpu_multiformat_interface |
todo
Data Fields | ||
---|---|---|
enum starpu_data_interface_id | id | |
void * | cpu_ptr | |
void * | cuda_ptr | |
void * | hip_ptr | |
void * | opencl_ptr | |
uint32_t | nx | |
struct starpu_multiformat_data_interface_ops * | ops |
#define STARPU_MATRIX_GET_PTR | ( | interface | ) |
Return a pointer to the matrix designated by interface
, valid on CPUs and CUDA devices only. For OpenCL devices, the device handle and offset need to be used instead.
#define STARPU_MATRIX_GET_DEV_HANDLE | ( | interface | ) |
Return a device handle for the matrix designated by interface
, to be used with OpenCL. The offset returned by STARPU_MATRIX_GET_OFFSET has to be used in addition to this.
#define STARPU_MATRIX_GET_OFFSET | ( | interface | ) |
Return the offset in the matrix designated by interface
, to be used with the device handle.
#define STARPU_MATRIX_GET_NX | ( | interface | ) |
Return the number of elements on the x-axis of the matrix designated by interface
.
#define STARPU_MATRIX_GET_NY | ( | interface | ) |
Return the number of elements on the y-axis of the matrix designated by interface
.
#define STARPU_MATRIX_GET_LD | ( | interface | ) |
Return the number of elements between each row of the matrix designated by interface
. May be equal to nx when there is no padding.
#define STARPU_MATRIX_GET_ELEMSIZE | ( | interface | ) |
Return the size of the elements registered into the matrix designated by interface
.
#define STARPU_MATRIX_GET_ALLOCSIZE | ( | interface | ) |
Return the allocated size of the matrix designated by interface
.
#define STARPU_MATRIX_SET_NX | ( | interface, | |
newnx | |||
) |
Set the number of elements on the x-axis of the matrix designated by interface
.
#define STARPU_MATRIX_SET_NY | ( | interface, | |
newny | |||
) |
Set the number of elements on the y-axis of the matrix designated by interface
.
#define STARPU_MATRIX_SET_LD | ( | interface, | |
newld | |||
) |
Set the number of elements between each row of the matrix designated by interface
. May be set to the same value as nx when there is no padding.
#define STARPU_COO_GET_COLUMNS | ( | interface | ) |
Return a pointer to the column array of the matrix designated by interface
.
#define STARPU_COO_GET_COLUMNS_DEV_HANDLE | ( | interface | ) |
Return a device handle for the column array of the matrix designated by interface
, to be used with OpenCL. The offset returned by STARPU_COO_GET_OFFSET has to be used in addition to this.
#define STARPU_COO_GET_ROWS | ( | interface | ) |
Return a pointer to the rows array of the matrix designated by interface
.
#define STARPU_COO_GET_ROWS_DEV_HANDLE | ( | interface | ) |
Return a device handle for the row array of the matrix designated by interface
, to be used on OpenCL. The offset returned by STARPU_COO_GET_OFFSET has to be used in addition to this.
#define STARPU_COO_GET_VALUES | ( | interface | ) |
Return a pointer to the values array of the matrix designated by interface
.
#define STARPU_COO_GET_VALUES_DEV_HANDLE | ( | interface | ) |
Return a device handle for the value array of the matrix designated by interface
, to be used on OpenCL. The offset returned by STARPU_COO_GET_OFFSET has to be used in addition to this.
#define STARPU_COO_GET_OFFSET |
Return the offset in the arrays of the COO matrix designated by interface
.
#define STARPU_COO_GET_NX | ( | interface | ) |
Return the number of elements on the x-axis of the matrix designated by interface
.
#define STARPU_COO_GET_NY | ( | interface | ) |
Return the number of elements on the y-axis of the matrix designated by interface
.
#define STARPU_COO_GET_NVALUES | ( | interface | ) |
Return the number of values registered in the matrix designated by interface
.
#define STARPU_COO_GET_ELEMSIZE | ( | interface | ) |
Return the size of the elements registered into the matrix designated by interface
.
#define STARPU_BLOCK_GET_PTR | ( | interface | ) |
Return a pointer to the block designated by interface
.
#define STARPU_BLOCK_GET_DEV_HANDLE | ( | interface | ) |
Return a device handle for the block designated by interface
, to be used on OpenCL. The offset returned by STARPU_BLOCK_GET_OFFSET has to be used in addition to this.
#define STARPU_BLOCK_GET_OFFSET | ( | interface | ) |
Return the offset in the block designated by interface
, to be used with the device handle.
#define STARPU_BLOCK_GET_NX | ( | interface | ) |
Return the number of elements on the x-axis of the block designated by interface
.
#define STARPU_BLOCK_GET_NY | ( | interface | ) |
Return the number of elements on the y-axis of the block designated by interface
.
#define STARPU_BLOCK_GET_NZ | ( | interface | ) |
Return the number of elements on the z-axis of the block designated by interface
.
#define STARPU_BLOCK_GET_LDY | ( | interface | ) |
Return the number of elements between each row of the block designated by interface
. May be equal to nx when there is no padding.
#define STARPU_BLOCK_GET_LDZ | ( | interface | ) |
Return the number of elements between each z plane of the block designated by interface
. May be equal to nx*ny when there is no padding.
#define STARPU_BLOCK_GET_ELEMSIZE | ( | interface | ) |
Return the size of the elements of the block designated by interface
.
#define STARPU_TENSOR_GET_PTR | ( | interface | ) |
Return a pointer to the tensor designated by interface
.
#define STARPU_TENSOR_GET_DEV_HANDLE | ( | interface | ) |
Return a device handle for the tensor designated by interface
, to be used on OpenCL. The offset returned by STARPU_TENSOR_GET_OFFSET has to be used in addition to this.
#define STARPU_TENSOR_GET_OFFSET | ( | interface | ) |
Return the offset in the tensor designated by interface
, to be used with the device handle.
#define STARPU_TENSOR_GET_NX | ( | interface | ) |
Return the number of elements on the x-axis of the tensor designated by interface
.
#define STARPU_TENSOR_GET_NY | ( | interface | ) |
Return the number of elements on the y-axis of the tensor designated by interface
.
#define STARPU_TENSOR_GET_NZ | ( | interface | ) |
Return the number of elements on the z-axis of the tensor designated by interface
.
#define STARPU_TENSOR_GET_NT | ( | interface | ) |
Return the number of elements on the t-axis of the tensor designated by interface
.
#define STARPU_TENSOR_GET_LDY | ( | interface | ) |
Return the number of elements between each row of the tensor designated by interface
. May be equal to nx when there is no padding.
#define STARPU_TENSOR_GET_LDZ | ( | interface | ) |
Return the number of elements between each z plane of the tensor designated by interface
. May be equal to nx*ny when there is no padding.
#define STARPU_TENSOR_GET_LDT | ( | interface | ) |
Return the number of elements between each t cubes of the tensor designated by interface
. May be equal to nx*ny*nz when there is no padding.
#define STARPU_TENSOR_GET_ELEMSIZE | ( | interface | ) |
Return the size of the elements of the tensor designated by interface
.
#define STARPU_NDIM_GET_PTR | ( | interface | ) |
Return a pointer to the ndim array designated by interface
.
#define STARPU_NDIM_GET_DEV_HANDLE | ( | interface | ) |
Return a device handle for the ndim array designated by interface
, to be used on OpenCL. The offset returned by STARPU_NDIM_GET_OFFSET has to be used in addition to this.
#define STARPU_NDIM_GET_OFFSET | ( | interface | ) |
Return the offset in the ndim designated by interface
, to be used with the device handle.
#define STARPU_NDIM_GET_NN | ( | interface | ) |
Return the number of elements on each dimension of the ndim array designated by interface
.
#define STARPU_NDIM_GET_LDN | ( | interface | ) |
Return the number of elements between each two units on each dimension of the ndim array designated by interface
. May be equal to nx when there is no padding.
#define STARPU_NDIM_GET_NDIM | ( | interface | ) |
Return the dimension size of the ndim array designated by interface
.
#define STARPU_NDIM_GET_ELEMSIZE | ( | interface | ) |
Return the size of the elements of the ndim array designated by interface
.
#define STARPU_VECTOR_GET_PTR | ( | interface | ) |
Return a pointer to the array designated by interface
, valid on CPUs and CUDA only. For OpenCL, the device handle and offset need to be used instead.
#define STARPU_VECTOR_GET_DEV_HANDLE | ( | interface | ) |
Return a device handle for the array designated by interface
, to be used with OpenCL. the offset returned by STARPU_VECTOR_GET_OFFSET has to be used in addition to this.
#define STARPU_VECTOR_GET_OFFSET | ( | interface | ) |
Return the offset in the array designated by interface
, to be used with the device handle.
#define STARPU_VECTOR_GET_NX | ( | interface | ) |
Return the number of elements registered into the array designated by interface
.
#define STARPU_VECTOR_GET_ELEMSIZE | ( | interface | ) |
Return the size of each element of the array designated by interface
.
#define STARPU_VECTOR_GET_ALLOCSIZE | ( | interface | ) |
Return the size of each element of the array designated by interface
.
#define STARPU_VECTOR_GET_SLICE_BASE | ( | interface | ) |
Return the OpenMP slice base annotation of each element of the array designated by interface
.
#define STARPU_VECTOR_SET_NX | ( | interface, | |
newnx | |||
) |
Set the number of elements registered into the array designated by interface
.
#define STARPU_VARIABLE_GET_PTR | ( | interface | ) |
Return a pointer to the variable designated by interface
.
#define STARPU_VARIABLE_GET_OFFSET | ( | interface | ) |
Return the offset in the variable designated by interface
, to be used with the device handle.
#define STARPU_VARIABLE_GET_ELEMSIZE | ( | interface | ) |
Return the size of the variable designated by interface
.
#define STARPU_VARIABLE_GET_DEV_HANDLE | ( | interface | ) |
Return a device handle for the variable designated by interface
, to be used with OpenCL. The offset returned by STARPU_VARIABLE_GET_OFFSET has to be used in addition to this.
#define STARPU_CSR_GET_NNZ | ( | interface | ) |
Return the number of non-zero values in the matrix designated by interface
.
#define STARPU_CSR_GET_NROW | ( | interface | ) |
Return the size of the row pointer array of the matrix designated by interface
.
#define STARPU_CSR_GET_NZVAL | ( | interface | ) |
Return a pointer to the non-zero values of the matrix designated by interface
.
#define STARPU_CSR_GET_NZVAL_DEV_HANDLE | ( | interface | ) |
Return a device handle for the array of non-zero values in the matrix designated by interface
. The offset returned by STARPU_CSR_GET_OFFSET has to used in addition to this.
#define STARPU_CSR_GET_COLIND | ( | interface | ) |
Return a pointer to the column index of the matrix designated by interface
.
#define STARPU_CSR_GET_RAM_COLIND | ( | interface | ) |
Return a RAM pointer to the column index of the matrix designated by interface
.
#define STARPU_CSR_GET_COLIND_DEV_HANDLE | ( | interface | ) |
Return a device handle for the column index of the matrix designated by interface
. The offset returned by STARPU_CSR_GET_OFFSET has to be used in addition to this.
#define STARPU_CSR_GET_ROWPTR | ( | interface | ) |
Return a pointer to the row pointer array of the matrix designated by interface
.
#define STARPU_CSR_GET_RAM_ROWPTR | ( | interface | ) |
Return a RAM pointer to the row pointer array of the matrix designated by interface
.
#define STARPU_CSR_GET_ROWPTR_DEV_HANDLE | ( | interface | ) |
Return a device handle for the row pointer array of the matrix designated by interface
. The offset returned by STARPU_CSR_GET_OFFSET has to be used in addition to this.
#define STARPU_CSR_GET_OFFSET |
Return the offset in the arrays (colind, rowptr, nzval) of the matrix designated by interface
, to be used with the device handles.
#define STARPU_CSR_GET_FIRSTENTRY | ( | interface | ) |
Return the index at which all arrays (the column indexes, the row pointers...) of the interface
start.
#define STARPU_CSR_GET_ELEMSIZE | ( | interface | ) |
Return the size of the elements registered into the matrix designated by interface
.
#define STARPU_BCSR_GET_NNZ | ( | interface | ) |
Return the number of non-zero values in the matrix designated by interface
.
#define STARPU_BCSR_GET_NROW | ( | interface | ) |
Return the number of block rows in the matrix designated by interface
.
#define STARPU_BCSR_GET_NZVAL | ( | interface | ) |
Return a pointer to the non-zero values of the matrix designated by interface
.
#define STARPU_BCSR_GET_NZVAL_DEV_HANDLE | ( | interface | ) |
Return a device handle for the array of non-zero values in the matrix designated by interface
. The offset returned by STARPU_BCSR_GET_OFFSET has to be used in addition to this.
#define STARPU_BCSR_GET_COLIND | ( | interface | ) |
Return a pointer to the column index of the matrix designated by interface
.
#define STARPU_BCSR_GET_RAM_COLIND | ( | interface | ) |
Return a RAM pointer to the column index of the matrix designated by interface
.
#define STARPU_BCSR_GET_COLIND_DEV_HANDLE | ( | interface | ) |
Return a device handle for the column index of the matrix designated by interface
. The offset returned by STARPU_BCSR_GET_OFFSET has to be used in addition to this.
#define STARPU_BCSR_GET_ROWPTR | ( | interface | ) |
Return a pointer to the row pointer array of the matrix designated by interface
.
#define STARPU_BCSR_GET_RAM_ROWPTR | ( | interface | ) |
Return a RAM pointer to the row pointer array of the matrix designated by interface
.
#define STARPU_BCSR_GET_ROWPTR_DEV_HANDLE | ( | interface | ) |
Return a device handle for the row pointer array of the matrix designated by interface
. The offset returned by STARPU_BCSR_GET_OFFSET has to be used in addition to this.
#define STARPU_BCSR_GET_FIRSTENTRY | ( | interface | ) |
Return the base of the indexing (0 or 1 usually) in the matrix designated by interface
.
#define STARPU_BCSR_GET_R | ( | interface | ) |
Return the height of blocks in the matrix designated by interface
.
#define STARPU_BCSR_GET_C | ( | interface | ) |
Return the width of blocks in the matrix designated by interface
.
#define STARPU_BCSR_GET_ELEMSIZE | ( | interface | ) |
Return the size of elements in the matrix designated by interface
.
#define STARPU_BCSR_GET_OFFSET |
Return the offset in the arrays (coling, rowptr, nzval) of the matrix designated by interface
, to be used with the device handles.
#define STARPU_MULTIFORMAT_GET_CPU_PTR | ( | interface | ) |
Return the local pointer to the data with CPU format.
#define STARPU_MULTIFORMAT_GET_CUDA_PTR | ( | interface | ) |
Return the local pointer to the data with CUDA format.
#define STARPU_MULTIFORMAT_GET_HIP_PTR | ( | interface | ) |
Return the local pointer to the data with HIP format.
#define STARPU_MULTIFORMAT_GET_OPENCL_PTR | ( | interface | ) |
Return the local pointer to the data with OpenCL format.
#define STARPU_MULTIFORMAT_GET_NX | ( | interface | ) |
Return the number of elements in the data.
Identifier for all predefined StarPU data interfaces
void starpu_data_register | ( | starpu_data_handle_t * | handleptr, |
int | home_node, | ||
void * | data_interface, | ||
struct starpu_data_interface_ops * | ops | ||
) |
Register a piece of data into the handle located at the handleptr
address. The data_interface
buffer contains the initial description of the data in the home_node
. The ops
argument is a pointer to a structure describing the different methods used to manipulate this type of interface. See starpu_data_interface_ops for more details on this structure. If home_node
is -1, StarPU will automatically allocate the memory when it is used for the first time in write-only mode. Once such data handle has been automatically allocated, it is possible to access it using any access mode. Note that StarPU supplies a set of predefined types of interface (e.g. vector or matrix) which can be registered by the means of helper functions (e.g. starpu_vector_data_register() or starpu_matrix_data_register()).
See DefiningANewDataInterface_registration for more details.
void starpu_data_register_ops | ( | struct starpu_data_interface_ops * | ops | ) |
Register the given data interface operations. If the field starpu_data_interface_ops::field is set to STARPU_UNKNOWN_INTERFACE_ID, then a new identifier will be set by calling starpu_data_interface_get_next_id(). The function is automatically called when registering a piece of data with starpu_data_register(). It is only necessary to call it beforehand for some specific cases (such as the usmaster slave mode).
void starpu_data_ptr_register | ( | starpu_data_handle_t | handle, |
unsigned | node | ||
) |
Register that a buffer for handle
on node
will be set. This is typically used by starpu_*_ptr_register helpers before setting the interface pointers for this node, to tell the core that that is now allocated. See DefiningANewDataInterface_pointers for more details.
void starpu_data_register_same | ( | starpu_data_handle_t * | handledst, |
starpu_data_handle_t | handlesrc | ||
) |
Register a new piece of data into the handle handledst
with the same interface as the handle handlesrc
. See DataHandlesHelpers for more details.
void * starpu_data_handle_to_pointer | ( | starpu_data_handle_t | handle, |
unsigned | node | ||
) |
Return the pointer associated with handle
on node node
or NULL
if handle’s interface does not support this operation or data for this handle
is not allocated on that node
. See DataPointers for more details.
void * starpu_data_get_local_ptr | ( | starpu_data_handle_t | handle | ) |
Return the local pointer associated with handle
or NULL
if handle’s
interface does not have any data allocated locally. See DataPointers for more details.
void * starpu_data_get_interface_on_node | ( | starpu_data_handle_t | handle, |
unsigned | memory_node | ||
) |
Return the interface associated with handle
on memory_node
. See DefiningANewDataInterface_pack for more details.
enum starpu_data_interface_id starpu_data_get_interface_id | ( | starpu_data_handle_t | handle | ) |
Return the unique identifier of the interface associated with the given handle
. See DefiningANewDataInterface_helpers for more details.
int starpu_data_pack_node | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
void ** | ptr, | ||
starpu_ssize_t * | count | ||
) |
Execute the packing operation of the interface of the data registered at handle
(see starpu_data_interface_ops). This packing operation must allocate a buffer large enough at ptr
on node node
and copy into the newly allocated buffer the data associated to handle
. count
will be set to the size of the allocated buffer. If ptr
is NULL
, the function should not copy the data in the buffer but just set count
to the size of the buffer which would have been allocated. The special value -1 indicates the size is yet unknown. See DataHandlesHelpers for more details.
int starpu_data_pack | ( | starpu_data_handle_t | handle, |
void ** | ptr, | ||
starpu_ssize_t * | count | ||
) |
Like starpu_data_pack_node(), but for the local memory node. See DataHandlesHelpers for more details.
int starpu_data_peek_node | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
void * | ptr, | ||
size_t | count | ||
) |
Read in handle's node
replicate the data located at ptr
of size count
as described by the interface of the data. The interface registered at handle
must define a peeking operation (see starpu_data_interface_ops). See DataHandlesHelpers for more details.
int starpu_data_peek | ( | starpu_data_handle_t | handle, |
void * | ptr, | ||
size_t | count | ||
) |
Read in handle's local replicate the data located at ptr
of size count
as described by the interface of the data. The interface registered at handle
must define a peeking operation (see starpu_data_interface_ops). See DataHandlesHelpers for more details.
int starpu_data_unpack_node | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
void * | ptr, | ||
size_t | count | ||
) |
Unpack in handle the data located at ptr
of size count
allocated on node node
as described by the interface of the data. The interface registered at handle
must define an unpacking operation (see starpu_data_interface_ops). See DataHandlesHelpers for more details.
int starpu_data_unpack | ( | starpu_data_handle_t | handle, |
void * | ptr, | ||
size_t | count | ||
) |
Unpack in handle the data located at ptr
of size count
as described by the interface of the data. The interface registered at handle
must define a unpacking operation (see starpu_data_interface_ops). See DataHandlesHelpers for more details.
size_t starpu_data_get_size | ( | starpu_data_handle_t | handle | ) |
Return the size of the data associated with handle
. See DataHandlesHelpers for more details.
size_t starpu_data_get_alloc_size | ( | starpu_data_handle_t | handle | ) |
Return the size of the allocated data associated with handle
. See DataHandlesHelpers for more details.
starpu_ssize_t starpu_data_get_max_size | ( | starpu_data_handle_t | handle | ) |
Return the maximum size that the handle
data may need to increase to. See DataHandlesHelpers for more details.
int starpu_data_get_home_node | ( | starpu_data_handle_t | handle | ) |
See DataHandlesHelpers for more details.
void starpu_data_print | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
FILE * | stream | ||
) |
Print basic information on handle
on node
. See DataHandlesHelpers for more details.
int starpu_data_interface_get_next_id | ( | void | ) |
Return the next available id for a newly created data interface (DefiningANewDataInterface).
int starpu_interface_copy | ( | uintptr_t | src, |
size_t | src_offset, | ||
unsigned | src_node, | ||
uintptr_t | dst, | ||
size_t | dst_offset, | ||
unsigned | dst_node, | ||
size_t | size, | ||
void * | async_data | ||
) |
Copy size
bytes from byte offset src_offset
of src
on src_node
to byte offset dst_offset
of dst
on dst_node
. This is to be used in the starpu_data_copy_methods::any_to_any copy method, which is provided with async_data
to be passed to starpu_interface_copy(). this returns -EAGAIN
if the transfer is still ongoing, or 0 if the transfer is already completed.
See DefiningANewDataInterface_copy for more details.
int starpu_interface_copy2d | ( | uintptr_t | src, |
size_t | src_offset, | ||
unsigned | src_node, | ||
uintptr_t | dst, | ||
size_t | dst_offset, | ||
unsigned | dst_node, | ||
size_t | blocksize, | ||
size_t | numblocks, | ||
size_t | ld_src, | ||
size_t | ld_dst, | ||
void * | async_data | ||
) |
Copy numblocks
blocks of blocksize
bytes from byte offset src_offset
of src
on src_node
to byte offset dst_offset
of dst
on dst_node
.
The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in the source (resp. destination) interface.
If blocksize == ld_src == ld_dst, the transfer is optimized into a single starpu_interface_copy call.
This is to be used in the starpu_data_copy_methods::any_to_any copy method for 2D data, which is provided with async_data
to be passed to starpu_interface_copy(). this returns -EAGAIN
if the transfer is still ongoing, or 0 if the transfer is already completed.
See DefiningANewDataInterface_copy for more details.
int starpu_interface_copy3d | ( | uintptr_t | src, |
size_t | src_offset, | ||
unsigned | src_node, | ||
uintptr_t | dst, | ||
size_t | dst_offset, | ||
unsigned | dst_node, | ||
size_t | blocksize, | ||
size_t | numblocks1, | ||
size_t | ld1_src, | ||
size_t | ld1_dst, | ||
size_t | numblocks2, | ||
size_t | ld2_src, | ||
size_t | ld2_dst, | ||
void * | async_data | ||
) |
Copy numblocks_1
* numblocks_2
blocks of blocksize
bytes from byte offset src_offset
of src
on src_node
to byte offset dst_offset
of dst
on dst_node
.
The blocks are grouped by numblocks_1
blocks whose start addresses are ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) interface.
Such groups are grouped by numblocks_2 groups whose start addresses are ld2_src (resp. ld2_dst) bytes apart in the source (resp. destination) interface.
If the blocks are contiguous, the transfers will be optimized.
This is to be used in the starpu_data_copy_methods::any_to_any copy method for 3D data, which is provided with async_data
to be passed to starpu_interface_copy(). this returns -EAGAIN
if the transfer is still ongoing, or 0 if the transfer is already completed.
See DefiningANewDataInterface_copy for more details.
int starpu_interface_copy4d | ( | uintptr_t | src, |
size_t | src_offset, | ||
unsigned | src_node, | ||
uintptr_t | dst, | ||
size_t | dst_offset, | ||
unsigned | dst_node, | ||
size_t | blocksize, | ||
size_t | numblocks1, | ||
size_t | ld1_src, | ||
size_t | ld1_dst, | ||
size_t | numblocks2, | ||
size_t | ld2_src, | ||
size_t | ld2_dst, | ||
size_t | numblocks3, | ||
size_t | ld3_src, | ||
size_t | ld3_dst, | ||
void * | async_data | ||
) |
Copy numblocks_1
* numblocks_2
* numblocks_3
blocks of blocksize
bytes from byte offset src_offset
of src
on src_node
to byte offset dst_offset
of dst
on dst_node
.
The blocks are grouped by numblocks_1
blocks whose start addresses are ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) interface.
Such groups are grouped by numblocks_2 groups whose start addresses are ld2_src (resp. ld2_dst) bytes apart in the source (resp. destination) interface.
Such groups are grouped by numblocks_3 groups whose start addresses are ld3_src (resp. ld3_dst) bytes apart in the source (resp. destination) interface.
If the blocks are contiguous, the transfers will be optimized.
This is to be used in the starpu_data_copy_methods::any_to_any copy method for 4D data, which is provided with async_data
to be passed to starpu_interface_copy(). this returns -EAGAIN
if the transfer is still ongoing, or 0 if the transfer is already completed.
See DefiningANewDataInterface_copy for more details.
int starpu_interface_copynd | ( | uintptr_t | src, |
size_t | src_offset, | ||
unsigned | src_node, | ||
uintptr_t | dst, | ||
size_t | dst_offset, | ||
unsigned | dst_node, | ||
size_t | elemsize, | ||
size_t | ndim, | ||
uint32_t * | nn, | ||
uint32_t * | ldn_src, | ||
uint32_t * | ldn_dst, | ||
void * | async_data | ||
) |
Copy nn
[1] * nn
[2]...* nn
[ndim-1] blocks of nn
[0] * elemsize
bytes from byte offset src_offset
of src
on src_node
to byte offset dst_offset
of dst
on dst_node
.
The blocks are grouped by nn
[i] blocks (i = 1, 2, ... ndim-1) whose start addresses are ldn_src[i] * elemsize
(resp. ld1_dst[i] * elemsize
) bytes apart in the source (resp. destination) interface.
If the blocks are contiguous, the transfers will be optimized.
This is to be used in the starpu_data_copy_methods::any_to_any copy method for Ndim data, which is provided with async_data
to be passed to starpu_interface_copy(). this returns -EAGAIN
if the transfer is still ongoing, or 0 if the transfer is already completed.
See DefiningANewDataInterface_copy for more details.
void starpu_interface_start_driver_copy_async | ( | unsigned | src_node, |
unsigned | dst_node, | ||
double * | start | ||
) |
When an asynchronous implementation of the data transfer is implemented, the call to the underlying CUDA, OpenCL, etc. call should be surrounded by calls to starpu_interface_start_driver_copy_async() and starpu_interface_end_driver_copy_async(), so that it is recorded in offline execution traces, and the timing of the submission is checked. start
must point to a variable whose value will be passed unchanged to starpu_interface_end_driver_copy_async().
See DefiningANewDataInterface_copy for more details.
void starpu_interface_end_driver_copy_async | ( | unsigned | src_node, |
unsigned | dst_node, | ||
double | start | ||
) |
See starpu_interface_start_driver_copy_async(). See DefiningANewDataInterface_copy for more details.
void starpu_interface_data_copy | ( | unsigned | src_node, |
unsigned | dst_node, | ||
size_t | size | ||
) |
Record in offline execution traces the copy of size
bytes from node src_node
to node dst_node
. See DefiningANewDataInterface_copy for more details.
uintptr_t starpu_malloc_on_node_flags | ( | unsigned | dst_node, |
size_t | size, | ||
int | flags | ||
) |
Allocate size
bytes on node dst_node
with the given allocation flags
(such as STARPU_MALLOC_PINNED, STARPU_MALLOC_COUNT, etc.). This returns 0 if allocation failed, the allocation method should then return -ENOMEM
as allocated size. Deallocation must be done with starpu_free_on_node_flags().
See VariableSizeDataInterface for more details.
uintptr_t starpu_malloc_on_node | ( | unsigned | dst_node, |
size_t | size | ||
) |
Allocate size
bytes on node dst_node
with the default allocation flags. This returns 0 if allocation failed, the allocation method should then return -ENOMEM
as allocated size. Deallocation must be done with starpu_free_on_node().
See DefiningANewDataInterface_allocation for more details.
void starpu_free_on_node_flags | ( | unsigned | dst_node, |
uintptr_t | addr, | ||
size_t | size, | ||
int | flags | ||
) |
Free addr
of size
bytes on node dst_node
which was previously allocated with starpu_malloc_on_node_flags() with the given allocation flags
.
See VariableSizeDataInterface for more details.
void starpu_free_on_node | ( | unsigned | dst_node, |
uintptr_t | addr, | ||
size_t | size | ||
) |
Free addr
of size
bytes on node dst_node
which was previously allocated with starpu_malloc_on_node().
See DefiningANewDataInterface_allocation for more details.
void starpu_malloc_on_node_set_default_flags | ( | unsigned | node, |
int | flags | ||
) |
Define the default flags for allocations performed by starpu_malloc_on_node() and starpu_free_on_node(). The default is STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT. See HowToLimitMemoryPerNode for more details.
uintptr_t starpu_interface_map | ( | uintptr_t | src, |
size_t | src_offset, | ||
unsigned | src_node, | ||
unsigned | dst_node, | ||
size_t | size, | ||
int * | ret | ||
) |
Used to set starpu_data_interface_ops::map_data. See DefiningANewDataInterface_pointers for more details.
int starpu_interface_unmap | ( | uintptr_t | src, |
size_t | src_offset, | ||
unsigned | src_node, | ||
uintptr_t | dst, | ||
unsigned | dst_node, | ||
size_t | size | ||
) |
Used to set starpu_data_interface_ops::unmap_data. See DefiningANewDataInterface_pointers for more details.
int starpu_interface_update_map | ( | uintptr_t | src, |
size_t | src_offset, | ||
unsigned | src_node, | ||
uintptr_t | dst, | ||
size_t | dst_offset, | ||
unsigned | dst_node, | ||
size_t | size | ||
) |
Used to set starpu_data_interface_ops::update_map. See DefiningANewDataInterface_pointers for more details.
void starpu_matrix_data_register | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uintptr_t | ptr, | ||
uint32_t | ld, | ||
uint32_t | nx, | ||
uint32_t | ny, | ||
size_t | elemsize | ||
) |
Register the nx
x ny
2D matrix of elemsize-byte
elements pointed by ptr
and initialize handle
to represent it. ld
specifies the number of elements between rows. a value greater than nx
adds padding, which can be useful for alignment purposes.
Here an example of how to use the function.
See MatrixDataInterface for more details.
void starpu_matrix_data_register_allocsize | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uintptr_t | ptr, | ||
uint32_t | ld, | ||
uint32_t | nx, | ||
uint32_t | ny, | ||
size_t | elemsize, | ||
size_t | allocsize | ||
) |
Similar to starpu_matrix_data_register, but additionally specifies which allocation size should be used instead of the initial nx*ny*elemsize.
See VariableSizeDataInterface for more details.
void starpu_matrix_ptr_register | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
uintptr_t | ptr, | ||
uintptr_t | dev_handle, | ||
size_t | offset, | ||
uint32_t | ld | ||
) |
Register into the handle
that to store data on node node
it should use the buffer located at ptr
, or device handle dev_handle
and offset offset
(for OpenCL, notably), with ld
elements between rows.
uint32_t starpu_matrix_get_nx | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the x-axis of the matrix designated by handle
.
uint32_t starpu_matrix_get_ny | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the y-axis of the matrix designated by handle
.
uint32_t starpu_matrix_get_local_ld | ( | starpu_data_handle_t | handle | ) |
Return the number of elements between each row of the matrix designated by handle
. Maybe be equal to nx when there is no padding.
uintptr_t starpu_matrix_get_local_ptr | ( | starpu_data_handle_t | handle | ) |
Return the local pointer associated with handle
.
size_t starpu_matrix_get_elemsize | ( | starpu_data_handle_t | handle | ) |
Return the size of the elements registered into the matrix designated by handle
.
size_t starpu_matrix_get_allocsize | ( | starpu_data_handle_t | handle | ) |
Return the allocated size of the matrix designated by handle
.
void starpu_coo_data_register | ( | starpu_data_handle_t * | handleptr, |
int | home_node, | ||
uint32_t | nx, | ||
uint32_t | ny, | ||
uint32_t | n_values, | ||
uint32_t * | columns, | ||
uint32_t * | rows, | ||
uintptr_t | values, | ||
size_t | elemsize | ||
) |
Register the nx
x ny
2D matrix given in the COO format, using the columns
, rows
, values
arrays, which must have n_values
elements of size elemsize
. Initialize handleptr
. See COODataInterface for more details.
void starpu_block_data_register | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uintptr_t | ptr, | ||
uint32_t | ldy, | ||
uint32_t | ldz, | ||
uint32_t | nx, | ||
uint32_t | ny, | ||
uint32_t | nz, | ||
size_t | elemsize | ||
) |
Register the nx
x ny
x nz
3D matrix of elemsize
byte elements pointed by ptr
and initialize handle
to represent it. Again, ldy
and ldz
specify the number of elements between rows and between z planes.
Here an example of how to use the function.
See BlockDataInterface for more details.
void starpu_block_ptr_register | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
uintptr_t | ptr, | ||
uintptr_t | dev_handle, | ||
size_t | offset, | ||
uint32_t | ldy, | ||
uint32_t | ldz | ||
) |
Register into the handle
that to store data on node node
it should use the buffer located at ptr
, or device handle dev_handle
and offset offset
(for OpenCL, notably), with ldy
elements between rows and ldz
elements between z planes.
uint32_t starpu_block_get_nx | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the x-axis of the block designated by handle
.
uint32_t starpu_block_get_ny | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the y-axis of the block designated by handle
.
uint32_t starpu_block_get_nz | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the z-axis of the block designated by handle
.
uint32_t starpu_block_get_local_ldy | ( | starpu_data_handle_t | handle | ) |
Return the number of elements between each row of the block designated by handle
, in the format of the current memory node.
uint32_t starpu_block_get_local_ldz | ( | starpu_data_handle_t | handle | ) |
Return the number of elements between each z plane of the block designated by handle
, in the format of the current memory node.
uintptr_t starpu_block_get_local_ptr | ( | starpu_data_handle_t | handle | ) |
Return the local pointer associated with handle
.
size_t starpu_block_get_elemsize | ( | starpu_data_handle_t | handle | ) |
Return the size of the elements of the block designated by handle
.
void starpu_tensor_data_register | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uintptr_t | ptr, | ||
uint32_t | ldy, | ||
uint32_t | ldz, | ||
uint32_t | ldt, | ||
uint32_t | nx, | ||
uint32_t | ny, | ||
uint32_t | nz, | ||
uint32_t | nt, | ||
size_t | elemsize | ||
) |
Register the nx
x ny
x nz
x nt
4D tensor of elemsize
byte elements pointed by ptr
and initialize handle
to represent it. Again, ldy
, ldz
, and ldt
specify the number of elements between rows, between z planes and between t cubes.
Here an example of how to use the function.
See TensorDataInterface for more details.
void starpu_tensor_ptr_register | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
uintptr_t | ptr, | ||
uintptr_t | dev_handle, | ||
size_t | offset, | ||
uint32_t | ldy, | ||
uint32_t | ldz, | ||
uint32_t | ldt | ||
) |
Register into the handle
that to store data on node node
it should use the buffer located at ptr
, or device handle dev_handle
and offset offset
(for OpenCL, notably), with ldy
elements between rows, and ldz
elements between z planes, and ldt
elements between t cubes.
uint32_t starpu_tensor_get_nx | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the x-axis of the tensor designated by handle
.
uint32_t starpu_tensor_get_ny | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the y-axis of the tensor designated by handle
.
uint32_t starpu_tensor_get_nz | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the z-axis of the tensor designated by handle
.
uint32_t starpu_tensor_get_nt | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on the t-axis of the tensor designated by handle
.
uint32_t starpu_tensor_get_local_ldy | ( | starpu_data_handle_t | handle | ) |
Return the number of elements between each row of the tensor designated by handle
, in the format of the current memory node.
uint32_t starpu_tensor_get_local_ldz | ( | starpu_data_handle_t | handle | ) |
Return the number of elements between each z plane of the tensor designated by handle
, in the format of the current memory node.
uint32_t starpu_tensor_get_local_ldt | ( | starpu_data_handle_t | handle | ) |
Return the number of elements between each t cubes of the tensor designated by handle
, in the format of the current memory node.
uintptr_t starpu_tensor_get_local_ptr | ( | starpu_data_handle_t | handle | ) |
Return the local pointer associated with handle
.
size_t starpu_tensor_get_elemsize | ( | starpu_data_handle_t | handle | ) |
Return the size of the elements of the tensor designated by handle
.
void starpu_ndim_data_register | ( | starpu_data_handle_t * | handleptr, |
int | home_node, | ||
uintptr_t | ptr, | ||
uint32_t * | ldn, | ||
uint32_t * | nn, | ||
size_t | ndim, | ||
size_t | elemsize | ||
) |
Register the nn
[0] x nn
[1] x ... ndim-dimension
matrix of elemsize
byte elements pointed by ptr
and initialize handle
to represent it. Again, ldn
, specifies the number of elements between two units on each dimension.
Here an example of how to use the function.
See NdimDataInterface for more details.
void starpu_ndim_ptr_register | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
uintptr_t | ptr, | ||
uintptr_t | dev_handle, | ||
size_t | offset, | ||
uint32_t * | ldn | ||
) |
Register into the handle
that to store data on node node
it should use the buffer located at ptr
, or device handle dev_handle
and offset offset
(for OpenCL, notably), with ldn
elements between two units on each dimension.
uint32_t * starpu_ndim_get_nn | ( | starpu_data_handle_t | handle | ) |
Return the number of elements on each dimension of the ndim array designated by handle
.
uint32_t starpu_ndim_get_ni | ( | starpu_data_handle_t | handle, |
size_t | i | ||
) |
Return the number of elements on the i-axis of the ndim array designated by handle
. When i=0, it means x-axis, when i=1, it means y-axis, when i=2, it means z-axis, etc.
uint32_t * starpu_ndim_get_local_ldn | ( | starpu_data_handle_t | handle | ) |
Return the number of elements between two units on each dimension of the ndim array designated by handle
, in the format of the current memory node.
uint32_t starpu_ndim_get_local_ldi | ( | starpu_data_handle_t | handle, |
size_t | i | ||
) |
Return the number of elements between two units i-axis dimension of the ndim array designated by handle
, in the format of the current memory node.
uintptr_t starpu_ndim_get_local_ptr | ( | starpu_data_handle_t | handle | ) |
Return the local pointer associated with handle
.
size_t starpu_ndim_get_ndim | ( | starpu_data_handle_t | handle | ) |
Return the dimension size.
size_t starpu_ndim_get_elemsize | ( | starpu_data_handle_t | handle | ) |
Return the size of the elements of the ndim array designated by handle
.
void starpu_vector_data_register | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uintptr_t | ptr, | ||
uint32_t | nx, | ||
size_t | elemsize | ||
) |
Register the nx
elemsize-byte
elements pointed to by ptr
and initialize handle
to represent it.
Here an example of how to use the function.
See VectorDataInterface for more details.
void starpu_vector_data_register_allocsize | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uintptr_t | ptr, | ||
uint32_t | nx, | ||
size_t | elemsize, | ||
size_t | allocsize | ||
) |
Similar to starpu_vector_data_register, but additionally specifies which allocation size should be used instead of the initial nx*elemsize. See VariableSizeDataInterface for more details.
void starpu_vector_ptr_register | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
uintptr_t | ptr, | ||
uintptr_t | dev_handle, | ||
size_t | offset | ||
) |
Register into the handle
that to store data on node node
it should use the buffer located at ptr
, or device handle dev_handle
and offset offset
(for OpenCL, notably)
uint32_t starpu_vector_get_nx | ( | starpu_data_handle_t | handle | ) |
Return the number of elements registered into the array designated by handle
.
size_t starpu_vector_get_elemsize | ( | starpu_data_handle_t | handle | ) |
Return the size of each element of the array designated by handle
.
size_t starpu_vector_get_allocsize | ( | starpu_data_handle_t | handle | ) |
Return the allocated size of the array designated by handle
.
uintptr_t starpu_vector_get_local_ptr | ( | starpu_data_handle_t | handle | ) |
Return the local pointer associated with handle
.
void starpu_variable_data_register | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uintptr_t | ptr, | ||
size_t | size | ||
) |
Register the size
byte element pointed to by ptr
, which is typically a scalar, and initialize handle
to represent this data item.
Here an example of how to use the function.
See VariableDataInterface for more details.
void starpu_variable_ptr_register | ( | starpu_data_handle_t | handle, |
unsigned | node, | ||
uintptr_t | ptr, | ||
uintptr_t | dev_handle, | ||
size_t | offset | ||
) |
Register into the handle
that to store data on node node
it should use the buffer located at ptr
, or device handle dev_handle
and offset offset
(for OpenCL, notably)
size_t starpu_variable_get_elemsize | ( | starpu_data_handle_t | handle | ) |
Return the size of the variable designated by handle
.
uintptr_t starpu_variable_get_local_ptr | ( | starpu_data_handle_t | handle | ) |
Return a pointer to the variable designated by handle
.
void starpu_void_data_register | ( | starpu_data_handle_t * | handle | ) |
Register a void interface. There is no data really associated to that interface, but it may be used as a synchronization mechanism. It also permits to express an abstract piece of data that is managed by the application internally: this makes it possible to forbid the concurrent execution of different tasks accessing the same void
data in read-write concurrently. See DataHandlesHelpers for more details.
void starpu_csr_data_register | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uint32_t | nnz, | ||
uint32_t | nrow, | ||
uintptr_t | nzval, | ||
uint32_t * | colind, | ||
uint32_t * | rowptr, | ||
uint32_t | firstentry, | ||
size_t | elemsize | ||
) |
Register a CSR (Compressed Sparse Row Representation) sparse matrix. See CSRDataInterface for more details.
uint32_t starpu_csr_get_nnz | ( | starpu_data_handle_t | handle | ) |
Return the number of non-zero values in the matrix designated by handle
.
uint32_t starpu_csr_get_nrow | ( | starpu_data_handle_t | handle | ) |
Return the size of the row pointer array of the matrix designated by handle
.
uint32_t starpu_csr_get_firstentry | ( | starpu_data_handle_t | handle | ) |
Return the index at which all arrays (the column indexes, the row pointers...) of the matrix designated by handle
.
uintptr_t starpu_csr_get_local_nzval | ( | starpu_data_handle_t | handle | ) |
Return a local pointer to the non-zero values of the matrix designated by handle
.
uint32_t * starpu_csr_get_local_colind | ( | starpu_data_handle_t | handle | ) |
Return a local pointer to the column index of the matrix designated by handle
.
uint32_t * starpu_csr_get_local_rowptr | ( | starpu_data_handle_t | handle | ) |
Return a local pointer to the row pointer array of the matrix designated by handle
.
size_t starpu_csr_get_elemsize | ( | starpu_data_handle_t | handle | ) |
Return the size of the elements registered into the matrix designated by handle
.
void starpu_bcsr_data_register | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
uint32_t | nnz, | ||
uint32_t | nrow, | ||
uintptr_t | nzval, | ||
uint32_t * | colind, | ||
uint32_t * | rowptr, | ||
uint32_t | firstentry, | ||
uint32_t | r, | ||
uint32_t | c, | ||
size_t | elemsize | ||
) |
This variant of starpu_data_register() uses the BCSR (Blocked Compressed Sparse Row Representation) sparse matrix interface. Register the sparse matrix made of nnz
non-zero blocks of elements of size elemsize
stored in nzval
and initializes handle
to represent it. Blocks have size r
* c
. nrow
is the number of rows (in terms of blocks), colind
is an array of nnz elements, colind[i] is the block-column index for block i in nzval
, rowptr
is an array of nrow+1 elements, rowptr[i] is the block-index (in nzval
) of the first block of row i. By convention, rowptr[nrow] is the number of blocks, this allows an easier access of the matrix's elements for the kernels. firstentry
is the index of the first entry of the given arrays (usually 0 or 1).
Here an example with the following matrix:
which translates into the following code
See BCSRDataInterface for more details.
uint32_t starpu_bcsr_get_nnz | ( | starpu_data_handle_t | handle | ) |
Return the number of non-zero elements in the matrix designated by handle
.
uint32_t starpu_bcsr_get_nrow | ( | starpu_data_handle_t | handle | ) |
Return the number of rows (in terms of blocks of size r*c) in the matrix designated by handle
.
uint32_t starpu_bcsr_get_firstentry | ( | starpu_data_handle_t | handle | ) |
Return the index at which all arrays (the column indexes, the row pointers...) of the matrix desginated by handle
.
uintptr_t starpu_bcsr_get_local_nzval | ( | starpu_data_handle_t | handle | ) |
Return a pointer to the non-zero values of the matrix designated by handle
.
uint32_t * starpu_bcsr_get_local_colind | ( | starpu_data_handle_t | handle | ) |
Return a pointer to the column index, which holds the positions of the non-zero entries in the matrix designated by handle
.
uint32_t * starpu_bcsr_get_local_rowptr | ( | starpu_data_handle_t | handle | ) |
Return the row pointer array of the matrix designated by handle
.
uint32_t starpu_bcsr_get_r | ( | starpu_data_handle_t | handle | ) |
Return the number of rows in a block.
uint32_t starpu_bcsr_get_c | ( | starpu_data_handle_t | handle | ) |
Return the number of columns in a block.
size_t starpu_bcsr_get_elemsize | ( | starpu_data_handle_t | handle | ) |
Return the size of the elements in the matrix designated by handle
.
void starpu_multiformat_data_register | ( | starpu_data_handle_t * | handle, |
int | home_node, | ||
void * | ptr, | ||
uint32_t | nobjects, | ||
struct starpu_multiformat_data_interface_ops * | format_ops | ||
) |
Register a piece of data that can be represented in different ways, depending upon the processing unit that manipulates it. It allows the programmer, for instance, to use an array of structures when working on a CPU, and a structure of arrays when working on a GPU. nobjects
is the number of elements in the data. format_ops
describes the format. See TheMultiformatInterface for more details.
uint32_t starpu_hash_crc32c_be_n | ( | const void * | input, |
size_t | n, | ||
uint32_t | inputcrc | ||
) |
Compute the CRC of a byte buffer seeded by the inputcrc
current state. The return value should be considered as the new current state for future CRC computation. This is used for computing data size footprint. See DefiningANewDataInterface_footprint for more details.
uint32_t starpu_hash_crc32c_be_ptr | ( | void * | input, |
uint32_t | inputcrc | ||
) |
Compute the CRC of a pointer value seeded by the inputcrc
current state. The return value should be considered as the new current state for future CRC computation. This is used for computing data size footprint. See DefiningANewDataInterface_footprint for more details.
uint32_t starpu_hash_crc32c_be | ( | uint32_t | input, |
uint32_t | inputcrc | ||
) |
Compute the CRC of a 32bit number seeded by the inputcrc
current state. The return value should be considered as the new current state for future CRC computation. This is used for computing data size footprint. See DefiningANewDataInterface_footprint for more details.
uint32_t starpu_hash_crc32c_string | ( | const char * | str, |
uint32_t | inputcrc | ||
) |
Compute the CRC of a string seeded by the inputcrc
current state. The return value should be considered as the new current state for future CRC computation. This is used for computing data size footprint. See DefiningANewDataInterface_footprint for more details.