StarPU Internal Handbook
Loading...
Searching...
No Matches
topology.h File Reference
#include <starpu.h>
#include <common/config.h>
#include <common/list.h>
#include <common/fxt.h>
#include <common/uthash.h>

Go to the source code of this file.

Macros

#define ALLOC_WORKER_SET
 
#define DEVID_PER_WORKER
 
#define STARPU_NOWORKERID
 
#define STARPU_ACTIVETHREAD
 
#define STARPU_NONACTIVETHREAD
 

Functions

int _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
 
void _starpu_initialize_workers_deviceid (int *explicit_workers_gpuid, int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus, enum starpu_worker_archtype type)
 
int _starpu_get_next_devid (struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, enum starpu_worker_archtype arch)
 
void _starpu_topology_check_ndevices (int *ndevices, unsigned nhwdevices, int overflow, unsigned max, int reserved, const char *nname, const char *dname, const char *configurename)
 
void _starpu_topology_configure_workers (struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, enum starpu_worker_archtype type, int devnum, int devid, int homogeneous, int worker_devid, unsigned nworker_per_device, unsigned ncores, struct _starpu_worker_set *worker_set, struct _starpu_worker_set *driver_worker_set)
 
unsigned _starpu_get_next_bindid (struct _starpu_machine_config *config, unsigned flags, unsigned *preferred_binding, unsigned npreferred)
 
void _starpu_destroy_machine_config (struct _starpu_machine_config *config, int no_mp_config)
 
void _starpu_destroy_topology (struct _starpu_machine_config *config)
 
hwloc_obj_t _starpu_numa_get_obj (hwloc_obj_t obj)
 
unsigned _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
 
unsigned _starpu_topology_get_nhwpu (struct _starpu_machine_config *config)
 
unsigned _starpu_topology_get_nhwnumanodes (struct _starpu_machine_config *config)
 
unsigned _starpu_topology_get_nnumanodes (struct _starpu_machine_config *config)
 
unsigned _starpu_topology_get_numa_core_binding (struct _starpu_machine_config *config, const unsigned *numa_binding, unsigned nnuma, unsigned *binding, unsigned nbinding)
 
int starpu_memory_nodes_numa_hwloclogid_to_id (int logid)
 
int _starpu_get_logical_numa_node_worker (unsigned workerid)
 
unsigned _starpu_get_nhyperthreads () STARPU_ATTRIBUTE_VISIBILITY_DEFAULT
 
void _starpu_topology_filter (hwloc_topology_t topology)
 
int _starpu_bind_thread_on_cpu (int cpuid, int workerid, const char *name)
 
void _starpu_bind_thread_on_cpus (struct _starpu_combined_worker *combined_worker)
 
struct _starpu_worker_starpu_get_worker_from_driver (struct starpu_driver *d)
 
unsigned starpu_memory_nodes_get_numa_count (void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT
 
int starpu_memory_nodes_numa_id_to_hwloclogid (unsigned id)
 
int _starpu_task_data_get_node_on_node (struct starpu_task *task, unsigned index, unsigned target_node)
 
int _starpu_task_data_get_node_on_worker (struct starpu_task *task, unsigned index, unsigned worker)
 

Variables

unsigned _starpu_may_bind_automatically [STARPU_NARCH]
 

Macro Definition Documentation

◆ ALLOC_WORKER_SET

#define ALLOC_WORKER_SET

Configures the topology according to the desired worker distribution on the device.

  • homogeneous tells to use devid 0 for the perfmodel (all devices have the same performance)
  • worker_devid tells to set a devid per worker, and subworkerid to 0, rather than sharing the devid and giving a different subworkerid to each worker. Request to allocate a worker set for each worker

◆ DEVID_PER_WORKER

#define DEVID_PER_WORKER

Request to set a different perfmodel devid per worker

Function Documentation

◆ _starpu_build_topology()

int _starpu_build_topology ( struct _starpu_machine_config config,
int  no_mp_config 
)

Detect the number of memory nodes and where to bind the different workers.

◆ _starpu_initialize_workers_deviceid()

void _starpu_initialize_workers_deviceid ( int *  explicit_workers_gpuid,
int *  current,
int *  workers_gpuid,
const char *  varname,
unsigned  nhwgpus,
enum starpu_worker_archtype  type 
)

Initialize a series of workers.

  • If explicit_workers_gpuid is non-null, it will be used as the list of device IDs of the actual hardware devices to be used.
  • If current is non-null, it points to the next device ID to be used
  • workers_gpuid is filled with the set of device IDs actually used in the end
  • varname is the name of the environment variable that users can use to override the set of device IDs to be used.
  • nhwgpus is the number of actual devices available on the system.
  • type is the type of devices.

◆ _starpu_get_next_devid()

int _starpu_get_next_devid ( struct _starpu_machine_topology topology,
struct _starpu_machine_config config,
enum starpu_worker_archtype  arch 
)

Get the next devid for architecture type

◆ _starpu_topology_check_ndevices()

void _starpu_topology_check_ndevices ( int *  ndevices,
unsigned  nhwdevices,
int  overflow,
unsigned  max,
int  reserved,
const char *  nname,
const char *  dname,
const char *  configurename 
)

Check that *ndevices is not larger than nhwdevices (unless overflow is 1), and is not larger than max. Cap it otherwise, and advise using the configurename ./configure option in the max case.

◆ _starpu_get_next_bindid()

unsigned _starpu_get_next_bindid ( struct _starpu_machine_config config,
unsigned  flags,
unsigned *  preferred_binding,
unsigned  npreferred 
)

This function gets the identifier of the next core on which to bind a worker. In case a list of preferred cores was specified (logical indexes), we look for a an available core among the list if possible, otherwise a round-robin policy is used.

◆ _starpu_destroy_machine_config()

void _starpu_destroy_machine_config ( struct _starpu_machine_config config,
int  no_mp_config 
)

Should be called instead of _starpu_destroy_topology when _starpu_build_topology returns a non zero value.

◆ _starpu_destroy_topology()

void _starpu_destroy_topology ( struct _starpu_machine_config config)

Destroy all resources used to store the topology of the machine.

◆ _starpu_numa_get_obj()

hwloc_obj_t _starpu_numa_get_obj ( hwloc_obj_t  obj)

Return the hwloc object of the NUMA node corresponding to the given hwloc object

◆ _starpu_topology_get_nhwcpu()

unsigned _starpu_topology_get_nhwcpu ( struct _starpu_machine_config config)

returns the number of physical cpus

◆ _starpu_topology_get_nhwpu()

unsigned _starpu_topology_get_nhwpu ( struct _starpu_machine_config config)

returns the number of logical cpus

◆ _starpu_topology_get_nhwnumanodes()

unsigned _starpu_topology_get_nhwnumanodes ( struct _starpu_machine_config config)

returns the number of hardware NUMA nodes

◆ _starpu_topology_get_nnumanodes()

unsigned _starpu_topology_get_nnumanodes ( struct _starpu_machine_config config)

returns the number of NUMA nodes to be exposed by StarPU as memory nodes, can be just 1 when STARPU_USE_NUMA is 0

◆ _starpu_topology_get_numa_core_binding()

unsigned _starpu_topology_get_numa_core_binding ( struct _starpu_machine_config config,
const unsigned *  numa_binding,
unsigned  nnuma,
unsigned *  binding,
unsigned  nbinding 
)

given a list of numa nodes (logical indexes) numa_binding, fill binding with the corresponding cores (logical indexes)

◆ _starpu_get_nhyperthreads()

unsigned _starpu_get_nhyperthreads ( )

returns the number of hyperthreads per core

◆ _starpu_topology_filter()

void _starpu_topology_filter ( hwloc_topology_t  topology)

Small convenient function to filter hwloc topology depending on HWLOC API version

◆ _starpu_bind_thread_on_cpu()

int _starpu_bind_thread_on_cpu ( int  cpuid,
int  workerid,
const char *  name 
)

Bind the current thread on the CPU logically identified by "cpuid". The logical ordering of the processors is either that of hwloc (if available), or the ordering exposed by the OS.

◆ _starpu_bind_thread_on_cpus()

void _starpu_bind_thread_on_cpus ( struct _starpu_combined_worker combined_worker)

Bind the current thread on the set of CPUs for the given combined worker.

◆ _starpu_task_data_get_node_on_node()

int _starpu_task_data_get_node_on_node ( struct starpu_task *  task,
unsigned  index,
unsigned  target_node 
)

Get the memory node for data number i when task is to be executed on memory node target_node. Returns -1 if the data does not need to be loaded.

◆ _starpu_task_data_get_node_on_worker()

int _starpu_task_data_get_node_on_worker ( struct starpu_task *  task,
unsigned  index,
unsigned  worker 
)

Get the memory node for data number i when task is to be executed on worker worker. Returns -1 if the data does not need to be loaded.