1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162 |
- #ifndef PTHREADPOOL_H_
- #define PTHREADPOOL_H_
- #include <stddef.h>
- #include <stdint.h>
- typedef struct pthreadpool* pthreadpool_t;
- typedef void (*pthreadpool_task_1d_t)(void*, size_t);
- typedef void (*pthreadpool_task_1d_tile_1d_t)(void*, size_t, size_t);
- typedef void (*pthreadpool_task_2d_t)(void*, size_t, size_t);
- typedef void (*pthreadpool_task_2d_tile_1d_t)(void*, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_2d_tile_2d_t)(void*, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_3d_t)(void*, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_3d_tile_1d_t)(void*, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_3d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_4d_t)(void*, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_4d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_4d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_5d_t)(void*, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_5d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_5d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_6d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_6d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_6d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_1d_with_id_t)(void*, uint32_t, size_t);
- typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t);
- /**
- * Disable support for denormalized numbers to the maximum extent possible for
- * the duration of the computation.
- *
- * Handling denormalized floating-point numbers is often implemented in
- * microcode, and incurs significant performance degradation. This hint
- * instructs the thread pool to disable support for denormalized numbers before
- * running the computation by manipulating architecture-specific control
- * registers, and restore the initial value of control registers after the
- * computation is complete. The thread pool temporary disables denormalized
- * numbers on all threads involved in the computation (i.e. the caller threads,
- * and potentially worker threads).
- *
- * Disabling denormalized numbers may have a small negative effect on results'
- * accuracy. As various architectures differ in capabilities to control
- * processing of denormalized numbers, using this flag may also hurt results'
- * reproducibility across different instruction set architectures.
- */
- #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
- /**
- * Yield worker threads to the system scheduler after the operation is finished.
- *
- * Force workers to use kernel wait (instead of active spin-wait by default) for
- * new commands after this command is processed. This flag affects only the
- * immediate next operation on this thread pool. To make the thread pool always
- * use kernel wait, pass this flag to all parallelization functions.
- */
- #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
- #ifdef __cplusplus
- extern "C" {
- #endif
- /**
- * Create a thread pool with the specified number of threads.
- *
- * @param threads_count the number of threads in the thread pool.
- * A value of 0 has special interpretation: it creates a thread pool with as
- * many threads as there are logical processors in the system.
- *
- * @returns A pointer to an opaque thread pool object if the call is
- * successful, or NULL pointer if the call failed.
- */
- pthreadpool_t pthreadpool_create(size_t threads_count);
- /**
- * Query the number of threads in a thread pool.
- *
- * @param threadpool the thread pool to query.
- *
- * @returns The number of threads in the thread pool.
- */
- size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
- /**
- * Process items on a 1D grid.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range; i++)
- * function(context, i);
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each item.
- * @param context the first argument passed to the specified function.
- * @param range the number of items on the 1D grid to process. The
- * specified function will be called once for each item.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_1d(
- pthreadpool_t threadpool,
- pthreadpool_task_1d_t function,
- void* context,
- size_t range,
- uint32_t flags);
- /**
- * Process items on a 1D grid using a microarchitecture-aware task function.
- *
- * The function implements a parallel version of the following snippet:
- *
- * uint32_t uarch_index = cpuinfo_initialize() ?
- * cpuinfo_get_current_uarch_index() : default_uarch_index;
- * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
- * for (size_t i = 0; i < range; i++)
- * function(context, uarch_index, i);
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If
- * threadpool is NULL, all items are processed serially on the calling
- * thread.
- * @param function the function to call for each item.
- * @param context the first argument passed to the specified
- * function.
- * @param default_uarch_index the microarchitecture index to use when
- * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
- * or index returned by cpuinfo_get_current_uarch_index() exceeds the
- * max_uarch_index value.
- * @param max_uarch_index the maximum microarchitecture index expected by
- * the specified function. If the index returned by
- * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
- * will be used instead. default_uarch_index can exceed max_uarch_index.
- * @param range the number of items on the 1D grid to process.
- * The specified function will be called once for each item.
- * @param flags a bitwise combination of zero or more optional
- * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
- * PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_1d_with_uarch(
- pthreadpool_t threadpool,
- pthreadpool_task_1d_with_id_t function,
- void* context,
- uint32_t default_uarch_index,
- uint32_t max_uarch_index,
- size_t range,
- uint32_t flags);
- /**
- * Process items on a 1D grid with specified maximum tile size.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range; i += tile)
- * function(context, i, min(range - i, tile));
- *
- * When the call returns, all items have been processed and the thread pool is
- * ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool,
- * the calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range the number of items on the 1D grid to process.
- * @param tile the maximum number of items on the 1D grid to process in
- * one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_1d_tile_1d(
- pthreadpool_t threadpool,
- pthreadpool_task_1d_tile_1d_t function,
- void* context,
- size_t range,
- size_t tile,
- uint32_t flags);
- /**
- * Process items on a 2D grid.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * function(context, i, j);
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each item.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 2D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 2D grid.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_2d(
- pthreadpool_t threadpool,
- pthreadpool_task_2d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- uint32_t flags);
- /**
- * Process items on a 2D grid with the specified maximum tile size along the
- * last grid dimension.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j += tile_j)
- * function(context, i, j, min(range_j - j, tile_j));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 2D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 2D grid.
- * @param tile_j the maximum number of items along the second dimension of
- * the 2D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_2d_tile_1d(
- pthreadpool_t threadpool,
- pthreadpool_task_2d_tile_1d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t tile_j,
- uint32_t flags);
- /**
- * Process items on a 2D grid with the specified maximum tile size along each
- * grid dimension.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i += tile_i)
- * for (size_t j = 0; j < range_j; j += tile_j)
- * function(context, i, j,
- * min(range_i - i, tile_i), min(range_j - j, tile_j));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 2D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 2D grid.
- * @param tile_j the maximum number of items along the first dimension of
- * the 2D grid to process in one function call.
- * @param tile_j the maximum number of items along the second dimension of
- * the 2D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_2d_tile_2d(
- pthreadpool_t threadpool,
- pthreadpool_task_2d_tile_2d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t tile_i,
- size_t tile_j,
- uint32_t flags);
- /**
- * Process items on a 2D grid with the specified maximum tile size along each
- * grid dimension using a microarchitecture-aware task function.
- *
- * The function implements a parallel version of the following snippet:
- *
- * uint32_t uarch_index = cpuinfo_initialize() ?
- * cpuinfo_get_current_uarch_index() : default_uarch_index;
- * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
- * for (size_t i = 0; i < range_i; i += tile_i)
- * for (size_t j = 0; j < range_j; j += tile_j)
- * function(context, uarch_index, i, j,
- * min(range_i - i, tile_i), min(range_j - j, tile_j));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If
- * threadpool is NULL, all items are processed serially on the calling
- * thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified
- * function.
- * @param default_uarch_index the microarchitecture index to use when
- * pthreadpool is configured without cpuinfo,
- * cpuinfo initialization failed, or index returned
- * by cpuinfo_get_current_uarch_index() exceeds
- * the max_uarch_index value.
- * @param max_uarch_index the maximum microarchitecture index expected
- * by the specified function. If the index returned
- * by cpuinfo_get_current_uarch_index() exceeds this
- * value, default_uarch_index will be used instead.
- * default_uarch_index can exceed max_uarch_index.
- * @param range_i the number of items to process along the first
- * dimension of the 2D grid.
- * @param range_j the number of items to process along the second
- * dimension of the 2D grid.
- * @param tile_j the maximum number of items along the first
- * dimension of the 2D grid to process in one function call.
- * @param tile_j the maximum number of items along the second
- * dimension of the 2D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional
- * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
- * PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_2d_tile_2d_with_uarch(
- pthreadpool_t threadpool,
- pthreadpool_task_2d_tile_2d_with_id_t function,
- void* context,
- uint32_t default_uarch_index,
- uint32_t max_uarch_index,
- size_t range_i,
- size_t range_j,
- size_t tile_i,
- size_t tile_j,
- uint32_t flags);
- /**
- * Process items on a 3D grid.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * function(context, i, j, k);
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 3D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 3D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 3D grid.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_3d(
- pthreadpool_t threadpool,
- pthreadpool_task_3d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- uint32_t flags);
- /**
- * Process items on a 3D grid with the specified maximum tile size along the
- * last grid dimension.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k += tile_k)
- * function(context, i, j, k, min(range_k - k, tile_k));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 3D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 3D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 3D grid.
- * @param tile_k the maximum number of items along the third dimension of
- * the 3D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_3d_tile_1d(
- pthreadpool_t threadpool,
- pthreadpool_task_3d_tile_1d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t tile_k,
- uint32_t flags);
- /**
- * Process items on a 3D grid with the specified maximum tile size along the
- * last two grid dimensions.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j += tile_j)
- * for (size_t k = 0; k < range_k; k += tile_k)
- * function(context, i, j, k,
- * min(range_j - j, tile_j), min(range_k - k, tile_k));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 3D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 3D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 3D grid.
- * @param tile_j the maximum number of items along the second dimension of
- * the 3D grid to process in one function call.
- * @param tile_k the maximum number of items along the third dimension of
- * the 3D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_3d_tile_2d(
- pthreadpool_t threadpool,
- pthreadpool_task_3d_tile_2d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t tile_j,
- size_t tile_k,
- uint32_t flags);
- /**
- * Process items on a 3D grid with the specified maximum tile size along the
- * last two grid dimensions using a microarchitecture-aware task function.
- *
- * The function implements a parallel version of the following snippet:
- *
- * uint32_t uarch_index = cpuinfo_initialize() ?
- * cpuinfo_get_current_uarch_index() : default_uarch_index;
- * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j += tile_j)
- * for (size_t k = 0; k < range_k; k += tile_k)
- * function(context, uarch_index, i, j, k,
- * min(range_j - j, tile_j), min(range_k - k, tile_k));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If
- * threadpool is NULL, all items are processed serially on the calling
- * thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified
- * function.
- * @param default_uarch_index the microarchitecture index to use when
- * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
- * or index returned by cpuinfo_get_current_uarch_index() exceeds the
- * max_uarch_index value.
- * @param max_uarch_index the maximum microarchitecture index expected by
- * the specified function. If the index returned by
- * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
- * will be used instead. default_uarch_index can exceed max_uarch_index.
- * @param range_i the number of items to process along the first
- * dimension of the 3D grid.
- * @param range_j the number of items to process along the second
- * dimension of the 3D grid.
- * @param range_k the number of items to process along the third
- * dimension of the 3D grid.
- * @param tile_j the maximum number of items along the second
- * dimension of the 3D grid to process in one function call.
- * @param tile_k the maximum number of items along the third
- * dimension of the 3D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional
- * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
- * PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_3d_tile_2d_with_uarch(
- pthreadpool_t threadpool,
- pthreadpool_task_3d_tile_2d_with_id_t function,
- void* context,
- uint32_t default_uarch_index,
- uint32_t max_uarch_index,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t tile_j,
- size_t tile_k,
- uint32_t flags);
- /**
- * Process items on a 4D grid.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * for (size_t l = 0; l < range_l; l++)
- * function(context, i, j, k, l);
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 4D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 4D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 4D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 4D grid.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_4d(
- pthreadpool_t threadpool,
- pthreadpool_task_4d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- uint32_t flags);
- /**
- * Process items on a 4D grid with the specified maximum tile size along the
- * last grid dimension.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * for (size_t l = 0; l < range_l; l += tile_l)
- * function(context, i, j, k, l, min(range_l - l, tile_l));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 4D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 4D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 4D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 4D grid.
- * @param tile_l the maximum number of items along the fourth dimension of
- * the 4D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_4d_tile_1d(
- pthreadpool_t threadpool,
- pthreadpool_task_4d_tile_1d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t tile_l,
- uint32_t flags);
- /**
- * Process items on a 4D grid with the specified maximum tile size along the
- * last two grid dimensions.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k += tile_k)
- * for (size_t l = 0; l < range_l; l += tile_l)
- * function(context, i, j, k, l,
- * min(range_k - k, tile_k), min(range_l - l, tile_l));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 4D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 4D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 4D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 4D grid.
- * @param tile_k the maximum number of items along the third dimension of
- * the 4D grid to process in one function call.
- * @param tile_l the maximum number of items along the fourth dimension of
- * the 4D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_4d_tile_2d(
- pthreadpool_t threadpool,
- pthreadpool_task_4d_tile_2d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t tile_k,
- size_t tile_l,
- uint32_t flags);
- /**
- * Process items on a 4D grid with the specified maximum tile size along the
- * last two grid dimensions using a microarchitecture-aware task function.
- *
- * The function implements a parallel version of the following snippet:
- *
- * uint32_t uarch_index = cpuinfo_initialize() ?
- * cpuinfo_get_current_uarch_index() : default_uarch_index;
- * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k += tile_k)
- * for (size_t l = 0; l < range_l; l += tile_l)
- * function(context, uarch_index, i, j, k, l,
- * min(range_k - k, tile_k), min(range_l - l, tile_l));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If
- * threadpool is NULL, all items are processed serially on the calling
- * thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified
- * function.
- * @param default_uarch_index the microarchitecture index to use when
- * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
- * or index returned by cpuinfo_get_current_uarch_index() exceeds the
- * max_uarch_index value.
- * @param max_uarch_index the maximum microarchitecture index expected by
- * the specified function. If the index returned by
- * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
- * will be used instead. default_uarch_index can exceed max_uarch_index.
- * @param range_i the number of items to process along the first
- * dimension of the 4D grid.
- * @param range_j the number of items to process along the second
- * dimension of the 4D grid.
- * @param range_k the number of items to process along the third
- * dimension of the 4D grid.
- * @param range_l the number of items to process along the fourth
- * dimension of the 4D grid.
- * @param tile_k the maximum number of items along the third
- * dimension of the 4D grid to process in one function call.
- * @param tile_l the maximum number of items along the fourth
- * dimension of the 4D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional
- * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
- * PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_4d_tile_2d_with_uarch(
- pthreadpool_t threadpool,
- pthreadpool_task_4d_tile_2d_with_id_t function,
- void* context,
- uint32_t default_uarch_index,
- uint32_t max_uarch_index,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t tile_k,
- size_t tile_l,
- uint32_t flags);
- /**
- * Process items on a 5D grid.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * for (size_t l = 0; l < range_l; l++)
- * for (size_t m = 0; m < range_m; m++)
- * function(context, i, j, k, l, m);
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 5D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 5D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 5D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 5D grid.
- * @param range_m the number of items to process along the fifth dimension
- * of the 5D grid.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_5d(
- pthreadpool_t threadpool,
- pthreadpool_task_5d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t range_m,
- uint32_t flags);
- /**
- * Process items on a 5D grid with the specified maximum tile size along the
- * last grid dimension.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * for (size_t l = 0; l < range_l; l++)
- * for (size_t m = 0; m < range_m; m += tile_m)
- * function(context, i, j, k, l, m, min(range_m - m, tile_m));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 5D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 5D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 5D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 5D grid.
- * @param range_m the number of items to process along the fifth dimension
- * of the 5D grid.
- * @param tile_m the maximum number of items along the fifth dimension of
- * the 5D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_5d_tile_1d(
- pthreadpool_t threadpool,
- pthreadpool_task_5d_tile_1d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t range_m,
- size_t tile_m,
- uint32_t flags);
- /**
- * Process items on a 5D grid with the specified maximum tile size along the
- * last two grid dimensions.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * for (size_t l = 0; l < range_l; l += tile_l)
- * for (size_t m = 0; m < range_m; m += tile_m)
- * function(context, i, j, k, l, m,
- * min(range_l - l, tile_l), min(range_m - m, tile_m));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 5D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 5D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 5D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 5D grid.
- * @param range_m the number of items to process along the fifth dimension
- * of the 5D grid.
- * @param tile_l the maximum number of items along the fourth dimension of
- * the 5D grid to process in one function call.
- * @param tile_m the maximum number of items along the fifth dimension of
- * the 5D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_5d_tile_2d(
- pthreadpool_t threadpool,
- pthreadpool_task_5d_tile_2d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t range_m,
- size_t tile_l,
- size_t tile_m,
- uint32_t flags);
- /**
- * Process items on a 6D grid.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * for (size_t l = 0; l < range_l; l++)
- * for (size_t m = 0; m < range_m; m++)
- * for (size_t n = 0; n < range_n; n++)
- * function(context, i, j, k, l, m, n);
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 6D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 6D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 6D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 6D grid.
- * @param range_m the number of items to process along the fifth dimension
- * of the 6D grid.
- * @param range_n the number of items to process along the sixth dimension
- * of the 6D grid.
- * @param tile_n the maximum number of items along the sixth dimension of
- * the 6D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_6d(
- pthreadpool_t threadpool,
- pthreadpool_task_6d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t range_m,
- size_t range_n,
- uint32_t flags);
- /**
- * Process items on a 6D grid with the specified maximum tile size along the
- * last grid dimension.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * for (size_t l = 0; l < range_l; l++)
- * for (size_t m = 0; m < range_m; m++)
- * for (size_t n = 0; n < range_n; n += tile_n)
- * function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 6D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 6D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 6D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 6D grid.
- * @param range_m the number of items to process along the fifth dimension
- * of the 6D grid.
- * @param range_n the number of items to process along the sixth dimension
- * of the 6D grid.
- * @param tile_n the maximum number of items along the sixth dimension of
- * the 6D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_6d_tile_1d(
- pthreadpool_t threadpool,
- pthreadpool_task_6d_tile_1d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t range_m,
- size_t range_n,
- size_t tile_n,
- uint32_t flags);
- /**
- * Process items on a 6D grid with the specified maximum tile size along the
- * last two grid dimensions.
- *
- * The function implements a parallel version of the following snippet:
- *
- * for (size_t i = 0; i < range_i; i++)
- * for (size_t j = 0; j < range_j; j++)
- * for (size_t k = 0; k < range_k; k++)
- * for (size_t l = 0; l < range_l; l++)
- * for (size_t m = 0; m < range_m; m += tile_m)
- * for (size_t n = 0; n < range_n; n += tile_n)
- * function(context, i, j, k, l, m, n,
- * min(range_m - m, tile_m), min(range_n - n, tile_n));
- *
- * When the function returns, all items have been processed and the thread pool
- * is ready for a new task.
- *
- * @note If multiple threads call this function with the same thread pool, the
- * calls are serialized.
- *
- * @param threadpool the thread pool to use for parallelisation. If threadpool
- * is NULL, all items are processed serially on the calling thread.
- * @param function the function to call for each tile.
- * @param context the first argument passed to the specified function.
- * @param range_i the number of items to process along the first dimension
- * of the 6D grid.
- * @param range_j the number of items to process along the second dimension
- * of the 6D grid.
- * @param range_k the number of items to process along the third dimension
- * of the 6D grid.
- * @param range_l the number of items to process along the fourth dimension
- * of the 6D grid.
- * @param range_m the number of items to process along the fifth dimension
- * of the 6D grid.
- * @param range_n the number of items to process along the sixth dimension
- * of the 6D grid.
- * @param tile_m the maximum number of items along the fifth dimension of
- * the 6D grid to process in one function call.
- * @param tile_n the maximum number of items along the sixth dimension of
- * the 6D grid to process in one function call.
- * @param flags a bitwise combination of zero or more optional flags
- * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
- */
- void pthreadpool_parallelize_6d_tile_2d(
- pthreadpool_t threadpool,
- pthreadpool_task_6d_tile_2d_t function,
- void* context,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t range_m,
- size_t range_n,
- size_t tile_m,
- size_t tile_n,
- uint32_t flags);
- /**
- * Terminates threads in the thread pool and releases associated resources.
- *
- * @warning Accessing the thread pool after a call to this function constitutes
- * undefined behaviour and may cause data corruption.
- *
- * @param[in,out] threadpool The thread pool to destroy.
- */
- void pthreadpool_destroy(pthreadpool_t threadpool);
- #ifndef PTHREADPOOL_NO_DEPRECATED_API
- /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
- #if defined(__GNUC__)
- #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
- #else
- #define PTHREADPOOL_DEPRECATED
- #endif
- typedef void (*pthreadpool_function_1d_t)(void*, size_t);
- typedef void (*pthreadpool_function_1d_tiled_t)(void*, size_t, size_t);
- typedef void (*pthreadpool_function_2d_t)(void*, size_t, size_t);
- typedef void (*pthreadpool_function_2d_tiled_t)(void*, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_function_3d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
- typedef void (*pthreadpool_function_4d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
- void pthreadpool_compute_1d(
- pthreadpool_t threadpool,
- pthreadpool_function_1d_t function,
- void* argument,
- size_t range) PTHREADPOOL_DEPRECATED;
- void pthreadpool_compute_1d_tiled(
- pthreadpool_t threadpool,
- pthreadpool_function_1d_tiled_t function,
- void* argument,
- size_t range,
- size_t tile) PTHREADPOOL_DEPRECATED;
- void pthreadpool_compute_2d(
- pthreadpool_t threadpool,
- pthreadpool_function_2d_t function,
- void* argument,
- size_t range_i,
- size_t range_j) PTHREADPOOL_DEPRECATED;
- void pthreadpool_compute_2d_tiled(
- pthreadpool_t threadpool,
- pthreadpool_function_2d_tiled_t function,
- void* argument,
- size_t range_i,
- size_t range_j,
- size_t tile_i,
- size_t tile_j) PTHREADPOOL_DEPRECATED;
- void pthreadpool_compute_3d_tiled(
- pthreadpool_t threadpool,
- pthreadpool_function_3d_tiled_t function,
- void* argument,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t tile_i,
- size_t tile_j,
- size_t tile_k) PTHREADPOOL_DEPRECATED;
- void pthreadpool_compute_4d_tiled(
- pthreadpool_t threadpool,
- pthreadpool_function_4d_tiled_t function,
- void* argument,
- size_t range_i,
- size_t range_j,
- size_t range_k,
- size_t range_l,
- size_t tile_i,
- size_t tile_j,
- size_t tile_k,
- size_t tile_l) PTHREADPOOL_DEPRECATED;
- #endif /* PTHREADPOOL_NO_DEPRECATED_API */
- #ifdef __cplusplus
- } /* extern "C" */
- #endif
- #endif /* PTHREADPOOL_H_ */
|