pthreadpool.h 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162
  1. #ifndef PTHREADPOOL_H_
  2. #define PTHREADPOOL_H_
  3. #include <stddef.h>
  4. #include <stdint.h>
  5. typedef struct pthreadpool* pthreadpool_t;
  6. typedef void (*pthreadpool_task_1d_t)(void*, size_t);
  7. typedef void (*pthreadpool_task_1d_tile_1d_t)(void*, size_t, size_t);
  8. typedef void (*pthreadpool_task_2d_t)(void*, size_t, size_t);
  9. typedef void (*pthreadpool_task_2d_tile_1d_t)(void*, size_t, size_t, size_t);
  10. typedef void (*pthreadpool_task_2d_tile_2d_t)(void*, size_t, size_t, size_t, size_t);
  11. typedef void (*pthreadpool_task_3d_t)(void*, size_t, size_t, size_t);
  12. typedef void (*pthreadpool_task_3d_tile_1d_t)(void*, size_t, size_t, size_t, size_t);
  13. typedef void (*pthreadpool_task_3d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t);
  14. typedef void (*pthreadpool_task_4d_t)(void*, size_t, size_t, size_t, size_t);
  15. typedef void (*pthreadpool_task_4d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t);
  16. typedef void (*pthreadpool_task_4d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
  17. typedef void (*pthreadpool_task_5d_t)(void*, size_t, size_t, size_t, size_t, size_t);
  18. typedef void (*pthreadpool_task_5d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
  19. typedef void (*pthreadpool_task_5d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
  20. typedef void (*pthreadpool_task_6d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
  21. typedef void (*pthreadpool_task_6d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
  22. typedef void (*pthreadpool_task_6d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
  23. typedef void (*pthreadpool_task_1d_with_id_t)(void*, uint32_t, size_t);
  24. typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t);
  25. typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t);
  26. typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t);
  27. /**
  28. * Disable support for denormalized numbers to the maximum extent possible for
  29. * the duration of the computation.
  30. *
  31. * Handling denormalized floating-point numbers is often implemented in
  32. * microcode, and incurs significant performance degradation. This hint
  33. * instructs the thread pool to disable support for denormalized numbers before
  34. * running the computation by manipulating architecture-specific control
  35. * registers, and restore the initial value of control registers after the
  36. * computation is complete. The thread pool temporary disables denormalized
  37. * numbers on all threads involved in the computation (i.e. the caller threads,
  38. * and potentially worker threads).
  39. *
  40. * Disabling denormalized numbers may have a small negative effect on results'
  41. * accuracy. As various architectures differ in capabilities to control
  42. * processing of denormalized numbers, using this flag may also hurt results'
  43. * reproducibility across different instruction set architectures.
  44. */
  45. #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
  46. /**
  47. * Yield worker threads to the system scheduler after the operation is finished.
  48. *
  49. * Force workers to use kernel wait (instead of active spin-wait by default) for
  50. * new commands after this command is processed. This flag affects only the
  51. * immediate next operation on this thread pool. To make the thread pool always
  52. * use kernel wait, pass this flag to all parallelization functions.
  53. */
  54. #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
  55. #ifdef __cplusplus
  56. extern "C" {
  57. #endif
  58. /**
  59. * Create a thread pool with the specified number of threads.
  60. *
  61. * @param threads_count the number of threads in the thread pool.
  62. * A value of 0 has special interpretation: it creates a thread pool with as
  63. * many threads as there are logical processors in the system.
  64. *
  65. * @returns A pointer to an opaque thread pool object if the call is
  66. * successful, or NULL pointer if the call failed.
  67. */
  68. pthreadpool_t pthreadpool_create(size_t threads_count);
  69. /**
  70. * Query the number of threads in a thread pool.
  71. *
  72. * @param threadpool the thread pool to query.
  73. *
  74. * @returns The number of threads in the thread pool.
  75. */
  76. size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
  77. /**
  78. * Process items on a 1D grid.
  79. *
  80. * The function implements a parallel version of the following snippet:
  81. *
  82. * for (size_t i = 0; i < range; i++)
  83. * function(context, i);
  84. *
  85. * When the function returns, all items have been processed and the thread pool
  86. * is ready for a new task.
  87. *
  88. * @note If multiple threads call this function with the same thread pool, the
  89. * calls are serialized.
  90. *
  91. * @param threadpool the thread pool to use for parallelisation. If threadpool
  92. * is NULL, all items are processed serially on the calling thread.
  93. * @param function the function to call for each item.
  94. * @param context the first argument passed to the specified function.
  95. * @param range the number of items on the 1D grid to process. The
  96. * specified function will be called once for each item.
  97. * @param flags a bitwise combination of zero or more optional flags
  98. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  99. */
  100. void pthreadpool_parallelize_1d(
  101. pthreadpool_t threadpool,
  102. pthreadpool_task_1d_t function,
  103. void* context,
  104. size_t range,
  105. uint32_t flags);
  106. /**
  107. * Process items on a 1D grid using a microarchitecture-aware task function.
  108. *
  109. * The function implements a parallel version of the following snippet:
  110. *
  111. * uint32_t uarch_index = cpuinfo_initialize() ?
  112. * cpuinfo_get_current_uarch_index() : default_uarch_index;
  113. * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
  114. * for (size_t i = 0; i < range; i++)
  115. * function(context, uarch_index, i);
  116. *
  117. * When the function returns, all items have been processed and the thread pool
  118. * is ready for a new task.
  119. *
  120. * @note If multiple threads call this function with the same thread pool, the
  121. * calls are serialized.
  122. *
  123. * @param threadpool the thread pool to use for parallelisation. If
  124. * threadpool is NULL, all items are processed serially on the calling
  125. * thread.
  126. * @param function the function to call for each item.
  127. * @param context the first argument passed to the specified
  128. * function.
  129. * @param default_uarch_index the microarchitecture index to use when
  130. * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
  131. * or index returned by cpuinfo_get_current_uarch_index() exceeds the
  132. * max_uarch_index value.
  133. * @param max_uarch_index the maximum microarchitecture index expected by
  134. * the specified function. If the index returned by
  135. * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
  136. * will be used instead. default_uarch_index can exceed max_uarch_index.
  137. * @param range the number of items on the 1D grid to process.
  138. * The specified function will be called once for each item.
  139. * @param flags a bitwise combination of zero or more optional
  140. * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
  141. * PTHREADPOOL_FLAG_YIELD_WORKERS)
  142. */
  143. void pthreadpool_parallelize_1d_with_uarch(
  144. pthreadpool_t threadpool,
  145. pthreadpool_task_1d_with_id_t function,
  146. void* context,
  147. uint32_t default_uarch_index,
  148. uint32_t max_uarch_index,
  149. size_t range,
  150. uint32_t flags);
  151. /**
  152. * Process items on a 1D grid with specified maximum tile size.
  153. *
  154. * The function implements a parallel version of the following snippet:
  155. *
  156. * for (size_t i = 0; i < range; i += tile)
  157. * function(context, i, min(range - i, tile));
  158. *
  159. * When the call returns, all items have been processed and the thread pool is
  160. * ready for a new task.
  161. *
  162. * @note If multiple threads call this function with the same thread pool,
  163. * the calls are serialized.
  164. *
  165. * @param threadpool the thread pool to use for parallelisation. If threadpool
  166. * is NULL, all items are processed serially on the calling thread.
  167. * @param function the function to call for each tile.
  168. * @param context the first argument passed to the specified function.
  169. * @param range the number of items on the 1D grid to process.
  170. * @param tile the maximum number of items on the 1D grid to process in
  171. * one function call.
  172. * @param flags a bitwise combination of zero or more optional flags
  173. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  174. */
  175. void pthreadpool_parallelize_1d_tile_1d(
  176. pthreadpool_t threadpool,
  177. pthreadpool_task_1d_tile_1d_t function,
  178. void* context,
  179. size_t range,
  180. size_t tile,
  181. uint32_t flags);
  182. /**
  183. * Process items on a 2D grid.
  184. *
  185. * The function implements a parallel version of the following snippet:
  186. *
  187. * for (size_t i = 0; i < range_i; i++)
  188. * for (size_t j = 0; j < range_j; j++)
  189. * function(context, i, j);
  190. *
  191. * When the function returns, all items have been processed and the thread pool
  192. * is ready for a new task.
  193. *
  194. * @note If multiple threads call this function with the same thread pool, the
  195. * calls are serialized.
  196. *
  197. * @param threadpool the thread pool to use for parallelisation. If threadpool
  198. * is NULL, all items are processed serially on the calling thread.
  199. * @param function the function to call for each item.
  200. * @param context the first argument passed to the specified function.
  201. * @param range_i the number of items to process along the first dimension
  202. * of the 2D grid.
  203. * @param range_j the number of items to process along the second dimension
  204. * of the 2D grid.
  205. * @param flags a bitwise combination of zero or more optional flags
  206. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  207. */
  208. void pthreadpool_parallelize_2d(
  209. pthreadpool_t threadpool,
  210. pthreadpool_task_2d_t function,
  211. void* context,
  212. size_t range_i,
  213. size_t range_j,
  214. uint32_t flags);
  215. /**
  216. * Process items on a 2D grid with the specified maximum tile size along the
  217. * last grid dimension.
  218. *
  219. * The function implements a parallel version of the following snippet:
  220. *
  221. * for (size_t i = 0; i < range_i; i++)
  222. * for (size_t j = 0; j < range_j; j += tile_j)
  223. * function(context, i, j, min(range_j - j, tile_j));
  224. *
  225. * When the function returns, all items have been processed and the thread pool
  226. * is ready for a new task.
  227. *
  228. * @note If multiple threads call this function with the same thread pool, the
  229. * calls are serialized.
  230. *
  231. * @param threadpool the thread pool to use for parallelisation. If threadpool
  232. * is NULL, all items are processed serially on the calling thread.
  233. * @param function the function to call for each tile.
  234. * @param context the first argument passed to the specified function.
  235. * @param range_i the number of items to process along the first dimension
  236. * of the 2D grid.
  237. * @param range_j the number of items to process along the second dimension
  238. * of the 2D grid.
  239. * @param tile_j the maximum number of items along the second dimension of
  240. * the 2D grid to process in one function call.
  241. * @param flags a bitwise combination of zero or more optional flags
  242. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  243. */
  244. void pthreadpool_parallelize_2d_tile_1d(
  245. pthreadpool_t threadpool,
  246. pthreadpool_task_2d_tile_1d_t function,
  247. void* context,
  248. size_t range_i,
  249. size_t range_j,
  250. size_t tile_j,
  251. uint32_t flags);
  252. /**
  253. * Process items on a 2D grid with the specified maximum tile size along each
  254. * grid dimension.
  255. *
  256. * The function implements a parallel version of the following snippet:
  257. *
  258. * for (size_t i = 0; i < range_i; i += tile_i)
  259. * for (size_t j = 0; j < range_j; j += tile_j)
  260. * function(context, i, j,
  261. * min(range_i - i, tile_i), min(range_j - j, tile_j));
  262. *
  263. * When the function returns, all items have been processed and the thread pool
  264. * is ready for a new task.
  265. *
  266. * @note If multiple threads call this function with the same thread pool, the
  267. * calls are serialized.
  268. *
  269. * @param threadpool the thread pool to use for parallelisation. If threadpool
  270. * is NULL, all items are processed serially on the calling thread.
  271. * @param function the function to call for each tile.
  272. * @param context the first argument passed to the specified function.
  273. * @param range_i the number of items to process along the first dimension
  274. * of the 2D grid.
  275. * @param range_j the number of items to process along the second dimension
  276. * of the 2D grid.
  277. * @param tile_j the maximum number of items along the first dimension of
  278. * the 2D grid to process in one function call.
  279. * @param tile_j the maximum number of items along the second dimension of
  280. * the 2D grid to process in one function call.
  281. * @param flags a bitwise combination of zero or more optional flags
  282. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  283. */
  284. void pthreadpool_parallelize_2d_tile_2d(
  285. pthreadpool_t threadpool,
  286. pthreadpool_task_2d_tile_2d_t function,
  287. void* context,
  288. size_t range_i,
  289. size_t range_j,
  290. size_t tile_i,
  291. size_t tile_j,
  292. uint32_t flags);
  293. /**
  294. * Process items on a 2D grid with the specified maximum tile size along each
  295. * grid dimension using a microarchitecture-aware task function.
  296. *
  297. * The function implements a parallel version of the following snippet:
  298. *
  299. * uint32_t uarch_index = cpuinfo_initialize() ?
  300. * cpuinfo_get_current_uarch_index() : default_uarch_index;
  301. * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
  302. * for (size_t i = 0; i < range_i; i += tile_i)
  303. * for (size_t j = 0; j < range_j; j += tile_j)
  304. * function(context, uarch_index, i, j,
  305. * min(range_i - i, tile_i), min(range_j - j, tile_j));
  306. *
  307. * When the function returns, all items have been processed and the thread pool
  308. * is ready for a new task.
  309. *
  310. * @note If multiple threads call this function with the same thread pool, the
  311. * calls are serialized.
  312. *
  313. * @param threadpool the thread pool to use for parallelisation. If
  314. * threadpool is NULL, all items are processed serially on the calling
  315. * thread.
  316. * @param function the function to call for each tile.
  317. * @param context the first argument passed to the specified
  318. * function.
  319. * @param default_uarch_index the microarchitecture index to use when
  320. * pthreadpool is configured without cpuinfo,
  321. * cpuinfo initialization failed, or index returned
  322. * by cpuinfo_get_current_uarch_index() exceeds
  323. * the max_uarch_index value.
  324. * @param max_uarch_index the maximum microarchitecture index expected
  325. * by the specified function. If the index returned
  326. * by cpuinfo_get_current_uarch_index() exceeds this
  327. * value, default_uarch_index will be used instead.
  328. * default_uarch_index can exceed max_uarch_index.
  329. * @param range_i the number of items to process along the first
  330. * dimension of the 2D grid.
  331. * @param range_j the number of items to process along the second
  332. * dimension of the 2D grid.
  333. * @param tile_j the maximum number of items along the first
  334. * dimension of the 2D grid to process in one function call.
  335. * @param tile_j the maximum number of items along the second
  336. * dimension of the 2D grid to process in one function call.
  337. * @param flags a bitwise combination of zero or more optional
  338. * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
  339. * PTHREADPOOL_FLAG_YIELD_WORKERS)
  340. */
  341. void pthreadpool_parallelize_2d_tile_2d_with_uarch(
  342. pthreadpool_t threadpool,
  343. pthreadpool_task_2d_tile_2d_with_id_t function,
  344. void* context,
  345. uint32_t default_uarch_index,
  346. uint32_t max_uarch_index,
  347. size_t range_i,
  348. size_t range_j,
  349. size_t tile_i,
  350. size_t tile_j,
  351. uint32_t flags);
  352. /**
  353. * Process items on a 3D grid.
  354. *
  355. * The function implements a parallel version of the following snippet:
  356. *
  357. * for (size_t i = 0; i < range_i; i++)
  358. * for (size_t j = 0; j < range_j; j++)
  359. * for (size_t k = 0; k < range_k; k++)
  360. * function(context, i, j, k);
  361. *
  362. * When the function returns, all items have been processed and the thread pool
  363. * is ready for a new task.
  364. *
  365. * @note If multiple threads call this function with the same thread pool, the
  366. * calls are serialized.
  367. *
  368. * @param threadpool the thread pool to use for parallelisation. If threadpool
  369. * is NULL, all items are processed serially on the calling thread.
  370. * @param function the function to call for each tile.
  371. * @param context the first argument passed to the specified function.
  372. * @param range_i the number of items to process along the first dimension
  373. * of the 3D grid.
  374. * @param range_j the number of items to process along the second dimension
  375. * of the 3D grid.
  376. * @param range_k the number of items to process along the third dimension
  377. * of the 3D grid.
  378. * @param flags a bitwise combination of zero or more optional flags
  379. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  380. */
  381. void pthreadpool_parallelize_3d(
  382. pthreadpool_t threadpool,
  383. pthreadpool_task_3d_t function,
  384. void* context,
  385. size_t range_i,
  386. size_t range_j,
  387. size_t range_k,
  388. uint32_t flags);
  389. /**
  390. * Process items on a 3D grid with the specified maximum tile size along the
  391. * last grid dimension.
  392. *
  393. * The function implements a parallel version of the following snippet:
  394. *
  395. * for (size_t i = 0; i < range_i; i++)
  396. * for (size_t j = 0; j < range_j; j++)
  397. * for (size_t k = 0; k < range_k; k += tile_k)
  398. * function(context, i, j, k, min(range_k - k, tile_k));
  399. *
  400. * When the function returns, all items have been processed and the thread pool
  401. * is ready for a new task.
  402. *
  403. * @note If multiple threads call this function with the same thread pool, the
  404. * calls are serialized.
  405. *
  406. * @param threadpool the thread pool to use for parallelisation. If threadpool
  407. * is NULL, all items are processed serially on the calling thread.
  408. * @param function the function to call for each tile.
  409. * @param context the first argument passed to the specified function.
  410. * @param range_i the number of items to process along the first dimension
  411. * of the 3D grid.
  412. * @param range_j the number of items to process along the second dimension
  413. * of the 3D grid.
  414. * @param range_k the number of items to process along the third dimension
  415. * of the 3D grid.
  416. * @param tile_k the maximum number of items along the third dimension of
  417. * the 3D grid to process in one function call.
  418. * @param flags a bitwise combination of zero or more optional flags
  419. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  420. */
  421. void pthreadpool_parallelize_3d_tile_1d(
  422. pthreadpool_t threadpool,
  423. pthreadpool_task_3d_tile_1d_t function,
  424. void* context,
  425. size_t range_i,
  426. size_t range_j,
  427. size_t range_k,
  428. size_t tile_k,
  429. uint32_t flags);
  430. /**
  431. * Process items on a 3D grid with the specified maximum tile size along the
  432. * last two grid dimensions.
  433. *
  434. * The function implements a parallel version of the following snippet:
  435. *
  436. * for (size_t i = 0; i < range_i; i++)
  437. * for (size_t j = 0; j < range_j; j += tile_j)
  438. * for (size_t k = 0; k < range_k; k += tile_k)
  439. * function(context, i, j, k,
  440. * min(range_j - j, tile_j), min(range_k - k, tile_k));
  441. *
  442. * When the function returns, all items have been processed and the thread pool
  443. * is ready for a new task.
  444. *
  445. * @note If multiple threads call this function with the same thread pool, the
  446. * calls are serialized.
  447. *
  448. * @param threadpool the thread pool to use for parallelisation. If threadpool
  449. * is NULL, all items are processed serially on the calling thread.
  450. * @param function the function to call for each tile.
  451. * @param context the first argument passed to the specified function.
  452. * @param range_i the number of items to process along the first dimension
  453. * of the 3D grid.
  454. * @param range_j the number of items to process along the second dimension
  455. * of the 3D grid.
  456. * @param range_k the number of items to process along the third dimension
  457. * of the 3D grid.
  458. * @param tile_j the maximum number of items along the second dimension of
  459. * the 3D grid to process in one function call.
  460. * @param tile_k the maximum number of items along the third dimension of
  461. * the 3D grid to process in one function call.
  462. * @param flags a bitwise combination of zero or more optional flags
  463. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  464. */
  465. void pthreadpool_parallelize_3d_tile_2d(
  466. pthreadpool_t threadpool,
  467. pthreadpool_task_3d_tile_2d_t function,
  468. void* context,
  469. size_t range_i,
  470. size_t range_j,
  471. size_t range_k,
  472. size_t tile_j,
  473. size_t tile_k,
  474. uint32_t flags);
  475. /**
  476. * Process items on a 3D grid with the specified maximum tile size along the
  477. * last two grid dimensions using a microarchitecture-aware task function.
  478. *
  479. * The function implements a parallel version of the following snippet:
  480. *
  481. * uint32_t uarch_index = cpuinfo_initialize() ?
  482. * cpuinfo_get_current_uarch_index() : default_uarch_index;
  483. * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
  484. * for (size_t i = 0; i < range_i; i++)
  485. * for (size_t j = 0; j < range_j; j += tile_j)
  486. * for (size_t k = 0; k < range_k; k += tile_k)
  487. * function(context, uarch_index, i, j, k,
  488. * min(range_j - j, tile_j), min(range_k - k, tile_k));
  489. *
  490. * When the function returns, all items have been processed and the thread pool
  491. * is ready for a new task.
  492. *
  493. * @note If multiple threads call this function with the same thread pool, the
  494. * calls are serialized.
  495. *
  496. * @param threadpool the thread pool to use for parallelisation. If
  497. * threadpool is NULL, all items are processed serially on the calling
  498. * thread.
  499. * @param function the function to call for each tile.
  500. * @param context the first argument passed to the specified
  501. * function.
  502. * @param default_uarch_index the microarchitecture index to use when
  503. * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
  504. * or index returned by cpuinfo_get_current_uarch_index() exceeds the
  505. * max_uarch_index value.
  506. * @param max_uarch_index the maximum microarchitecture index expected by
  507. * the specified function. If the index returned by
  508. * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
  509. * will be used instead. default_uarch_index can exceed max_uarch_index.
  510. * @param range_i the number of items to process along the first
  511. * dimension of the 3D grid.
  512. * @param range_j the number of items to process along the second
  513. * dimension of the 3D grid.
  514. * @param range_k the number of items to process along the third
  515. * dimension of the 3D grid.
  516. * @param tile_j the maximum number of items along the second
  517. * dimension of the 3D grid to process in one function call.
  518. * @param tile_k the maximum number of items along the third
  519. * dimension of the 3D grid to process in one function call.
  520. * @param flags a bitwise combination of zero or more optional
  521. * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
  522. * PTHREADPOOL_FLAG_YIELD_WORKERS)
  523. */
  524. void pthreadpool_parallelize_3d_tile_2d_with_uarch(
  525. pthreadpool_t threadpool,
  526. pthreadpool_task_3d_tile_2d_with_id_t function,
  527. void* context,
  528. uint32_t default_uarch_index,
  529. uint32_t max_uarch_index,
  530. size_t range_i,
  531. size_t range_j,
  532. size_t range_k,
  533. size_t tile_j,
  534. size_t tile_k,
  535. uint32_t flags);
  536. /**
  537. * Process items on a 4D grid.
  538. *
  539. * The function implements a parallel version of the following snippet:
  540. *
  541. * for (size_t i = 0; i < range_i; i++)
  542. * for (size_t j = 0; j < range_j; j++)
  543. * for (size_t k = 0; k < range_k; k++)
  544. * for (size_t l = 0; l < range_l; l++)
  545. * function(context, i, j, k, l);
  546. *
  547. * When the function returns, all items have been processed and the thread pool
  548. * is ready for a new task.
  549. *
  550. * @note If multiple threads call this function with the same thread pool, the
  551. * calls are serialized.
  552. *
  553. * @param threadpool the thread pool to use for parallelisation. If threadpool
  554. * is NULL, all items are processed serially on the calling thread.
  555. * @param function the function to call for each tile.
  556. * @param context the first argument passed to the specified function.
  557. * @param range_i the number of items to process along the first dimension
  558. * of the 4D grid.
  559. * @param range_j the number of items to process along the second dimension
  560. * of the 4D grid.
  561. * @param range_k the number of items to process along the third dimension
  562. * of the 4D grid.
  563. * @param range_l the number of items to process along the fourth dimension
  564. * of the 4D grid.
  565. * @param flags a bitwise combination of zero or more optional flags
  566. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  567. */
  568. void pthreadpool_parallelize_4d(
  569. pthreadpool_t threadpool,
  570. pthreadpool_task_4d_t function,
  571. void* context,
  572. size_t range_i,
  573. size_t range_j,
  574. size_t range_k,
  575. size_t range_l,
  576. uint32_t flags);
  577. /**
  578. * Process items on a 4D grid with the specified maximum tile size along the
  579. * last grid dimension.
  580. *
  581. * The function implements a parallel version of the following snippet:
  582. *
  583. * for (size_t i = 0; i < range_i; i++)
  584. * for (size_t j = 0; j < range_j; j++)
  585. * for (size_t k = 0; k < range_k; k++)
  586. * for (size_t l = 0; l < range_l; l += tile_l)
  587. * function(context, i, j, k, l, min(range_l - l, tile_l));
  588. *
  589. * When the function returns, all items have been processed and the thread pool
  590. * is ready for a new task.
  591. *
  592. * @note If multiple threads call this function with the same thread pool, the
  593. * calls are serialized.
  594. *
  595. * @param threadpool the thread pool to use for parallelisation. If threadpool
  596. * is NULL, all items are processed serially on the calling thread.
  597. * @param function the function to call for each tile.
  598. * @param context the first argument passed to the specified function.
  599. * @param range_i the number of items to process along the first dimension
  600. * of the 4D grid.
  601. * @param range_j the number of items to process along the second dimension
  602. * of the 4D grid.
  603. * @param range_k the number of items to process along the third dimension
  604. * of the 4D grid.
  605. * @param range_l the number of items to process along the fourth dimension
  606. * of the 4D grid.
  607. * @param tile_l the maximum number of items along the fourth dimension of
  608. * the 4D grid to process in one function call.
  609. * @param flags a bitwise combination of zero or more optional flags
  610. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  611. */
  612. void pthreadpool_parallelize_4d_tile_1d(
  613. pthreadpool_t threadpool,
  614. pthreadpool_task_4d_tile_1d_t function,
  615. void* context,
  616. size_t range_i,
  617. size_t range_j,
  618. size_t range_k,
  619. size_t range_l,
  620. size_t tile_l,
  621. uint32_t flags);
  622. /**
  623. * Process items on a 4D grid with the specified maximum tile size along the
  624. * last two grid dimensions.
  625. *
  626. * The function implements a parallel version of the following snippet:
  627. *
  628. * for (size_t i = 0; i < range_i; i++)
  629. * for (size_t j = 0; j < range_j; j++)
  630. * for (size_t k = 0; k < range_k; k += tile_k)
  631. * for (size_t l = 0; l < range_l; l += tile_l)
  632. * function(context, i, j, k, l,
  633. * min(range_k - k, tile_k), min(range_l - l, tile_l));
  634. *
  635. * When the function returns, all items have been processed and the thread pool
  636. * is ready for a new task.
  637. *
  638. * @note If multiple threads call this function with the same thread pool, the
  639. * calls are serialized.
  640. *
  641. * @param threadpool the thread pool to use for parallelisation. If threadpool
  642. * is NULL, all items are processed serially on the calling thread.
  643. * @param function the function to call for each tile.
  644. * @param context the first argument passed to the specified function.
  645. * @param range_i the number of items to process along the first dimension
  646. * of the 4D grid.
  647. * @param range_j the number of items to process along the second dimension
  648. * of the 4D grid.
  649. * @param range_k the number of items to process along the third dimension
  650. * of the 4D grid.
  651. * @param range_l the number of items to process along the fourth dimension
  652. * of the 4D grid.
  653. * @param tile_k the maximum number of items along the third dimension of
  654. * the 4D grid to process in one function call.
  655. * @param tile_l the maximum number of items along the fourth dimension of
  656. * the 4D grid to process in one function call.
  657. * @param flags a bitwise combination of zero or more optional flags
  658. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  659. */
  660. void pthreadpool_parallelize_4d_tile_2d(
  661. pthreadpool_t threadpool,
  662. pthreadpool_task_4d_tile_2d_t function,
  663. void* context,
  664. size_t range_i,
  665. size_t range_j,
  666. size_t range_k,
  667. size_t range_l,
  668. size_t tile_k,
  669. size_t tile_l,
  670. uint32_t flags);
  671. /**
  672. * Process items on a 4D grid with the specified maximum tile size along the
  673. * last two grid dimensions using a microarchitecture-aware task function.
  674. *
  675. * The function implements a parallel version of the following snippet:
  676. *
  677. * uint32_t uarch_index = cpuinfo_initialize() ?
  678. * cpuinfo_get_current_uarch_index() : default_uarch_index;
  679. * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
  680. * for (size_t i = 0; i < range_i; i++)
  681. * for (size_t j = 0; j < range_j; j++)
  682. * for (size_t k = 0; k < range_k; k += tile_k)
  683. * for (size_t l = 0; l < range_l; l += tile_l)
  684. * function(context, uarch_index, i, j, k, l,
  685. * min(range_k - k, tile_k), min(range_l - l, tile_l));
  686. *
  687. * When the function returns, all items have been processed and the thread pool
  688. * is ready for a new task.
  689. *
  690. * @note If multiple threads call this function with the same thread pool, the
  691. * calls are serialized.
  692. *
  693. * @param threadpool the thread pool to use for parallelisation. If
  694. * threadpool is NULL, all items are processed serially on the calling
  695. * thread.
  696. * @param function the function to call for each tile.
  697. * @param context the first argument passed to the specified
  698. * function.
  699. * @param default_uarch_index the microarchitecture index to use when
  700. * pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
  701. * or index returned by cpuinfo_get_current_uarch_index() exceeds the
  702. * max_uarch_index value.
  703. * @param max_uarch_index the maximum microarchitecture index expected by
  704. * the specified function. If the index returned by
  705. * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
  706. * will be used instead. default_uarch_index can exceed max_uarch_index.
  707. * @param range_i the number of items to process along the first
  708. * dimension of the 4D grid.
  709. * @param range_j the number of items to process along the second
  710. * dimension of the 4D grid.
  711. * @param range_k the number of items to process along the third
  712. * dimension of the 4D grid.
  713. * @param range_l the number of items to process along the fourth
  714. * dimension of the 4D grid.
  715. * @param tile_k the maximum number of items along the third
  716. * dimension of the 4D grid to process in one function call.
  717. * @param tile_l the maximum number of items along the fourth
  718. * dimension of the 4D grid to process in one function call.
  719. * @param flags a bitwise combination of zero or more optional
  720. * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
  721. * PTHREADPOOL_FLAG_YIELD_WORKERS)
  722. */
  723. void pthreadpool_parallelize_4d_tile_2d_with_uarch(
  724. pthreadpool_t threadpool,
  725. pthreadpool_task_4d_tile_2d_with_id_t function,
  726. void* context,
  727. uint32_t default_uarch_index,
  728. uint32_t max_uarch_index,
  729. size_t range_i,
  730. size_t range_j,
  731. size_t range_k,
  732. size_t range_l,
  733. size_t tile_k,
  734. size_t tile_l,
  735. uint32_t flags);
  736. /**
  737. * Process items on a 5D grid.
  738. *
  739. * The function implements a parallel version of the following snippet:
  740. *
  741. * for (size_t i = 0; i < range_i; i++)
  742. * for (size_t j = 0; j < range_j; j++)
  743. * for (size_t k = 0; k < range_k; k++)
  744. * for (size_t l = 0; l < range_l; l++)
  745. * for (size_t m = 0; m < range_m; m++)
  746. * function(context, i, j, k, l, m);
  747. *
  748. * When the function returns, all items have been processed and the thread pool
  749. * is ready for a new task.
  750. *
  751. * @note If multiple threads call this function with the same thread pool, the
  752. * calls are serialized.
  753. *
  754. * @param threadpool the thread pool to use for parallelisation. If threadpool
  755. * is NULL, all items are processed serially on the calling thread.
  756. * @param function the function to call for each tile.
  757. * @param context the first argument passed to the specified function.
  758. * @param range_i the number of items to process along the first dimension
  759. * of the 5D grid.
  760. * @param range_j the number of items to process along the second dimension
  761. * of the 5D grid.
  762. * @param range_k the number of items to process along the third dimension
  763. * of the 5D grid.
  764. * @param range_l the number of items to process along the fourth dimension
  765. * of the 5D grid.
  766. * @param range_m the number of items to process along the fifth dimension
  767. * of the 5D grid.
  768. * @param flags a bitwise combination of zero or more optional flags
  769. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  770. */
  771. void pthreadpool_parallelize_5d(
  772. pthreadpool_t threadpool,
  773. pthreadpool_task_5d_t function,
  774. void* context,
  775. size_t range_i,
  776. size_t range_j,
  777. size_t range_k,
  778. size_t range_l,
  779. size_t range_m,
  780. uint32_t flags);
  781. /**
  782. * Process items on a 5D grid with the specified maximum tile size along the
  783. * last grid dimension.
  784. *
  785. * The function implements a parallel version of the following snippet:
  786. *
  787. * for (size_t i = 0; i < range_i; i++)
  788. * for (size_t j = 0; j < range_j; j++)
  789. * for (size_t k = 0; k < range_k; k++)
  790. * for (size_t l = 0; l < range_l; l++)
  791. * for (size_t m = 0; m < range_m; m += tile_m)
  792. * function(context, i, j, k, l, m, min(range_m - m, tile_m));
  793. *
  794. * When the function returns, all items have been processed and the thread pool
  795. * is ready for a new task.
  796. *
  797. * @note If multiple threads call this function with the same thread pool, the
  798. * calls are serialized.
  799. *
  800. * @param threadpool the thread pool to use for parallelisation. If threadpool
  801. * is NULL, all items are processed serially on the calling thread.
  802. * @param function the function to call for each tile.
  803. * @param context the first argument passed to the specified function.
  804. * @param range_i the number of items to process along the first dimension
  805. * of the 5D grid.
  806. * @param range_j the number of items to process along the second dimension
  807. * of the 5D grid.
  808. * @param range_k the number of items to process along the third dimension
  809. * of the 5D grid.
  810. * @param range_l the number of items to process along the fourth dimension
  811. * of the 5D grid.
  812. * @param range_m the number of items to process along the fifth dimension
  813. * of the 5D grid.
  814. * @param tile_m the maximum number of items along the fifth dimension of
  815. * the 5D grid to process in one function call.
  816. * @param flags a bitwise combination of zero or more optional flags
  817. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  818. */
  819. void pthreadpool_parallelize_5d_tile_1d(
  820. pthreadpool_t threadpool,
  821. pthreadpool_task_5d_tile_1d_t function,
  822. void* context,
  823. size_t range_i,
  824. size_t range_j,
  825. size_t range_k,
  826. size_t range_l,
  827. size_t range_m,
  828. size_t tile_m,
  829. uint32_t flags);
  830. /**
  831. * Process items on a 5D grid with the specified maximum tile size along the
  832. * last two grid dimensions.
  833. *
  834. * The function implements a parallel version of the following snippet:
  835. *
  836. * for (size_t i = 0; i < range_i; i++)
  837. * for (size_t j = 0; j < range_j; j++)
  838. * for (size_t k = 0; k < range_k; k++)
  839. * for (size_t l = 0; l < range_l; l += tile_l)
  840. * for (size_t m = 0; m < range_m; m += tile_m)
  841. * function(context, i, j, k, l, m,
  842. * min(range_l - l, tile_l), min(range_m - m, tile_m));
  843. *
  844. * When the function returns, all items have been processed and the thread pool
  845. * is ready for a new task.
  846. *
  847. * @note If multiple threads call this function with the same thread pool, the
  848. * calls are serialized.
  849. *
  850. * @param threadpool the thread pool to use for parallelisation. If threadpool
  851. * is NULL, all items are processed serially on the calling thread.
  852. * @param function the function to call for each tile.
  853. * @param context the first argument passed to the specified function.
  854. * @param range_i the number of items to process along the first dimension
  855. * of the 5D grid.
  856. * @param range_j the number of items to process along the second dimension
  857. * of the 5D grid.
  858. * @param range_k the number of items to process along the third dimension
  859. * of the 5D grid.
  860. * @param range_l the number of items to process along the fourth dimension
  861. * of the 5D grid.
  862. * @param range_m the number of items to process along the fifth dimension
  863. * of the 5D grid.
  864. * @param tile_l the maximum number of items along the fourth dimension of
  865. * the 5D grid to process in one function call.
  866. * @param tile_m the maximum number of items along the fifth dimension of
  867. * the 5D grid to process in one function call.
  868. * @param flags a bitwise combination of zero or more optional flags
  869. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  870. */
  871. void pthreadpool_parallelize_5d_tile_2d(
  872. pthreadpool_t threadpool,
  873. pthreadpool_task_5d_tile_2d_t function,
  874. void* context,
  875. size_t range_i,
  876. size_t range_j,
  877. size_t range_k,
  878. size_t range_l,
  879. size_t range_m,
  880. size_t tile_l,
  881. size_t tile_m,
  882. uint32_t flags);
  883. /**
  884. * Process items on a 6D grid.
  885. *
  886. * The function implements a parallel version of the following snippet:
  887. *
  888. * for (size_t i = 0; i < range_i; i++)
  889. * for (size_t j = 0; j < range_j; j++)
  890. * for (size_t k = 0; k < range_k; k++)
  891. * for (size_t l = 0; l < range_l; l++)
  892. * for (size_t m = 0; m < range_m; m++)
  893. * for (size_t n = 0; n < range_n; n++)
  894. * function(context, i, j, k, l, m, n);
  895. *
  896. * When the function returns, all items have been processed and the thread pool
  897. * is ready for a new task.
  898. *
  899. * @note If multiple threads call this function with the same thread pool, the
  900. * calls are serialized.
  901. *
  902. * @param threadpool the thread pool to use for parallelisation. If threadpool
  903. * is NULL, all items are processed serially on the calling thread.
  904. * @param function the function to call for each tile.
  905. * @param context the first argument passed to the specified function.
  906. * @param range_i the number of items to process along the first dimension
  907. * of the 6D grid.
  908. * @param range_j the number of items to process along the second dimension
  909. * of the 6D grid.
  910. * @param range_k the number of items to process along the third dimension
  911. * of the 6D grid.
  912. * @param range_l the number of items to process along the fourth dimension
  913. * of the 6D grid.
  914. * @param range_m the number of items to process along the fifth dimension
  915. * of the 6D grid.
  916. * @param range_n the number of items to process along the sixth dimension
  917. * of the 6D grid.
  918. * @param tile_n the maximum number of items along the sixth dimension of
  919. * the 6D grid to process in one function call.
  920. * @param flags a bitwise combination of zero or more optional flags
  921. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  922. */
  923. void pthreadpool_parallelize_6d(
  924. pthreadpool_t threadpool,
  925. pthreadpool_task_6d_t function,
  926. void* context,
  927. size_t range_i,
  928. size_t range_j,
  929. size_t range_k,
  930. size_t range_l,
  931. size_t range_m,
  932. size_t range_n,
  933. uint32_t flags);
  934. /**
  935. * Process items on a 6D grid with the specified maximum tile size along the
  936. * last grid dimension.
  937. *
  938. * The function implements a parallel version of the following snippet:
  939. *
  940. * for (size_t i = 0; i < range_i; i++)
  941. * for (size_t j = 0; j < range_j; j++)
  942. * for (size_t k = 0; k < range_k; k++)
  943. * for (size_t l = 0; l < range_l; l++)
  944. * for (size_t m = 0; m < range_m; m++)
  945. * for (size_t n = 0; n < range_n; n += tile_n)
  946. * function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
  947. *
  948. * When the function returns, all items have been processed and the thread pool
  949. * is ready for a new task.
  950. *
  951. * @note If multiple threads call this function with the same thread pool, the
  952. * calls are serialized.
  953. *
  954. * @param threadpool the thread pool to use for parallelisation. If threadpool
  955. * is NULL, all items are processed serially on the calling thread.
  956. * @param function the function to call for each tile.
  957. * @param context the first argument passed to the specified function.
  958. * @param range_i the number of items to process along the first dimension
  959. * of the 6D grid.
  960. * @param range_j the number of items to process along the second dimension
  961. * of the 6D grid.
  962. * @param range_k the number of items to process along the third dimension
  963. * of the 6D grid.
  964. * @param range_l the number of items to process along the fourth dimension
  965. * of the 6D grid.
  966. * @param range_m the number of items to process along the fifth dimension
  967. * of the 6D grid.
  968. * @param range_n the number of items to process along the sixth dimension
  969. * of the 6D grid.
  970. * @param tile_n the maximum number of items along the sixth dimension of
  971. * the 6D grid to process in one function call.
  972. * @param flags a bitwise combination of zero or more optional flags
  973. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  974. */
  975. void pthreadpool_parallelize_6d_tile_1d(
  976. pthreadpool_t threadpool,
  977. pthreadpool_task_6d_tile_1d_t function,
  978. void* context,
  979. size_t range_i,
  980. size_t range_j,
  981. size_t range_k,
  982. size_t range_l,
  983. size_t range_m,
  984. size_t range_n,
  985. size_t tile_n,
  986. uint32_t flags);
  987. /**
  988. * Process items on a 6D grid with the specified maximum tile size along the
  989. * last two grid dimensions.
  990. *
  991. * The function implements a parallel version of the following snippet:
  992. *
  993. * for (size_t i = 0; i < range_i; i++)
  994. * for (size_t j = 0; j < range_j; j++)
  995. * for (size_t k = 0; k < range_k; k++)
  996. * for (size_t l = 0; l < range_l; l++)
  997. * for (size_t m = 0; m < range_m; m += tile_m)
  998. * for (size_t n = 0; n < range_n; n += tile_n)
  999. * function(context, i, j, k, l, m, n,
  1000. * min(range_m - m, tile_m), min(range_n - n, tile_n));
  1001. *
  1002. * When the function returns, all items have been processed and the thread pool
  1003. * is ready for a new task.
  1004. *
  1005. * @note If multiple threads call this function with the same thread pool, the
  1006. * calls are serialized.
  1007. *
  1008. * @param threadpool the thread pool to use for parallelisation. If threadpool
  1009. * is NULL, all items are processed serially on the calling thread.
  1010. * @param function the function to call for each tile.
  1011. * @param context the first argument passed to the specified function.
  1012. * @param range_i the number of items to process along the first dimension
  1013. * of the 6D grid.
  1014. * @param range_j the number of items to process along the second dimension
  1015. * of the 6D grid.
  1016. * @param range_k the number of items to process along the third dimension
  1017. * of the 6D grid.
  1018. * @param range_l the number of items to process along the fourth dimension
  1019. * of the 6D grid.
  1020. * @param range_m the number of items to process along the fifth dimension
  1021. * of the 6D grid.
  1022. * @param range_n the number of items to process along the sixth dimension
  1023. * of the 6D grid.
  1024. * @param tile_m the maximum number of items along the fifth dimension of
  1025. * the 6D grid to process in one function call.
  1026. * @param tile_n the maximum number of items along the sixth dimension of
  1027. * the 6D grid to process in one function call.
  1028. * @param flags a bitwise combination of zero or more optional flags
  1029. * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
  1030. */
  1031. void pthreadpool_parallelize_6d_tile_2d(
  1032. pthreadpool_t threadpool,
  1033. pthreadpool_task_6d_tile_2d_t function,
  1034. void* context,
  1035. size_t range_i,
  1036. size_t range_j,
  1037. size_t range_k,
  1038. size_t range_l,
  1039. size_t range_m,
  1040. size_t range_n,
  1041. size_t tile_m,
  1042. size_t tile_n,
  1043. uint32_t flags);
  1044. /**
  1045. * Terminates threads in the thread pool and releases associated resources.
  1046. *
  1047. * @warning Accessing the thread pool after a call to this function constitutes
  1048. * undefined behaviour and may cause data corruption.
  1049. *
  1050. * @param[in,out] threadpool The thread pool to destroy.
  1051. */
  1052. void pthreadpool_destroy(pthreadpool_t threadpool);
  1053. #ifndef PTHREADPOOL_NO_DEPRECATED_API
  1054. /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
  1055. #if defined(__GNUC__)
  1056. #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
  1057. #else
  1058. #define PTHREADPOOL_DEPRECATED
  1059. #endif
  1060. typedef void (*pthreadpool_function_1d_t)(void*, size_t);
  1061. typedef void (*pthreadpool_function_1d_tiled_t)(void*, size_t, size_t);
  1062. typedef void (*pthreadpool_function_2d_t)(void*, size_t, size_t);
  1063. typedef void (*pthreadpool_function_2d_tiled_t)(void*, size_t, size_t, size_t, size_t);
  1064. typedef void (*pthreadpool_function_3d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t);
  1065. typedef void (*pthreadpool_function_4d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t);
  1066. void pthreadpool_compute_1d(
  1067. pthreadpool_t threadpool,
  1068. pthreadpool_function_1d_t function,
  1069. void* argument,
  1070. size_t range) PTHREADPOOL_DEPRECATED;
  1071. void pthreadpool_compute_1d_tiled(
  1072. pthreadpool_t threadpool,
  1073. pthreadpool_function_1d_tiled_t function,
  1074. void* argument,
  1075. size_t range,
  1076. size_t tile) PTHREADPOOL_DEPRECATED;
  1077. void pthreadpool_compute_2d(
  1078. pthreadpool_t threadpool,
  1079. pthreadpool_function_2d_t function,
  1080. void* argument,
  1081. size_t range_i,
  1082. size_t range_j) PTHREADPOOL_DEPRECATED;
  1083. void pthreadpool_compute_2d_tiled(
  1084. pthreadpool_t threadpool,
  1085. pthreadpool_function_2d_tiled_t function,
  1086. void* argument,
  1087. size_t range_i,
  1088. size_t range_j,
  1089. size_t tile_i,
  1090. size_t tile_j) PTHREADPOOL_DEPRECATED;
  1091. void pthreadpool_compute_3d_tiled(
  1092. pthreadpool_t threadpool,
  1093. pthreadpool_function_3d_tiled_t function,
  1094. void* argument,
  1095. size_t range_i,
  1096. size_t range_j,
  1097. size_t range_k,
  1098. size_t tile_i,
  1099. size_t tile_j,
  1100. size_t tile_k) PTHREADPOOL_DEPRECATED;
  1101. void pthreadpool_compute_4d_tiled(
  1102. pthreadpool_t threadpool,
  1103. pthreadpool_function_4d_tiled_t function,
  1104. void* argument,
  1105. size_t range_i,
  1106. size_t range_j,
  1107. size_t range_k,
  1108. size_t range_l,
  1109. size_t tile_i,
  1110. size_t tile_j,
  1111. size_t tile_k,
  1112. size_t tile_l) PTHREADPOOL_DEPRECATED;
  1113. #endif /* PTHREADPOOL_NO_DEPRECATED_API */
  1114. #ifdef __cplusplus
  1115. } /* extern "C" */
  1116. #endif
  1117. #endif /* PTHREADPOOL_H_ */