intrin.hpp 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17. // Third party copyrights are property of their respective owners.
  18. //
  19. // Redistribution and use in source and binary forms, with or without modification,
  20. // are permitted provided that the following conditions are met:
  21. //
  22. // * Redistribution's of source code must retain the above copyright notice,
  23. // this list of conditions and the following disclaimer.
  24. //
  25. // * Redistribution's in binary form must reproduce the above copyright notice,
  26. // this list of conditions and the following disclaimer in the documentation
  27. // and/or other materials provided with the distribution.
  28. //
  29. // * The name of the copyright holders may not be used to endorse or promote products
  30. // derived from this software without specific prior written permission.
  31. //
  32. // This software is provided by the copyright holders and contributors "as is" and
  33. // any express or implied warranties, including, but not limited to, the implied
  34. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35. // In no event shall the Intel Corporation or contributors be liable for any direct,
  36. // indirect, incidental, special, exemplary, or consequential damages
  37. // (including, but not limited to, procurement of substitute goods or services;
  38. // loss of use, data, or profits; or business interruption) however caused
  39. // and on any theory of liability, whether in contract, strict liability,
  40. // or tort (including negligence or otherwise) arising in any way out of
  41. // the use of this software, even if advised of the possibility of such damage.
  42. //
  43. //M*/
  44. #ifndef OPENCV_HAL_INTRIN_HPP
  45. #define OPENCV_HAL_INTRIN_HPP
  46. #include <cmath>
  47. #include <float.h>
  48. #include <stdlib.h>
  49. #include "opencv2/core/cvdef.h"
  50. #if defined(__GNUC__) && __GNUC__ == 12
  51. #pragma GCC diagnostic push
  52. #pragma GCC diagnostic ignored "-Wuninitialized"
  53. #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
  54. #endif
  55. #define OPENCV_HAL_ADD(a, b) ((a) + (b))
  56. #define OPENCV_HAL_AND(a, b) ((a) & (b))
  57. #define OPENCV_HAL_NOP(a) (a)
  58. #define OPENCV_HAL_1ST(a, b) (a)
  59. namespace {
  60. inline unsigned int trailingZeros32(unsigned int value) {
  61. #if defined(_MSC_VER)
  62. #if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
  63. unsigned long index = 0;
  64. _BitScanForward(&index, value);
  65. return (unsigned int)index;
  66. #elif defined(__clang__)
  67. // clang-cl doesn't export _tzcnt_u32 for non BMI systems
  68. return value ? __builtin_ctz(value) : 32;
  69. #else
  70. return _tzcnt_u32(value);
  71. #endif
  72. #elif defined(__GNUC__) || defined(__GNUG__)
  73. return __builtin_ctz(value);
  74. #elif defined(__ICC) || defined(__INTEL_COMPILER)
  75. return _bit_scan_forward(value);
  76. #elif defined(__clang__)
  77. return llvm.cttz.i32(value, true);
  78. #else
  79. static const int MultiplyDeBruijnBitPosition[32] = {
  80. 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
  81. 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
  82. return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
  83. #endif
  84. }
  85. }
  86. // unlike HAL API, which is in cv::hal,
  87. // we put intrinsics into cv namespace to make its
  88. // access from within opencv code more accessible
  89. namespace cv {
  90. namespace hal {
  91. enum StoreMode
  92. {
  93. STORE_UNALIGNED = 0,
  94. STORE_ALIGNED = 1,
  95. STORE_ALIGNED_NOCACHE = 2
  96. };
  97. }
  98. // TODO FIXIT: Don't use "God" traits. Split on separate cases.
  99. template<typename _Tp> struct V_TypeTraits
  100. {
  101. };
  102. #define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
  103. template<> struct V_TypeTraits<type> \
  104. { \
  105. typedef type value_type; \
  106. typedef int_type_ int_type; \
  107. typedef abs_type_ abs_type; \
  108. typedef uint_type_ uint_type; \
  109. typedef w_type_ w_type; \
  110. typedef q_type_ q_type; \
  111. typedef sum_type_ sum_type; \
  112. \
  113. static inline int_type reinterpret_int(type x) \
  114. { \
  115. union { type l; int_type i; } v; \
  116. v.l = x; \
  117. return v.i; \
  118. } \
  119. \
  120. static inline type reinterpret_from_int(int_type x) \
  121. { \
  122. union { type l; int_type i; } v; \
  123. v.i = x; \
  124. return v.l; \
  125. } \
  126. }
  127. #define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
  128. template<> struct V_TypeTraits<type> \
  129. { \
  130. typedef type value_type; \
  131. typedef int_type_ int_type; \
  132. typedef abs_type_ abs_type; \
  133. typedef uint_type_ uint_type; \
  134. typedef w_type_ w_type; \
  135. typedef sum_type_ sum_type; \
  136. \
  137. static inline int_type reinterpret_int(type x) \
  138. { \
  139. union { type l; int_type i; } v; \
  140. v.l = x; \
  141. return v.i; \
  142. } \
  143. \
  144. static inline type reinterpret_from_int(int_type x) \
  145. { \
  146. union { type l; int_type i; } v; \
  147. v.i = x; \
  148. return v.l; \
  149. } \
  150. }
  151. CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
  152. CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
  153. CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
  154. CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
  155. CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
  156. CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
  157. CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
  158. CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
  159. CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
  160. CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);
  161. #ifndef CV_DOXYGEN
  162. #ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
  163. #ifdef CV_FORCE_SIMD128_CPP
  164. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
  165. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
  166. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  167. #elif defined(CV_CPU_DISPATCH_MODE)
  168. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
  169. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
  170. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  171. #else
  172. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
  173. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
  174. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  175. #endif
  176. #endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
  177. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  178. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  179. using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
  180. #endif
  181. }
  182. #ifdef CV_DOXYGEN
  183. # undef CV_AVX2
  184. # undef CV_SSE2
  185. # undef CV_NEON
  186. # undef CV_VSX
  187. # undef CV_FP16
  188. # undef CV_MSA
  189. # undef CV_RVV
  190. #endif
  191. #if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_LSX) && !defined(CV_FORCE_SIMD128_CPP)
  192. #define CV__SIMD_FORWARD 128
  193. #include "opencv2/core/hal/intrin_forward.hpp"
  194. #endif
  195. #if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
  196. #include "opencv2/core/hal/intrin_sse_em.hpp"
  197. #include "opencv2/core/hal/intrin_sse.hpp"
  198. #elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
  199. #include "opencv2/core/hal/intrin_neon.hpp"
  200. #elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
  201. #define CV_SIMD128_CPP 0
  202. #include "opencv2/core/hal/intrin_rvv071.hpp"
  203. #elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
  204. #include "opencv2/core/hal/intrin_vsx.hpp"
  205. #elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
  206. #include "opencv2/core/hal/intrin_msa.hpp"
  207. #elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
  208. #include "opencv2/core/hal/intrin_wasm.hpp"
  209. #elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP)
  210. #if defined(CV_RVV_SCALABLE)
  211. #include "opencv2/core/hal/intrin_rvv_scalable.hpp"
  212. #else
  213. #include "opencv2/core/hal/intrin_rvv.hpp"
  214. #endif
  215. #elif CV_LSX && !defined(CV_FORCE_SIMD128_CPP)
  216. #include "opencv2/core/hal/intrin_lsx.hpp"
  217. #else
  218. #include "opencv2/core/hal/intrin_cpp.hpp"
  219. #endif
  220. // AVX2 can be used together with SSE2, so
  221. // we define those two sets of intrinsics at once.
  222. // Most of the intrinsics do not conflict (the proper overloaded variant is
  223. // resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
  224. // but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
  225. // Correspondingly, the wide intrinsics (which are mapped to the "widest"
  226. // available instruction set) will get vx_ prefix
  227. // (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
  228. #if CV_AVX2
  229. #define CV__SIMD_FORWARD 256
  230. #include "opencv2/core/hal/intrin_forward.hpp"
  231. #include "opencv2/core/hal/intrin_avx.hpp"
  232. #endif
  233. // AVX512 can be used together with SSE2 and AVX2, so
  234. // we define those sets of intrinsics at once.
  235. // For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
  236. // Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
  237. #if CV_AVX512_SKX
  238. #define CV__SIMD_FORWARD 512
  239. #include "opencv2/core/hal/intrin_forward.hpp"
  240. #include "opencv2/core/hal/intrin_avx512.hpp"
  241. #endif
  242. #if CV_LASX
  243. #define CV__SIMD_FORWARD 256
  244. #include "opencv2/core/hal/intrin_forward.hpp"
  245. #include "opencv2/core/hal/intrin_lasx.hpp"
  246. #endif
  247. //! @cond IGNORED
  248. namespace cv {
  249. #ifndef CV_DOXYGEN
  250. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  251. #endif
  252. #ifndef CV_SIMD128
  253. #define CV_SIMD128 0
  254. #endif
  255. #ifndef CV_SIMD128_CPP
  256. #define CV_SIMD128_CPP 0
  257. #endif
  258. #ifndef CV_SIMD128_64F
  259. #define CV_SIMD128_64F 0
  260. #endif
  261. #ifndef CV_SIMD256
  262. #define CV_SIMD256 0
  263. #endif
  264. #ifndef CV_SIMD256_64F
  265. #define CV_SIMD256_64F 0
  266. #endif
  267. #ifndef CV_SIMD512
  268. #define CV_SIMD512 0
  269. #endif
  270. #ifndef CV_SIMD512_64F
  271. #define CV_SIMD512_64F 0
  272. #endif
  273. #ifndef CV_SIMD128_FP16
  274. #define CV_SIMD128_FP16 0
  275. #endif
  276. #ifndef CV_SIMD256_FP16
  277. #define CV_SIMD256_FP16 0
  278. #endif
  279. #ifndef CV_SIMD512_FP16
  280. #define CV_SIMD512_FP16 0
  281. #endif
  282. #ifndef CV_SIMD_SCALABLE
  283. #define CV_SIMD_SCALABLE 0
  284. #endif
  285. #ifndef CV_SIMD_SCALABLE_64F
  286. #define CV_SIMD_SCALABLE_64F 0
  287. #endif
  288. //==================================================================================================
  289. template<typename _Tp> struct V_RegTraits
  290. {
  291. };
  292. #define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
  293. template<> struct V_RegTraits<_reg> \
  294. { \
  295. typedef _reg reg; \
  296. typedef _u_reg u_reg; \
  297. typedef _w_reg w_reg; \
  298. typedef _q_reg q_reg; \
  299. typedef _int_reg int_reg; \
  300. typedef _round_reg round_reg; \
  301. }
  302. #if CV_SIMD128 || CV_SIMD128_CPP
  303. CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
  304. CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
  305. CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
  306. CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
  307. CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
  308. CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
  309. #if CV_SIMD128_64F || CV_SIMD128_CPP
  310. CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
  311. #else
  312. CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
  313. #endif
  314. CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
  315. CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
  316. #if CV_SIMD128_64F
  317. CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
  318. #endif
  319. #endif
  320. #if CV_SIMD256
  321. CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
  322. CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
  323. CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
  324. CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
  325. CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
  326. CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
  327. CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
  328. CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
  329. CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
  330. CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
  331. #endif
  332. #if CV_SIMD512
  333. CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
  334. CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
  335. CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
  336. CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
  337. CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
  338. CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
  339. CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
  340. CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
  341. CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
  342. CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
  343. #endif
  344. #if CV_SIMD_SCALABLE
  345. CV_DEF_REG_TRAITS(v, v_uint8, uchar, u8, v_uint8, v_uint16, v_uint32, v_int8, void);
  346. CV_DEF_REG_TRAITS(v, v_int8, schar, s8, v_uint8, v_int16, v_int32, v_int8, void);
  347. CV_DEF_REG_TRAITS(v, v_uint16, ushort, u16, v_uint16, v_uint32, v_uint64, v_int16, void);
  348. CV_DEF_REG_TRAITS(v, v_int16, short, s16, v_uint16, v_int32, v_int64, v_int16, void);
  349. CV_DEF_REG_TRAITS(v, v_uint32, unsigned, u32, v_uint32, v_uint64, void, v_int32, void);
  350. CV_DEF_REG_TRAITS(v, v_int32, int, s32, v_uint32, v_int64, void, v_int32, void);
  351. CV_DEF_REG_TRAITS(v, v_float32, float, f32, v_float32, v_float64, void, v_int32, v_int32);
  352. CV_DEF_REG_TRAITS(v, v_uint64, uint64, u64, v_uint64, void, void, v_int64, void);
  353. CV_DEF_REG_TRAITS(v, v_int64, int64, s64, v_uint64, void, void, v_int64, void);
  354. CV_DEF_REG_TRAITS(v, v_float64, double, f64, v_float64, void, void, v_int64, v_int32);
  355. #endif
  356. //! @endcond
  357. #if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
  358. #define CV__SIMD_NAMESPACE simd512
  359. namespace CV__SIMD_NAMESPACE {
  360. #define CV_SIMD 1
  361. #define CV_SIMD_64F CV_SIMD512_64F
  362. #define CV_SIMD_FP16 CV_SIMD512_FP16
  363. #define CV_SIMD_WIDTH 64
  364. //! @addtogroup core_hal_intrin
  365. //! @{
  366. //! @brief Maximum available vector register capacity 8-bit unsigned integer values
  367. typedef v_uint8x64 v_uint8;
  368. //! @brief Maximum available vector register capacity 8-bit signed integer values
  369. typedef v_int8x64 v_int8;
  370. //! @brief Maximum available vector register capacity 16-bit unsigned integer values
  371. typedef v_uint16x32 v_uint16;
  372. //! @brief Maximum available vector register capacity 16-bit signed integer values
  373. typedef v_int16x32 v_int16;
  374. //! @brief Maximum available vector register capacity 32-bit unsigned integer values
  375. typedef v_uint32x16 v_uint32;
  376. //! @brief Maximum available vector register capacity 32-bit signed integer values
  377. typedef v_int32x16 v_int32;
  378. //! @brief Maximum available vector register capacity 64-bit unsigned integer values
  379. typedef v_uint64x8 v_uint64;
  380. //! @brief Maximum available vector register capacity 64-bit signed integer values
  381. typedef v_int64x8 v_int64;
  382. //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
  383. typedef v_float32x16 v_float32;
  384. #if CV_SIMD512_64F
  385. //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
  386. typedef v_float64x8 v_float64;
  387. #endif
  388. //! @}
  389. #define VXPREFIX(func) v512##func
  390. } // namespace
  391. using namespace CV__SIMD_NAMESPACE;
  392. #elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
  393. #define CV__SIMD_NAMESPACE simd256
  394. namespace CV__SIMD_NAMESPACE {
  395. #define CV_SIMD 1
  396. #define CV_SIMD_64F CV_SIMD256_64F
  397. #define CV_SIMD_FP16 CV_SIMD256_FP16
  398. #define CV_SIMD_WIDTH 32
  399. //! @addtogroup core_hal_intrin
  400. //! @{
  401. //! @brief Maximum available vector register capacity 8-bit unsigned integer values
  402. typedef v_uint8x32 v_uint8;
  403. //! @brief Maximum available vector register capacity 8-bit signed integer values
  404. typedef v_int8x32 v_int8;
  405. //! @brief Maximum available vector register capacity 16-bit unsigned integer values
  406. typedef v_uint16x16 v_uint16;
  407. //! @brief Maximum available vector register capacity 16-bit signed integer values
  408. typedef v_int16x16 v_int16;
  409. //! @brief Maximum available vector register capacity 32-bit unsigned integer values
  410. typedef v_uint32x8 v_uint32;
  411. //! @brief Maximum available vector register capacity 32-bit signed integer values
  412. typedef v_int32x8 v_int32;
  413. //! @brief Maximum available vector register capacity 64-bit unsigned integer values
  414. typedef v_uint64x4 v_uint64;
  415. //! @brief Maximum available vector register capacity 64-bit signed integer values
  416. typedef v_int64x4 v_int64;
  417. //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
  418. typedef v_float32x8 v_float32;
  419. #if CV_SIMD256_64F
  420. //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
  421. typedef v_float64x4 v_float64;
  422. #endif
  423. //! @}
  424. #define VXPREFIX(func) v256##func
  425. } // namespace
  426. using namespace CV__SIMD_NAMESPACE;
  427. #elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
  428. #if defined CV_SIMD128_CPP
  429. #define CV__SIMD_NAMESPACE simd128_cpp
  430. #else
  431. #define CV__SIMD_NAMESPACE simd128
  432. #endif
  433. namespace CV__SIMD_NAMESPACE {
  434. #define CV_SIMD CV_SIMD128
  435. #define CV_SIMD_64F CV_SIMD128_64F
  436. #define CV_SIMD_WIDTH 16
  437. //! @addtogroup core_hal_intrin
  438. //! @{
  439. //! @brief Maximum available vector register capacity 8-bit unsigned integer values
  440. typedef v_uint8x16 v_uint8;
  441. //! @brief Maximum available vector register capacity 8-bit signed integer values
  442. typedef v_int8x16 v_int8;
  443. //! @brief Maximum available vector register capacity 16-bit unsigned integer values
  444. typedef v_uint16x8 v_uint16;
  445. //! @brief Maximum available vector register capacity 16-bit signed integer values
  446. typedef v_int16x8 v_int16;
  447. //! @brief Maximum available vector register capacity 32-bit unsigned integer values
  448. typedef v_uint32x4 v_uint32;
  449. //! @brief Maximum available vector register capacity 32-bit signed integer values
  450. typedef v_int32x4 v_int32;
  451. //! @brief Maximum available vector register capacity 64-bit unsigned integer values
  452. typedef v_uint64x2 v_uint64;
  453. //! @brief Maximum available vector register capacity 64-bit signed integer values
  454. typedef v_int64x2 v_int64;
  455. //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
  456. typedef v_float32x4 v_float32;
  457. #if CV_SIMD128_64F
  458. //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
  459. typedef v_float64x2 v_float64;
  460. #endif
  461. //! @}
  462. #define VXPREFIX(func) v##func
  463. } // namespace
  464. using namespace CV__SIMD_NAMESPACE;
  465. #elif CV_SIMD_SCALABLE
  466. #define CV__SIMD_NAMESPACE simd
  467. namespace CV__SIMD_NAMESPACE {
  468. #define CV_SIMD 0
  469. #define CV_SIMD_WIDTH 128 /* 1024/8 */
  470. #define VXPREFIX(func) v##func
  471. } // namespace
  472. using namespace CV__SIMD_NAMESPACE;
  473. #endif
  474. //! @cond IGNORED
  475. #ifndef CV_SIMD_64F
  476. #define CV_SIMD_64F 0
  477. #endif
  478. namespace CV__SIMD_NAMESPACE {
  479. //! @addtogroup core_hal_intrin
  480. //! @{
  481. //! @name Wide init with value
  482. //! @{
  483. //! @brief Create maximum available capacity vector with elements set to a specific value
  484. inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
  485. inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
  486. inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
  487. inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
  488. inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
  489. inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
  490. inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
  491. inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
  492. inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
  493. #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
  494. inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
  495. #endif
  496. //! @}
  497. //! @name Wide init with zero
  498. //! @{
  499. //! @brief Create maximum available capacity vector with elements set to zero
  500. inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
  501. inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
  502. inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
  503. inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
  504. inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
  505. inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
  506. inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
  507. inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
  508. inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
  509. #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
  510. inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
  511. #endif
  512. //! @}
  513. //! @name Wide load from memory
  514. //! @{
  515. //! @brief Load maximum available capacity register contents from memory
  516. inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
  517. inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
  518. inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
  519. inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
  520. inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
  521. inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
  522. inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
  523. inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
  524. inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
  525. #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
  526. inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
  527. #endif
  528. //! @}
  529. //! @name Wide load from memory(aligned)
  530. //! @{
  531. //! @brief Load maximum available capacity register contents from memory(aligned)
  532. inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  533. inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  534. inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  535. inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  536. inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  537. inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  538. inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  539. inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  540. inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  541. #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
  542. inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
  543. #endif
  544. //! @}
  545. //! @name Wide load lower half from memory
  546. //! @{
  547. //! @brief Load lower half of maximum available capacity register from memory
  548. inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
  549. inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
  550. inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
  551. inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
  552. inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
  553. inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
  554. inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
  555. inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
  556. inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
  557. #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
  558. inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
  559. #endif
  560. //! @}
  561. //! @name Wide load halfs from memory
  562. //! @{
  563. //! @brief Load maximum available capacity register contents from two memory blocks
  564. inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  565. inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  566. inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  567. inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  568. inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  569. inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  570. inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  571. inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  572. inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  573. #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
  574. inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
  575. #endif
  576. //! @}
  577. //! @name Wide LUT of elements
  578. //! @{
  579. //! @brief Load maximum available capacity register contents with array elements by provided indexes
  580. inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  581. inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  582. inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  583. inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  584. inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  585. inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  586. inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  587. inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  588. inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  589. #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
  590. inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
  591. #endif
  592. //! @}
  593. //! @name Wide LUT of element pairs
  594. //! @{
  595. //! @brief Load maximum available capacity register contents with array element pairs by provided indexes
  596. inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  597. inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  598. inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  599. inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  600. inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  601. inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  602. inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  603. inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  604. inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  605. #if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
  606. inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
  607. #endif
  608. //! @}
  609. //! @name Wide LUT of element quads
  610. //! @{
  611. //! @brief Load maximum available capacity register contents with array element quads by provided indexes
  612. inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
  613. inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
  614. inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
  615. inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
  616. inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
  617. inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
  618. inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
  619. //! @}
  620. //! @name Wide load with double expansion
  621. //! @{
  622. //! @brief Load maximum available capacity register contents from memory with double expand
  623. inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
  624. inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
  625. inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
  626. inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
  627. inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
  628. inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
  629. inline v_float32 vx_load_expand(const hfloat * ptr) { return VXPREFIX(_load_expand)(ptr); }
  630. //! @}
  631. //! @name Wide load with quad expansion
  632. //! @{
  633. //! @brief Load maximum available capacity register contents from memory with quad expand
  634. inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
  635. inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
  636. //! @}
  637. /** @brief SIMD processing state cleanup call */
  638. inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
  639. #if !CV_SIMD_SCALABLE && !(CV_NEON && !defined(CV_FORCE_SIMD128_CPP))
  640. // Compatibility layer
  641. template<typename T> struct VTraits {
  642. static inline int vlanes() { return T::nlanes; }
  643. enum { nlanes = T::nlanes, max_nlanes = T::nlanes };
  644. using lane_type = typename T::lane_type;
  645. };
  646. #define OPENCV_HAL_WRAP_BIN_OP_ADDSUB(_Tpvec) \
  647. inline _Tpvec v_add(const _Tpvec& a, const _Tpvec& b) \
  648. { \
  649. return a + b; \
  650. } \
  651. inline _Tpvec v_sub(const _Tpvec& a, const _Tpvec& b) \
  652. { \
  653. return a - b; \
  654. } \
  655. template<typename... Args> \
  656. inline _Tpvec v_add(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
  657. return v_add(f1 + f2, vf...); \
  658. }
  659. #define OPENCV_HAL_WRAP_SHIFT_OP(_Tpvec) \
  660. inline _Tpvec v_shr(const _Tpvec& a, int n) \
  661. { \
  662. return a >> n; \
  663. } \
  664. inline _Tpvec v_shl(const _Tpvec& a, int n) \
  665. { \
  666. return a << n; \
  667. }
  668. OPENCV_HAL_WRAP_SHIFT_OP(v_uint16)
  669. OPENCV_HAL_WRAP_SHIFT_OP(v_uint32)
  670. OPENCV_HAL_WRAP_SHIFT_OP(v_uint64)
  671. OPENCV_HAL_WRAP_SHIFT_OP(v_int16)
  672. OPENCV_HAL_WRAP_SHIFT_OP(v_int32)
  673. OPENCV_HAL_WRAP_SHIFT_OP(v_int64)
  674. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8)
  675. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16)
  676. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32)
  677. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64)
  678. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8)
  679. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16)
  680. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32)
  681. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64)
  682. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32)
  683. #if CV_SIMD_64F
  684. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64)
  685. #endif
  686. #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
  687. // when we use CV_SIMD128 with 256/512 bit SIMD (e.g. AVX2 or AVX512)
  688. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8x16)
  689. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16x8)
  690. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32x4)
  691. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64x2)
  692. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8x16)
  693. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16x8)
  694. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32x4)
  695. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64x2)
  696. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32x4)
  697. OPENCV_HAL_WRAP_SHIFT_OP(v_uint16x8)
  698. OPENCV_HAL_WRAP_SHIFT_OP(v_uint32x4)
  699. OPENCV_HAL_WRAP_SHIFT_OP(v_uint64x2)
  700. OPENCV_HAL_WRAP_SHIFT_OP(v_int16x8)
  701. OPENCV_HAL_WRAP_SHIFT_OP(v_int32x4)
  702. OPENCV_HAL_WRAP_SHIFT_OP(v_int64x2)
  703. #if CV_SIMD_64F
  704. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64x2)
  705. #endif
  706. #endif
  707. #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
  708. // when we use CV_SIMD256 with 512 bit SIMD (e.g. AVX512)
  709. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8x32)
  710. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16x16)
  711. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32x8)
  712. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64x4)
  713. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8x32)
  714. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16x16)
  715. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32x8)
  716. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64x4)
  717. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32x8)
  718. OPENCV_HAL_WRAP_SHIFT_OP(v_uint16x16)
  719. OPENCV_HAL_WRAP_SHIFT_OP(v_uint32x8)
  720. OPENCV_HAL_WRAP_SHIFT_OP(v_uint64x4)
  721. OPENCV_HAL_WRAP_SHIFT_OP(v_int16x16)
  722. OPENCV_HAL_WRAP_SHIFT_OP(v_int32x8)
  723. OPENCV_HAL_WRAP_SHIFT_OP(v_int64x4)
  724. #if CV_SIMD_64F
  725. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64x4)
  726. #endif
  727. #endif
  728. #define OPENCV_HAL_WRAP_BIN_OP_LOGIC(_Tpvec) \
  729. inline _Tpvec v_and(const _Tpvec& a, const _Tpvec& b) \
  730. { \
  731. return a & b; \
  732. } \
  733. inline _Tpvec v_or(const _Tpvec& a, const _Tpvec& b) \
  734. { \
  735. return a | b; \
  736. } \
  737. inline _Tpvec v_xor(const _Tpvec& a, const _Tpvec& b) \
  738. { \
  739. return a ^ b; \
  740. }
  741. #define OPENCV_HAL_WRAP_NOT_OP(_Tpvec) \
  742. inline _Tpvec v_not(const _Tpvec& a) \
  743. { \
  744. return ~a; \
  745. }
  746. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint8)
  747. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint16)
  748. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint32)
  749. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint64)
  750. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int8)
  751. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16)
  752. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32)
  753. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64)
  754. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float32)
  755. OPENCV_HAL_WRAP_NOT_OP(v_uint8)
  756. OPENCV_HAL_WRAP_NOT_OP(v_uint16)
  757. OPENCV_HAL_WRAP_NOT_OP(v_uint32)
  758. OPENCV_HAL_WRAP_NOT_OP(v_uint64)
  759. OPENCV_HAL_WRAP_NOT_OP(v_int8)
  760. OPENCV_HAL_WRAP_NOT_OP(v_int16)
  761. OPENCV_HAL_WRAP_NOT_OP(v_int32)
  762. OPENCV_HAL_WRAP_NOT_OP(v_int64)
  763. #if CV_SIMD_64F
  764. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float64)
  765. #endif
  766. #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
  767. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint8x16)
  768. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint16x8)
  769. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint32x4)
  770. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint64x2)
  771. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int8x16)
  772. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16x8)
  773. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32x4)
  774. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64x2)
  775. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float32x4)
  776. OPENCV_HAL_WRAP_NOT_OP(v_uint8x16)
  777. OPENCV_HAL_WRAP_NOT_OP(v_uint16x8)
  778. OPENCV_HAL_WRAP_NOT_OP(v_uint32x4)
  779. OPENCV_HAL_WRAP_NOT_OP(v_uint64x2)
  780. OPENCV_HAL_WRAP_NOT_OP(v_int8x16)
  781. OPENCV_HAL_WRAP_NOT_OP(v_int16x8)
  782. OPENCV_HAL_WRAP_NOT_OP(v_int32x4)
  783. OPENCV_HAL_WRAP_NOT_OP(v_int64x2)
  784. #if CV_SIMD_64F
  785. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float64x2)
  786. #endif
  787. #endif
  788. #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
  789. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint8x32)
  790. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint16x16)
  791. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint32x8)
  792. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint64x4)
  793. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int8x32)
  794. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16x16)
  795. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32x8)
  796. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64x4)
  797. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float32x8)
  798. OPENCV_HAL_WRAP_NOT_OP(v_uint8x32)
  799. OPENCV_HAL_WRAP_NOT_OP(v_uint16x16)
  800. OPENCV_HAL_WRAP_NOT_OP(v_uint32x8)
  801. OPENCV_HAL_WRAP_NOT_OP(v_uint64x4)
  802. OPENCV_HAL_WRAP_NOT_OP(v_int8x32)
  803. OPENCV_HAL_WRAP_NOT_OP(v_int16x16)
  804. OPENCV_HAL_WRAP_NOT_OP(v_int32x8)
  805. OPENCV_HAL_WRAP_NOT_OP(v_int64x4)
  806. #if CV_SIMD_64F
  807. OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_float64x4)
  808. #endif
  809. #endif
  810. #define OPENCV_HAL_WRAP_BIN_OP_MUL(_Tpvec) \
  811. inline _Tpvec v_mul(const _Tpvec& a, const _Tpvec& b) \
  812. { \
  813. return a * b; \
  814. } \
  815. template<typename... Args> \
  816. inline _Tpvec v_mul(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
  817. return v_mul(f1 * f2, vf...); \
  818. }
  819. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8)
  820. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8)
  821. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16)
  822. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32)
  823. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16)
  824. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32)
  825. OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32)
  826. #if CV_SIMD_64F
  827. OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64)
  828. #endif
  829. #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
  830. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8x16)
  831. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16x8)
  832. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32x4)
  833. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8x16)
  834. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16x8)
  835. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32x4)
  836. OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32x4)
  837. #if CV_SIMD_64F
  838. OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64x2)
  839. #endif
  840. #endif
  841. #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
  842. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8x32)
  843. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16x16)
  844. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32x8)
  845. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8x32)
  846. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16x16)
  847. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32x8)
  848. OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32x8)
  849. #if CV_SIMD_64F
  850. OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64x4)
  851. #endif
  852. #endif
  853. #define OPENCV_HAL_WRAP_BIN_OP_DIV(_Tpvec) \
  854. inline _Tpvec v_div(const _Tpvec& a, const _Tpvec& b) \
  855. { \
  856. return a / b; \
  857. }
  858. OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32)
  859. #if CV_SIMD_64F
  860. OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64)
  861. #endif
  862. #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
  863. OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32x4)
  864. #if CV_SIMD_64F
  865. OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64x2)
  866. #endif
  867. #endif
  868. #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
  869. OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32x8)
  870. #if CV_SIMD_64F
  871. OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64x4)
  872. #endif
  873. #endif
  874. #define OPENCV_HAL_WRAP_CMP_OP(_Tpvec, intrin, op) \
  875. inline _Tpvec v_##intrin(const _Tpvec& a, const _Tpvec& b) \
  876. { \
  877. return a op b; \
  878. }
  879. #define OPENCV_HAL_WRAP_EQ_OP(_Tpvec) \
  880. inline _Tpvec v_eq(const _Tpvec& a, const _Tpvec& b) \
  881. { \
  882. return a == b; \
  883. } \
  884. inline _Tpvec v_ne(const _Tpvec& a, const _Tpvec& b) \
  885. { \
  886. return a != b; \
  887. }
  888. #define OPENCV_HAL_WRAP_CMP(_Tpvec) \
  889. OPENCV_HAL_WRAP_CMP_OP(_Tpvec, eq, ==) \
  890. OPENCV_HAL_WRAP_CMP_OP(_Tpvec, ne, !=) \
  891. OPENCV_HAL_WRAP_CMP_OP(_Tpvec, lt, <) \
  892. OPENCV_HAL_WRAP_CMP_OP(_Tpvec, gt, >) \
  893. OPENCV_HAL_WRAP_CMP_OP(_Tpvec, le, <=) \
  894. OPENCV_HAL_WRAP_CMP_OP(_Tpvec, ge, >=)
  895. OPENCV_HAL_WRAP_CMP(v_uint8)
  896. OPENCV_HAL_WRAP_CMP(v_uint16)
  897. OPENCV_HAL_WRAP_CMP(v_uint32)
  898. OPENCV_HAL_WRAP_EQ_OP(v_uint64)
  899. OPENCV_HAL_WRAP_CMP(v_int8)
  900. OPENCV_HAL_WRAP_CMP(v_int16)
  901. OPENCV_HAL_WRAP_CMP(v_int32)
  902. OPENCV_HAL_WRAP_EQ_OP(v_int64)
  903. OPENCV_HAL_WRAP_CMP(v_float32)
  904. #if CV_SIMD_64F
  905. OPENCV_HAL_WRAP_CMP(v_float64)
  906. #endif
  907. #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
  908. OPENCV_HAL_WRAP_CMP(v_uint8x16)
  909. OPENCV_HAL_WRAP_CMP(v_uint16x8)
  910. OPENCV_HAL_WRAP_CMP(v_uint32x4)
  911. OPENCV_HAL_WRAP_EQ_OP(v_uint64x2)
  912. OPENCV_HAL_WRAP_CMP(v_int8x16)
  913. OPENCV_HAL_WRAP_CMP(v_int16x8)
  914. OPENCV_HAL_WRAP_CMP(v_int32x4)
  915. OPENCV_HAL_WRAP_EQ_OP(v_int64x2)
  916. OPENCV_HAL_WRAP_CMP(v_float32x4)
  917. #if CV_SIMD_64F
  918. OPENCV_HAL_WRAP_CMP(v_float64x2)
  919. #endif
  920. #endif
  921. #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
  922. OPENCV_HAL_WRAP_CMP(v_uint8x32)
  923. OPENCV_HAL_WRAP_CMP(v_uint16x16)
  924. OPENCV_HAL_WRAP_CMP(v_uint32x8)
  925. OPENCV_HAL_WRAP_EQ_OP(v_uint64x4)
  926. OPENCV_HAL_WRAP_CMP(v_int8x32)
  927. OPENCV_HAL_WRAP_CMP(v_int16x16)
  928. OPENCV_HAL_WRAP_CMP(v_int32x8)
  929. OPENCV_HAL_WRAP_EQ_OP(v_int64x4)
  930. OPENCV_HAL_WRAP_CMP(v_float32x8)
  931. #if CV_SIMD_64F
  932. OPENCV_HAL_WRAP_CMP(v_float64x4)
  933. #endif
  934. #endif
  935. //////////// get0 ////////////
  936. #define OPENCV_HAL_WRAP_GRT0(_Tpvec) \
  937. inline typename VTraits<_Tpvec>::lane_type v_get0(const _Tpvec& v) \
  938. { \
  939. return v.get0(); \
  940. }
  941. OPENCV_HAL_WRAP_GRT0(v_uint8)
  942. OPENCV_HAL_WRAP_GRT0(v_int8)
  943. OPENCV_HAL_WRAP_GRT0(v_uint16)
  944. OPENCV_HAL_WRAP_GRT0(v_int16)
  945. OPENCV_HAL_WRAP_GRT0(v_uint32)
  946. OPENCV_HAL_WRAP_GRT0(v_int32)
  947. OPENCV_HAL_WRAP_GRT0(v_uint64)
  948. OPENCV_HAL_WRAP_GRT0(v_int64)
  949. OPENCV_HAL_WRAP_GRT0(v_float32)
  950. #if CV_SIMD_64F
  951. OPENCV_HAL_WRAP_GRT0(v_float64)
  952. #endif
  953. #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
  954. OPENCV_HAL_WRAP_GRT0(v_uint8x16)
  955. OPENCV_HAL_WRAP_GRT0(v_uint16x8)
  956. OPENCV_HAL_WRAP_GRT0(v_uint32x4)
  957. OPENCV_HAL_WRAP_GRT0(v_uint64x2)
  958. OPENCV_HAL_WRAP_GRT0(v_int8x16)
  959. OPENCV_HAL_WRAP_GRT0(v_int16x8)
  960. OPENCV_HAL_WRAP_GRT0(v_int32x4)
  961. OPENCV_HAL_WRAP_GRT0(v_int64x2)
  962. OPENCV_HAL_WRAP_GRT0(v_float32x4)
  963. #if CV_SIMD_64F
  964. OPENCV_HAL_WRAP_GRT0(v_float64x2)
  965. #endif
  966. #endif
  967. #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
  968. OPENCV_HAL_WRAP_GRT0(v_uint8x32)
  969. OPENCV_HAL_WRAP_GRT0(v_uint16x16)
  970. OPENCV_HAL_WRAP_GRT0(v_uint32x8)
  971. OPENCV_HAL_WRAP_GRT0(v_uint64x4)
  972. OPENCV_HAL_WRAP_GRT0(v_int8x32)
  973. OPENCV_HAL_WRAP_GRT0(v_int16x16)
  974. OPENCV_HAL_WRAP_GRT0(v_int32x8)
  975. OPENCV_HAL_WRAP_GRT0(v_int64x4)
  976. OPENCV_HAL_WRAP_GRT0(v_float32x8)
  977. #if CV_SIMD_64F
  978. OPENCV_HAL_WRAP_GRT0(v_float64x4)
  979. #endif
  980. #endif
  981. #define OPENCV_HAL_WRAP_EXTRACT(_Tpvec) \
  982. inline typename VTraits<_Tpvec>::lane_type v_extract_highest(const _Tpvec& v) \
  983. { \
  984. return v_extract_n<VTraits<_Tpvec>::nlanes-1>(v); \
  985. }
  986. OPENCV_HAL_WRAP_EXTRACT(v_uint8)
  987. OPENCV_HAL_WRAP_EXTRACT(v_int8)
  988. OPENCV_HAL_WRAP_EXTRACT(v_uint16)
  989. OPENCV_HAL_WRAP_EXTRACT(v_int16)
  990. OPENCV_HAL_WRAP_EXTRACT(v_uint32)
  991. OPENCV_HAL_WRAP_EXTRACT(v_int32)
  992. OPENCV_HAL_WRAP_EXTRACT(v_uint64)
  993. OPENCV_HAL_WRAP_EXTRACT(v_int64)
  994. OPENCV_HAL_WRAP_EXTRACT(v_float32)
  995. #if CV_SIMD_64F
  996. OPENCV_HAL_WRAP_EXTRACT(v_float64)
  997. #endif
  998. #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
  999. OPENCV_HAL_WRAP_EXTRACT(v_uint8x16)
  1000. OPENCV_HAL_WRAP_EXTRACT(v_uint16x8)
  1001. OPENCV_HAL_WRAP_EXTRACT(v_uint32x4)
  1002. OPENCV_HAL_WRAP_EXTRACT(v_uint64x2)
  1003. OPENCV_HAL_WRAP_EXTRACT(v_int8x16)
  1004. OPENCV_HAL_WRAP_EXTRACT(v_int16x8)
  1005. OPENCV_HAL_WRAP_EXTRACT(v_int32x4)
  1006. OPENCV_HAL_WRAP_EXTRACT(v_int64x2)
  1007. OPENCV_HAL_WRAP_EXTRACT(v_float32x4)
  1008. #if CV_SIMD_64F
  1009. OPENCV_HAL_WRAP_EXTRACT(v_float64x2)
  1010. #endif
  1011. #endif
  1012. #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
  1013. OPENCV_HAL_WRAP_EXTRACT(v_uint8x32)
  1014. OPENCV_HAL_WRAP_EXTRACT(v_uint16x16)
  1015. OPENCV_HAL_WRAP_EXTRACT(v_uint32x8)
  1016. OPENCV_HAL_WRAP_EXTRACT(v_uint64x4)
  1017. OPENCV_HAL_WRAP_EXTRACT(v_int8x32)
  1018. OPENCV_HAL_WRAP_EXTRACT(v_int16x16)
  1019. OPENCV_HAL_WRAP_EXTRACT(v_int32x8)
  1020. OPENCV_HAL_WRAP_EXTRACT(v_int64x4)
  1021. OPENCV_HAL_WRAP_EXTRACT(v_float32x8)
  1022. #if CV_SIMD_64F
  1023. OPENCV_HAL_WRAP_EXTRACT(v_float64x4)
  1024. #endif
  1025. #endif
  1026. #define OPENCV_HAL_WRAP_BROADCAST(_Tpvec) \
  1027. inline _Tpvec v_broadcast_highest(const _Tpvec& v) \
  1028. { \
  1029. return v_broadcast_element<VTraits<_Tpvec>::nlanes-1>(v); \
  1030. }
  1031. OPENCV_HAL_WRAP_BROADCAST(v_uint32)
  1032. OPENCV_HAL_WRAP_BROADCAST(v_int32)
  1033. OPENCV_HAL_WRAP_BROADCAST(v_float32)
  1034. #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
  1035. OPENCV_HAL_WRAP_BROADCAST(v_uint32x4)
  1036. OPENCV_HAL_WRAP_BROADCAST(v_int32x4)
  1037. OPENCV_HAL_WRAP_BROADCAST(v_float32x4)
  1038. #endif
  1039. #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
  1040. OPENCV_HAL_WRAP_BROADCAST(v_uint32x8)
  1041. OPENCV_HAL_WRAP_BROADCAST(v_int32x8)
  1042. OPENCV_HAL_WRAP_BROADCAST(v_float32x8)
  1043. #endif
  1044. #endif //!CV_SIMD_SCALABLE
  1045. #if (CV_NEON /* || CV_others */) && !defined(CV_FORCE_SIMD128_CPP)
  1046. // Compatibility layer for the backend that cleaned up.
  1047. #define OPENCV_HAL_WRAP_BIN_OP_ADDSUB(_Tpvec) \
  1048. template<typename... Args> \
  1049. inline _Tpvec v_add(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
  1050. return v_add(v_add(f1, f2), vf...); \
  1051. }
  1052. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8)
  1053. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16)
  1054. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32)
  1055. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64)
  1056. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8)
  1057. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16)
  1058. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32)
  1059. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64)
  1060. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32)
  1061. #if CV_SIMD_64F
  1062. OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64)
  1063. #endif
  1064. #define OPENCV_HAL_WRAP_BIN_OP_MUL(_Tpvec) \
  1065. template<typename... Args> \
  1066. inline _Tpvec v_mul(const _Tpvec& f1, const _Tpvec& f2, const Args&... vf) { \
  1067. return v_mul(v_mul(f1, f2), vf...); \
  1068. }
  1069. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8)
  1070. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8)
  1071. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16)
  1072. OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32)
  1073. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16)
  1074. OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32)
  1075. OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32)
  1076. #if CV_SIMD_64F
  1077. OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64)
  1078. #endif
  1079. #define OPENCV_HAL_WRAP_EXTRACT(_Tpvec) \
  1080. inline typename VTraits<_Tpvec>::lane_type v_extract_highest(const _Tpvec& v) \
  1081. { \
  1082. return v_extract_n<VTraits<_Tpvec>::nlanes-1>(v); \
  1083. }
  1084. OPENCV_HAL_WRAP_EXTRACT(v_uint8)
  1085. OPENCV_HAL_WRAP_EXTRACT(v_int8)
  1086. OPENCV_HAL_WRAP_EXTRACT(v_uint16)
  1087. OPENCV_HAL_WRAP_EXTRACT(v_int16)
  1088. OPENCV_HAL_WRAP_EXTRACT(v_uint32)
  1089. OPENCV_HAL_WRAP_EXTRACT(v_int32)
  1090. OPENCV_HAL_WRAP_EXTRACT(v_uint64)
  1091. OPENCV_HAL_WRAP_EXTRACT(v_int64)
  1092. OPENCV_HAL_WRAP_EXTRACT(v_float32)
  1093. #if CV_SIMD_64F
  1094. OPENCV_HAL_WRAP_EXTRACT(v_float64)
  1095. #endif
  1096. #define OPENCV_HAL_WRAP_BROADCAST(_Tpvec) \
  1097. inline _Tpvec v_broadcast_highest(const _Tpvec& v) \
  1098. { \
  1099. return v_broadcast_element<VTraits<_Tpvec>::nlanes-1>(v); \
  1100. }
  1101. OPENCV_HAL_WRAP_BROADCAST(v_uint32)
  1102. OPENCV_HAL_WRAP_BROADCAST(v_int32)
  1103. OPENCV_HAL_WRAP_BROADCAST(v_float32)
  1104. #endif //CV_NEON
  1105. //! @cond IGNORED
  1106. // backward compatibility
  1107. template<typename _Tp, typename _Tvec> static inline
  1108. void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
  1109. // backward compatibility
  1110. template<typename _Tp, typename _Tvec> static inline
  1111. void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
  1112. //! @endcond
  1113. //! @}
  1114. #undef VXPREFIX
  1115. } // namespace
  1116. #ifndef CV_SIMD_FP16
  1117. #define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
  1118. #endif
  1119. #ifndef CV_SIMD
  1120. #define CV_SIMD 0
  1121. #endif
  1122. #include "simd_utils.impl.hpp"
  1123. #ifndef CV_DOXYGEN
  1124. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  1125. #endif
  1126. } // cv::
  1127. //! @endcond
  1128. #if defined(__GNUC__) && __GNUC__ == 12
  1129. #pragma GCC diagnostic pop
  1130. #endif
  1131. #endif