fast_math.hpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17. // Third party copyrights are property of their respective owners.
  18. //
  19. // Redistribution and use in source and binary forms, with or without modification,
  20. // are permitted provided that the following conditions are met:
  21. //
  22. // * Redistribution's of source code must retain the above copyright notice,
  23. // this list of conditions and the following disclaimer.
  24. //
  25. // * Redistribution's in binary form must reproduce the above copyright notice,
  26. // this list of conditions and the following disclaimer in the documentation
  27. // and/or other materials provided with the distribution.
  28. //
  29. // * The name of the copyright holders may not be used to endorse or promote products
  30. // derived from this software without specific prior written permission.
  31. //
  32. // This software is provided by the copyright holders and contributors "as is" and
  33. // any express or implied warranties, including, but not limited to, the implied
  34. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35. // In no event shall the Intel Corporation or contributors be liable for any direct,
  36. // indirect, incidental, special, exemplary, or consequential damages
  37. // (including, but not limited to, procurement of substitute goods or services;
  38. // loss of use, data, or profits; or business interruption) however caused
  39. // and on any theory of liability, whether in contract, strict liability,
  40. // or tort (including negligence or otherwise) arising in any way out of
  41. // the use of this software, even if advised of the possibility of such damage.
  42. //
  43. //M*/
  44. #ifndef OPENCV_CORE_FAST_MATH_HPP
  45. #define OPENCV_CORE_FAST_MATH_HPP
  46. #include "opencv2/core/cvdef.h"
  47. //! @addtogroup core_utils
  48. //! @{
  49. /****************************************************************************************\
  50. * fast math *
  51. \****************************************************************************************/
  52. #ifdef __cplusplus
  53. # include <cmath>
  54. #else
  55. # ifdef __BORLANDC__
  56. # include <fastmath.h>
  57. # else
  58. # include <math.h>
  59. # endif
  60. #endif
  61. #if defined(__CUDACC__)
  62. // nothing, intrinsics/asm code is not supported
  63. #else
  64. #if ((defined _MSC_VER && defined _M_X64) \
  65. || (defined __GNUC__ && defined __SSE2__)) \
  66. && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
  67. #include <emmintrin.h>
  68. #endif
  69. #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
  70. && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
  71. #include <altivec.h>
  72. #undef vector
  73. #undef bool
  74. #undef pixel
  75. #endif
  76. #if defined(CV_INLINE_ROUND_FLT)
  77. // user-specified version
  78. // CV_INLINE_ROUND_DBL should be defined too
  79. #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON) && !defined __SOFTFP__
  80. // 1. general scheme
  81. #define ARM_ROUND(_value, _asm_string) \
  82. int res; \
  83. float temp; \
  84. CV_UNUSED(temp); \
  85. __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
  86. return res
  87. // 2. version for double
  88. #ifdef __clang__
  89. #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
  90. #else
  91. #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
  92. #endif
  93. // 3. version for float
  94. #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
  95. #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
  96. // P8 and newer machines can convert fp32/64 to int quickly.
  97. #define CV_INLINE_ROUND_DBL(value) \
  98. int out; \
  99. double temp; \
  100. __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
  101. return out;
  102. // FP32 also works with FP64 routine above
  103. #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
  104. #endif
  105. #ifdef CV_INLINE_ISINF_FLT
  106. // user-specified version
  107. // CV_INLINE_ISINF_DBL should be defined too
  108. #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
  109. #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
  110. #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
  111. #endif
  112. #ifdef CV_INLINE_ISNAN_FLT
  113. // user-specified version
  114. // CV_INLINE_ISNAN_DBL should be defined too
  115. #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
  116. #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
  117. #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
  118. #endif
  119. #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
  120. && ( \
  121. defined(__x86_64__) || defined(__i686__) \
  122. || defined(__arm__) \
  123. || defined(__PPC64__) \
  124. )
  125. /* Let builtin C math functions when available. Dedicated hardware is available to
  126. round and convert FP values. */
  127. #define OPENCV_USE_FASTMATH_BUILTINS 1
  128. #endif
  129. /* Enable builtin math functions if possible, desired, and available.
  130. Note, not all math functions inline equally. E.g lrint will not inline
  131. without the -fno-math-errno option. */
  132. #if defined(CV_ICC)
  133. // nothing
  134. #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
  135. #if defined(__clang__)
  136. #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
  137. #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
  138. #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
  139. #endif
  140. #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
  141. #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
  142. #endif
  143. #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
  144. #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
  145. #endif
  146. #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
  147. #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
  148. #endif
  149. #elif defined(__GNUC__)
  150. #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
  151. #if !defined(CV_INLINE_ISNAN_DBL)
  152. #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
  153. #endif
  154. #if !defined(CV_INLINE_ISNAN_FLT)
  155. #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
  156. #endif
  157. #if !defined(CV_INLINE_ISINF_DBL)
  158. #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
  159. #endif
  160. #if !defined(CV_INLINE_ISINF_FLT)
  161. #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
  162. #endif
  163. #elif defined(_MSC_VER)
  164. #if !defined(CV_INLINE_ISNAN_DBL)
  165. #define CV_INLINE_ISNAN_DBL(value) return isnan(value);
  166. #endif
  167. #if !defined(CV_INLINE_ISNAN_FLT)
  168. #define CV_INLINE_ISNAN_FLT(value) return isnan(value);
  169. #endif
  170. #if !defined(CV_INLINE_ISINF_DBL)
  171. #define CV_INLINE_ISINF_DBL(value) return isinf(value);
  172. #endif
  173. #if !defined(CV_INLINE_ISINF_FLT)
  174. #define CV_INLINE_ISINF_FLT(value) return isinf(value);
  175. #endif
  176. #endif
  177. #endif
  178. #endif // defined(__CUDACC__)
  179. /** @brief Rounds floating-point number to the nearest integer
  180. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  181. result is not defined.
  182. */
  183. CV_INLINE int
  184. cvRound( double value )
  185. {
  186. #if defined CV_INLINE_ROUND_DBL
  187. CV_INLINE_ROUND_DBL(value);
  188. #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
  189. __m128d t = _mm_set_sd( value );
  190. return _mm_cvtsd_si32(t);
  191. #elif defined _MSC_VER && defined _M_IX86
  192. int t;
  193. __asm
  194. {
  195. fld value;
  196. fistp t;
  197. }
  198. return t;
  199. #elif defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
  200. defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
  201. return (int)__builtin_lrint(value);
  202. #else
  203. return (int)lrint(value);
  204. #endif
  205. }
  206. /** @brief Rounds floating-point number to the nearest integer not larger than the original.
  207. The function computes an integer i such that:
  208. \f[i \le \texttt{value} < i+1\f]
  209. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  210. result is not defined.
  211. */
  212. CV_INLINE int cvFloor( double value )
  213. {
  214. #if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
  215. defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
  216. return (int)__builtin_floor(value);
  217. #elif defined __loongarch64
  218. int i;
  219. double tmp;
  220. __asm__ ("ftintrm.l.d %[tmp], %[in] \n\t"
  221. "movfr2gr.d %[i], %[tmp] \n\t"
  222. : [i] "=r" (i), [tmp] "=f" (tmp)
  223. : [in] "f" (value)
  224. :);
  225. return i;
  226. #else
  227. int i = (int)value;
  228. return i - (i > value);
  229. #endif
  230. }
  231. /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
  232. The function computes an integer i such that:
  233. \f[i \le \texttt{value} < i+1\f]
  234. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  235. result is not defined.
  236. */
  237. CV_INLINE int cvCeil( double value )
  238. {
  239. #if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
  240. defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
  241. return (int)__builtin_ceil(value);
  242. #elif defined __loongarch64
  243. int i;
  244. double tmp;
  245. __asm__ ("ftintrp.l.d %[tmp], %[in] \n\t"
  246. "movfr2gr.d %[i], %[tmp] \n\t"
  247. : [i] "=r" (i), [tmp] "=f" (tmp)
  248. : [in] "f" (value)
  249. :);
  250. return i;
  251. #else
  252. int i = (int)value;
  253. return i + (i < value);
  254. #endif
  255. }
  256. /** @brief Determines if the argument is Not A Number.
  257. @param value The input floating-point value
  258. The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
  259. otherwise. */
  260. CV_INLINE int cvIsNaN( double value )
  261. {
  262. #if defined CV_INLINE_ISNAN_DBL
  263. CV_INLINE_ISNAN_DBL(value);
  264. #else
  265. Cv64suf ieee754;
  266. ieee754.f = value;
  267. return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
  268. ((unsigned)ieee754.u != 0) > 0x7ff00000;
  269. #endif
  270. }
  271. /** @brief Determines if the argument is Infinity.
  272. @param value The input floating-point value
  273. The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
  274. and 0 otherwise. */
  275. CV_INLINE int cvIsInf( double value )
  276. {
  277. #if defined CV_INLINE_ISINF_DBL
  278. CV_INLINE_ISINF_DBL(value);
  279. #elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__) || defined(__loongarch64)
  280. Cv64suf ieee754;
  281. ieee754.f = value;
  282. return (ieee754.u & 0x7fffffffffffffff) ==
  283. 0x7ff0000000000000;
  284. #else
  285. Cv64suf ieee754;
  286. ieee754.f = value;
  287. return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
  288. (unsigned)ieee754.u == 0;
  289. #endif
  290. }
  291. #ifdef __cplusplus
  292. /** @overload */
  293. CV_INLINE int cvRound(float value)
  294. {
  295. #if defined CV_INLINE_ROUND_FLT
  296. CV_INLINE_ROUND_FLT(value);
  297. #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
  298. __m128 t = _mm_set_ss( value );
  299. return _mm_cvtss_si32(t);
  300. #elif defined _MSC_VER && defined _M_IX86
  301. int t;
  302. __asm
  303. {
  304. fld value;
  305. fistp t;
  306. }
  307. return t;
  308. #elif defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
  309. defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
  310. return (int)__builtin_lrintf(value);
  311. #else
  312. return (int)lrintf(value);
  313. #endif
  314. }
  315. /** @overload */
  316. CV_INLINE int cvRound( int value )
  317. {
  318. return value;
  319. }
  320. /** @overload */
  321. CV_INLINE int cvFloor( float value )
  322. {
  323. #if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
  324. defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
  325. return (int)__builtin_floorf(value);
  326. #elif defined __loongarch__
  327. int i;
  328. float tmp;
  329. __asm__ ("ftintrm.w.s %[tmp], %[in] \n\t"
  330. "movfr2gr.s %[i], %[tmp] \n\t"
  331. : [i] "=r" (i), [tmp] "=f" (tmp)
  332. : [in] "f" (value)
  333. :);
  334. return i;
  335. #else
  336. int i = (int)value;
  337. return i - (i > value);
  338. #endif
  339. }
  340. /** @overload */
  341. CV_INLINE int cvFloor( int value )
  342. {
  343. return value;
  344. }
  345. /** @overload */
  346. CV_INLINE int cvCeil( float value )
  347. {
  348. #if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
  349. defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
  350. return (int)__builtin_ceilf(value);
  351. #elif defined __loongarch__
  352. int i;
  353. float tmp;
  354. __asm__ ("ftintrp.w.s %[tmp], %[in] \n\t"
  355. "movfr2gr.s %[i], %[tmp] \n\t"
  356. : [i] "=r" (i), [tmp] "=f" (tmp)
  357. : [in] "f" (value)
  358. :);
  359. return i;
  360. #else
  361. int i = (int)value;
  362. return i + (i < value);
  363. #endif
  364. }
  365. /** @overload */
  366. CV_INLINE int cvCeil( int value )
  367. {
  368. return value;
  369. }
  370. /** @overload */
  371. CV_INLINE int cvIsNaN( float value )
  372. {
  373. #if defined CV_INLINE_ISNAN_FLT
  374. CV_INLINE_ISNAN_FLT(value);
  375. #else
  376. Cv32suf ieee754;
  377. ieee754.f = value;
  378. return (ieee754.u & 0x7fffffff) > 0x7f800000;
  379. #endif
  380. }
  381. /** @overload */
  382. CV_INLINE int cvIsInf( float value )
  383. {
  384. #if defined CV_INLINE_ISINF_FLT
  385. CV_INLINE_ISINF_FLT(value);
  386. #else
  387. Cv32suf ieee754;
  388. ieee754.f = value;
  389. return (ieee754.u & 0x7fffffff) == 0x7f800000;
  390. #endif
  391. }
  392. #endif // __cplusplus
  393. //! @} core_utils
  394. #endif