add_unsigned.hpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. ///////////////////////////////////////////////////////////////
  2. // Copyright 2020 Madhur Chauhan.
  3. // Copyright 2020 John Maddock. Distributed under the Boost
  4. // Software License, Version 1.0. (See accompanying file
  5. // LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_MP_ADD_UNSIGNED_ADDC_32_HPP
  7. #define BOOST_MP_ADD_UNSIGNED_ADDC_32_HPP
  8. #include <boost/multiprecision/cpp_int/intel_intrinsics.hpp>
  9. namespace boost { namespace multiprecision { namespace backends {
  10. template <class CppInt1, class CppInt2, class CppInt3>
  11. inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned_constexpr(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  12. {
  13. using ::boost::multiprecision::std_constexpr::swap;
  14. //
  15. // This is the generic, C++ only version of addition.
  16. // It's also used for all constexpr branches, hence the name.
  17. // Nothing fancy, just let uintmax_t take the strain:
  18. //
  19. double_limb_type carry = 0;
  20. unsigned m(0), x(0);
  21. unsigned as = a.size();
  22. unsigned bs = b.size();
  23. minmax(as, bs, m, x);
  24. if (x == 1)
  25. {
  26. bool s = a.sign();
  27. result = static_cast<double_limb_type>(*a.limbs()) + static_cast<double_limb_type>(*b.limbs());
  28. result.sign(s);
  29. return;
  30. }
  31. result.resize(x, x);
  32. typename CppInt2::const_limb_pointer pa = a.limbs();
  33. typename CppInt3::const_limb_pointer pb = b.limbs();
  34. typename CppInt1::limb_pointer pr = result.limbs();
  35. typename CppInt1::limb_pointer pr_end = pr + m;
  36. if (as < bs)
  37. swap(pa, pb);
  38. // First where a and b overlap:
  39. while (pr != pr_end)
  40. {
  41. carry += static_cast<double_limb_type>(*pa) + static_cast<double_limb_type>(*pb);
  42. #ifdef __MSVC_RUNTIME_CHECKS
  43. *pr = static_cast<limb_type>(carry & ~static_cast<limb_type>(0));
  44. #else
  45. *pr = static_cast<limb_type>(carry);
  46. #endif
  47. carry >>= CppInt1::limb_bits;
  48. ++pr, ++pa, ++pb;
  49. }
  50. pr_end += x - m;
  51. // Now where only a has digits:
  52. while (pr != pr_end)
  53. {
  54. if (!carry)
  55. {
  56. if (pa != pr)
  57. std_constexpr::copy(pa, pa + (pr_end - pr), pr);
  58. break;
  59. }
  60. carry += static_cast<double_limb_type>(*pa);
  61. #ifdef __MSVC_RUNTIME_CHECKS
  62. *pr = static_cast<limb_type>(carry & ~static_cast<limb_type>(0));
  63. #else
  64. *pr = static_cast<limb_type>(carry);
  65. #endif
  66. carry >>= CppInt1::limb_bits;
  67. ++pr, ++pa;
  68. }
  69. if (carry)
  70. {
  71. // We overflowed, need to add one more limb:
  72. result.resize(x + 1, x + 1);
  73. if (result.size() > x)
  74. result.limbs()[x] = static_cast<limb_type>(1u);
  75. }
  76. result.normalize();
  77. result.sign(a.sign());
  78. }
  79. //
  80. // Core subtraction routine for all non-trivial cpp_int's:
  81. //
  82. template <class CppInt1, class CppInt2, class CppInt3>
  83. inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned_constexpr(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  84. {
  85. using ::boost::multiprecision::std_constexpr::swap;
  86. //
  87. // This is the generic, C++ only version of subtraction.
  88. // It's also used for all constexpr branches, hence the name.
  89. // Nothing fancy, just let uintmax_t take the strain:
  90. //
  91. double_limb_type borrow = 0;
  92. unsigned m(0), x(0);
  93. minmax(a.size(), b.size(), m, x);
  94. //
  95. // special cases for small limb counts:
  96. //
  97. if (x == 1)
  98. {
  99. bool s = a.sign();
  100. limb_type al = *a.limbs();
  101. limb_type bl = *b.limbs();
  102. if (bl > al)
  103. {
  104. ::boost::multiprecision::std_constexpr::swap(al, bl);
  105. s = !s;
  106. }
  107. result = al - bl;
  108. result.sign(s);
  109. return;
  110. }
  111. // This isn't used till later, but comparison has to occur before we resize the result,
  112. // as that may also resize a or b if this is an inplace operation:
  113. int c = a.compare_unsigned(b);
  114. // Set up the result vector:
  115. result.resize(x, x);
  116. // Now that a, b, and result are stable, get pointers to their limbs:
  117. typename CppInt2::const_limb_pointer pa = a.limbs();
  118. typename CppInt3::const_limb_pointer pb = b.limbs();
  119. typename CppInt1::limb_pointer pr = result.limbs();
  120. bool swapped = false;
  121. if (c < 0)
  122. {
  123. swap(pa, pb);
  124. swapped = true;
  125. }
  126. else if (c == 0)
  127. {
  128. result = static_cast<limb_type>(0);
  129. return;
  130. }
  131. unsigned i = 0;
  132. // First where a and b overlap:
  133. while (i < m)
  134. {
  135. borrow = static_cast<double_limb_type>(pa[i]) - static_cast<double_limb_type>(pb[i]) - borrow;
  136. pr[i] = static_cast<limb_type>(borrow);
  137. borrow = (borrow >> CppInt1::limb_bits) & 1u;
  138. ++i;
  139. }
  140. // Now where only a has digits, only as long as we've borrowed:
  141. while (borrow && (i < x))
  142. {
  143. borrow = static_cast<double_limb_type>(pa[i]) - borrow;
  144. pr[i] = static_cast<limb_type>(borrow);
  145. borrow = (borrow >> CppInt1::limb_bits) & 1u;
  146. ++i;
  147. }
  148. // Any remaining digits are the same as those in pa:
  149. if ((x != i) && (pa != pr))
  150. std_constexpr::copy(pa + i, pa + x, pr + i);
  151. BOOST_ASSERT(0 == borrow);
  152. //
  153. // We may have lost digits, if so update limb usage count:
  154. //
  155. result.normalize();
  156. result.sign(a.sign());
  157. if (swapped)
  158. result.negate();
  159. }
  160. #ifdef BOOST_MP_HAS_IMMINTRIN_H
  161. //
  162. // This is the key addition routine where all the argument types are non-trivial cpp_int's:
  163. //
  164. //
  165. // This optimization is limited to: GCC, LLVM, ICC (Intel), MSVC for x86_64 and i386.
  166. // If your architecture and compiler supports ADC intrinsic, please file a bug
  167. //
  168. // As of May, 2020 major compilers don't recognize carry chain though adc
  169. // intrinsics are used to hint compilers to use ADC and still compilers don't
  170. // unroll the loop efficiently (except LLVM) so manual unrolling is done.
  171. //
  172. // Also note that these intrinsics were only introduced by Intel as part of the
  173. // ADX processor extensions, even though the addc instruction has been available
  174. // for basically all x86 processors. That means gcc-9, clang-9, msvc-14.2 and up
  175. // are required to support these intrinsics.
  176. //
  177. template <class CppInt1, class CppInt2, class CppInt3>
  178. inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  179. {
  180. #ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
  181. if (BOOST_MP_IS_CONST_EVALUATED(a.size()))
  182. {
  183. add_unsigned_constexpr(result, a, b);
  184. }
  185. else
  186. #endif
  187. {
  188. using std::swap;
  189. // Nothing fancy, just let uintmax_t take the strain:
  190. unsigned m(0), x(0);
  191. unsigned as = a.size();
  192. unsigned bs = b.size();
  193. minmax(as, bs, m, x);
  194. if (x == 1)
  195. {
  196. bool s = a.sign();
  197. result = static_cast<double_limb_type>(*a.limbs()) + static_cast<double_limb_type>(*b.limbs());
  198. result.sign(s);
  199. return;
  200. }
  201. result.resize(x, x);
  202. typename CppInt2::const_limb_pointer pa = a.limbs();
  203. typename CppInt3::const_limb_pointer pb = b.limbs();
  204. typename CppInt1::limb_pointer pr = result.limbs();
  205. if (as < bs)
  206. swap(pa, pb);
  207. // First where a and b overlap:
  208. unsigned i = 0;
  209. unsigned char carry = 0;
  210. #if defined(BOOST_MSVC) && !defined(BOOST_HAS_INT128) && defined(_M_X64)
  211. //
  212. // Special case for 32-bit limbs on 64-bit architecture - we can process
  213. // 2 limbs with each instruction.
  214. //
  215. for (; i + 8 <= m; i += 8)
  216. {
  217. carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 0), *(unsigned long long*)(pb + i + 0), (unsigned long long*)(pr + i));
  218. carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 2), *(unsigned long long*)(pb + i + 2), (unsigned long long*)(pr + i + 2));
  219. carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 4), *(unsigned long long*)(pb + i + 4), (unsigned long long*)(pr + i + 4));
  220. carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 6), *(unsigned long long*)(pb + i + 6), (unsigned long long*)(pr + i + 6));
  221. }
  222. #else
  223. for (; i + 4 <= m; i += 4)
  224. {
  225. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 0], pb[i + 0], pr + i);
  226. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 1], pb[i + 1], pr + i + 1);
  227. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 2], pb[i + 2], pr + i + 2);
  228. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 3], pb[i + 3], pr + i + 3);
  229. }
  230. #endif
  231. for (; i < m; ++i)
  232. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i], pb[i], pr + i);
  233. for (; i < x && carry; ++i)
  234. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i], 0, pr + i);
  235. if (i == x && carry)
  236. {
  237. // We overflowed, need to add one more limb:
  238. result.resize(x + 1, x + 1);
  239. if (result.size() > x)
  240. result.limbs()[x] = static_cast<limb_type>(1u);
  241. }
  242. else
  243. std::copy(pa + i, pa + x, pr + i);
  244. result.normalize();
  245. result.sign(a.sign());
  246. }
  247. }
  248. template <class CppInt1, class CppInt2, class CppInt3>
  249. inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  250. {
  251. #ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
  252. if (BOOST_MP_IS_CONST_EVALUATED(a.size()))
  253. {
  254. subtract_unsigned_constexpr(result, a, b);
  255. }
  256. else
  257. #endif
  258. {
  259. using std::swap;
  260. // Nothing fancy, just let uintmax_t take the strain:
  261. unsigned m(0), x(0);
  262. minmax(a.size(), b.size(), m, x);
  263. //
  264. // special cases for small limb counts:
  265. //
  266. if (x == 1)
  267. {
  268. bool s = a.sign();
  269. limb_type al = *a.limbs();
  270. limb_type bl = *b.limbs();
  271. if (bl > al)
  272. {
  273. ::boost::multiprecision::std_constexpr::swap(al, bl);
  274. s = !s;
  275. }
  276. result = al - bl;
  277. result.sign(s);
  278. return;
  279. }
  280. // This isn't used till later, but comparison has to occur before we resize the result,
  281. // as that may also resize a or b if this is an inplace operation:
  282. int c = a.compare_unsigned(b);
  283. // Set up the result vector:
  284. result.resize(x, x);
  285. // Now that a, b, and result are stable, get pointers to their limbs:
  286. typename CppInt2::const_limb_pointer pa = a.limbs();
  287. typename CppInt3::const_limb_pointer pb = b.limbs();
  288. typename CppInt1::limb_pointer pr = result.limbs();
  289. bool swapped = false;
  290. if (c < 0)
  291. {
  292. swap(pa, pb);
  293. swapped = true;
  294. }
  295. else if (c == 0)
  296. {
  297. result = static_cast<limb_type>(0);
  298. return;
  299. }
  300. unsigned i = 0;
  301. unsigned char borrow = 0;
  302. // First where a and b overlap:
  303. #if defined(BOOST_MSVC) && !defined(BOOST_HAS_INT128) && defined(_M_X64)
  304. //
  305. // Special case for 32-bit limbs on 64-bit architecture - we can process
  306. // 2 limbs with each instruction.
  307. //
  308. for (; i + 8 <= m; i += 8)
  309. {
  310. borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i), *reinterpret_cast<const unsigned long long*>(pb + i), reinterpret_cast<unsigned long long*>(pr + i));
  311. borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 2), *reinterpret_cast<const unsigned long long*>(pb + i + 2), reinterpret_cast<unsigned long long*>(pr + i + 2));
  312. borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 4), *reinterpret_cast<const unsigned long long*>(pb + i + 4), reinterpret_cast<unsigned long long*>(pr + i + 4));
  313. borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 6), *reinterpret_cast<const unsigned long long*>(pb + i + 6), reinterpret_cast<unsigned long long*>(pr + i + 6));
  314. }
  315. #else
  316. for(; i + 4 <= m; i += 4)
  317. {
  318. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], pb[i], pr + i);
  319. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 1], pb[i + 1], pr + i + 1);
  320. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 2], pb[i + 2], pr + i + 2);
  321. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 3], pb[i + 3], pr + i + 3);
  322. }
  323. #endif
  324. for (; i < m; ++i)
  325. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], pb[i], pr + i);
  326. // Now where only a has digits, only as long as we've borrowed:
  327. while (borrow && (i < x))
  328. {
  329. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], 0, pr + i);
  330. ++i;
  331. }
  332. // Any remaining digits are the same as those in pa:
  333. if ((x != i) && (pa != pr))
  334. std_constexpr::copy(pa + i, pa + x, pr + i);
  335. BOOST_ASSERT(0 == borrow);
  336. //
  337. // We may have lost digits, if so update limb usage count:
  338. //
  339. result.normalize();
  340. result.sign(a.sign());
  341. if (swapped)
  342. result.negate();
  343. } // constepxr.
  344. }
  345. #else
  346. template <class CppInt1, class CppInt2, class CppInt3>
  347. inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  348. {
  349. add_unsigned_constexpr(result, a, b);
  350. }
  351. template <class CppInt1, class CppInt2, class CppInt3>
  352. inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  353. {
  354. subtract_unsigned_constexpr(result, a, b);
  355. }
  356. #endif
  357. } } } // namespaces
  358. #endif