ieee.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. // Copyright 2012 the V8 project authors. All rights reserved.
  2. // Redistribution and use in source and binary forms, with or without
  3. // modification, are permitted provided that the following conditions are
  4. // met:
  5. //
  6. // * Redistributions of source code must retain the above copyright
  7. // notice, this list of conditions and the following disclaimer.
  8. // * Redistributions in binary form must reproduce the above
  9. // copyright notice, this list of conditions and the following
  10. // disclaimer in the documentation and/or other materials provided
  11. // with the distribution.
  12. // * Neither the name of Google Inc. nor the names of its
  13. // contributors may be used to endorse or promote products derived
  14. // from this software without specific prior written permission.
  15. //
  16. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. #ifndef DOUBLE_CONVERSION_DOUBLE_H_
  28. #define DOUBLE_CONVERSION_DOUBLE_H_
  29. #include "diy-fp.h"
  30. namespace double_conversion {
  31. // We assume that doubles and uint64_t have the same endianness.
  32. static uint64_t double_to_uint64(double d) { return BitCast<uint64_t>(d); }
  33. static double uint64_to_double(uint64_t d64) { return BitCast<double>(d64); }
  34. static uint32_t float_to_uint32(float f) { return BitCast<uint32_t>(f); }
  35. static float uint32_to_float(uint32_t d32) { return BitCast<float>(d32); }
  36. // Helper functions for doubles.
  37. class Double {
  38. public:
  39. static const uint64_t kSignMask = DOUBLE_CONVERSION_UINT64_2PART_C(0x80000000, 00000000);
  40. static const uint64_t kExponentMask = DOUBLE_CONVERSION_UINT64_2PART_C(0x7FF00000, 00000000);
  41. static const uint64_t kSignificandMask = DOUBLE_CONVERSION_UINT64_2PART_C(0x000FFFFF, FFFFFFFF);
  42. static const uint64_t kHiddenBit = DOUBLE_CONVERSION_UINT64_2PART_C(0x00100000, 00000000);
  43. static const int kPhysicalSignificandSize = 52; // Excludes the hidden bit.
  44. static const int kSignificandSize = 53;
  45. static const int kExponentBias = 0x3FF + kPhysicalSignificandSize;
  46. static const int kMaxExponent = 0x7FF - kExponentBias;
  47. Double() : d64_(0) {}
  48. explicit Double(double d) : d64_(double_to_uint64(d)) {}
  49. explicit Double(uint64_t d64) : d64_(d64) {}
  50. explicit Double(DiyFp diy_fp)
  51. : d64_(DiyFpToUint64(diy_fp)) {}
  52. // The value encoded by this Double must be greater or equal to +0.0.
  53. // It must not be special (infinity, or NaN).
  54. DiyFp AsDiyFp() const {
  55. DOUBLE_CONVERSION_ASSERT(Sign() > 0);
  56. DOUBLE_CONVERSION_ASSERT(!IsSpecial());
  57. return DiyFp(Significand(), Exponent());
  58. }
  59. // The value encoded by this Double must be strictly greater than 0.
  60. DiyFp AsNormalizedDiyFp() const {
  61. DOUBLE_CONVERSION_ASSERT(value() > 0.0);
  62. uint64_t f = Significand();
  63. int e = Exponent();
  64. // The current double could be a denormal.
  65. while ((f & kHiddenBit) == 0) {
  66. f <<= 1;
  67. e--;
  68. }
  69. // Do the final shifts in one go.
  70. f <<= DiyFp::kSignificandSize - kSignificandSize;
  71. e -= DiyFp::kSignificandSize - kSignificandSize;
  72. return DiyFp(f, e);
  73. }
  74. // Returns the double's bit as uint64.
  75. uint64_t AsUint64() const {
  76. return d64_;
  77. }
  78. // Returns the next greater double. Returns +infinity on input +infinity.
  79. double NextDouble() const {
  80. if (d64_ == kInfinity) return Double(kInfinity).value();
  81. if (Sign() < 0 && Significand() == 0) {
  82. // -0.0
  83. return 0.0;
  84. }
  85. if (Sign() < 0) {
  86. return Double(d64_ - 1).value();
  87. } else {
  88. return Double(d64_ + 1).value();
  89. }
  90. }
  91. double PreviousDouble() const {
  92. if (d64_ == (kInfinity | kSignMask)) return -Infinity();
  93. if (Sign() < 0) {
  94. return Double(d64_ + 1).value();
  95. } else {
  96. if (Significand() == 0) return -0.0;
  97. return Double(d64_ - 1).value();
  98. }
  99. }
  100. int Exponent() const {
  101. if (IsDenormal()) return kDenormalExponent;
  102. uint64_t d64 = AsUint64();
  103. int biased_e =
  104. static_cast<int>((d64 & kExponentMask) >> kPhysicalSignificandSize);
  105. return biased_e - kExponentBias;
  106. }
  107. uint64_t Significand() const {
  108. uint64_t d64 = AsUint64();
  109. uint64_t significand = d64 & kSignificandMask;
  110. if (!IsDenormal()) {
  111. return significand + kHiddenBit;
  112. } else {
  113. return significand;
  114. }
  115. }
  116. // Returns true if the double is a denormal.
  117. bool IsDenormal() const {
  118. uint64_t d64 = AsUint64();
  119. return (d64 & kExponentMask) == 0;
  120. }
  121. // We consider denormals not to be special.
  122. // Hence only Infinity and NaN are special.
  123. bool IsSpecial() const {
  124. uint64_t d64 = AsUint64();
  125. return (d64 & kExponentMask) == kExponentMask;
  126. }
  127. bool IsNan() const {
  128. uint64_t d64 = AsUint64();
  129. return ((d64 & kExponentMask) == kExponentMask) &&
  130. ((d64 & kSignificandMask) != 0);
  131. }
  132. bool IsInfinite() const {
  133. uint64_t d64 = AsUint64();
  134. return ((d64 & kExponentMask) == kExponentMask) &&
  135. ((d64 & kSignificandMask) == 0);
  136. }
  137. int Sign() const {
  138. uint64_t d64 = AsUint64();
  139. return (d64 & kSignMask) == 0? 1: -1;
  140. }
  141. // Precondition: the value encoded by this Double must be greater or equal
  142. // than +0.0.
  143. DiyFp UpperBoundary() const {
  144. DOUBLE_CONVERSION_ASSERT(Sign() > 0);
  145. return DiyFp(Significand() * 2 + 1, Exponent() - 1);
  146. }
  147. // Computes the two boundaries of this.
  148. // The bigger boundary (m_plus) is normalized. The lower boundary has the same
  149. // exponent as m_plus.
  150. // Precondition: the value encoded by this Double must be greater than 0.
  151. void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
  152. DOUBLE_CONVERSION_ASSERT(value() > 0.0);
  153. DiyFp v = this->AsDiyFp();
  154. DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
  155. DiyFp m_minus;
  156. if (LowerBoundaryIsCloser()) {
  157. m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
  158. } else {
  159. m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
  160. }
  161. m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
  162. m_minus.set_e(m_plus.e());
  163. *out_m_plus = m_plus;
  164. *out_m_minus = m_minus;
  165. }
  166. bool LowerBoundaryIsCloser() const {
  167. // The boundary is closer if the significand is of the form f == 2^p-1 then
  168. // the lower boundary is closer.
  169. // Think of v = 1000e10 and v- = 9999e9.
  170. // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
  171. // at a distance of 1e8.
  172. // The only exception is for the smallest normal: the largest denormal is
  173. // at the same distance as its successor.
  174. // Note: denormals have the same exponent as the smallest normals.
  175. bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0);
  176. return physical_significand_is_zero && (Exponent() != kDenormalExponent);
  177. }
  178. double value() const { return uint64_to_double(d64_); }
  179. // Returns the significand size for a given order of magnitude.
  180. // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude.
  181. // This function returns the number of significant binary digits v will have
  182. // once it's encoded into a double. In almost all cases this is equal to
  183. // kSignificandSize. The only exceptions are denormals. They start with
  184. // leading zeroes and their effective significand-size is hence smaller.
  185. static int SignificandSizeForOrderOfMagnitude(int order) {
  186. if (order >= (kDenormalExponent + kSignificandSize)) {
  187. return kSignificandSize;
  188. }
  189. if (order <= kDenormalExponent) return 0;
  190. return order - kDenormalExponent;
  191. }
  192. static double Infinity() {
  193. return Double(kInfinity).value();
  194. }
  195. static double NaN() {
  196. return Double(kNaN).value();
  197. }
  198. private:
  199. static const int kDenormalExponent = -kExponentBias + 1;
  200. static const uint64_t kInfinity = DOUBLE_CONVERSION_UINT64_2PART_C(0x7FF00000, 00000000);
  201. static const uint64_t kNaN = DOUBLE_CONVERSION_UINT64_2PART_C(0x7FF80000, 00000000);
  202. const uint64_t d64_;
  203. static uint64_t DiyFpToUint64(DiyFp diy_fp) {
  204. uint64_t significand = diy_fp.f();
  205. int exponent = diy_fp.e();
  206. while (significand > kHiddenBit + kSignificandMask) {
  207. significand >>= 1;
  208. exponent++;
  209. }
  210. if (exponent >= kMaxExponent) {
  211. return kInfinity;
  212. }
  213. if (exponent < kDenormalExponent) {
  214. return 0;
  215. }
  216. while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) {
  217. significand <<= 1;
  218. exponent--;
  219. }
  220. uint64_t biased_exponent;
  221. if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) {
  222. biased_exponent = 0;
  223. } else {
  224. biased_exponent = static_cast<uint64_t>(exponent + kExponentBias);
  225. }
  226. return (significand & kSignificandMask) |
  227. (biased_exponent << kPhysicalSignificandSize);
  228. }
  229. DOUBLE_CONVERSION_DISALLOW_COPY_AND_ASSIGN(Double);
  230. };
  231. class Single {
  232. public:
  233. static const uint32_t kSignMask = 0x80000000;
  234. static const uint32_t kExponentMask = 0x7F800000;
  235. static const uint32_t kSignificandMask = 0x007FFFFF;
  236. static const uint32_t kHiddenBit = 0x00800000;
  237. static const int kPhysicalSignificandSize = 23; // Excludes the hidden bit.
  238. static const int kSignificandSize = 24;
  239. Single() : d32_(0) {}
  240. explicit Single(float f) : d32_(float_to_uint32(f)) {}
  241. explicit Single(uint32_t d32) : d32_(d32) {}
  242. // The value encoded by this Single must be greater or equal to +0.0.
  243. // It must not be special (infinity, or NaN).
  244. DiyFp AsDiyFp() const {
  245. DOUBLE_CONVERSION_ASSERT(Sign() > 0);
  246. DOUBLE_CONVERSION_ASSERT(!IsSpecial());
  247. return DiyFp(Significand(), Exponent());
  248. }
  249. // Returns the single's bit as uint64.
  250. uint32_t AsUint32() const {
  251. return d32_;
  252. }
  253. int Exponent() const {
  254. if (IsDenormal()) return kDenormalExponent;
  255. uint32_t d32 = AsUint32();
  256. int biased_e =
  257. static_cast<int>((d32 & kExponentMask) >> kPhysicalSignificandSize);
  258. return biased_e - kExponentBias;
  259. }
  260. uint32_t Significand() const {
  261. uint32_t d32 = AsUint32();
  262. uint32_t significand = d32 & kSignificandMask;
  263. if (!IsDenormal()) {
  264. return significand + kHiddenBit;
  265. } else {
  266. return significand;
  267. }
  268. }
  269. // Returns true if the single is a denormal.
  270. bool IsDenormal() const {
  271. uint32_t d32 = AsUint32();
  272. return (d32 & kExponentMask) == 0;
  273. }
  274. // We consider denormals not to be special.
  275. // Hence only Infinity and NaN are special.
  276. bool IsSpecial() const {
  277. uint32_t d32 = AsUint32();
  278. return (d32 & kExponentMask) == kExponentMask;
  279. }
  280. bool IsNan() const {
  281. uint32_t d32 = AsUint32();
  282. return ((d32 & kExponentMask) == kExponentMask) &&
  283. ((d32 & kSignificandMask) != 0);
  284. }
  285. bool IsInfinite() const {
  286. uint32_t d32 = AsUint32();
  287. return ((d32 & kExponentMask) == kExponentMask) &&
  288. ((d32 & kSignificandMask) == 0);
  289. }
  290. int Sign() const {
  291. uint32_t d32 = AsUint32();
  292. return (d32 & kSignMask) == 0? 1: -1;
  293. }
  294. // Computes the two boundaries of this.
  295. // The bigger boundary (m_plus) is normalized. The lower boundary has the same
  296. // exponent as m_plus.
  297. // Precondition: the value encoded by this Single must be greater than 0.
  298. void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
  299. DOUBLE_CONVERSION_ASSERT(value() > 0.0);
  300. DiyFp v = this->AsDiyFp();
  301. DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
  302. DiyFp m_minus;
  303. if (LowerBoundaryIsCloser()) {
  304. m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
  305. } else {
  306. m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
  307. }
  308. m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
  309. m_minus.set_e(m_plus.e());
  310. *out_m_plus = m_plus;
  311. *out_m_minus = m_minus;
  312. }
  313. // Precondition: the value encoded by this Single must be greater or equal
  314. // than +0.0.
  315. DiyFp UpperBoundary() const {
  316. DOUBLE_CONVERSION_ASSERT(Sign() > 0);
  317. return DiyFp(Significand() * 2 + 1, Exponent() - 1);
  318. }
  319. bool LowerBoundaryIsCloser() const {
  320. // The boundary is closer if the significand is of the form f == 2^p-1 then
  321. // the lower boundary is closer.
  322. // Think of v = 1000e10 and v- = 9999e9.
  323. // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
  324. // at a distance of 1e8.
  325. // The only exception is for the smallest normal: the largest denormal is
  326. // at the same distance as its successor.
  327. // Note: denormals have the same exponent as the smallest normals.
  328. bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0);
  329. return physical_significand_is_zero && (Exponent() != kDenormalExponent);
  330. }
  331. float value() const { return uint32_to_float(d32_); }
  332. static float Infinity() {
  333. return Single(kInfinity).value();
  334. }
  335. static float NaN() {
  336. return Single(kNaN).value();
  337. }
  338. private:
  339. static const int kExponentBias = 0x7F + kPhysicalSignificandSize;
  340. static const int kDenormalExponent = -kExponentBias + 1;
  341. static const int kMaxExponent = 0xFF - kExponentBias;
  342. static const uint32_t kInfinity = 0x7F800000;
  343. static const uint32_t kNaN = 0x7FC00000;
  344. const uint32_t d32_;
  345. DOUBLE_CONVERSION_DISALLOW_COPY_AND_ASSIGN(Single);
  346. };
  347. } // namespace double_conversion
  348. #endif // DOUBLE_CONVERSION_DOUBLE_H_