md5_constexpr_internal.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. // Copyright 2019 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #ifndef BASE_HASH_MD5_CONSTEXPR_INTERNAL_H_
  5. #define BASE_HASH_MD5_CONSTEXPR_INTERNAL_H_
  6. #include <array>
  7. #include <cstddef>
  8. #include <cstdint>
  9. #include "base/check.h"
  10. #include "base/hash/md5.h"
  11. namespace base {
  12. namespace internal {
  13. // The implementation here is based on the pseudocode provided by Wikipedia:
  14. // https://en.wikipedia.org/wiki/MD5#Pseudocode
  15. struct MD5CE {
  16. //////////////////////////////////////////////////////////////////////////////
  17. // DATA STRUCTURES
  18. // The data representation at each round is a 4-tuple of uint32_t.
  19. struct IntermediateData {
  20. uint32_t a;
  21. uint32_t b;
  22. uint32_t c;
  23. uint32_t d;
  24. };
  25. // The input data for a single round consists of 16 uint32_t (64 bytes).
  26. using RoundData = std::array<uint32_t, 16>;
  27. //////////////////////////////////////////////////////////////////////////////
  28. // CONSTANTS
  29. static constexpr std::array<uint32_t, 64> kConstants = {
  30. {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a,
  31. 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
  32. 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340,
  33. 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
  34. 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8,
  35. 0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
  36. 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa,
  37. 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
  38. 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92,
  39. 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
  40. 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391}};
  41. static constexpr std::array<uint32_t, 16> kShifts = {
  42. {7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21}};
  43. // The initial intermediate data.
  44. static constexpr IntermediateData kInitialIntermediateData{
  45. 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476};
  46. //////////////////////////////////////////////////////////////////////////////
  47. // PADDED MESSAGE GENERATION / EXTRACTION
  48. // Given the message length, calculates the padded message length. There has
  49. // to be room for the 1-byte end-of-message marker, plus 8 bytes for the
  50. // uint64_t encoded message length, all rounded up to a multiple of 64 bytes.
  51. static constexpr uint32_t GetPaddedMessageLength(const uint32_t n) {
  52. return (((n + 1 + 8) + 63) / 64) * 64;
  53. }
  54. // Extracts the |i|th byte of a uint64_t, where |i == 0| extracts the least
  55. // significant byte. It is expected that 0 <= i < 8.
  56. static constexpr uint8_t ExtractByte(const uint64_t value, const uint32_t i) {
  57. DCHECK(i < 8);
  58. return static_cast<uint8_t>((value >> (i * 8)) & 0xff);
  59. }
  60. // Extracts the |i|th byte of a message of length |n|.
  61. static constexpr uint8_t GetPaddedMessageByte(const char* data,
  62. const uint32_t n,
  63. const uint32_t m,
  64. const uint32_t i) {
  65. DCHECK(i < m);
  66. DCHECK(n < m);
  67. DCHECK(m % 64 == 0);
  68. if (i < n) {
  69. // Emit the message itself...
  70. return data[i];
  71. } else if (i == n) {
  72. // ...followed by the end of message marker.
  73. return 0x80;
  74. } else if (i >= m - 8) {
  75. // The last 8 bytes encode the original message length times 8.
  76. return ExtractByte(n * 8, i - (m - 8));
  77. } else {
  78. // And everything else is just empyt padding.
  79. return 0;
  80. }
  81. }
  82. // Extracts the uint32_t starting at position |i| from the padded message
  83. // generate by the provided input |data| of length |n|. The bytes are treated
  84. // in little endian order.
  85. static constexpr uint32_t GetPaddedMessageWord(const char* data,
  86. const uint32_t n,
  87. const uint32_t m,
  88. const uint32_t i) {
  89. DCHECK(i % 4 == 0);
  90. DCHECK(i < m);
  91. DCHECK(n < m);
  92. DCHECK(m % 64 == 0);
  93. return static_cast<uint32_t>(GetPaddedMessageByte(data, n, m, i)) |
  94. static_cast<uint32_t>((GetPaddedMessageByte(data, n, m, i + 1))
  95. << 8) |
  96. static_cast<uint32_t>((GetPaddedMessageByte(data, n, m, i + 2))
  97. << 16) |
  98. static_cast<uint32_t>((GetPaddedMessageByte(data, n, m, i + 3))
  99. << 24);
  100. }
  101. // Given an input buffer of length |n| bytes, extracts one round worth of data
  102. // starting at offset |i|.
  103. static constexpr RoundData GetRoundData(const char* data,
  104. const uint32_t n,
  105. const uint32_t m,
  106. const uint32_t i) {
  107. DCHECK(i % 64 == 0);
  108. DCHECK(i < m);
  109. DCHECK(n < m);
  110. DCHECK(m % 64 == 0);
  111. return RoundData{{GetPaddedMessageWord(data, n, m, i),
  112. GetPaddedMessageWord(data, n, m, i + 4),
  113. GetPaddedMessageWord(data, n, m, i + 8),
  114. GetPaddedMessageWord(data, n, m, i + 12),
  115. GetPaddedMessageWord(data, n, m, i + 16),
  116. GetPaddedMessageWord(data, n, m, i + 20),
  117. GetPaddedMessageWord(data, n, m, i + 24),
  118. GetPaddedMessageWord(data, n, m, i + 28),
  119. GetPaddedMessageWord(data, n, m, i + 32),
  120. GetPaddedMessageWord(data, n, m, i + 36),
  121. GetPaddedMessageWord(data, n, m, i + 40),
  122. GetPaddedMessageWord(data, n, m, i + 44),
  123. GetPaddedMessageWord(data, n, m, i + 48),
  124. GetPaddedMessageWord(data, n, m, i + 52),
  125. GetPaddedMessageWord(data, n, m, i + 56),
  126. GetPaddedMessageWord(data, n, m, i + 60)}};
  127. }
  128. //////////////////////////////////////////////////////////////////////////////
  129. // HASH IMPLEMENTATION
  130. // Mixes elements |b|, |c| and |d| at round |i| of the calculation.
  131. static constexpr uint32_t CalcF(const uint32_t i,
  132. const uint32_t b,
  133. const uint32_t c,
  134. const uint32_t d) {
  135. DCHECK(i < 64);
  136. if (i < 16) {
  137. return d ^ (b & (c ^ d));
  138. } else if (i < 32) {
  139. return c ^ (d & (b ^ c));
  140. } else if (i < 48) {
  141. return b ^ c ^ d;
  142. } else {
  143. return c ^ (b | (~d));
  144. }
  145. }
  146. static constexpr uint32_t CalcF(const uint32_t i,
  147. const IntermediateData& intermediate) {
  148. return CalcF(i, intermediate.b, intermediate.c, intermediate.d);
  149. }
  150. // Calculates the indexing function at round |i|.
  151. static constexpr uint32_t CalcG(const uint32_t i) {
  152. DCHECK(i < 64);
  153. if (i < 16) {
  154. return i;
  155. } else if (i < 32) {
  156. return (5 * i + 1) % 16;
  157. } else if (i < 48) {
  158. return (3 * i + 5) % 16;
  159. } else {
  160. return (7 * i) % 16;
  161. }
  162. }
  163. // Calculates the rotation to be applied at round |i|.
  164. static constexpr uint32_t GetShift(const uint32_t i) {
  165. DCHECK(i < 64);
  166. return kShifts[(i / 16) * 4 + (i % 4)];
  167. }
  168. // Rotates to the left the given |value| by the given |bits|.
  169. static constexpr uint32_t LeftRotate(const uint32_t value,
  170. const uint32_t bits) {
  171. DCHECK(bits < 32);
  172. return (value << bits) | (value >> (32 - bits));
  173. }
  174. // Applies the ith step of mixing.
  175. static constexpr IntermediateData ApplyStep(
  176. const uint32_t i,
  177. const RoundData& data,
  178. const IntermediateData& intermediate) {
  179. DCHECK(i < 64);
  180. const uint32_t g = CalcG(i);
  181. DCHECK(g < 16);
  182. const uint32_t f =
  183. CalcF(i, intermediate) + intermediate.a + kConstants[i] + data[g];
  184. const uint32_t s = GetShift(i);
  185. return IntermediateData{/* a */ intermediate.d,
  186. /* b */ intermediate.b + LeftRotate(f, s),
  187. /* c */ intermediate.b,
  188. /* d */ intermediate.c};
  189. }
  190. // Adds two IntermediateData together.
  191. static constexpr IntermediateData Add(const IntermediateData& intermediate1,
  192. const IntermediateData& intermediate2) {
  193. return IntermediateData{
  194. intermediate1.a + intermediate2.a, intermediate1.b + intermediate2.b,
  195. intermediate1.c + intermediate2.c, intermediate1.d + intermediate2.d};
  196. }
  197. // Processes an entire message.
  198. static constexpr IntermediateData ProcessMessage(const char* message,
  199. const uint32_t n) {
  200. const uint32_t m = GetPaddedMessageLength(n);
  201. IntermediateData intermediate0 = kInitialIntermediateData;
  202. for (uint32_t offset = 0; offset < m; offset += 64) {
  203. RoundData data = GetRoundData(message, n, m, offset);
  204. IntermediateData intermediate1 = intermediate0;
  205. for (uint32_t i = 0; i < 64; ++i)
  206. intermediate1 = ApplyStep(i, data, intermediate1);
  207. intermediate0 = Add(intermediate0, intermediate1);
  208. }
  209. return intermediate0;
  210. }
  211. //////////////////////////////////////////////////////////////////////////////
  212. // HELPER FUNCTIONS
  213. // Converts an IntermediateData to a final digest.
  214. static constexpr MD5Digest IntermediateDataToMD5Digest(
  215. const IntermediateData& intermediate) {
  216. return MD5Digest{{static_cast<uint8_t>((intermediate.a >> 0) & 0xff),
  217. static_cast<uint8_t>((intermediate.a >> 8) & 0xff),
  218. static_cast<uint8_t>((intermediate.a >> 16) & 0xff),
  219. static_cast<uint8_t>((intermediate.a >> 24) & 0xff),
  220. static_cast<uint8_t>((intermediate.b >> 0) & 0xff),
  221. static_cast<uint8_t>((intermediate.b >> 8) & 0xff),
  222. static_cast<uint8_t>((intermediate.b >> 16) & 0xff),
  223. static_cast<uint8_t>((intermediate.b >> 24) & 0xff),
  224. static_cast<uint8_t>((intermediate.c >> 0) & 0xff),
  225. static_cast<uint8_t>((intermediate.c >> 8) & 0xff),
  226. static_cast<uint8_t>((intermediate.c >> 16) & 0xff),
  227. static_cast<uint8_t>((intermediate.c >> 24) & 0xff),
  228. static_cast<uint8_t>((intermediate.d >> 0) & 0xff),
  229. static_cast<uint8_t>((intermediate.d >> 8) & 0xff),
  230. static_cast<uint8_t>((intermediate.d >> 16) & 0xff),
  231. static_cast<uint8_t>((intermediate.d >> 24) & 0xff)}};
  232. }
  233. static constexpr uint32_t StringLength(const char* string) {
  234. const char* end = string;
  235. while (*end != 0)
  236. ++end;
  237. // Double check that the precision losing conversion is safe.
  238. DCHECK(end >= string);
  239. DCHECK(static_cast<std::ptrdiff_t>(static_cast<uint32_t>(end - string)) ==
  240. (end - string));
  241. return static_cast<uint32_t>(end - string);
  242. }
  243. static constexpr uint32_t SwapEndian(uint32_t a) {
  244. return ((a & 0xff) << 24) | (((a >> 8) & 0xff) << 16) |
  245. (((a >> 16) & 0xff) << 8) | ((a >> 24) & 0xff);
  246. }
  247. //////////////////////////////////////////////////////////////////////////////
  248. // WRAPPER FUNCTIONS
  249. static constexpr MD5Digest Sum(const char* data, uint32_t n) {
  250. return IntermediateDataToMD5Digest(ProcessMessage(data, n));
  251. }
  252. static constexpr uint64_t Hash64(const char* data, uint32_t n) {
  253. IntermediateData intermediate = ProcessMessage(data, n);
  254. return (static_cast<uint64_t>(SwapEndian(intermediate.a)) << 32) |
  255. static_cast<uint64_t>(SwapEndian(intermediate.b));
  256. }
  257. static constexpr uint32_t Hash32(const char* data, uint32_t n) {
  258. IntermediateData intermediate = ProcessMessage(data, n);
  259. return SwapEndian(intermediate.a);
  260. }
  261. };
  262. } // namespace internal
  263. // Implementations of the functions exposed in the public header.
  264. constexpr MD5Digest MD5SumConstexpr(const char* string) {
  265. return internal::MD5CE::Sum(string, internal::MD5CE::StringLength(string));
  266. }
  267. constexpr MD5Digest MD5SumConstexpr(const char* string, uint32_t length) {
  268. return internal::MD5CE::Sum(string, length);
  269. }
  270. constexpr uint64_t MD5Hash64Constexpr(const char* string) {
  271. return internal::MD5CE::Hash64(string, internal::MD5CE::StringLength(string));
  272. }
  273. constexpr uint64_t MD5Hash64Constexpr(const char* string, uint32_t length) {
  274. return internal::MD5CE::Hash64(string, length);
  275. }
  276. constexpr uint32_t MD5Hash32Constexpr(const char* string) {
  277. return internal::MD5CE::Hash32(string, internal::MD5CE::StringLength(string));
  278. }
  279. constexpr uint32_t MD5Hash32Constexpr(const char* string, uint32_t length) {
  280. return internal::MD5CE::Hash32(string, length);
  281. }
  282. } // namespace base
  283. #endif // BASE_HASH_MD5_CONSTEXPR_INTERNAL_H_