sse2.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. //
  2. // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
  3. // Vinnie Falco (vinnie.falco@gmail.com)
  4. // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
  5. //
  6. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  7. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. //
  9. // Official repository: https://github.com/boostorg/json
  10. //
  11. #ifndef BOOST_JSON_DETAIL_SSE2_HPP
  12. #define BOOST_JSON_DETAIL_SSE2_HPP
  13. #include <boost/json/detail/config.hpp>
  14. #include <boost/json/detail/utf8.hpp>
  15. #include <cstddef>
  16. #include <cstring>
  17. #ifdef BOOST_JSON_USE_SSE2
  18. # include <emmintrin.h>
  19. # include <xmmintrin.h>
  20. # ifdef _MSC_VER
  21. # include <intrin.h>
  22. # endif
  23. #endif
  24. BOOST_JSON_NS_BEGIN
  25. namespace detail {
  26. #ifdef BOOST_JSON_USE_SSE2
  27. template<bool AllowBadUTF8>
  28. inline
  29. const char*
  30. count_valid(
  31. char const* p,
  32. const char* end) noexcept
  33. {
  34. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  35. __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
  36. __m128i const q3 = _mm_set1_epi8( 0x1F );
  37. while(end - p >= 16)
  38. {
  39. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  40. __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
  41. __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
  42. __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
  43. __m128i v5 = _mm_min_epu8( v1, q3 );
  44. __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
  45. __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
  46. int w = _mm_movemask_epi8( v7 );
  47. if( w != 0 )
  48. {
  49. int m;
  50. #if defined(__GNUC__) || defined(__clang__)
  51. m = __builtin_ffs( w ) - 1;
  52. #else
  53. unsigned long index;
  54. _BitScanForward( &index, w );
  55. m = index;
  56. #endif
  57. return p + m;
  58. }
  59. p += 16;
  60. }
  61. while(p != end)
  62. {
  63. const unsigned char c = *p;
  64. if(c == '\x22' || c == '\\' || c < 0x20)
  65. break;
  66. ++p;
  67. }
  68. return p;
  69. }
  70. template<>
  71. inline
  72. const char*
  73. count_valid<false>(
  74. char const* p,
  75. const char* end) noexcept
  76. {
  77. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  78. __m128i const q2 = _mm_set1_epi8( '\\' );
  79. __m128i const q3 = _mm_set1_epi8( 0x20 );
  80. while(end - p >= 16)
  81. {
  82. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  83. __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
  84. __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
  85. __m128i v4 = _mm_cmplt_epi8( v1, q3 );
  86. __m128i v5 = _mm_or_si128( v2, v3 );
  87. __m128i v6 = _mm_or_si128( v5, v4 );
  88. int w = _mm_movemask_epi8( v6 );
  89. if( w != 0 )
  90. {
  91. int m;
  92. #if defined(__GNUC__) || defined(__clang__)
  93. m = __builtin_ffs( w ) - 1;
  94. #else
  95. unsigned long index;
  96. _BitScanForward( &index, w );
  97. m = index;
  98. #endif
  99. p += m;
  100. break;
  101. }
  102. p += 16;
  103. }
  104. while(p != end)
  105. {
  106. const unsigned char c = *p;
  107. if(c == '\x22' || c == '\\' || c < 0x20)
  108. break;
  109. if(c < 0x80)
  110. {
  111. ++p;
  112. continue;
  113. }
  114. // validate utf-8
  115. uint16_t first = classify_utf8(c & 0x7F);
  116. uint8_t len = first & 0xFF;
  117. if(BOOST_JSON_UNLIKELY(end - p < len))
  118. break;
  119. if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
  120. break;
  121. p += len;
  122. }
  123. return p;
  124. }
  125. #else
  126. template<bool AllowBadUTF8>
  127. char const*
  128. count_valid(
  129. char const* p,
  130. char const* end) noexcept
  131. {
  132. while(p != end)
  133. {
  134. const unsigned char c = *p;
  135. if(c == '\x22' || c == '\\' || c < 0x20)
  136. break;
  137. ++p;
  138. }
  139. return p;
  140. }
  141. template<>
  142. inline
  143. char const*
  144. count_valid<false>(
  145. char const* p,
  146. char const* end) noexcept
  147. {
  148. while(p != end)
  149. {
  150. const unsigned char c = *p;
  151. if(c == '\x22' || c == '\\' || c < 0x20)
  152. break;
  153. if(c < 0x80)
  154. {
  155. ++p;
  156. continue;
  157. }
  158. // validate utf-8
  159. uint16_t first = classify_utf8(c & 0x7F);
  160. uint8_t len = first & 0xFF;
  161. if(BOOST_JSON_UNLIKELY(end - p < len))
  162. break;
  163. if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
  164. break;
  165. p += len;
  166. }
  167. return p;
  168. }
  169. #endif
  170. // KRYSTIAN NOTE: does not stop to validate
  171. // count_unescaped
  172. #ifdef BOOST_JSON_USE_SSE2
  173. inline
  174. size_t
  175. count_unescaped(
  176. char const* s,
  177. size_t n) noexcept
  178. {
  179. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  180. __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
  181. __m128i const q3 = _mm_set1_epi8( 0x1F );
  182. char const * s0 = s;
  183. while( n >= 16 )
  184. {
  185. __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
  186. __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
  187. __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
  188. __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
  189. __m128i v5 = _mm_min_epu8( v1, q3 );
  190. __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
  191. __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
  192. int w = _mm_movemask_epi8( v7 );
  193. if( w != 0 )
  194. {
  195. int m;
  196. #if defined(__GNUC__) || defined(__clang__)
  197. m = __builtin_ffs( w ) - 1;
  198. #else
  199. unsigned long index;
  200. _BitScanForward( &index, w );
  201. m = index;
  202. #endif
  203. s += m;
  204. break;
  205. }
  206. s += 16;
  207. n -= 16;
  208. }
  209. return s - s0;
  210. }
  211. #else
  212. inline
  213. std::size_t
  214. count_unescaped(
  215. char const*,
  216. std::size_t) noexcept
  217. {
  218. return 0;
  219. }
  220. #endif
  221. // count_digits
  222. #ifdef BOOST_JSON_USE_SSE2
  223. // assumes p..p+15 are valid
  224. inline int count_digits( char const* p ) noexcept
  225. {
  226. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  227. v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
  228. v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
  229. int m = _mm_movemask_epi8(v1);
  230. int n;
  231. if( m == 0 )
  232. {
  233. n = 16;
  234. }
  235. else
  236. {
  237. #if defined(__GNUC__) || defined(__clang__)
  238. n = __builtin_ffs( m ) - 1;
  239. #else
  240. unsigned long index;
  241. _BitScanForward( &index, m );
  242. n = static_cast<int>(index);
  243. #endif
  244. }
  245. return n;
  246. }
  247. #else
  248. // assumes p..p+15 are valid
  249. inline int count_digits( char const* p ) noexcept
  250. {
  251. int n = 0;
  252. for( ; n < 16; ++n )
  253. {
  254. unsigned char const d = *p++ - '0';
  255. if(d > 9) break;
  256. }
  257. return n;
  258. }
  259. #endif
  260. // parse_unsigned
  261. inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
  262. {
  263. while( n >= 4 )
  264. {
  265. // faster on on clang for x86,
  266. // slower on gcc
  267. #ifdef __clang__
  268. r = r * 10 + p[0] - '0';
  269. r = r * 10 + p[1] - '0';
  270. r = r * 10 + p[2] - '0';
  271. r = r * 10 + p[3] - '0';
  272. #else
  273. uint32_t v;
  274. std::memcpy( &v, p, 4 );
  275. v -= 0x30303030;
  276. unsigned w0 = v & 0xFF;
  277. unsigned w1 = (v >> 8) & 0xFF;
  278. unsigned w2 = (v >> 16) & 0xFF;
  279. unsigned w3 = (v >> 24);
  280. #ifdef BOOST_JSON_BIG_ENDIAN
  281. r = (((r * 10 + w3) * 10 + w2) * 10 + w1) * 10 + w0;
  282. #else
  283. r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
  284. #endif
  285. #endif
  286. p += 4;
  287. n -= 4;
  288. }
  289. switch( n )
  290. {
  291. case 0:
  292. break;
  293. case 1:
  294. r = r * 10 + p[0] - '0';
  295. break;
  296. case 2:
  297. r = r * 10 + p[0] - '0';
  298. r = r * 10 + p[1] - '0';
  299. break;
  300. case 3:
  301. r = r * 10 + p[0] - '0';
  302. r = r * 10 + p[1] - '0';
  303. r = r * 10 + p[2] - '0';
  304. break;
  305. }
  306. return r;
  307. }
  308. // KRYSTIAN: this function is unused
  309. // count_leading
  310. /*
  311. #ifdef BOOST_JSON_USE_SSE2
  312. // assumes p..p+15
  313. inline std::size_t count_leading( char const * p, char ch ) noexcept
  314. {
  315. __m128i const q1 = _mm_set1_epi8( ch );
  316. __m128i v = _mm_loadu_si128( (__m128i const*)p );
  317. __m128i w = _mm_cmpeq_epi8( v, q1 );
  318. int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
  319. std::size_t n;
  320. if( m == 0 )
  321. {
  322. n = 16;
  323. }
  324. else
  325. {
  326. #if defined(__GNUC__) || defined(__clang__)
  327. n = __builtin_ffs( m ) - 1;
  328. #else
  329. unsigned long index;
  330. _BitScanForward( &index, m );
  331. n = index;
  332. #endif
  333. }
  334. return n;
  335. }
  336. #else
  337. // assumes p..p+15
  338. inline std::size_t count_leading( char const * p, char ch ) noexcept
  339. {
  340. std::size_t n = 0;
  341. for( ; n < 16 && *p == ch; ++p, ++n );
  342. return n;
  343. }
  344. #endif
  345. */
  346. // count_whitespace
  347. #ifdef BOOST_JSON_USE_SSE2
  348. inline const char* count_whitespace( char const* p, const char* end ) noexcept
  349. {
  350. if( p == end )
  351. {
  352. return p;
  353. }
  354. if( static_cast<unsigned char>( *p ) > 0x20 )
  355. {
  356. return p;
  357. }
  358. __m128i const q1 = _mm_set1_epi8( ' ' );
  359. __m128i const q2 = _mm_set1_epi8( '\n' );
  360. __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
  361. __m128i const q4 = _mm_set1_epi8( '\r' );
  362. while( end - p >= 16 )
  363. {
  364. __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
  365. __m128i w0 = _mm_or_si128(
  366. _mm_cmpeq_epi8( v0, q1 ),
  367. _mm_cmpeq_epi8( v0, q2 ));
  368. __m128i v1 = _mm_or_si128( v0, q3 );
  369. __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
  370. __m128i w2 = _mm_or_si128( w0, w1 );
  371. int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
  372. if( m != 0 )
  373. {
  374. #if defined(__GNUC__) || defined(__clang__)
  375. std::size_t c = __builtin_ffs( m ) - 1;
  376. #else
  377. unsigned long index;
  378. _BitScanForward( &index, m );
  379. std::size_t c = index;
  380. #endif
  381. p += c;
  382. return p;
  383. }
  384. p += 16;
  385. }
  386. while( p != end )
  387. {
  388. if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
  389. {
  390. return p;
  391. }
  392. ++p;
  393. }
  394. return p;
  395. }
  396. /*
  397. // slightly faster on msvc-14.2, slightly slower on clang-win
  398. inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
  399. {
  400. char const * p0 = p;
  401. while( n > 0 )
  402. {
  403. char ch = *p;
  404. if( ch == '\n' || ch == '\r' )
  405. {
  406. ++p;
  407. --n;
  408. continue;
  409. }
  410. if( ch != ' ' && ch != '\t' )
  411. {
  412. break;
  413. }
  414. ++p;
  415. --n;
  416. while( n >= 16 )
  417. {
  418. std::size_t n2 = count_leading( p, ch );
  419. p += n2;
  420. n -= n2;
  421. if( n2 < 16 )
  422. {
  423. break;
  424. }
  425. }
  426. }
  427. return p - p0;
  428. }
  429. */
  430. #else
  431. inline const char* count_whitespace( char const* p, const char* end ) noexcept
  432. {
  433. for(; p != end; ++p)
  434. {
  435. char const c = *p;
  436. if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
  437. }
  438. return p;
  439. }
  440. #endif
  441. } // detail
  442. BOOST_JSON_NS_END
  443. #endif