123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- //
- // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
- // Vinnie Falco (vinnie.falco@gmail.com)
- // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- //
- // Official repository: https://github.com/boostorg/json
- //
- #ifndef BOOST_JSON_DETAIL_SSE2_HPP
- #define BOOST_JSON_DETAIL_SSE2_HPP
- #include <boost/json/detail/config.hpp>
- #include <boost/json/detail/utf8.hpp>
- #include <cstddef>
- #include <cstring>
- #ifdef BOOST_JSON_USE_SSE2
- # include <emmintrin.h>
- # include <xmmintrin.h>
- # ifdef _MSC_VER
- # include <intrin.h>
- # endif
- #endif
- BOOST_JSON_NS_BEGIN
- namespace detail {
- #ifdef BOOST_JSON_USE_SSE2
- template<bool AllowBadUTF8>
- inline
- const char*
- count_valid(
- char const* p,
- const char* end) noexcept
- {
- __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
- __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
- __m128i const q3 = _mm_set1_epi8( 0x1F );
- while(end - p >= 16)
- {
- __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
- __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
- __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
- __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
- __m128i v5 = _mm_min_epu8( v1, q3 );
- __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
- __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
- int w = _mm_movemask_epi8( v7 );
- if( w != 0 )
- {
- int m;
- #if defined(__GNUC__) || defined(__clang__)
- m = __builtin_ffs( w ) - 1;
- #else
- unsigned long index;
- _BitScanForward( &index, w );
- m = index;
- #endif
- return p + m;
- }
- p += 16;
- }
- while(p != end)
- {
- const unsigned char c = *p;
- if(c == '\x22' || c == '\\' || c < 0x20)
- break;
- ++p;
- }
- return p;
- }
- template<>
- inline
- const char*
- count_valid<false>(
- char const* p,
- const char* end) noexcept
- {
- __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
- __m128i const q2 = _mm_set1_epi8( '\\' );
- __m128i const q3 = _mm_set1_epi8( 0x20 );
- while(end - p >= 16)
- {
- __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
- __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
- __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
- __m128i v4 = _mm_cmplt_epi8( v1, q3 );
- __m128i v5 = _mm_or_si128( v2, v3 );
- __m128i v6 = _mm_or_si128( v5, v4 );
- int w = _mm_movemask_epi8( v6 );
- if( w != 0 )
- {
- int m;
- #if defined(__GNUC__) || defined(__clang__)
- m = __builtin_ffs( w ) - 1;
- #else
- unsigned long index;
- _BitScanForward( &index, w );
- m = index;
- #endif
- p += m;
- break;
- }
- p += 16;
- }
- while(p != end)
- {
- const unsigned char c = *p;
- if(c == '\x22' || c == '\\' || c < 0x20)
- break;
- if(c < 0x80)
- {
- ++p;
- continue;
- }
- // validate utf-8
- uint16_t first = classify_utf8(c & 0x7F);
- uint8_t len = first & 0xFF;
- if(BOOST_JSON_UNLIKELY(end - p < len))
- break;
- if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
- break;
- p += len;
- }
- return p;
- }
- #else
- template<bool AllowBadUTF8>
- char const*
- count_valid(
- char const* p,
- char const* end) noexcept
- {
- while(p != end)
- {
- const unsigned char c = *p;
- if(c == '\x22' || c == '\\' || c < 0x20)
- break;
- ++p;
- }
- return p;
- }
- template<>
- inline
- char const*
- count_valid<false>(
- char const* p,
- char const* end) noexcept
- {
- while(p != end)
- {
- const unsigned char c = *p;
- if(c == '\x22' || c == '\\' || c < 0x20)
- break;
- if(c < 0x80)
- {
- ++p;
- continue;
- }
- // validate utf-8
- uint16_t first = classify_utf8(c & 0x7F);
- uint8_t len = first & 0xFF;
- if(BOOST_JSON_UNLIKELY(end - p < len))
- break;
- if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
- break;
- p += len;
- }
- return p;
- }
- #endif
- // KRYSTIAN NOTE: does not stop to validate
- // count_unescaped
- #ifdef BOOST_JSON_USE_SSE2
- inline
- size_t
- count_unescaped(
- char const* s,
- size_t n) noexcept
- {
- __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
- __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
- __m128i const q3 = _mm_set1_epi8( 0x1F );
- char const * s0 = s;
- while( n >= 16 )
- {
- __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
- __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
- __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
- __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
- __m128i v5 = _mm_min_epu8( v1, q3 );
- __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
- __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
- int w = _mm_movemask_epi8( v7 );
- if( w != 0 )
- {
- int m;
- #if defined(__GNUC__) || defined(__clang__)
- m = __builtin_ffs( w ) - 1;
- #else
- unsigned long index;
- _BitScanForward( &index, w );
- m = index;
- #endif
- s += m;
- break;
- }
- s += 16;
- n -= 16;
- }
- return s - s0;
- }
- #else
- inline
- std::size_t
- count_unescaped(
- char const*,
- std::size_t) noexcept
- {
- return 0;
- }
- #endif
- // count_digits
- #ifdef BOOST_JSON_USE_SSE2
- // assumes p..p+15 are valid
- inline int count_digits( char const* p ) noexcept
- {
- __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
- v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
- v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
- int m = _mm_movemask_epi8(v1);
- int n;
- if( m == 0 )
- {
- n = 16;
- }
- else
- {
- #if defined(__GNUC__) || defined(__clang__)
- n = __builtin_ffs( m ) - 1;
- #else
- unsigned long index;
- _BitScanForward( &index, m );
- n = static_cast<int>(index);
- #endif
- }
- return n;
- }
- #else
- // assumes p..p+15 are valid
- inline int count_digits( char const* p ) noexcept
- {
- int n = 0;
- for( ; n < 16; ++n )
- {
- unsigned char const d = *p++ - '0';
- if(d > 9) break;
- }
- return n;
- }
- #endif
- // parse_unsigned
- inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
- {
- while( n >= 4 )
- {
- // faster on on clang for x86,
- // slower on gcc
- #ifdef __clang__
- r = r * 10 + p[0] - '0';
- r = r * 10 + p[1] - '0';
- r = r * 10 + p[2] - '0';
- r = r * 10 + p[3] - '0';
- #else
- uint32_t v;
- std::memcpy( &v, p, 4 );
- v -= 0x30303030;
- unsigned w0 = v & 0xFF;
- unsigned w1 = (v >> 8) & 0xFF;
- unsigned w2 = (v >> 16) & 0xFF;
- unsigned w3 = (v >> 24);
- #ifdef BOOST_JSON_BIG_ENDIAN
- r = (((r * 10 + w3) * 10 + w2) * 10 + w1) * 10 + w0;
- #else
- r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
- #endif
- #endif
- p += 4;
- n -= 4;
- }
- switch( n )
- {
- case 0:
- break;
- case 1:
- r = r * 10 + p[0] - '0';
- break;
- case 2:
- r = r * 10 + p[0] - '0';
- r = r * 10 + p[1] - '0';
- break;
- case 3:
- r = r * 10 + p[0] - '0';
- r = r * 10 + p[1] - '0';
- r = r * 10 + p[2] - '0';
- break;
- }
- return r;
- }
- // KRYSTIAN: this function is unused
- // count_leading
- /*
- #ifdef BOOST_JSON_USE_SSE2
- // assumes p..p+15
- inline std::size_t count_leading( char const * p, char ch ) noexcept
- {
- __m128i const q1 = _mm_set1_epi8( ch );
- __m128i v = _mm_loadu_si128( (__m128i const*)p );
- __m128i w = _mm_cmpeq_epi8( v, q1 );
- int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
- std::size_t n;
- if( m == 0 )
- {
- n = 16;
- }
- else
- {
- #if defined(__GNUC__) || defined(__clang__)
- n = __builtin_ffs( m ) - 1;
- #else
- unsigned long index;
- _BitScanForward( &index, m );
- n = index;
- #endif
- }
- return n;
- }
- #else
- // assumes p..p+15
- inline std::size_t count_leading( char const * p, char ch ) noexcept
- {
- std::size_t n = 0;
- for( ; n < 16 && *p == ch; ++p, ++n );
- return n;
- }
- #endif
- */
- // count_whitespace
- #ifdef BOOST_JSON_USE_SSE2
- inline const char* count_whitespace( char const* p, const char* end ) noexcept
- {
- if( p == end )
- {
- return p;
- }
- if( static_cast<unsigned char>( *p ) > 0x20 )
- {
- return p;
- }
- __m128i const q1 = _mm_set1_epi8( ' ' );
- __m128i const q2 = _mm_set1_epi8( '\n' );
- __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
- __m128i const q4 = _mm_set1_epi8( '\r' );
- while( end - p >= 16 )
- {
- __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
- __m128i w0 = _mm_or_si128(
- _mm_cmpeq_epi8( v0, q1 ),
- _mm_cmpeq_epi8( v0, q2 ));
- __m128i v1 = _mm_or_si128( v0, q3 );
- __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
- __m128i w2 = _mm_or_si128( w0, w1 );
- int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
- if( m != 0 )
- {
- #if defined(__GNUC__) || defined(__clang__)
- std::size_t c = __builtin_ffs( m ) - 1;
- #else
- unsigned long index;
- _BitScanForward( &index, m );
- std::size_t c = index;
- #endif
- p += c;
- return p;
- }
- p += 16;
- }
- while( p != end )
- {
- if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
- {
- return p;
- }
- ++p;
- }
- return p;
- }
- /*
- // slightly faster on msvc-14.2, slightly slower on clang-win
- inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
- {
- char const * p0 = p;
- while( n > 0 )
- {
- char ch = *p;
- if( ch == '\n' || ch == '\r' )
- {
- ++p;
- --n;
- continue;
- }
- if( ch != ' ' && ch != '\t' )
- {
- break;
- }
- ++p;
- --n;
- while( n >= 16 )
- {
- std::size_t n2 = count_leading( p, ch );
- p += n2;
- n -= n2;
- if( n2 < 16 )
- {
- break;
- }
- }
- }
- return p - p0;
- }
- */
- #else
- inline const char* count_whitespace( char const* p, const char* end ) noexcept
- {
- for(; p != end; ++p)
- {
- char const c = *p;
- if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
- }
- return p;
- }
- #endif
- } // detail
- BOOST_JSON_NS_END
- #endif
|