basic_parser.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // Official repository: https://github.com/boostorg/json
  9. //
  10. #ifndef BOOST_JSON_BASIC_PARSER_HPP
  11. #define BOOST_JSON_BASIC_PARSER_HPP
  12. #include <boost/json/detail/config.hpp>
  13. #include <boost/json/error.hpp>
  14. #include <boost/json/kind.hpp>
  15. #include <boost/json/parse_options.hpp>
  16. #include <boost/json/detail/stack.hpp>
  17. #include <boost/json/detail/stream.hpp>
  18. #include <boost/json/detail/utf8.hpp>
  19. /* VFALCO NOTE
  20. This file is in the detail namespace because it
  21. is not allowed to be included directly by users,
  22. who should be including <boost/json/basic_parser.hpp>
  23. instead, which provides the member function definitions.
  24. The source code is arranged this way to keep compile
  25. times down.
  26. */
  27. BOOST_JSON_NS_BEGIN
  28. /** An incremental SAX parser for serialized JSON.
  29. This implements a SAX-style parser, invoking a
  30. caller-supplied handler with each parsing event.
  31. To use, first declare a variable of type
  32. `basic_parser<T>` where `T` meets the handler
  33. requirements specified below. Then call
  34. @ref write_some one or more times with the input,
  35. setting `more = false` on the final buffer.
  36. The parsing events are realized through member
  37. function calls on the handler, which exists
  38. as a data member of the parser.
  39. \n
  40. The parser may dynamically allocate intermediate
  41. storage as needed to accommodate the nesting level
  42. of the input JSON. On subsequent invocations, the
  43. parser can cheaply re-use this memory, improving
  44. performance. This storage is freed when the
  45. parser is destroyed
  46. @par Usage
  47. To get the declaration and function definitions
  48. for this class it is necessary to include this
  49. file instead:
  50. @code
  51. #include <boost/json/basic_parser_impl.hpp>
  52. @endcode
  53. Users who wish to parse JSON into the DOM container
  54. @ref value will not use this class directly; instead
  55. they will create an instance of @ref parser or
  56. @ref stream_parser and use that instead. Alternatively,
  57. they may call the function @ref parse. This class is
  58. designed for users who wish to perform custom actions
  59. instead of building a @ref value. For example, to
  60. produce a DOM from an external library.
  61. \n
  62. @note
  63. By default, only conforming JSON using UTF-8
  64. encoding is accepted. However, select non-compliant
  65. syntax can be allowed by construction using a
  66. @ref parse_options set to desired values.
  67. @par Handler
  68. The handler provided must be implemented as an
  69. object of class type which defines each of the
  70. required event member functions below. The event
  71. functions return a `bool` where `true` indicates
  72. success, and `false` indicates failure. If the
  73. member function returns `false`, it must set
  74. the error code to a suitable value. This error
  75. code will be returned by the write function to
  76. the caller.
  77. \n
  78. Handlers are required to declare the maximum
  79. limits on various elements. If these limits
  80. are exceeded during parsing, then parsing
  81. fails with an error.
  82. \n
  83. The following declaration meets the parser's
  84. handler requirements:
  85. @code
  86. struct handler
  87. {
  88. /// The maximum number of elements allowed in an array
  89. static constexpr std::size_t max_array_size = -1;
  90. /// The maximum number of elements allowed in an object
  91. static constexpr std::size_t max_object_size = -1;
  92. /// The maximum number of characters allowed in a string
  93. static constexpr std::size_t max_string_size = -1;
  94. /// The maximum number of characters allowed in a key
  95. static constexpr std::size_t max_key_size = -1;
  96. /// Called once when the JSON parsing begins.
  97. ///
  98. /// @return `true` on success.
  99. /// @param ec Set to the error, if any occurred.
  100. ///
  101. bool on_document_begin( error_code& ec );
  102. /// Called when the JSON parsing is done.
  103. ///
  104. /// @return `true` on success.
  105. /// @param ec Set to the error, if any occurred.
  106. ///
  107. bool on_document_end( error_code& ec );
  108. /// Called when the beginning of an array is encountered.
  109. ///
  110. /// @return `true` on success.
  111. /// @param ec Set to the error, if any occurred.
  112. ///
  113. bool on_array_begin( error_code& ec );
  114. /// Called when the end of the current array is encountered.
  115. ///
  116. /// @return `true` on success.
  117. /// @param n The number of elements in the array.
  118. /// @param ec Set to the error, if any occurred.
  119. ///
  120. bool on_array_end( std::size_t n, error_code& ec );
  121. /// Called when the beginning of an object is encountered.
  122. ///
  123. /// @return `true` on success.
  124. /// @param ec Set to the error, if any occurred.
  125. ///
  126. bool on_object_begin( error_code& ec );
  127. /// Called when the end of the current object is encountered.
  128. ///
  129. /// @return `true` on success.
  130. /// @param n The number of elements in the object.
  131. /// @param ec Set to the error, if any occurred.
  132. ///
  133. bool on_object_end( std::size_t n, error_code& ec );
  134. /// Called with characters corresponding to part of the current string.
  135. ///
  136. /// @return `true` on success.
  137. /// @param s The partial characters
  138. /// @param n The total size of the string thus far
  139. /// @param ec Set to the error, if any occurred.
  140. ///
  141. bool on_string_part( string_view s, std::size_t n, error_code& ec );
  142. /// Called with the last characters corresponding to the current string.
  143. ///
  144. /// @return `true` on success.
  145. /// @param s The remaining characters
  146. /// @param n The total size of the string
  147. /// @param ec Set to the error, if any occurred.
  148. ///
  149. bool on_string( string_view s, std::size_t n, error_code& ec );
  150. /// Called with characters corresponding to part of the current key.
  151. ///
  152. /// @return `true` on success.
  153. /// @param s The partial characters
  154. /// @param n The total size of the key thus far
  155. /// @param ec Set to the error, if any occurred.
  156. ///
  157. bool on_key_part( string_view s, std::size_t n, error_code& ec );
  158. /// Called with the last characters corresponding to the current key.
  159. ///
  160. /// @return `true` on success.
  161. /// @param s The remaining characters
  162. /// @param n The total size of the key
  163. /// @param ec Set to the error, if any occurred.
  164. ///
  165. bool on_key( string_view s, std::size_t n, error_code& ec );
  166. /// Called with the characters corresponding to part of the current number.
  167. ///
  168. /// @return `true` on success.
  169. /// @param s The partial characters
  170. /// @param ec Set to the error, if any occurred.
  171. ///
  172. bool on_number_part( string_view s, error_code& ec );
  173. /// Called when a signed integer is parsed.
  174. ///
  175. /// @return `true` on success.
  176. /// @param i The value
  177. /// @param s The remaining characters
  178. /// @param ec Set to the error, if any occurred.
  179. ///
  180. bool on_int64( int64_t i, string_view s, error_code& ec );
  181. /// Called when an unsigend integer is parsed.
  182. ///
  183. /// @return `true` on success.
  184. /// @param u The value
  185. /// @param s The remaining characters
  186. /// @param ec Set to the error, if any occurred.
  187. ///
  188. bool on_uint64( uint64_t u, string_view s, error_code& ec );
  189. /// Called when a double is parsed.
  190. ///
  191. /// @return `true` on success.
  192. /// @param d The value
  193. /// @param s The remaining characters
  194. /// @param ec Set to the error, if any occurred.
  195. ///
  196. bool on_double( double d, string_view s, error_code& ec );
  197. /// Called when a boolean is parsed.
  198. ///
  199. /// @return `true` on success.
  200. /// @param b The value
  201. /// @param s The remaining characters
  202. /// @param ec Set to the error, if any occurred.
  203. ///
  204. bool on_bool( bool b, error_code& ec );
  205. /// Called when a null is parsed.
  206. ///
  207. /// @return `true` on success.
  208. /// @param ec Set to the error, if any occurred.
  209. ///
  210. bool on_null( error_code& ec );
  211. /// Called with characters corresponding to part of the current comment.
  212. ///
  213. /// @return `true` on success.
  214. /// @param s The partial characters.
  215. /// @param ec Set to the error, if any occurred.
  216. ///
  217. bool on_comment_part( string_view s, error_code& ec );
  218. /// Called with the last characters corresponding to the current comment.
  219. ///
  220. /// @return `true` on success.
  221. /// @param s The remaining characters
  222. /// @param ec Set to the error, if any occurred.
  223. ///
  224. bool on_comment( string_view s, error_code& ec );
  225. };
  226. @endcode
  227. @see
  228. @ref parse,
  229. @ref stream_parser.
  230. @headerfile <boost/json/basic_parser.hpp>
  231. */
  232. template<class Handler>
  233. class basic_parser
  234. {
  235. enum class state : char
  236. {
  237. doc1, doc2, doc3, doc4,
  238. com1, com2, com3, com4,
  239. nul1, nul2, nul3,
  240. tru1, tru2, tru3,
  241. fal1, fal2, fal3, fal4,
  242. str1, str2, str3, str4,
  243. str5, str6, str7, str8,
  244. sur1, sur2, sur3,
  245. sur4, sur5, sur6,
  246. obj1, obj2, obj3, obj4,
  247. obj5, obj6, obj7, obj8,
  248. obj9, obj10, obj11,
  249. arr1, arr2, arr3,
  250. arr4, arr5, arr6,
  251. num1, num2, num3, num4,
  252. num5, num6, num7, num8,
  253. exp1, exp2, exp3,
  254. val1, val2
  255. };
  256. struct number
  257. {
  258. uint64_t mant;
  259. int bias;
  260. int exp;
  261. bool frac;
  262. bool neg;
  263. };
  264. // optimization: must come first
  265. Handler h_;
  266. number num_;
  267. error_code ec_;
  268. detail::stack st_;
  269. detail::utf8_sequence seq_;
  270. unsigned u1_;
  271. unsigned u2_;
  272. bool more_; // false for final buffer
  273. bool done_ = false; // true on complete parse
  274. bool clean_ = true; // write_some exited cleanly
  275. const char* end_;
  276. parse_options opt_;
  277. // how many levels deeper the parser can go
  278. std::size_t depth_ = opt_.max_depth;
  279. inline void reserve();
  280. inline const char* sentinel();
  281. inline bool incomplete(
  282. const detail::const_stream_wrapper& cs);
  283. #ifdef __INTEL_COMPILER
  284. #pragma warning push
  285. #pragma warning disable 2196
  286. #endif
  287. BOOST_NOINLINE
  288. inline
  289. const char*
  290. suspend_or_fail(state st);
  291. BOOST_NOINLINE
  292. inline
  293. const char*
  294. suspend_or_fail(
  295. state st,
  296. std::size_t n);
  297. BOOST_NOINLINE
  298. inline
  299. const char*
  300. fail(const char* p) noexcept;
  301. BOOST_NOINLINE
  302. inline
  303. const char*
  304. fail(
  305. const char* p,
  306. error ev) noexcept;
  307. BOOST_NOINLINE
  308. inline
  309. const char*
  310. maybe_suspend(
  311. const char* p,
  312. state st);
  313. BOOST_NOINLINE
  314. inline
  315. const char*
  316. maybe_suspend(
  317. const char* p,
  318. state st,
  319. std::size_t n);
  320. BOOST_NOINLINE
  321. inline
  322. const char*
  323. maybe_suspend(
  324. const char* p,
  325. state st,
  326. const number& num);
  327. BOOST_NOINLINE
  328. inline
  329. const char*
  330. suspend(
  331. const char* p,
  332. state st);
  333. BOOST_NOINLINE
  334. inline
  335. const char*
  336. suspend(
  337. const char* p,
  338. state st,
  339. const number& num);
  340. #ifdef __INTEL_COMPILER
  341. #pragma warning pop
  342. #endif
  343. template<bool StackEmpty_/*, bool Terminal_*/>
  344. const char* parse_comment(const char* p,
  345. std::integral_constant<bool, StackEmpty_> stack_empty,
  346. /*std::integral_constant<bool, Terminal_>*/ bool terminal);
  347. template<bool StackEmpty_>
  348. const char* parse_document(const char* p,
  349. std::integral_constant<bool, StackEmpty_> stack_empty);
  350. template<bool StackEmpty_, bool AllowComments_/*,
  351. bool AllowTrailing_, bool AllowBadUTF8_*/>
  352. const char* parse_value(const char* p,
  353. std::integral_constant<bool, StackEmpty_> stack_empty,
  354. std::integral_constant<bool, AllowComments_> allow_comments,
  355. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  356. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  357. template<bool StackEmpty_, bool AllowComments_/*,
  358. bool AllowTrailing_, bool AllowBadUTF8_*/>
  359. const char* resume_value(const char* p,
  360. std::integral_constant<bool, StackEmpty_> stack_empty,
  361. std::integral_constant<bool, AllowComments_> allow_comments,
  362. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  363. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  364. template<bool StackEmpty_, bool AllowComments_/*,
  365. bool AllowTrailing_, bool AllowBadUTF8_*/>
  366. const char* parse_object(const char* p,
  367. std::integral_constant<bool, StackEmpty_> stack_empty,
  368. std::integral_constant<bool, AllowComments_> allow_comments,
  369. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  370. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  371. template<bool StackEmpty_, bool AllowComments_/*,
  372. bool AllowTrailing_, bool AllowBadUTF8_*/>
  373. const char* parse_array(const char* p,
  374. std::integral_constant<bool, StackEmpty_> stack_empty,
  375. std::integral_constant<bool, AllowComments_> allow_comments,
  376. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  377. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  378. template<bool StackEmpty_>
  379. const char* parse_null(const char* p,
  380. std::integral_constant<bool, StackEmpty_> stack_empty);
  381. template<bool StackEmpty_>
  382. const char* parse_true(const char* p,
  383. std::integral_constant<bool, StackEmpty_> stack_empty);
  384. template<bool StackEmpty_>
  385. const char* parse_false(const char* p,
  386. std::integral_constant<bool, StackEmpty_> stack_empty);
  387. template<bool StackEmpty_, bool IsKey_/*,
  388. bool AllowBadUTF8_*/>
  389. const char* parse_string(const char* p,
  390. std::integral_constant<bool, StackEmpty_> stack_empty,
  391. std::integral_constant<bool, IsKey_> is_key,
  392. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  393. template<bool StackEmpty_, char First_>
  394. const char* parse_number(const char* p,
  395. std::integral_constant<bool, StackEmpty_> stack_empty,
  396. std::integral_constant<char, First_> first);
  397. template<bool StackEmpty_, bool IsKey_/*,
  398. bool AllowBadUTF8_*/>
  399. const char* parse_unescaped(const char* p,
  400. std::integral_constant<bool, StackEmpty_> stack_empty,
  401. std::integral_constant<bool, IsKey_> is_key,
  402. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  403. template<bool StackEmpty_/*, bool IsKey_,
  404. bool AllowBadUTF8_*/>
  405. const char* parse_escaped(
  406. const char* p,
  407. std::size_t total,
  408. std::integral_constant<bool, StackEmpty_> stack_empty,
  409. /*std::integral_constant<bool, IsKey_>*/ bool is_key,
  410. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  411. // intentionally private
  412. std::size_t
  413. depth() const noexcept
  414. {
  415. return opt_.max_depth - depth_;
  416. }
  417. public:
  418. /// Copy constructor (deleted)
  419. basic_parser(
  420. basic_parser const&) = delete;
  421. /// Copy assignment (deleted)
  422. basic_parser& operator=(
  423. basic_parser const&) = delete;
  424. /** Destructor.
  425. All dynamically allocated internal memory is freed.
  426. @par Effects
  427. @code
  428. this->handler().~Handler()
  429. @endcode
  430. @par Complexity
  431. Same as `~Handler()`.
  432. @par Exception Safety
  433. Same as `~Handler()`.
  434. */
  435. ~basic_parser() = default;
  436. /** Constructor.
  437. This function constructs the parser with
  438. the specified options, with any additional
  439. arguments forwarded to the handler's constructor.
  440. @par Complexity
  441. Same as `Handler( std::forward< Args >( args )... )`.
  442. @par Exception Safety
  443. Same as `Handler( std::forward< Args >( args )... )`.
  444. @param opt Configuration settings for the parser.
  445. If this structure is default constructed, the
  446. parser will accept only standard JSON.
  447. @param args Optional additional arguments
  448. forwarded to the handler's constructor.
  449. @see parse_options
  450. */
  451. template<class... Args>
  452. explicit
  453. basic_parser(
  454. parse_options const& opt,
  455. Args&&... args);
  456. /** Return a reference to the handler.
  457. This function provides access to the constructed
  458. instance of the handler owned by the parser.
  459. @par Complexity
  460. Constant.
  461. @par Exception Safety
  462. No-throw guarantee.
  463. */
  464. Handler&
  465. handler() noexcept
  466. {
  467. return h_;
  468. }
  469. /** Return a reference to the handler.
  470. This function provides access to the constructed
  471. instance of the handler owned by the parser.
  472. @par Complexity
  473. Constant.
  474. @par Exception Safety
  475. No-throw guarantee.
  476. */
  477. Handler const&
  478. handler() const noexcept
  479. {
  480. return h_;
  481. }
  482. /** Return the last error.
  483. This returns the last error code which
  484. was generated in the most recent call
  485. to @ref write_some.
  486. @par Complexity
  487. Constant.
  488. @par Exception Safety
  489. No-throw guarantee.
  490. */
  491. error_code
  492. last_error() const noexcept
  493. {
  494. return ec_;
  495. }
  496. /** Return true if a complete JSON has been parsed.
  497. This function returns `true` when all of these
  498. conditions are met:
  499. @li A complete serialized JSON has been
  500. presented to the parser, and
  501. @li No error or exception has occurred since the
  502. parser was constructed, or since the last call
  503. to @ref reset,
  504. @par Complexity
  505. Constant.
  506. @par Exception Safety
  507. No-throw guarantee.
  508. */
  509. bool
  510. done() const noexcept
  511. {
  512. return done_;
  513. }
  514. /** Reset the state, to parse a new document.
  515. This function discards the current parsing
  516. state, to prepare for parsing a new document.
  517. Dynamically allocated temporary memory used
  518. by the implementation is not deallocated.
  519. @par Complexity
  520. Constant.
  521. @par Exception Safety
  522. No-throw guarantee.
  523. */
  524. void
  525. reset() noexcept;
  526. /** Indicate a parsing failure.
  527. This changes the state of the parser to indicate
  528. that the parse has failed. A parser implementation
  529. can use this to fail the parser if needed due to
  530. external inputs.
  531. @note
  532. If `!ec`, the stored error code is unspecified.
  533. @par Complexity
  534. Constant.
  535. @par Exception Safety
  536. No-throw guarantee.
  537. @param ec The error code to set. If the code does
  538. not indicate failure, an implementation-defined
  539. error code that indicates failure will be stored
  540. instead.
  541. */
  542. void
  543. fail(error_code ec) noexcept;
  544. /** Parse some of an input string as JSON, incrementally.
  545. This function parses the JSON in the specified
  546. buffer, calling the handler to emit each SAX
  547. parsing event. The parse proceeds from the
  548. current state, which is at the beginning of a
  549. new JSON or in the middle of the current JSON
  550. if any characters were already parsed.
  551. \n
  552. The characters in the buffer are processed
  553. starting from the beginning, until one of the
  554. following conditions is met:
  555. @li All of the characters in the buffer
  556. have been parsed, or
  557. @li Some of the characters in the buffer
  558. have been parsed and the JSON is complete, or
  559. @li A parsing error occurs.
  560. The supplied buffer does not need to contain the
  561. entire JSON. Subsequent calls can provide more
  562. serialized data, allowing JSON to be processed
  563. incrementally. The end of the serialized JSON
  564. can be indicated by passing `more = false`.
  565. @par Complexity
  566. Linear in `size`.
  567. @par Exception Safety
  568. Basic guarantee.
  569. Calls to the handler may throw.
  570. Upon error or exception, subsequent calls will
  571. fail until @ref reset is called to parse a new JSON.
  572. @return The number of characters successfully
  573. parsed, which may be smaller than `size`.
  574. @param more `true` if there are possibly more
  575. buffers in the current JSON, otherwise `false`.
  576. @param data A pointer to a buffer of `size`
  577. characters to parse.
  578. @param size The number of characters pointed to
  579. by `data`.
  580. @param ec Set to the error, if any occurred.
  581. */
  582. std::size_t
  583. write_some(
  584. bool more,
  585. char const* data,
  586. std::size_t size,
  587. error_code& ec);
  588. };
  589. BOOST_JSON_NS_END
  590. #endif