parser.hpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. // Official repository: https://github.com/boostorg/json
  8. //
  9. #ifndef BOOST_JSON_PARSER_HPP
  10. #define BOOST_JSON_PARSER_HPP
  11. #include <boost/json/detail/config.hpp>
  12. #include <boost/json/basic_parser.hpp>
  13. #include <boost/json/storage_ptr.hpp>
  14. #include <boost/json/value.hpp>
  15. #include <boost/json/detail/handler.hpp>
  16. #include <type_traits>
  17. #include <cstddef>
  18. BOOST_JSON_NS_BEGIN
  19. //----------------------------------------------------------
  20. /** A DOM parser for JSON contained in a single buffer.
  21. This class is used to parse a JSON contained in a
  22. single character buffer, into a @ref value container.
  23. @par Usage
  24. To use the parser first construct it, then optionally
  25. call @ref reset to specify a @ref storage_ptr to use
  26. for the resulting @ref value. Then call @ref write
  27. to parse a character buffer containing a complete
  28. JSON. If the parse is successful, call @ref release
  29. to take ownership of the value:
  30. @code
  31. parser p; // construct a parser
  32. size_t n = p.write( "[1,2,3]" ); // parse a complete JSON
  33. assert( n == 7 ); // all characters consumed
  34. value jv = p.release(); // take ownership of the value
  35. @endcode
  36. @par Extra Data
  37. When the character buffer provided as input contains
  38. additional data that is not part of the complete
  39. JSON, an error is returned. The @ref write_some
  40. function is an alternative which allows the parse
  41. to finish early, without consuming all the characters
  42. in the buffer. This allows parsing of a buffer
  43. containing multiple individual JSONs or containing
  44. different protocol data:
  45. @code
  46. parser p; // construct a parser
  47. size_t n = p.write_some( "[1,2,3] null" ); // parse a complete JSON
  48. assert( n == 8 ); // only some characters consumed
  49. value jv = p.release(); // take ownership of the value
  50. @endcode
  51. @par Temporary Storage
  52. The parser may dynamically allocate temporary
  53. storage as needed to accommodate the nesting level
  54. of the JSON being parsed. Temporary storage is
  55. first obtained from an optional, caller-owned
  56. buffer specified upon construction. When that
  57. is exhausted, the next allocation uses the
  58. @ref memory_resource passed to the constructor; if
  59. no such argument is specified, the default memory
  60. resource is used. Temporary storage is freed only
  61. when the parser is destroyed; The performance of
  62. parsing multiple JSONs may be improved by reusing
  63. the same parser instance.
  64. \n
  65. It is important to note that the @ref memory_resource
  66. supplied upon construction is used for temporary
  67. storage only, and not for allocating the elements
  68. which make up the parsed value. That other memory
  69. resource is optionally supplied in each call
  70. to @ref reset.
  71. @par Duplicate Keys
  72. If there are object elements with duplicate keys;
  73. that is, if multiple elements in an object have
  74. keys that compare equal, only the last equivalent
  75. element will be inserted.
  76. @par Non-Standard JSON
  77. The @ref parse_options structure optionally
  78. provided upon construction is used to customize
  79. some parameters of the parser, including which
  80. non-standard JSON extensions should be allowed.
  81. A default-constructed parse options allows only
  82. standard JSON.
  83. @par Thread Safety
  84. Distinct instances may be accessed concurrently.
  85. Non-const member functions of a shared instance
  86. may not be called concurrently with any other
  87. member functions of that instance.
  88. @see
  89. @ref parse,
  90. @ref parse_options,
  91. @ref stream_parser.
  92. */
  93. class parser
  94. {
  95. basic_parser<detail::handler> p_;
  96. public:
  97. /// Copy constructor (deleted)
  98. parser(
  99. parser const&) = delete;
  100. /// Copy assignment (deleted)
  101. parser& operator=(
  102. parser const&) = delete;
  103. /** Destructor.
  104. All dynamically allocated memory, including
  105. any incomplete parsing results, is freed.
  106. @par Complexity
  107. Linear in the size of partial results
  108. @par Exception Safety
  109. No-throw guarantee.
  110. */
  111. ~parser() = default;
  112. /** Constructor.
  113. This constructs a new parser which first uses
  114. the caller-owned storage pointed to by `buffer`
  115. for temporary storage, falling back to the memory
  116. resource `sp` if needed. The parser will use the
  117. specified parsing options.
  118. \n
  119. The parsed value will use the default memory
  120. resource for storage. To use a different resource,
  121. call @ref reset after construction.
  122. @par Complexity
  123. Constant.
  124. @par Exception Safety
  125. No-throw guarantee.
  126. @param sp The memory resource to use for
  127. temporary storage after `buffer` is exhausted.
  128. @param opt The parsing options to use.
  129. @param buffer A pointer to valid memory of at least
  130. `size` bytes for the parser to use for temporary storage.
  131. Ownership is not transferred, the caller is responsible
  132. for ensuring the lifetime of the memory pointed to by
  133. `buffer` extends until the parser is destroyed.
  134. @param size The number of valid bytes in `buffer`.
  135. */
  136. BOOST_JSON_DECL
  137. parser(
  138. storage_ptr sp,
  139. parse_options const& opt,
  140. unsigned char* buffer,
  141. std::size_t size) noexcept;
  142. /** Constructor.
  143. This constructs a new parser which uses the default
  144. memory resource for temporary storage, and accepts
  145. only strict JSON.
  146. \n
  147. The parsed value will use the default memory
  148. resource for storage. To use a different resource,
  149. call @ref reset after construction.
  150. @par Complexity
  151. Constant.
  152. @par Exception Safety
  153. No-throw guarantee.
  154. */
  155. parser() noexcept
  156. : parser({}, {})
  157. {
  158. }
  159. /** Constructor.
  160. This constructs a new parser which uses the
  161. specified memory resource for temporary storage,
  162. and is configured to use the specified parsing
  163. options.
  164. \n
  165. The parsed value will use the default memory
  166. resource for storage. To use a different resource,
  167. call @ref reset after construction.
  168. @par Complexity
  169. Constant.
  170. @par Exception Safety
  171. No-throw guarantee.
  172. @param sp The memory resource to use for temporary storage.
  173. @param opt The parsing options to use.
  174. */
  175. BOOST_JSON_DECL
  176. parser(
  177. storage_ptr sp,
  178. parse_options const& opt) noexcept;
  179. /** Constructor.
  180. This constructs a new parser which uses the
  181. specified memory resource for temporary storage,
  182. and accepts only strict JSON.
  183. \n
  184. The parsed value will use the default memory
  185. resource for storage. To use a different resource,
  186. call @ref reset after construction.
  187. @par Complexity
  188. Constant.
  189. @par Exception Safety
  190. No-throw guarantee.
  191. @param sp The memory resource to use for temporary storage.
  192. */
  193. explicit
  194. parser(storage_ptr sp) noexcept
  195. : parser(std::move(sp), {})
  196. {
  197. }
  198. /** Constructor.
  199. This constructs a new parser which first uses the
  200. caller-owned storage `buffer` for temporary storage,
  201. falling back to the memory resource `sp` if needed.
  202. The parser will use the specified parsing options.
  203. \n
  204. The parsed value will use the default memory
  205. resource for storage. To use a different resource,
  206. call @ref reset after construction.
  207. @par Complexity
  208. Constant.
  209. @par Exception Safety
  210. No-throw guarantee.
  211. @param sp The memory resource to use for
  212. temporary storage after `buffer` is exhausted.
  213. @param opt The parsing options to use.
  214. @param buffer A buffer for the parser to use for
  215. temporary storage. Ownership is not transferred,
  216. the caller is responsible for ensuring the lifetime
  217. of `buffer` extends until the parser is destroyed.
  218. */
  219. template<std::size_t N>
  220. parser(
  221. storage_ptr sp,
  222. parse_options const& opt,
  223. unsigned char(&buffer)[N]) noexcept
  224. : parser(std::move(sp),
  225. opt, &buffer[0], N)
  226. {
  227. }
  228. #if defined(__cpp_lib_byte) || defined(BOOST_JSON_DOCS)
  229. /** Constructor.
  230. This constructs a new parser which first uses
  231. the caller-owned storage pointed to by `buffer`
  232. for temporary storage, falling back to the memory
  233. resource `sp` if needed. The parser will use the
  234. specified parsing options.
  235. \n
  236. The parsed value will use the default memory
  237. resource for storage. To use a different resource,
  238. call @ref reset after construction.
  239. @par Complexity
  240. Constant.
  241. @par Exception Safety
  242. No-throw guarantee.
  243. @param sp The memory resource to use for
  244. temporary storage after `buffer` is exhausted.
  245. @param opt The parsing options to use.
  246. @param buffer A pointer to valid memory of at least
  247. `size` bytes for the parser to use for temporary storage.
  248. Ownership is not transferred, the caller is responsible
  249. for ensuring the lifetime of the memory pointed to by
  250. `buffer` extends until the parser is destroyed.
  251. @param size The number of valid bytes in `buffer`.
  252. */
  253. parser(
  254. storage_ptr sp,
  255. parse_options const& opt,
  256. std::byte* buffer,
  257. std::size_t size) noexcept
  258. : parser(sp, opt, reinterpret_cast<
  259. unsigned char*>(buffer), size)
  260. {
  261. }
  262. /** Constructor.
  263. This constructs a new parser which first uses the
  264. caller-owned storage `buffer` for temporary storage,
  265. falling back to the memory resource `sp` if needed.
  266. The parser will use the specified parsing options.
  267. \n
  268. The parsed value will use the default memory
  269. resource for storage. To use a different resource,
  270. call @ref reset after construction.
  271. @par Complexity
  272. Constant.
  273. @par Exception Safety
  274. No-throw guarantee.
  275. @param sp The memory resource to use for
  276. temporary storage after `buffer` is exhausted.
  277. @param opt The parsing options to use.
  278. @param buffer A buffer for the parser to use for
  279. temporary storage. Ownership is not transferred,
  280. the caller is responsible for ensuring the lifetime
  281. of `buffer` extends until the parser is destroyed.
  282. */
  283. template<std::size_t N>
  284. parser(
  285. storage_ptr sp,
  286. parse_options const& opt,
  287. std::byte(&buffer)[N]) noexcept
  288. : parser(std::move(sp),
  289. opt, &buffer[0], N)
  290. {
  291. }
  292. #endif
  293. #ifndef BOOST_JSON_DOCS
  294. // Safety net for accidental buffer overflows
  295. template<std::size_t N>
  296. parser(
  297. storage_ptr sp,
  298. parse_options const& opt,
  299. unsigned char(&buffer)[N],
  300. std::size_t n) noexcept
  301. : parser(std::move(sp),
  302. opt, &buffer[0], n)
  303. {
  304. // If this goes off, check your parameters
  305. // closely, chances are you passed an array
  306. // thinking it was a pointer.
  307. BOOST_ASSERT(n <= N);
  308. }
  309. #ifdef __cpp_lib_byte
  310. // Safety net for accidental buffer overflows
  311. template<std::size_t N>
  312. parser(
  313. storage_ptr sp,
  314. parse_options const& opt,
  315. std::byte(&buffer)[N], std::size_t n) noexcept
  316. : parser(std::move(sp),
  317. opt, &buffer[0], n)
  318. {
  319. // If this goes off, check your parameters
  320. // closely, chances are you passed an array
  321. // thinking it was a pointer.
  322. BOOST_ASSERT(n <= N);
  323. }
  324. #endif
  325. #endif
  326. /** Reset the parser for a new JSON.
  327. This function is used to reset the parser to
  328. prepare it for parsing a new complete JSON.
  329. Any previous partial results are destroyed.
  330. @par Complexity
  331. Constant or linear in the size of any previous
  332. partial parsing results.
  333. @par Exception Safety
  334. No-throw guarantee.
  335. @param sp A pointer to the @ref memory_resource
  336. to use for the resulting @ref value. The parser
  337. will acquire shared ownership.
  338. */
  339. BOOST_JSON_DECL
  340. void
  341. reset(storage_ptr sp = {}) noexcept;
  342. /** Parse a buffer containing a complete JSON.
  343. This function parses a complete JSON contained
  344. in the specified character buffer. Additional
  345. characters past the end of the complete JSON
  346. are ignored. The function returns the actual
  347. number of characters parsed, which may be less
  348. than the size of the input. This allows parsing
  349. of a buffer containing multiple individual JSONs
  350. or containing different protocol data:
  351. @par Example
  352. @code
  353. parser p; // construct a parser
  354. size_t n = p.write_some( "[1,2,3] null" ); // parse a complete JSON
  355. assert( n == 8 ); // only some characters consumed
  356. value jv = p.release(); // take ownership of the value
  357. @endcode
  358. @par Complexity
  359. Linear in `size`.
  360. @par Exception Safety
  361. Basic guarantee.
  362. Calls to `memory_resource::allocate` may throw.
  363. Upon error or exception, subsequent calls will
  364. fail until @ref reset is called to parse a new JSON.
  365. @return The number of characters consumed from
  366. the buffer.
  367. @param data A pointer to a buffer of `size`
  368. characters to parse.
  369. @param size The number of characters pointed to
  370. by `data`.
  371. @param ec Set to the error, if any occurred.
  372. */
  373. BOOST_JSON_DECL
  374. std::size_t
  375. write_some(
  376. char const* data,
  377. std::size_t size,
  378. error_code& ec);
  379. /** Parse a buffer containing a complete JSON.
  380. This function parses a complete JSON contained
  381. in the specified character buffer. Additional
  382. characters past the end of the complete JSON
  383. are ignored. The function returns the actual
  384. number of characters parsed, which may be less
  385. than the size of the input. This allows parsing
  386. of a buffer containing multiple individual JSONs
  387. or containing different protocol data:
  388. @par Example
  389. @code
  390. parser p; // construct a parser
  391. size_t n = p.write_some( "[1,2,3] null" ); // parse a complete JSON
  392. assert( n == 8 ); // only some characters consumed
  393. value jv = p.release(); // take ownership of the value
  394. @endcode
  395. @par Complexity
  396. Linear in `size`.
  397. @par Exception Safety
  398. Basic guarantee.
  399. Calls to `memory_resource::allocate` may throw.
  400. Upon error or exception, subsequent calls will
  401. fail until @ref reset is called to parse a new JSON.
  402. @return The number of characters consumed from
  403. the buffer.
  404. @param data A pointer to a buffer of `size`
  405. characters to parse.
  406. @param size The number of characters pointed to
  407. by `data`.
  408. @throw system_error Thrown on error.
  409. */
  410. BOOST_JSON_DECL
  411. std::size_t
  412. write_some(
  413. char const* data,
  414. std::size_t size);
  415. /** Parse a buffer containing a complete JSON.
  416. This function parses a complete JSON contained
  417. in the specified character buffer. Additional
  418. characters past the end of the complete JSON
  419. are ignored. The function returns the actual
  420. number of characters parsed, which may be less
  421. than the size of the input. This allows parsing
  422. of a buffer containing multiple individual JSONs
  423. or containing different protocol data:
  424. @par Example
  425. @code
  426. parser p; // construct a parser
  427. size_t n = p.write_some( "[1,2,3] null" ); // parse a complete JSON
  428. assert( n == 8 ); // only some characters consumed
  429. value jv = p.release(); // take ownership of the value
  430. @endcode
  431. @par Complexity
  432. Linear in `size`.
  433. @par Exception Safety
  434. Basic guarantee.
  435. Calls to `memory_resource::allocate` may throw.
  436. Upon error or exception, subsequent calls will
  437. fail until @ref reset is called to parse a new JSON.
  438. @return The number of characters consumed from
  439. the buffer.
  440. @param s The character string to parse.
  441. @param ec Set to the error, if any occurred.
  442. */
  443. std::size_t
  444. write_some(
  445. string_view s,
  446. error_code& ec)
  447. {
  448. return write_some(
  449. s.data(), s.size(), ec);
  450. }
  451. /** Parse a buffer containing a complete JSON.
  452. This function parses a complete JSON contained
  453. in the specified character buffer. Additional
  454. characters past the end of the complete JSON
  455. are ignored. The function returns the actual
  456. number of characters parsed, which may be less
  457. than the size of the input. This allows parsing
  458. of a buffer containing multiple individual JSONs
  459. or containing different protocol data:
  460. @par Example
  461. @code
  462. parser p; // construct a parser
  463. size_t n = p.write_some( "[1,2,3] null" ); // parse a complete JSON
  464. assert( n == 8 ); // only some characters consumed
  465. value jv = p.release(); // take ownership of the value
  466. @endcode
  467. @par Complexity
  468. Linear in `size`.
  469. @par Exception Safety
  470. Basic guarantee.
  471. Calls to `memory_resource::allocate` may throw.
  472. Upon error or exception, subsequent calls will
  473. fail until @ref reset is called to parse a new JSON.
  474. @return The number of characters consumed from
  475. the buffer.
  476. @param s The character string to parse.
  477. @throw system_error Thrown on error.
  478. */
  479. std::size_t
  480. write_some(
  481. string_view s)
  482. {
  483. return write_some(
  484. s.data(), s.size());
  485. }
  486. /** Parse a buffer containing a complete JSON.
  487. This function parses a complete JSON contained
  488. in the specified character buffer. The entire
  489. buffer must be consumed; if there are additional
  490. characters past the end of the complete JSON,
  491. the parse fails and an error is returned.
  492. @par Example
  493. @code
  494. parser p; // construct a parser
  495. size_t n = p.write( "[1,2,3]" ); // parse a complete JSON
  496. assert( n == 7 ); // all characters consumed
  497. value jv = p.release(); // take ownership of the value
  498. @endcode
  499. @par Complexity
  500. Linear in `size`.
  501. @par Exception Safety
  502. Basic guarantee.
  503. Calls to `memory_resource::allocate` may throw.
  504. Upon error or exception, subsequent calls will
  505. fail until @ref reset is called to parse a new JSON.
  506. @return The number of characters consumed from
  507. the buffer.
  508. @param data A pointer to a buffer of `size`
  509. characters to parse.
  510. @param size The number of characters pointed to
  511. by `data`.
  512. @param ec Set to the error, if any occurred.
  513. */
  514. BOOST_JSON_DECL
  515. std::size_t
  516. write(
  517. char const* data,
  518. std::size_t size,
  519. error_code& ec);
  520. /** Parse a buffer containing a complete JSON.
  521. This function parses a complete JSON contained
  522. in the specified character buffer. The entire
  523. buffer must be consumed; if there are additional
  524. characters past the end of the complete JSON,
  525. the parse fails and an error is returned.
  526. @par Example
  527. @code
  528. parser p; // construct a parser
  529. size_t n = p.write( "[1,2,3]" ); // parse a complete JSON
  530. assert( n == 7 ); // all characters consumed
  531. value jv = p.release(); // take ownership of the value
  532. @endcode
  533. @par Complexity
  534. Linear in `size`.
  535. @par Exception Safety
  536. Basic guarantee.
  537. Calls to `memory_resource::allocate` may throw.
  538. Upon error or exception, subsequent calls will
  539. fail until @ref reset is called to parse a new JSON.
  540. @return The number of characters consumed from
  541. the buffer.
  542. @param data A pointer to a buffer of `size`
  543. characters to parse.
  544. @param size The number of characters pointed to
  545. by `data`.
  546. @throw system_error Thrown on error.
  547. */
  548. BOOST_JSON_DECL
  549. std::size_t
  550. write(
  551. char const* data,
  552. std::size_t size);
  553. /** Parse a buffer containing a complete JSON.
  554. This function parses a complete JSON contained
  555. in the specified character buffer. The entire
  556. buffer must be consumed; if there are additional
  557. characters past the end of the complete JSON,
  558. the parse fails and an error is returned.
  559. @par Example
  560. @code
  561. parser p; // construct a parser
  562. size_t n = p.write( "[1,2,3]" ); // parse a complete JSON
  563. assert( n == 7 ); // all characters consumed
  564. value jv = p.release(); // take ownership of the value
  565. @endcode
  566. @par Complexity
  567. Linear in `size`.
  568. @par Exception Safety
  569. Basic guarantee.
  570. Calls to `memory_resource::allocate` may throw.
  571. Upon error or exception, subsequent calls will
  572. fail until @ref reset is called to parse a new JSON.
  573. @return The number of characters consumed from
  574. the buffer.
  575. @param s The character string to parse.
  576. @param ec Set to the error, if any occurred.
  577. */
  578. std::size_t
  579. write(
  580. string_view s,
  581. error_code& ec)
  582. {
  583. return write(
  584. s.data(), s.size(), ec);
  585. }
  586. /** Parse a buffer containing a complete JSON.
  587. This function parses a complete JSON contained
  588. in the specified character buffer. The entire
  589. buffer must be consumed; if there are additional
  590. characters past the end of the complete JSON,
  591. the parse fails and an error is returned.
  592. @par Example
  593. @code
  594. parser p; // construct a parser
  595. size_t n = p.write( "[1,2,3]" ); // parse a complete JSON
  596. assert( n == 7 ); // all characters consumed
  597. value jv = p.release(); // take ownership of the value
  598. @endcode
  599. @par Complexity
  600. Linear in `size`.
  601. @par Exception Safety
  602. Basic guarantee.
  603. Calls to `memory_resource::allocate` may throw.
  604. Upon error or exception, subsequent calls will
  605. fail until @ref reset is called to parse a new JSON.
  606. @return The number of characters consumed from
  607. the buffer.
  608. @param s The character string to parse.
  609. @throw system_error Thrown on error.
  610. */
  611. std::size_t
  612. write(
  613. string_view s)
  614. {
  615. return write(
  616. s.data(), s.size());
  617. }
  618. /** Return the parsed JSON as a @ref value.
  619. This returns the parsed value, or throws
  620. an exception if the parsing is incomplete or
  621. failed. It is necessary to call @ref reset
  622. after calling this function in order to parse
  623. another JSON.
  624. @par Complexity
  625. Constant.
  626. @return The parsed value. Ownership of this
  627. value is transferred to the caller.
  628. @throw system_error Thrown on failure.
  629. */
  630. BOOST_JSON_DECL
  631. value
  632. release();
  633. };
  634. BOOST_JSON_NS_END
  635. #endif