json_parser.h 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. // Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #ifndef BASE_JSON_JSON_PARSER_H_
  5. #define BASE_JSON_JSON_PARSER_H_
  6. #include <stddef.h>
  7. #include <stdint.h>
  8. #include <memory>
  9. #include <string>
  10. #include "base/base_export.h"
  11. #include "base/compiler_specific.h"
  12. #include "base/gtest_prod_util.h"
  13. #include "base/json/json_common.h"
  14. #include "base/json/json_reader.h"
  15. #include "base/macros.h"
  16. #include "base/optional.h"
  17. #include "base/strings/string_piece.h"
  18. namespace base {
  19. class Value;
  20. namespace internal {
  21. class JSONParserTest;
  22. // The implementation behind the JSONReader interface. This class is not meant
  23. // to be used directly; it encapsulates logic that need not be exposed publicly.
  24. //
  25. // This parser guarantees O(n) time through the input string. Iteration happens
  26. // on the byte level, with the functions ConsumeChars() and ConsumeChar(). The
  27. // conversion from byte to JSON token happens without advancing the parser in
  28. // GetNextToken/ParseToken, that is tokenization operates on the current parser
  29. // position without advancing.
  30. //
  31. // Built on top of these are a family of Consume functions that iterate
  32. // internally. Invariant: on entry of a Consume function, the parser is wound
  33. // to the first byte of a valid JSON token. On exit, it is on the first byte
  34. // after the token that was just consumed, which would likely be the first byte
  35. // of the next token.
  36. class BASE_EXPORT JSONParser {
  37. public:
  38. JSONParser(int options, size_t max_depth = kAbsoluteMaxDepth);
  39. ~JSONParser();
  40. // Parses the input string according to the set options and returns the
  41. // result as a Value.
  42. // Wrap this in base::FooValue::From() to check the Value is of type Foo and
  43. // convert to a FooValue at the same time.
  44. Optional<Value> Parse(StringPiece input);
  45. // Returns the error code.
  46. JSONReader::JsonParseError error_code() const;
  47. // Returns the human-friendly error message.
  48. std::string GetErrorMessage() const;
  49. // Returns the error line number if parse error happened. Otherwise always
  50. // returns 0.
  51. int error_line() const;
  52. // Returns the error column number if parse error happened. Otherwise always
  53. // returns 0.
  54. int error_column() const;
  55. private:
  56. enum Token {
  57. T_OBJECT_BEGIN, // {
  58. T_OBJECT_END, // }
  59. T_ARRAY_BEGIN, // [
  60. T_ARRAY_END, // ]
  61. T_STRING,
  62. T_NUMBER,
  63. T_BOOL_TRUE, // true
  64. T_BOOL_FALSE, // false
  65. T_NULL, // null
  66. T_LIST_SEPARATOR, // ,
  67. T_OBJECT_PAIR_SEPARATOR, // :
  68. T_END_OF_INPUT,
  69. T_INVALID_TOKEN,
  70. };
  71. // A helper class used for parsing strings. One optimization performed is to
  72. // create base::Value with a StringPiece to avoid unnecessary std::string
  73. // copies. This is not possible if the input string needs to be decoded from
  74. // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
  75. // This class centralizes that logic.
  76. class StringBuilder {
  77. public:
  78. // Empty constructor. Used for creating a builder with which to assign to.
  79. StringBuilder();
  80. // |pos| is the beginning of an input string, excluding the |"|.
  81. explicit StringBuilder(const char* pos);
  82. ~StringBuilder();
  83. StringBuilder& operator=(StringBuilder&& other);
  84. // Appends the Unicode code point |point| to the string, either by
  85. // increasing the |length_| of the string if the string has not been
  86. // converted, or by appending the UTF8 bytes for the code point.
  87. void Append(uint32_t point);
  88. // Converts the builder from its default StringPiece to a full std::string,
  89. // performing a copy. Once a builder is converted, it cannot be made a
  90. // StringPiece again.
  91. void Convert();
  92. // Returns the builder as a string, invalidating all state. This allows
  93. // the internal string buffer representation to be destructively moved
  94. // in cases where the builder will not be needed any more.
  95. std::string DestructiveAsString();
  96. private:
  97. // The beginning of the input string.
  98. const char* pos_;
  99. // Number of bytes in |pos_| that make up the string being built.
  100. size_t length_;
  101. // The copied string representation. Will be unset until Convert() is
  102. // called.
  103. base::Optional<std::string> string_;
  104. };
  105. // Returns the next |count| bytes of the input stream, or nullopt if fewer
  106. // than |count| bytes remain.
  107. Optional<StringPiece> PeekChars(size_t count);
  108. // Calls PeekChars() with a |count| of 1.
  109. Optional<char> PeekChar();
  110. // Returns the next |count| bytes of the input stream, or nullopt if fewer
  111. // than |count| bytes remain, and advances the parser position by |count|.
  112. Optional<StringPiece> ConsumeChars(size_t count);
  113. // Calls ConsumeChars() with a |count| of 1.
  114. Optional<char> ConsumeChar();
  115. // Returns a pointer to the current character position.
  116. const char* pos();
  117. // Skips over whitespace and comments to find the next token in the stream.
  118. // This does not advance the parser for non-whitespace or comment chars.
  119. Token GetNextToken();
  120. // Consumes whitespace characters and comments until the next non-that is
  121. // encountered.
  122. void EatWhitespaceAndComments();
  123. // Helper function that consumes a comment, assuming that the parser is
  124. // currently wound to a '/'.
  125. bool EatComment();
  126. // Calls GetNextToken() and then ParseToken().
  127. Optional<Value> ParseNextToken();
  128. // Takes a token that represents the start of a Value ("a structural token"
  129. // in RFC terms) and consumes it, returning the result as a Value.
  130. Optional<Value> ParseToken(Token token);
  131. // Assuming that the parser is currently wound to '{', this parses a JSON
  132. // object into a Value.
  133. Optional<Value> ConsumeDictionary();
  134. // Assuming that the parser is wound to '[', this parses a JSON list into a
  135. // Value.
  136. Optional<Value> ConsumeList();
  137. // Calls through ConsumeStringRaw and wraps it in a value.
  138. Optional<Value> ConsumeString();
  139. // Assuming that the parser is wound to a double quote, this parses a string,
  140. // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
  141. // success and places result into |out|. Returns false on failure with
  142. // error information set.
  143. bool ConsumeStringRaw(StringBuilder* out);
  144. // Helper function for ConsumeStringRaw() that consumes the next four or 10
  145. // bytes (parser is wound to the first character of a HEX sequence, with the
  146. // potential for consuming another \uXXXX for a surrogate). Returns true on
  147. // success and places the code point |out_code_point|, and false on failure.
  148. bool DecodeUTF16(uint32_t* out_code_point);
  149. // Assuming that the parser is wound to the start of a valid JSON number,
  150. // this parses and converts it to either an int or double value.
  151. Optional<Value> ConsumeNumber();
  152. // Helper that reads characters that are ints. Returns true if a number was
  153. // read and false on error.
  154. bool ReadInt(bool allow_leading_zeros);
  155. // Consumes the literal values of |true|, |false|, and |null|, assuming the
  156. // parser is wound to the first character of any of those.
  157. Optional<Value> ConsumeLiteral();
  158. // Helper function that returns true if the byte squence |match| can be
  159. // consumed at the current parser position. Returns false if there are fewer
  160. // than |match|-length bytes or if the sequence does not match, and the
  161. // parser state is unchanged.
  162. bool ConsumeIfMatch(StringPiece match);
  163. // Sets the error information to |code| at the current column, based on
  164. // |index_| and |index_last_line_|, with an optional positive/negative
  165. // adjustment by |column_adjust|.
  166. void ReportError(JSONReader::JsonParseError code, int column_adjust);
  167. // Given the line and column number of an error, formats one of the error
  168. // message contants from json_reader.h for human display.
  169. static std::string FormatErrorMessage(int line, int column,
  170. const std::string& description);
  171. // base::JSONParserOptions that control parsing.
  172. const int options_;
  173. // Maximum depth to parse.
  174. const size_t max_depth_;
  175. // The input stream being parsed. Note: Not guaranteed to NUL-terminated.
  176. StringPiece input_;
  177. // The index in the input stream to which the parser is wound.
  178. int index_;
  179. // The number of times the parser has recursed (current stack depth).
  180. size_t stack_depth_;
  181. // The line number that the parser is at currently.
  182. int line_number_;
  183. // The last value of |index_| on the previous line.
  184. int index_last_line_;
  185. // Error information.
  186. JSONReader::JsonParseError error_code_;
  187. int error_line_;
  188. int error_column_;
  189. friend class JSONParserTest;
  190. FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
  191. FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
  192. FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
  193. FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
  194. FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
  195. FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
  196. FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
  197. DISALLOW_COPY_AND_ASSIGN(JSONParser);
  198. };
  199. // Used when decoding and an invalid utf-8 sequence is encountered.
  200. BASE_EXPORT extern const char kUnicodeReplacementString[];
  201. } // namespace internal
  202. } // namespace base
  203. #endif // BASE_JSON_JSON_PARSER_H_