xml_reader.h 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. // Copyright 2019 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #ifndef THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
  5. #define THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
  6. #include <map>
  7. #include <string>
  8. extern "C" {
  9. struct _xmlTextReader;
  10. }
  11. // XmlReader is a wrapper class around libxml's xmlReader,
  12. // providing a simplified C++ API.
  13. class XmlReader {
  14. public:
  15. XmlReader();
  16. ~XmlReader();
  17. // Load a document into the reader from memory. |input| must be UTF-8 and
  18. // exist for the lifetime of this object. Returns false on error.
  19. // TODO(evanm): handle encodings other than UTF-8?
  20. bool Load(const std::string& input);
  21. // Load a document into the reader from a file. Returns false on error.
  22. bool LoadFile(const std::string& file_path);
  23. // Wrappers around libxml functions -----------------------------------------
  24. // Read() advances to the next node. Returns false on EOF or error.
  25. bool Read();
  26. // Next(), when pointing at an opening tag, advances to the node after
  27. // the matching closing tag. Returns false on EOF or error.
  28. bool Next();
  29. // Return the depth in the tree of the current node.
  30. int Depth();
  31. // Returns the "local" name of the current node.
  32. // For a tag like <foo:bar>, this is the string "bar".
  33. std::string NodeName();
  34. // Returns the name of the current node.
  35. // For a tag like <foo:bar>, this is the string "foo:bar".
  36. std::string NodeFullName();
  37. // When pointing at a tag, retrieves the value of an attribute.
  38. // Returns false on failure.
  39. // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value)
  40. // returns true and |value| is set to "a".
  41. bool NodeAttribute(const char* name, std::string* value);
  42. // Populates |attributes| with all the attributes of the current tag and
  43. // returns true. Note that namespace declarations are not reported.
  44. // Returns false if there are no attributes in the current tag.
  45. bool GetAllNodeAttributes(std::map<std::string, std::string>* attributes);
  46. // Populates |namespaces| with all the namespaces (prefix/URI pairs) declared
  47. // in the current tag and returns true. Note that the default namespace, if
  48. // declared in the tag, is populated with an empty prefix.
  49. // Returns false if there are no namespaces declared in the current tag.
  50. bool GetAllDeclaredNamespaces(std::map<std::string, std::string>* namespaces);
  51. // Sets |content| to the content of the current node if it is a #text/#cdata
  52. // node.
  53. // Returns true if the current node is a #text/#cdata node, false otherwise.
  54. bool GetTextIfTextElement(std::string* content);
  55. bool GetTextIfCDataElement(std::string* content);
  56. // Returns true if the node is an element (e.g. <foo>). Note this returns
  57. // false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to
  58. // check for those.
  59. bool IsElement();
  60. // Returns true if the node is a closing element (e.g. </foo>).
  61. bool IsClosingElement();
  62. // Returns true if the current node is an empty (self-closing) element (e.g.
  63. // <foo/>).
  64. bool IsEmptyElement();
  65. // Helper functions not provided by libxml ----------------------------------
  66. // Return the string content within an element.
  67. // "<foo>bar</foo>" is a sequence of three nodes:
  68. // (1) open tag, (2) text, (3) close tag.
  69. // With the reader currently at (1), this returns the text of (2),
  70. // and advances past (3).
  71. // Returns false on error.
  72. bool ReadElementContent(std::string* content);
  73. // Skip to the next opening tag, returning false if we reach a closing
  74. // tag or EOF first.
  75. // If currently on an opening tag, doesn't advance at all.
  76. bool SkipToElement();
  77. private:
  78. // Returns the libxml node type of the current node.
  79. int NodeType();
  80. // The underlying libxml xmlTextReader.
  81. _xmlTextReader* reader_;
  82. };
  83. #endif // THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_