123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- // Copyright 2019 The Chromium Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
- #ifndef THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
- #define THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
- #include <map>
- #include <string>
- extern "C" {
- struct _xmlTextReader;
- }
- // XmlReader is a wrapper class around libxml's xmlReader,
- // providing a simplified C++ API.
- class XmlReader {
- public:
- XmlReader();
- ~XmlReader();
- // Load a document into the reader from memory. |input| must be UTF-8 and
- // exist for the lifetime of this object. Returns false on error.
- // TODO(evanm): handle encodings other than UTF-8?
- bool Load(const std::string& input);
- // Load a document into the reader from a file. Returns false on error.
- bool LoadFile(const std::string& file_path);
- // Wrappers around libxml functions -----------------------------------------
- // Read() advances to the next node. Returns false on EOF or error.
- bool Read();
- // Next(), when pointing at an opening tag, advances to the node after
- // the matching closing tag. Returns false on EOF or error.
- bool Next();
- // Return the depth in the tree of the current node.
- int Depth();
- // Returns the "local" name of the current node.
- // For a tag like <foo:bar>, this is the string "bar".
- std::string NodeName();
- // Returns the name of the current node.
- // For a tag like <foo:bar>, this is the string "foo:bar".
- std::string NodeFullName();
- // When pointing at a tag, retrieves the value of an attribute.
- // Returns false on failure.
- // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value)
- // returns true and |value| is set to "a".
- bool NodeAttribute(const char* name, std::string* value);
- // Populates |attributes| with all the attributes of the current tag and
- // returns true. Note that namespace declarations are not reported.
- // Returns false if there are no attributes in the current tag.
- bool GetAllNodeAttributes(std::map<std::string, std::string>* attributes);
- // Populates |namespaces| with all the namespaces (prefix/URI pairs) declared
- // in the current tag and returns true. Note that the default namespace, if
- // declared in the tag, is populated with an empty prefix.
- // Returns false if there are no namespaces declared in the current tag.
- bool GetAllDeclaredNamespaces(std::map<std::string, std::string>* namespaces);
- // Sets |content| to the content of the current node if it is a #text/#cdata
- // node.
- // Returns true if the current node is a #text/#cdata node, false otherwise.
- bool GetTextIfTextElement(std::string* content);
- bool GetTextIfCDataElement(std::string* content);
- // Returns true if the node is an element (e.g. <foo>). Note this returns
- // false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to
- // check for those.
- bool IsElement();
- // Returns true if the node is a closing element (e.g. </foo>).
- bool IsClosingElement();
- // Returns true if the current node is an empty (self-closing) element (e.g.
- // <foo/>).
- bool IsEmptyElement();
- // Helper functions not provided by libxml ----------------------------------
- // Return the string content within an element.
- // "<foo>bar</foo>" is a sequence of three nodes:
- // (1) open tag, (2) text, (3) close tag.
- // With the reader currently at (1), this returns the text of (2),
- // and advances past (3).
- // Returns false on error.
- bool ReadElementContent(std::string* content);
- // Skip to the next opening tag, returning false if we reach a closing
- // tag or EOF first.
- // If currently on an opening tag, doesn't advance at all.
- bool SkipToElement();
- private:
- // Returns the libxml node type of the current node.
- int NodeType();
- // The underlying libxml xmlTextReader.
- _xmlTextReader* reader_;
- };
- #endif // THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
|