233 lines
6.8 KiB
C++
233 lines
6.8 KiB
C++
|
|
// Copyright 2019 The Chromium Authors
|
||
|
|
// Use of this source code is governed by a BSD-style license that can be
|
||
|
|
// found in the LICENSE file.
|
||
|
|
|
||
|
|
#include "third_party/libxml/chromium/xml_reader.h"
|
||
|
|
|
||
|
|
#include <libxml/xmlreader.h>
|
||
|
|
|
||
|
|
#include <vector>
|
||
|
|
|
||
|
|
#include "third_party/libxml/chromium/libxml_utils.h"
|
||
|
|
|
||
|
|
using internal::XmlStringToStdString;
|
||
|
|
|
||
|
|
namespace {
|
||
|
|
|
||
|
|
// Same as XmlStringToStdString but also frees |xmlstring|.
|
||
|
|
std::string XmlStringToStdStringWithDelete(xmlChar* xmlstring) {
|
||
|
|
std::string result = XmlStringToStdString(xmlstring);
|
||
|
|
xmlFree(xmlstring);
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
enum GetAttributesQueryType { ATTRIBUTES, NAMESPACES_PREFIXES };
|
||
|
|
|
||
|
|
// Populates |names| with the names of the attributes or prefix of namespaces
|
||
|
|
// (depending on |query_type|) for the current node in |reader|.
|
||
|
|
// Returns true if attribute names/namespace prefixes were retrieved, false
|
||
|
|
// otherwise.
|
||
|
|
// Note the strings in |names| are valid as long as |reader| is valid and should
|
||
|
|
// not be deleted.
|
||
|
|
bool GetNodeAttributeNames(xmlTextReaderPtr reader,
|
||
|
|
GetAttributesQueryType query_type,
|
||
|
|
std::vector<const xmlChar*>* names) {
|
||
|
|
if (xmlTextReaderHasAttributes(reader) <= 0)
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (!xmlTextReaderMoveToFirstAttribute(reader))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
do {
|
||
|
|
bool is_namespace = xmlTextReaderIsNamespaceDecl(reader) == 1;
|
||
|
|
if (query_type == NAMESPACES_PREFIXES && is_namespace) {
|
||
|
|
// Use the local name for namespaces so we don't include 'xmlns:".
|
||
|
|
names->push_back(xmlTextReaderConstLocalName(reader));
|
||
|
|
} else if (query_type == ATTRIBUTES && !is_namespace) {
|
||
|
|
// Use the fully qualified name for attributes.
|
||
|
|
names->push_back(xmlTextReaderConstName(reader));
|
||
|
|
}
|
||
|
|
} while (xmlTextReaderMoveToNextAttribute(reader) > 0);
|
||
|
|
|
||
|
|
// Move the reader from the attributes back to the containing element.
|
||
|
|
if (!xmlTextReaderMoveToElement(reader))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
} // namespace
|
||
|
|
|
||
|
|
XmlReader::XmlReader() : reader_(nullptr) {}
|
||
|
|
|
||
|
|
XmlReader::~XmlReader() {
|
||
|
|
if (reader_)
|
||
|
|
xmlFreeTextReader(reader_);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::Load(const std::string& input) {
|
||
|
|
const int kParseOptions = XML_PARSE_NONET; // forbid network access
|
||
|
|
// TODO(evanm): Verify it's OK to pass nullptr for the URL and encoding.
|
||
|
|
// The libxml code allows for these, but it's unclear what effect is has.
|
||
|
|
reader_ = xmlReaderForMemory(input.data(), static_cast<int>(input.size()),
|
||
|
|
nullptr, nullptr, kParseOptions);
|
||
|
|
return reader_ != nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::LoadFile(const std::string& file_path) {
|
||
|
|
const int kParseOptions = XML_PARSE_NONET; // forbid network access
|
||
|
|
reader_ = xmlReaderForFile(file_path.c_str(), nullptr, kParseOptions);
|
||
|
|
return reader_ != nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::Read() {
|
||
|
|
return xmlTextReaderRead(reader_) == 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Next(), when pointing at an opening tag, advances to the node after
|
||
|
|
// the matching closing tag. Returns false on EOF or error.
|
||
|
|
bool XmlReader::Next() {
|
||
|
|
return xmlTextReaderNext(reader_) == 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Return the depth in the tree of the current node.
|
||
|
|
int XmlReader::Depth() {
|
||
|
|
return xmlTextReaderDepth(reader_);
|
||
|
|
}
|
||
|
|
|
||
|
|
std::string XmlReader::NodeName() {
|
||
|
|
return XmlStringToStdString(xmlTextReaderConstLocalName(reader_));
|
||
|
|
}
|
||
|
|
|
||
|
|
std::string XmlReader::NodeFullName() {
|
||
|
|
return XmlStringToStdString(xmlTextReaderConstName(reader_));
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::NodeAttribute(const char* name, std::string* out) {
|
||
|
|
xmlChar* value = xmlTextReaderGetAttribute(reader_, BAD_CAST name);
|
||
|
|
if (!value)
|
||
|
|
return false;
|
||
|
|
*out = XmlStringToStdStringWithDelete(value);
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::GetAllNodeAttributes(
|
||
|
|
std::map<std::string, std::string>* attributes) {
|
||
|
|
std::vector<const xmlChar*> attribute_names;
|
||
|
|
if (!GetNodeAttributeNames(reader_, ATTRIBUTES, &attribute_names))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
// Retrieve the attribute values.
|
||
|
|
for (const auto* name : attribute_names) {
|
||
|
|
(*attributes)[XmlStringToStdString(name)] = XmlStringToStdStringWithDelete(
|
||
|
|
xmlTextReaderGetAttribute(reader_, name));
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::GetAllDeclaredNamespaces(
|
||
|
|
std::map<std::string, std::string>* namespaces) {
|
||
|
|
std::vector<const xmlChar*> prefixes;
|
||
|
|
if (!GetNodeAttributeNames(reader_, NAMESPACES_PREFIXES, &prefixes))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
// Retrieve the namespace URIs.
|
||
|
|
for (const auto* prefix : prefixes) {
|
||
|
|
bool default_namespace = xmlStrcmp(prefix, BAD_CAST "xmlns") == 0;
|
||
|
|
|
||
|
|
std::string value = XmlStringToStdStringWithDelete(
|
||
|
|
xmlTextReaderLookupNamespace(reader_, prefix));
|
||
|
|
if (value.empty() && default_namespace) {
|
||
|
|
// Default namespace is treated as an attribute for some reason.
|
||
|
|
value = XmlStringToStdStringWithDelete(
|
||
|
|
xmlTextReaderGetAttribute(reader_, prefix));
|
||
|
|
}
|
||
|
|
(*namespaces)[default_namespace ? "" : XmlStringToStdString(prefix)] =
|
||
|
|
value;
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::GetTextIfTextElement(std::string* content) {
|
||
|
|
return GetTextFromNodeIfType(XML_READER_TYPE_TEXT, content);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::GetTextIfCDataElement(std::string* content) {
|
||
|
|
return GetTextFromNodeIfType(XML_READER_TYPE_CDATA, content);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::GetTextIfSignificantWhitespaceElement(std::string* content) {
|
||
|
|
return GetTextFromNodeIfType(XML_READER_TYPE_SIGNIFICANT_WHITESPACE, content);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::IsElement() {
|
||
|
|
return NodeType() == XML_READER_TYPE_ELEMENT;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::IsClosingElement() {
|
||
|
|
return NodeType() == XML_READER_TYPE_END_ELEMENT;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::IsEmptyElement() {
|
||
|
|
return xmlTextReaderIsEmptyElement(reader_);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::ReadElementContent(std::string* content) {
|
||
|
|
const int start_depth = Depth();
|
||
|
|
|
||
|
|
if (xmlTextReaderIsEmptyElement(reader_)) {
|
||
|
|
// Empty tag. We succesfully read the content, but it's
|
||
|
|
// empty.
|
||
|
|
*content = "";
|
||
|
|
// Advance past this empty tag.
|
||
|
|
if (!Read())
|
||
|
|
return false;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Advance past opening element tag.
|
||
|
|
if (!Read())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
// Read the content. We read up until we hit a closing tag at the
|
||
|
|
// same level as our starting point.
|
||
|
|
while (NodeType() != XML_READER_TYPE_END_ELEMENT || Depth() != start_depth) {
|
||
|
|
*content += XmlStringToStdString(xmlTextReaderConstValue(reader_));
|
||
|
|
if (!Read())
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Advance past ending element tag.
|
||
|
|
if (!Read())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::SkipToElement() {
|
||
|
|
do {
|
||
|
|
switch (NodeType()) {
|
||
|
|
case XML_READER_TYPE_ELEMENT:
|
||
|
|
return true;
|
||
|
|
case XML_READER_TYPE_END_ELEMENT:
|
||
|
|
return false;
|
||
|
|
default:
|
||
|
|
// Skip all other node types.
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
} while (Read());
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
int XmlReader::NodeType() {
|
||
|
|
return xmlTextReaderNodeType(reader_);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool XmlReader::GetTextFromNodeIfType(int node_type, std::string* content) {
|
||
|
|
if (NodeType() != node_type)
|
||
|
|
return false;
|
||
|
|
|
||
|
|
*content = XmlStringToStdString(xmlTextReaderConstValue(reader_));
|
||
|
|
return true;
|
||
|
|
}
|