Source code for COT.xml_file

#!/usr/bin/env python
#
# xml_file.py - class for reading/editing/writing XML-based data
#
# August 2013, Glenn F. Matthews
# Copyright (c) 2013-2016 the COT project developers.
# See the COPYRIGHT.txt file at the top-level directory of this distribution
# and at https://github.com/glennmatthews/cot/blob/master/COPYRIGHT.txt.
#
# This file is part of the Common OVF Tool (COT) project.
# It is subject to the license terms in the LICENSE.txt file found in the
# top-level directory of this distribution and at
# https://github.com/glennmatthews/cot/blob/master/LICENSE.txt. No part
# of COT, including this file, may be copied, modified, propagated, or
# distributed except according to the terms contained in the LICENSE.txt file.

"""Reading, editing, and writing XML files."""

import xml.etree.ElementTree as ET
import logging
import re
import sys

logger = logging.getLogger(__name__)


[docs]def register_namespace(prefix, uri): """Record a particular mapping between a namespace prefix and URI. Args: prefix (str): Namespace prefix such as "ovf" uri (str): Namespace URI such as "http://schemas.dmtf.org/ovf/envelope/1" """ try: ET.register_namespace(prefix, uri) except AttributeError: # 2.6 doesn't have the above API so we must write directly ET._namespace_map[uri] = prefix # pylint: disable=protected-access
[docs]class XML(object): """Class capable of reading, editing, and writing XML files.""" @staticmethod
[docs] def get_ns(text): """Get the namespace prefix from an XML element or attribute name. Args: text (str): Element name or attribute name, such as "{http://schemas.dmtf.org/ovf/envelope/1}Element". Returns: str: "" if no prefix is present, or a namespace prefix, such as "http://schemas.dmtf.org/ovf/envelope/1". """ match = re.match(r"\{(.*)\}", str(text)) if not match: logger.error("No namespace prefix on %s??", text) return "" return match.group(1)
@staticmethod
[docs] def strip_ns(text): """Remove a namespace prefix from an XML element or attribute name. Args: text (str): Element name or attribute name, such as "{http://schemas.dmtf.org/ovf/envelope/1}Element". Returns: str: Bare name, such as "Element". """ match = re.match(r"\{.*\}(.*)", str(text)) if match is None: logger.error("No namespace prefix on %s??", text) return text else: return match.group(1)
[docs] def __init__(self, xml_file): """Read the given XML file and store it in memory. The memory representation is available as properties :attr:`tree` and :attr:`root`. Args: xml_file (str): File path to read. Raises: xml.etree.ElementTree.ParseError: if parsing fails under Python 2.7 or later xml.parsers.expat.ExpatError: if parsing fails under Python 2.6 """ # Parse the XML into memory self.tree = ET.parse(xml_file) """:class:`xml.etree.ElementTree.ElementTree` describing this file.""" self.root = self.tree.getroot() """Root :class:`xml.etree.ElementTree.Element` instance of the tree."""
[docs] def write_xml(self, xml_file): """Write pretty XML out to the given file. Args: xml_file (str): Filename to write to """ logger.debug("Writing XML to %s", xml_file) # Pretty-print the XML for readability self.xml_reindent(self.root, 0) # We could make cleaner XML by passing "default_namespace=NSM['ovf']", # which will leave off the "ovf:" prefix on elements and attributes in # the main OVF namespace, but unfortunately, this cleaner XML is not # recognized as valid by ElementTree, resulting in a "write-once" OVF - # subsequent attempts to read and re-write the XML will give the error: # # ValueError: cannot use non-qualified names with default_namespace # option # # This is a bug - see http://bugs.python.org/issue17088 if sys.hexversion >= 0x02070000: self.tree.write(xml_file, xml_declaration=True, encoding='utf-8') else: # 2.6 doesn't have the xml_declaration parameter. Sigh. self.tree.write(xml_file, encoding='utf-8')
[docs] def xml_reindent(self, parent, depth): """Recursively add indentation to XML to make it look nice. Args: parent (xml.etree.ElementTree.Element): Current parent element depth (int): How far down the rabbit hole we have recursed. Increments by 2 for each successive level of nesting. """ depth += 2 last = None for elem in list(parent): elem.tail = "\n" + (" " * depth) self.xml_reindent(elem, depth) last = elem if last is not None: # Parent indents to first child parent.text = "\n" + (" " * depth) # Last element indents back to parent depth -= 2 last.tail = "\n" + (" " * depth) if depth == 0: # Add newline at end of file parent.tail = "\n"
@classmethod
[docs] def find_child(cls, parent, tag, attrib=None, required=False): """Find the unique child element under the specified parent element. Args: parent (xml.etree.ElementTree.Element): Parent element tag (str): Child tag to match on attrib (dict): Child attributes to match on required (boolean): Whether to raise an error if no child exists Raises: LookupError: if more than one matching child is found KeyError: if no matching child is found and :attr:`required` is True Returns: xml.etree.ElementTree.Element: Child element found, or None """ matches = cls.find_all_children(parent, tag, attrib) if len(matches) > 1: raise LookupError( "Found multiple matching <{0}> children (each with " "attributes '{1}') under <{2}>:\n{3}" .format(XML.strip_ns(tag), attrib, XML.strip_ns(parent.tag), "\n".join([ET.tostring(e).decode() for e in matches]))) elif len(matches) == 0: if required: raise KeyError("Mandatory element <{0}> not found under <{1}>" .format(XML.strip_ns(tag), XML.strip_ns(parent.tag))) return None else: return matches[0]
@classmethod
[docs] def find_all_children(cls, parent, tag, attrib=None): """Find all matching child elements under the specified parent element. Args: parent (xml.etree.ElementTree.Element): Parent element tag (iterable): Child tag string (or list of tags) to match on attrib (dict): Child attributes to match on Returns: list: (Possibly empty) list of matching child Elements """ assert parent is not None if isinstance(tag, str): elements = parent.findall(tag) label = tag else: elements = [] for t in tag: elements.extend(parent.findall(t)) label = [XML.strip_ns(t) for t in tag] logger.debug("Examining %s %s elements under %s", len(elements), label, XML.strip_ns(parent.tag)) child_list = [] for e in elements: found = True if attrib: for key in attrib.keys(): if e.get(key, None) != attrib[key]: logger.debug("Attribute '%s' (%s) does not match " "expected value (%s)", XML.strip_ns(key), e.get(key, ""), attrib[key]) found = False break if found: child_list.append(e) logger.debug("Found %s matching %s elements", len(child_list), label) return child_list
@classmethod
[docs] def add_child(cls, parent, new_child, ordering=None, known_namespaces=None): """Add the given child element under the given parent element. Args: parent (xml.etree.ElementTree.Element): Parent element new_child (xml.etree.ElementTree.Element): Child element to attach ordering (list): (Optional) List describing the expected ordering of child tags under the parent; if a new child element is created, its placement under the parent will respect this sequence. known_namespaces (list): (Optional) List of well-understood XML namespaces. If a new child is created, and ``ordering`` is given, any tag (new or existing) that is encountered but not accounted for in ``ordering`` will result in COT logging a warning **if and only if** the unaccounted-for tag is in a known namespace. """ if ordering and new_child.tag not in ordering: if (known_namespaces and (XML.get_ns(new_child.tag) in known_namespaces)): logger.warning("New child '%s' is not in the list of " "expected children under '%s': %s", new_child.tag, XML.strip_ns(parent.tag), ordering) # Assume this is some sort of custom element, which # implicitly goes at the end of the list. ordering = None if not ordering: parent.append(new_child) else: new_index = ordering.index(new_child.tag) i = 0 found_position = False for child in list(parent): try: if ordering.index(child.tag) > new_index: found_position = True break except ValueError: if (known_namespaces and (XML.get_ns(child.tag) in known_namespaces)): logger.warning( "Existing child element '%s' is not in expected " "list of children under '%s': \n%s", child.tag, XML.strip_ns(parent.tag), ordering) # Assume this is some sort of custom element - all known # elements should implicitly come before it. found_position = True break i += 1 if found_position: parent.insert(i, new_child) else: parent.append(new_child)
@classmethod
[docs] def set_or_make_child(cls, parent, tag, text=None, attrib=None, ordering=None, known_namespaces=None): """Update or create a child element under the specified parent element. Args: parent (xml.etree.ElementTree.Element): Parent element tag (str): Child element text tag to find or create text (str): Value to set the child's text attribute to attrib (dict): Dict of child attributes to match on while searching and set in the final child element ordering (list): See :meth:`add_child` known_namespaces (list): See :meth:`add_child` Returns: xml.etree.ElementTree.Element: New or updated child Element. """ assert parent is not None if attrib is None: attrib = {} element = cls.find_child(parent, tag, attrib=attrib) if element is None: logger.debug("Creating new %s under %s", XML.strip_ns(tag), XML.strip_ns(parent.tag)) element = ET.Element(tag) XML.add_child(parent, element, ordering, known_namespaces) if text is not None: element.text = str(text) for a in attrib: element.set(a, attrib[a]) return element