Source code for COT.data_validation

#!/usr/bin/env python
#
# data_validation.py - Helper libraries to validate data sanity
#
# September 2013, Glenn F. Matthews
# Copyright (c) 2013-2016 the COT project developers.
# See the COPYRIGHT.txt file at the top-level directory of this distribution
# and at https://github.com/glennmatthews/cot/blob/master/COPYRIGHT.txt.
#
# This file is part of the Common OVF Tool (COT) project.
# It is subject to the license terms in the LICENSE.txt file found in the
# top-level directory of this distribution and at
# https://github.com/glennmatthews/cot/blob/master/LICENSE.txt. No part
# of COT, including this file, may be copied, modified, propagated, or
# distributed except according to the terms contained in the LICENSE.txt file.

"""Various helpers for data sanity checks.

**Exceptions**

.. autosummary::
  :nosignatures:

  InvalidInputError
  ValueMismatchError
  ValueUnsupportedError
  ValueTooLowError
  ValueTooHighError

**Functions**

.. autosummary::
  :nosignatures:

  alphanum_split
  canonicalize_helper
  canonicalize_ide_subtype
  canonicalize_nic_subtype
  canonicalize_scsi_subtype
  check_for_conflict
  device_address
  file_checksum
  mac_address
  match_or_die
  natural_sort
  no_whitespace
  non_negative_int
  positive_int
  to_string
  validate_int
  truth_value

**Constants**

.. autosummary::
  NIC_TYPES
"""

import xml.etree.ElementTree as ET
import hashlib
import re
from distutils.util import strtobool


[docs]def to_string(obj): """Get string representation of an object, special-case for XML Element. Args: obj (object): Object to represent as a string. Returns: str: string representation """ if ET.iselement(obj): return ET.tostring(obj) else: return str(obj)
[docs]def alphanum_split(key): """Split the key into a list of [text, int, text, int, ...]. Args: key (str): String to split. Returns: list: List of tokens """ def text_to_int(text): """Convert number strings to ints, leave other strings as text. Args: text (object): Input to convert (str or int) Returns: object: Converted value (str or int) """ return int(text) if text.isdigit() else text return [text_to_int(c) for c in re.split('([0-9]+)', key)]
[docs]def natural_sort(l): """Sort the given list "naturally" rather than in ASCII order. E.g, "10" comes after "9" rather than between "1" and "2". See also http://nedbatchelder.com/blog/200712/human_sorting.html Args: l (list): List to sort Returns: list: Sorted list """ # Sort based on alphanum_split return value return sorted(l, key=alphanum_split)
[docs]def match_or_die(first_label, first, second_label, second): """Make sure "first" and "second" are equal or raise an error. Args: first_label (str): Descriptive label for :attr:`first` first (object): First object to compare second_label (str): Descriptive label for :attr:`second` second (object): Second object to compare Raises: ValueMismatchError: if ``first != second`` """ if first != second: raise ValueMismatchError("{0} {1} does not match {2} {3}" .format(first_label, to_string(first), second_label, to_string(second)))
[docs]def canonicalize_helper(label, user_input, mappings, re_flags=0): """Try to find a mapping of input to output. Args: label (str): Label to use in any error raised user_input (str): User-provided string mappings (list): List of ``(expr, canonical)`` pairs for mapping. re_flags (int): ``re.IGNORECASE``, etc. if desired Returns: str: The canonical string Raises: ValueUnsupportedError: If no ``expr`` in ``mappings`` matches the given ``user_input``. """ if user_input is None or user_input == "": return None for (expr, canonical) in mappings: if re.match(expr, user_input, flags=re_flags): return canonical raise ValueUnsupportedError(label, user_input, [c for (_, c) in mappings])
[docs]def canonicalize_ide_subtype(subtype): """Try to convert the given IDE controller string to a canonical form. Args: subtype (str): User-provided string Returns: str: The canonical string, one of: - ``PIIX4`` - ``virtio`` Raises: ValueUnsupportedError: If the canonical string cannot be determined """ return canonicalize_helper("IDE controller subtype", subtype, [ ("piix4", 'PIIX4'), ("virtio", 'virtio'), ], re.IGNORECASE)
_NIC_MAPPINGS = [ ("e1000e", 'E1000e'), ("e1000", 'E1000'), ("pcnet32", 'PCNet32'), ("virtio", 'virtio'), ("vmxnet *3", 'VMXNET3'), ] NIC_TYPES = [m[1] for m in _NIC_MAPPINGS] """List of NIC type strings recognized as canonical."""
[docs]def canonicalize_nic_subtype(subtype): """Try to convert the given NIC subtype string to a canonical form. Args: subtype (str): User-provided string Returns: str: The canonical string, one of :data:`NIC_TYPES` Raises: ValueUnsupportedError: If the canonical string cannot be determined .. seealso:: :meth:`COT.platforms.GenericPlatform.validate_nic_type` """ return canonicalize_helper("NIC subtype", subtype, _NIC_MAPPINGS, re.IGNORECASE)
[docs]def canonicalize_scsi_subtype(subtype): """Try to convert the given SCSI controller string to a canonical form. Args: subtype (str): User-provided string Returns: str: The canonical string, one of: - ``buslogic`` - ``lsilogic`` - ``lsilogicsas`` - ``virtio`` - ``VirtualSCSI`` Raises: ValueUnsupportedError: If the canonical string cannot be determined """ return canonicalize_helper("SCSI controller subtype", subtype, [ ("bus *logic", 'buslogic'), ("lsi *logic *sas", 'lsilogicsas'), ("lsi *logic", 'lsilogic'), ("virtio", 'virtio'), ("virtual *scsi", 'VirtualSCSI'), ], re.IGNORECASE)
[docs]def check_for_conflict(label, li): """Make sure the list does not contain references to more than one object. Args: label (str): Descriptive label to be used if an error is raised li (list): List of object references (which may include ``None``) Raises: ValueMismatchError: if references differ Returns: object: the object or ``None`` """ obj = None for i, obj1 in enumerate(li): if obj1 is None: continue for obj2 in li[(i+1):]: if obj2 is not None and obj1 != obj2: raise ValueMismatchError( "Found multiple candidates for the {0}: " "\n{1}\n...and...\n{2}\nPlease correct or clarify " "your search parameters." .format(label, to_string(obj1), to_string(obj2))) obj = obj1 return obj
[docs]def file_checksum(path_or_obj, checksum_type): """Get the checksum of the given file. Args: path_or_obj (str): File path to checksum OR an opened file object checksum_type (str): Supported values are 'md5' and 'sha1'. Returns: str: Hexadecimal file checksum """ # pylint: disable=redefined-variable-type if checksum_type == 'md5': h = hashlib.md5() elif checksum_type == 'sha1': h = hashlib.sha1() else: raise NotImplementedError( "No support for generating checksum type {0}" .format(checksum_type)) # Is it a file or do we need to open it? try: path_or_obj.read(0) file_obj = path_or_obj except AttributeError: file_obj = open(path_or_obj, 'rb') blocksize = 65536 try: while True: buf = file_obj.read(blocksize) if len(buf) == 0: break h.update(buf) finally: if file_obj != path_or_obj: file_obj.close() return h.hexdigest()
[docs]def mac_address(string): """Parser helper function for MAC address arguments. Validate whether a string is a valid MAC address. Recognized formats are: * xx:xx:xx:xx:xx:xx * xx-xx-xx-xx-xx-xx * xxxx.xxxx.xxxx Args: string (str): String to validate Raises: InvalidInputError: if string is not a valid MAC address Returns: str: Validated string(with leading/trailing whitespace stripped) """ string = string.strip() if not (re.match(r"([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}$", string) or re.match(r"([0-9a-fA-F]{2}-){5}[0-9a-fA-F]{2}$", string) or re.match(r"([0-9a-fA-F]{4}\.){2}[0-9a-fA-F]{4}$", string)): raise InvalidInputError("'{0}' is not a valid MAC address" .format(string)) # TODO - reformat string to a consistent output style? return string
[docs]def device_address(string): """Parser helper function for device address arguments. Validate string is an appropriately formed device address such as '1:0'. Args: string (str): String to validate Raises: InvalidInputError: if string is not a well-formatted device address Returns: str: Validated string (with leading/trailing whitespace stripped) """ string = string.strip() if not re.match(r"\d+:\d+$", string): raise InvalidInputError("'{0}' is not a valid device address" .format(string)) return string
[docs]def no_whitespace(string): """Parser helper function for arguments not allowed to contain whitespace. Args: string (str): String to validate Raises: InvalidInputError: if string contains internal whitespace Returns: str: Validated string (with leading/trailing whitespace stripped) """ string = string.strip() if len(string.split()) > 1: raise InvalidInputError("'{0}' contains invalid whitespace" .format(string)) return string
[docs]def validate_int(string, minimum=None, maximum=None, label="input"): """Parser helper function for validating integer arguments in a range. Args: string (str): String to convert to an integer and validate minimum (int): Minimum valid value (optional) maximum (int): Maximum valid value (optional) label (str): Label to include in any errors raised Returns: int: Validated integer value Raises: ValueUnsupportedError: if :attr:`string` can't be converted to int ValueTooLowError: if value is less than :attr:`minimum` ValueTooHighError: if value is more than :attr:`maximum` """ try: i = int(string) except ValueError: raise ValueUnsupportedError(label, string, "integer") if minimum is not None and i < minimum: raise ValueTooLowError(label, i, minimum) if maximum is not None and i > maximum: raise ValueTooHighError(label, i, maximum) return i
[docs]def non_negative_int(string): """Parser helper function for integer arguments that must be 0 or more. Alias for :func:`validate_int` setting :attr:`minimum` to 0. Args: string (str): String to validate. Returns: int: Validated integer value Raises: ValueUnsupportedError: if :attr:`string` can't be converted to int ValueTooLowError: if value is less than 0 """ return validate_int(string, minimum=0)
[docs]def positive_int(string): """Parser helper function for integer arguments that must be 1 or more. Alias for :func:`validate_int` setting :attr:`minimum` to 1. Args: string (str): String to validate. Returns: int: Validated integer value Raises: ValueUnsupportedError: if :attr:`string` can't be converted to int ValueTooLowError: if value is less than 1 """ return validate_int(string, minimum=1)
[docs]def truth_value(value): """Parser helper function for truth values like '0', 'y', or 'false'. Wrapper for :func:`distutils.util.strtobool` Args: value (str): String to parse/validate Returns: bool: True or False Raises: ValueUnsupportedError: if the value can't be parsed to a boolean. """ if isinstance(value, bool): return value try: return strtobool(value) except ValueError: raise ValueUnsupportedError( "truth value", value, ['y', 'yes', 't', 'true', 'on', 1, 'n', 'no', 'f', 'false', 'off', 0] )
# Some handy exception and error types we can throw
[docs]class ValueMismatchError(ValueError): """Values which were expected to be equal turned out to be not equal.""" pass
[docs]class InvalidInputError(ValueError): """Miscellaneous error during validation of user input.""" pass
[docs]class ValueUnsupportedError(InvalidInputError): """An unsupported value was provided. Args: value_type (str): descriptive string actual_value (str): invalid value that was provided expected_value (object): expected/valid value(s) (item or list) """
[docs] def __init__(self, value_type, actual_value, expected_value): """Create an instance of this class.""" self.value_type = value_type self.actual_value = actual_value self.expected_value = expected_value super(ValueUnsupportedError, self).__init__(str(self))
def __str__(self): """Human-readable string representation.""" return ("Unsupported value '{0}' for {1} - expected {2}" .format(self.actual_value, self.value_type, self.expected_value))
[docs]class ValueTooLowError(ValueUnsupportedError): """A numerical input was less than the lowest supported value. Args: value_type (str): descriptive string actual_value (int): invalid value that was provided expected_value (int): minimum supported value """ def __str__(self): """Human-readable string representation.""" return ("Value '{0}' for {1} is too low - must be at least {2}" .format(self.actual_value, self.value_type, self.expected_value))
[docs]class ValueTooHighError(ValueUnsupportedError): """A numerical input was higher than the highest supported value. Args: value_type (str): descriptive string actual_value (int): invalid value that was provided expected_value (int): maximum supported value """ def __str__(self): """Human-readable string representation.""" return ("Value '{0}' for {1} is too high - must be at most {2}" .format(self.actual_value, self.value_type, self.expected_value))