#!/usr/bin/env python
#
# data_validation.py - Helper libraries to validate data sanity
#
# September 2013, Glenn F. Matthews
# Copyright (c) 2013-2016 the COT project developers.
# See the COPYRIGHT.txt file at the top-level directory of this distribution
# and at https://github.com/glennmatthews/cot/blob/master/COPYRIGHT.txt.
#
# This file is part of the Common OVF Tool (COT) project.
# It is subject to the license terms in the LICENSE.txt file found in the
# top-level directory of this distribution and at
# https://github.com/glennmatthews/cot/blob/master/LICENSE.txt. No part
# of COT, including this file, may be copied, modified, propagated, or
# distributed except according to the terms contained in the LICENSE.txt file.
"""Various helpers for data sanity checks.
**Exceptions**
.. autosummary::
:nosignatures:
InvalidInputError
ValueMismatchError
ValueUnsupportedError
ValueTooLowError
ValueTooHighError
**Functions**
.. autosummary::
:nosignatures:
alphanum_split
canonicalize_helper
canonicalize_ide_subtype
canonicalize_nic_subtype
canonicalize_scsi_subtype
check_for_conflict
device_address
file_checksum
mac_address
match_or_die
natural_sort
no_whitespace
non_negative_int
positive_int
to_string
validate_int
truth_value
**Constants**
.. autosummary::
NIC_TYPES
"""
import xml.etree.ElementTree as ET
import hashlib
import re
from distutils.util import strtobool
[docs]def to_string(obj):
"""Get string representation of an object, special-case for XML Element.
Args:
obj (object): Object to represent as a string.
Returns:
str: string representation
"""
if ET.iselement(obj):
return ET.tostring(obj)
else:
return str(obj)
[docs]def alphanum_split(key):
"""Split the key into a list of [text, int, text, int, ...].
Args:
key (str): String to split.
Returns:
list: List of tokens
"""
def text_to_int(text):
"""Convert number strings to ints, leave other strings as text.
Args:
text (object): Input to convert (str or int)
Returns:
object: Converted value (str or int)
"""
return int(text) if text.isdigit() else text
return [text_to_int(c) for c in re.split('([0-9]+)', key)]
[docs]def natural_sort(l):
"""Sort the given list "naturally" rather than in ASCII order.
E.g, "10" comes after "9" rather than between "1" and "2".
See also http://nedbatchelder.com/blog/200712/human_sorting.html
Args:
l (list): List to sort
Returns:
list: Sorted list
"""
# Sort based on alphanum_split return value
return sorted(l, key=alphanum_split)
[docs]def match_or_die(first_label, first, second_label, second):
"""Make sure "first" and "second" are equal or raise an error.
Args:
first_label (str): Descriptive label for :attr:`first`
first (object): First object to compare
second_label (str): Descriptive label for :attr:`second`
second (object): Second object to compare
Raises:
ValueMismatchError: if ``first != second``
"""
if first != second:
raise ValueMismatchError("{0} {1} does not match {2} {3}"
.format(first_label,
to_string(first),
second_label,
to_string(second)))
[docs]def canonicalize_helper(label, user_input, mappings, re_flags=0):
"""Try to find a mapping of input to output.
Args:
label (str): Label to use in any error raised
user_input (str): User-provided string
mappings (list): List of ``(expr, canonical)`` pairs for mapping.
re_flags (int): ``re.IGNORECASE``, etc. if desired
Returns:
str: The canonical string
Raises:
ValueUnsupportedError: If no ``expr`` in ``mappings`` matches the given
``user_input``.
"""
if user_input is None or user_input == "":
return None
for (expr, canonical) in mappings:
if re.match(expr, user_input, flags=re_flags):
return canonical
raise ValueUnsupportedError(label, user_input, [c for (_, c) in mappings])
[docs]def canonicalize_ide_subtype(subtype):
"""Try to convert the given IDE controller string to a canonical form.
Args:
subtype (str): User-provided string
Returns:
str: The canonical string, one of:
- ``PIIX4``
- ``virtio``
Raises:
ValueUnsupportedError: If the canonical string cannot be determined
"""
return canonicalize_helper("IDE controller subtype", subtype,
[
("piix4", 'PIIX4'),
("virtio", 'virtio'),
],
re.IGNORECASE)
_NIC_MAPPINGS = [
("e1000e", 'E1000e'),
("e1000", 'E1000'),
("pcnet32", 'PCNet32'),
("virtio", 'virtio'),
("vmxnet *3", 'VMXNET3'),
]
NIC_TYPES = [m[1] for m in _NIC_MAPPINGS]
"""List of NIC type strings recognized as canonical."""
[docs]def canonicalize_nic_subtype(subtype):
"""Try to convert the given NIC subtype string to a canonical form.
Args:
subtype (str): User-provided string
Returns:
str: The canonical string, one of :data:`NIC_TYPES`
Raises:
ValueUnsupportedError: If the canonical string cannot be determined
.. seealso::
:meth:`COT.platforms.GenericPlatform.validate_nic_type`
"""
return canonicalize_helper("NIC subtype", subtype,
_NIC_MAPPINGS, re.IGNORECASE)
[docs]def canonicalize_scsi_subtype(subtype):
"""Try to convert the given SCSI controller string to a canonical form.
Args:
subtype (str): User-provided string
Returns:
str: The canonical string, one of:
- ``buslogic``
- ``lsilogic``
- ``lsilogicsas``
- ``virtio``
- ``VirtualSCSI``
Raises:
ValueUnsupportedError: If the canonical string cannot be determined
"""
return canonicalize_helper("SCSI controller subtype", subtype,
[
("bus *logic", 'buslogic'),
("lsi *logic *sas", 'lsilogicsas'),
("lsi *logic", 'lsilogic'),
("virtio", 'virtio'),
("virtual *scsi", 'VirtualSCSI'),
],
re.IGNORECASE)
[docs]def check_for_conflict(label, li):
"""Make sure the list does not contain references to more than one object.
Args:
label (str): Descriptive label to be used if an error is raised
li (list): List of object references (which may include ``None``)
Raises:
ValueMismatchError: if references differ
Returns:
object: the object or ``None``
"""
obj = None
for i, obj1 in enumerate(li):
if obj1 is None:
continue
for obj2 in li[(i+1):]:
if obj2 is not None and obj1 != obj2:
raise ValueMismatchError(
"Found multiple candidates for the {0}: "
"\n{1}\n...and...\n{2}\nPlease correct or clarify "
"your search parameters."
.format(label, to_string(obj1), to_string(obj2)))
obj = obj1
return obj
[docs]def file_checksum(path_or_obj, checksum_type):
"""Get the checksum of the given file.
Args:
path_or_obj (str): File path to checksum OR an opened file object
checksum_type (str): Supported values are 'md5' and 'sha1'.
Returns:
str: Hexadecimal file checksum
"""
# pylint: disable=redefined-variable-type
if checksum_type == 'md5':
h = hashlib.md5()
elif checksum_type == 'sha1':
h = hashlib.sha1()
else:
raise NotImplementedError(
"No support for generating checksum type {0}"
.format(checksum_type))
# Is it a file or do we need to open it?
try:
path_or_obj.read(0)
file_obj = path_or_obj
except AttributeError:
file_obj = open(path_or_obj, 'rb')
blocksize = 65536
try:
while True:
buf = file_obj.read(blocksize)
if len(buf) == 0:
break
h.update(buf)
finally:
if file_obj != path_or_obj:
file_obj.close()
return h.hexdigest()
[docs]def mac_address(string):
"""Parser helper function for MAC address arguments.
Validate whether a string is a valid MAC address.
Recognized formats are:
* xx:xx:xx:xx:xx:xx
* xx-xx-xx-xx-xx-xx
* xxxx.xxxx.xxxx
Args:
string (str): String to validate
Raises:
InvalidInputError: if string is not a valid MAC address
Returns:
str: Validated string(with leading/trailing whitespace stripped)
"""
string = string.strip()
if not (re.match(r"([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}$", string) or
re.match(r"([0-9a-fA-F]{2}-){5}[0-9a-fA-F]{2}$", string) or
re.match(r"([0-9a-fA-F]{4}\.){2}[0-9a-fA-F]{4}$", string)):
raise InvalidInputError("'{0}' is not a valid MAC address"
.format(string))
# TODO - reformat string to a consistent output style?
return string
[docs]def device_address(string):
"""Parser helper function for device address arguments.
Validate string is an appropriately formed device address such as '1:0'.
Args:
string (str): String to validate
Raises:
InvalidInputError: if string is not a well-formatted device address
Returns:
str: Validated string (with leading/trailing whitespace stripped)
"""
string = string.strip()
if not re.match(r"\d+:\d+$", string):
raise InvalidInputError("'{0}' is not a valid device address"
.format(string))
return string
[docs]def no_whitespace(string):
"""Parser helper function for arguments not allowed to contain whitespace.
Args:
string (str): String to validate
Raises:
InvalidInputError: if string contains internal whitespace
Returns:
str: Validated string (with leading/trailing whitespace stripped)
"""
string = string.strip()
if len(string.split()) > 1:
raise InvalidInputError("'{0}' contains invalid whitespace"
.format(string))
return string
[docs]def validate_int(string,
minimum=None, maximum=None,
label="input"):
"""Parser helper function for validating integer arguments in a range.
Args:
string (str): String to convert to an integer and validate
minimum (int): Minimum valid value (optional)
maximum (int): Maximum valid value (optional)
label (str): Label to include in any errors raised
Returns:
int: Validated integer value
Raises:
ValueUnsupportedError: if :attr:`string` can't be converted to int
ValueTooLowError: if value is less than :attr:`minimum`
ValueTooHighError: if value is more than :attr:`maximum`
"""
try:
i = int(string)
except ValueError:
raise ValueUnsupportedError(label, string, "integer")
if minimum is not None and i < minimum:
raise ValueTooLowError(label, i, minimum)
if maximum is not None and i > maximum:
raise ValueTooHighError(label, i, maximum)
return i
[docs]def non_negative_int(string):
"""Parser helper function for integer arguments that must be 0 or more.
Alias for :func:`validate_int` setting :attr:`minimum` to 0.
Args:
string (str): String to validate.
Returns:
int: Validated integer value
Raises:
ValueUnsupportedError: if :attr:`string` can't be converted to int
ValueTooLowError: if value is less than 0
"""
return validate_int(string, minimum=0)
[docs]def positive_int(string):
"""Parser helper function for integer arguments that must be 1 or more.
Alias for :func:`validate_int` setting :attr:`minimum` to 1.
Args:
string (str): String to validate.
Returns:
int: Validated integer value
Raises:
ValueUnsupportedError: if :attr:`string` can't be converted to int
ValueTooLowError: if value is less than 1
"""
return validate_int(string, minimum=1)
[docs]def truth_value(value):
"""Parser helper function for truth values like '0', 'y', or 'false'.
Wrapper for :func:`distutils.util.strtobool`
Args:
value (str): String to parse/validate
Returns:
bool: True or False
Raises:
ValueUnsupportedError: if the value can't be parsed to a boolean.
"""
if isinstance(value, bool):
return value
try:
return strtobool(value)
except ValueError:
raise ValueUnsupportedError(
"truth value",
value,
['y', 'yes', 't', 'true', 'on', 1,
'n', 'no', 'f', 'false', 'off', 0]
)
# Some handy exception and error types we can throw
[docs]class ValueMismatchError(ValueError):
"""Values which were expected to be equal turned out to be not equal."""
pass
[docs]class ValueUnsupportedError(InvalidInputError):
"""An unsupported value was provided.
Args:
value_type (str): descriptive string
actual_value (str): invalid value that was provided
expected_value (object): expected/valid value(s) (item or list)
"""
[docs] def __init__(self, value_type, actual_value, expected_value):
"""Create an instance of this class."""
self.value_type = value_type
self.actual_value = actual_value
self.expected_value = expected_value
super(ValueUnsupportedError, self).__init__(str(self))
def __str__(self):
"""Human-readable string representation."""
return ("Unsupported value '{0}' for {1} - expected {2}"
.format(self.actual_value, self.value_type,
self.expected_value))
[docs]class ValueTooHighError(ValueUnsupportedError):
"""A numerical input was higher than the highest supported value.
Args:
value_type (str): descriptive string
actual_value (int): invalid value that was provided
expected_value (int): maximum supported value
"""
def __str__(self):
"""Human-readable string representation."""
return ("Value '{0}' for {1} is too high - must be at most {2}"
.format(self.actual_value, self.value_type,
self.expected_value))