Source code for keystone.openstack.common.strutils

# Copyright 2011 OpenStack Foundation.
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

"""
System-level utilities and helper functions.
"""

import math
import re
import sys
import unicodedata

import six

from keystone.openstack.common.gettextutils import _


UNIT_PREFIX_EXPONENT = {
    'k': 1,
    'K': 1,
    'Ki': 1,
    'M': 2,
    'Mi': 2,
    'G': 3,
    'Gi': 3,
    'T': 4,
    'Ti': 4,
}
UNIT_SYSTEM_INFO = {
    'IEC': (1024, re.compile(r'(^[-+]?\d*\.?\d+)([KMGT]i?)?(b|bit|B)$')),
    'SI': (1000, re.compile(r'(^[-+]?\d*\.?\d+)([kMGT])?(b|bit|B)$')),
}

TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes')
FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no')

SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]")
SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+")


# NOTE(flaper87): The following 3 globals are used by `mask_password`
_SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password']

# NOTE(ldbragst): Let's build a list of regex objects using the list of
# _SANITIZE_KEYS we already have. This way, we only have to add the new key
# to the list of _SANITIZE_KEYS and we can generate regular expressions
# for XML and JSON automatically.
_SANITIZE_PATTERNS = []
_FORMAT_PATTERNS = [r'(%(key)s\s*[=]\s*[\"\']).*?([\"\'])',
                    r'(<%(key)s>).*?(</%(key)s>)',
                    r'([\"\']%(key)s[\"\']\s*:\s*[\"\']).*?([\"\'])',
                    r'([\'"].*?%(key)s[\'"]\s*:\s*u?[\'"]).*?([\'"])',
                    r'([\'"].*?%(key)s[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?[\'"])'
                    '.*?([\'"])',
                    r'(%(key)s\s*--?[A-z]+\s*)\S+(\s*)']

for key in _SANITIZE_KEYS:
    for pattern in _FORMAT_PATTERNS:
        reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
        _SANITIZE_PATTERNS.append(reg_ex)


[docs]def int_from_bool_as_string(subject): """Interpret a string as a boolean and return either 1 or 0. Any string value in: ('True', 'true', 'On', 'on', '1') is interpreted as a boolean True. Useful for JSON-decoded stuff and config file parsing """ return bool_from_string(subject) and 1 or 0
[docs]def bool_from_string(subject, strict=False, default=False): """Interpret a string as a boolean. A case-insensitive match is performed such that strings matching 't', 'true', 'on', 'y', 'yes', or '1' are considered True and, when `strict=False`, anything else returns the value specified by 'default'. Useful for JSON-decoded stuff and config file parsing. If `strict=True`, unrecognized values, including None, will raise a ValueError which is useful when parsing values passed in from an API call. Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'. """ if not isinstance(subject, six.string_types): subject = six.text_type(subject) lowered = subject.strip().lower() if lowered in TRUE_STRINGS: return True elif lowered in FALSE_STRINGS: return False elif strict: acceptable = ', '.join( "'%s'" % s for s in sorted(TRUE_STRINGS + FALSE_STRINGS)) msg = _("Unrecognized value '%(val)s', acceptable values are:" " %(acceptable)s") % {'val': subject, 'acceptable': acceptable} raise ValueError(msg) else: return default
[docs]def safe_decode(text, incoming=None, errors='strict'): """Decodes incoming text/bytes string using `incoming` if they're not already unicode. :param incoming: Text's current encoding :param errors: Errors handling policy. See here for valid values http://docs.python.org/2/library/codecs.html :returns: text or a unicode `incoming` encoded representation of it. :raises TypeError: If text is not an instance of str """ if not isinstance(text, (six.string_types, six.binary_type)): raise TypeError("%s can't be decoded" % type(text)) if isinstance(text, six.text_type): return text if not incoming: incoming = (sys.stdin.encoding or sys.getdefaultencoding()) try: return text.decode(incoming, errors) except UnicodeDecodeError: # Note(flaper87) If we get here, it means that # sys.stdin.encoding / sys.getdefaultencoding # didn't return a suitable encoding to decode # text. This happens mostly when global LANG # var is not set correctly and there's no # default encoding. In this case, most likely # python will use ASCII or ANSI encoders as # default encodings but they won't be capable # of decoding non-ASCII characters. # # Also, UTF-8 is being used since it's an ASCII # extension. return text.decode('utf-8', errors)
[docs]def safe_encode(text, incoming=None, encoding='utf-8', errors='strict'): """Encodes incoming text/bytes string using `encoding`. If incoming is not specified, text is expected to be encoded with current python's default encoding. (`sys.getdefaultencoding`) :param incoming: Text's current encoding :param encoding: Expected encoding for text (Default UTF-8) :param errors: Errors handling policy. See here for valid values http://docs.python.org/2/library/codecs.html :returns: text or a bytestring `encoding` encoded representation of it. :raises TypeError: If text is not an instance of str """ if not isinstance(text, (six.string_types, six.binary_type)): raise TypeError("%s can't be encoded" % type(text)) if not incoming: incoming = (sys.stdin.encoding or sys.getdefaultencoding()) if isinstance(text, six.text_type): return text.encode(encoding, errors) elif text and encoding != incoming: # Decode text before encoding it with `encoding` text = safe_decode(text, incoming, errors) return text.encode(encoding, errors) else: return text
[docs]def string_to_bytes(text, unit_system='IEC', return_int=False): """Converts a string into an float representation of bytes. The units supported for IEC :: Kb(it), Kib(it), Mb(it), Mib(it), Gb(it), Gib(it), Tb(it), Tib(it) KB, KiB, MB, MiB, GB, GiB, TB, TiB The units supported for SI :: kb(it), Mb(it), Gb(it), Tb(it) kB, MB, GB, TB Note that the SI unit system does not support capital letter 'K' :param text: String input for bytes size conversion. :param unit_system: Unit system for byte size conversion. :param return_int: If True, returns integer representation of text in bytes. (default: decimal) :returns: Numerical representation of text in bytes. :raises ValueError: If text has an invalid value. """ try: base, reg_ex = UNIT_SYSTEM_INFO[unit_system] except KeyError: msg = _('Invalid unit system: "%s"') % unit_system raise ValueError(msg) match = reg_ex.match(text) if match: magnitude = float(match.group(1)) unit_prefix = match.group(2) if match.group(3) in ['b', 'bit']: magnitude /= 8 else: msg = _('Invalid string format: %s') % text raise ValueError(msg) if not unit_prefix: res = magnitude else: res = magnitude * pow(base, UNIT_PREFIX_EXPONENT[unit_prefix]) if return_int: return int(math.ceil(res)) return res
[docs]def to_slug(value, incoming=None, errors="strict"): """Normalize string. Convert to lowercase, remove non-word characters, and convert spaces to hyphens. Inspired by Django's `slugify` filter. :param value: Text to slugify :param incoming: Text's current encoding :param errors: Errors handling policy. See here for valid values http://docs.python.org/2/library/codecs.html :returns: slugified unicode representation of `value` :raises TypeError: If text is not an instance of str """ value = safe_decode(value, incoming, errors) # NOTE(aababilov): no need to use safe_(encode|decode) here: # encodings are always "ascii", error handling is always "ignore" # and types are always known (first: unicode; second: str) value = unicodedata.normalize("NFKD", value).encode( "ascii", "ignore").decode("ascii") value = SLUGIFY_STRIP_RE.sub("", value).strip().lower() return SLUGIFY_HYPHENATE_RE.sub("-", value)
[docs]def mask_password(message, secret="***"): """Replace password with 'secret' in message. :param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. For example: >>> mask_password("'adminPass' : 'aaaaa'") "'adminPass' : '***'" >>> mask_password("'admin_pass' : 'aaaaa'") "'admin_pass' : '***'" >>> mask_password('"password" : "aaaaa"') '"password" : "***"' >>> mask_password("'original_password' : 'aaaaa'") "'original_password' : '***'" >>> mask_password("u'original_password' : u'aaaaa'") "u'original_password' : u'***'" """ message = six.text_type(message) # NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message secret = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS: message = re.sub(pattern, secret, message) return message