From 5bb0ae0ceb2acd9a37e8ea9c500e49500afa1d47 Mon Sep 17 00:00:00 2001 From: Andris Raugulis Date: Wed, 2 Nov 2016 18:23:55 +0200 Subject: [PATCH] Rework is/to ASCII and implement printable ASCII is/to functions. Add Utils tests. --- ssh-audit.py | 64 ++++++++++--- test/test_utils.py | 218 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+), 13 deletions(-) create mode 100644 test/test_utils.py diff --git a/ssh-audit.py b/ssh-audit.py index 0dc8bb1..9100a31 100755 --- a/ssh-audit.py +++ b/ssh-audit.py @@ -1914,28 +1914,58 @@ class Utils(object): if isinstance(v, str): return v elif isinstance(v, text_type): - return v.encode(enc) + return v.encode(enc) # PY2 only elif isinstance(v, binary_type): - return v.decode(enc) + return v.decode(enc) # PY3 only raise cls._type_err(v, 'native text') + @classmethod + def _is_ascii(cls, v, char_filter=lambda x: x <= 127): + # type: (Union[text_type, str], Callable[[int], bool]) -> bool + r = False + if isinstance(v, (text_type, str)): + for c in v: + i = cls.ctoi(c) + if not char_filter(i): + return r + r = True + return r + + @classmethod + def _to_ascii(cls, v, char_filter=lambda x: x <= 127, errors='replace'): + # type: (Union[text_type, str], Callable[[int], bool], str) -> str + if isinstance(v, (text_type, str)): + r = bytearray() + for c in v: + i = cls.ctoi(c) + if char_filter(i): + r.append(i) + else: + if errors == 'ignore': + continue + r.append(63) + return cls.to_ntext(r.decode('ascii')) + raise cls._type_err(v, 'ascii') + @classmethod def is_ascii(cls, v): # type: (Union[text_type, str]) -> bool - try: - if isinstance(v, (text_type, str)): - v.encode('ascii') - return True - except UnicodeEncodeError: - pass - return False + return cls._is_ascii(v) @classmethod def to_ascii(cls, v, errors='replace'): # type: (Union[text_type, str], str) -> str - if isinstance(v, (text_type, str)): - return cls.to_ntext(v.encode('ascii', errors)) - raise cls._type_err(v, 'ascii') + return cls._to_ascii(v, errors=errors) + + @classmethod + def is_print_ascii(cls, v): + # type: (Union[text_type, str]) -> bool + return cls._is_ascii(v, lambda x: x >= 32 and x <= 126) + + @classmethod + def to_print_ascii(cls, v, errors='replace'): + # type: (Union[text_type, str], str) -> str + return cls._to_ascii(v, lambda x: x >= 32 and x <= 126, errors) @classmethod def unique_seq(cls, seq): @@ -1951,7 +1981,15 @@ class Utils(object): return tuple(x for x in seq if x not in seen and not _seen_add(x)) else: return [x for x in seq if x not in seen and not _seen_add(x)] - + + @classmethod + def ctoi(cls, c): + # type: (Union[text_type, str, int]) -> int + if isinstance(c, (text_type, str)): + return ord(c[0]) + else: + return c + @staticmethod def parse_int(v): # type: (Any) -> int diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 0000000..64cb07b --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +import pytest + + +# pylint: disable=attribute-defined-outside-init +class TestUtils(object): + @pytest.fixture(autouse=True) + def init(self, ssh_audit): + self.utils = ssh_audit.Utils + self.PY3 = sys.version_info >= (3,) + + def test_to_bytes_py2(self): + if self.PY3: + return + # binary_type (native str, bytes as str) + assert self.utils.to_bytes('fran\xc3\xa7ais') == 'fran\xc3\xa7ais' + assert self.utils.to_bytes(b'fran\xc3\xa7ais') == 'fran\xc3\xa7ais' + # text_type (unicode) + assert self.utils.to_bytes(u'fran\xe7ais') == 'fran\xc3\xa7ais' + # other + with pytest.raises(TypeError): + self.utils.to_bytes(123) + + def test_to_bytes_py3(self): + if not self.PY3: + return + # binary_type (bytes) + assert self.utils.to_bytes(b'fran\xc3\xa7ais') == b'fran\xc3\xa7ais' + # text_type (native str as unicode, unicode) + assert self.utils.to_bytes('fran\xe7ais') == b'fran\xc3\xa7ais' + assert self.utils.to_bytes(u'fran\xe7ais') == b'fran\xc3\xa7ais' + # other + with pytest.raises(TypeError): + self.utils.to_bytes(123) + + def test_to_utext_py2(self): + if self.PY3: + return + # binary_type (native str, bytes as str) + assert self.utils.to_utext('fran\xc3\xa7ais') == u'fran\xe7ais' + assert self.utils.to_utext(b'fran\xc3\xa7ais') == u'fran\xe7ais' + # text_type (unicode) + assert self.utils.to_utext(u'fran\xe7ais') == u'fran\xe7ais' + # other + with pytest.raises(TypeError): + self.utils.to_utext(123) + + def test_to_utext_py3(self): + if not self.PY3: + return + # binary_type (bytes) + assert self.utils.to_utext(b'fran\xc3\xa7ais') == u'fran\xe7ais' + # text_type (native str as unicode, unicode) + assert self.utils.to_utext('fran\xe7ais') == 'fran\xe7ais' + assert self.utils.to_utext(u'fran\xe7ais') == u'fran\xe7ais' + # other + with pytest.raises(TypeError): + self.utils.to_utext(123) + + def test_to_ntext_py2(self): + if self.PY3: + return + # str (native str, bytes as str) + assert self.utils.to_ntext('fran\xc3\xa7ais') == 'fran\xc3\xa7ais' + assert self.utils.to_ntext(b'fran\xc3\xa7ais') == 'fran\xc3\xa7ais' + # text_type (unicode) + assert self.utils.to_ntext(u'fran\xe7ais') == 'fran\xc3\xa7ais' + # other + with pytest.raises(TypeError): + self.utils.to_ntext(123) + + def test_to_ntext_py3(self): + if not self.PY3: + return + # str (native str) + assert self.utils.to_ntext('fran\xc3\xa7ais') == 'fran\xc3\xa7ais' + assert self.utils.to_ntext(u'fran\xe7ais') == 'fran\xe7ais' + # binary_type (bytes) + assert self.utils.to_utext(b'fran\xc3\xa7ais') == 'fran\xe7ais' + # other + with pytest.raises(TypeError): + self.utils.to_ntext(123) + + def test_is_ascii_py2(self): + if self.PY3: + return + # text_type (unicode) + assert self.utils.is_ascii(u'francais') is True + assert self.utils.is_ascii(u'fran\xe7ais') is False + # str + assert self.utils.is_ascii('francais') is True + assert self.utils.is_ascii('fran\xc3\xa7ais') is False + # other + assert self.utils.is_ascii(123) is False + + def test_is_ascii_py3(self): + if not self.PY3: + return + # text_type (str) + assert self.utils.is_ascii('francais') is True + assert self.utils.is_ascii(u'francais') is True + assert self.utils.is_ascii('fran\xe7ais') is False + assert self.utils.is_ascii(u'fran\xe7ais') is False + # other + assert self.utils.is_ascii(123) is False + + def test_to_ascii_py2(self): + if self.PY3: + return + # text_type (unicode) + assert self.utils.to_ascii(u'francais') == 'francais' + assert self.utils.to_ascii(u'fran\xe7ais') == 'fran?ais' + assert self.utils.to_ascii(u'fran\xe7ais', 'ignore') == 'franais' + # str + assert self.utils.to_ascii('francais') == 'francais' + assert self.utils.to_ascii('fran\xc3\xa7ais') == 'fran??ais' + assert self.utils.to_ascii('fran\xc3\xa7ais', 'ignore') == 'franais' + with pytest.raises(TypeError): + self.utils.to_ascii(123) + + def test_to_ascii_py3(self): + if not self.PY3: + return + # text_type (str) + assert self.utils.to_ascii('francais') == 'francais' + assert self.utils.to_ascii(u'francais') == 'francais' + assert self.utils.to_ascii('fran\xe7ais') == 'fran?ais' + assert self.utils.to_ascii('fran\xe7ais', 'ignore') == 'franais' + assert self.utils.to_ascii(u'fran\xe7ais') == 'fran?ais' + assert self.utils.to_ascii(u'fran\xe7ais', 'ignore') == 'franais' + with pytest.raises(TypeError): + self.utils.to_ascii(123) + + def test_is_print_ascii_py2(self): + if self.PY3: + return + # text_type (unicode) + assert self.utils.is_print_ascii(u'francais') is True + assert self.utils.is_print_ascii(u'francais\n') is False + assert self.utils.is_print_ascii(u'fran\xe7ais') is False + assert self.utils.is_print_ascii(u'fran\xe7ais\n') is False + # str + assert self.utils.is_print_ascii('francais') is True + assert self.utils.is_print_ascii('francais\n') is False + assert self.utils.is_print_ascii('fran\xc3\xa7ais') is False + # other + assert self.utils.is_print_ascii(123) is False + + def test_is_print_ascii_py3(self): + if not self.PY3: + return + # text_type (str) + assert self.utils.is_print_ascii('francais') is True + assert self.utils.is_print_ascii('francais\n') is False + assert self.utils.is_print_ascii(u'francais') is True + assert self.utils.is_print_ascii(u'francais\n') is False + assert self.utils.is_print_ascii('fran\xe7ais') is False + assert self.utils.is_print_ascii(u'fran\xe7ais') is False + # other + assert self.utils.is_print_ascii(123) is False + + def test_to_print_ascii_py2(self): + if self.PY3: + return + # text_type (unicode) + assert self.utils.to_print_ascii(u'francais') == 'francais' + assert self.utils.to_print_ascii(u'francais\n') == 'francais?' + assert self.utils.to_print_ascii(u'fran\xe7ais') == 'fran?ais' + assert self.utils.to_print_ascii(u'fran\xe7ais\n') == 'fran?ais?' + assert self.utils.to_print_ascii(u'fran\xe7ais', 'ignore') == 'franais' + assert self.utils.to_print_ascii(u'fran\xe7ais\n', 'ignore') == 'franais' + # str + assert self.utils.to_print_ascii('francais') == 'francais' + assert self.utils.to_print_ascii('francais\n') == 'francais?' + assert self.utils.to_print_ascii('fran\xc3\xa7ais') == 'fran??ais' + assert self.utils.to_print_ascii('fran\xc3\xa7ais\n') == 'fran??ais?' + assert self.utils.to_print_ascii('fran\xc3\xa7ais', 'ignore') == 'franais' + assert self.utils.to_print_ascii('fran\xc3\xa7ais\n', 'ignore') == 'franais' + with pytest.raises(TypeError): + self.utils.to_print_ascii(123) + + def test_to_print_ascii_py3(self): + if not self.PY3: + return + # text_type (str) + assert self.utils.to_print_ascii('francais') == 'francais' + assert self.utils.to_print_ascii('francais\n') == 'francais?' + assert self.utils.to_print_ascii(u'francais') == 'francais' + assert self.utils.to_print_ascii(u'francais\n') == 'francais?' + assert self.utils.to_print_ascii('fran\xe7ais') == 'fran?ais' + assert self.utils.to_print_ascii('fran\xe7ais\n') == 'fran?ais?' + assert self.utils.to_print_ascii('fran\xe7ais', 'ignore') == 'franais' + assert self.utils.to_print_ascii('fran\xe7ais\n', 'ignore') == 'franais' + assert self.utils.to_print_ascii(u'fran\xe7ais') == 'fran?ais' + assert self.utils.to_print_ascii(u'fran\xe7ais\n') == 'fran?ais?' + assert self.utils.to_print_ascii(u'fran\xe7ais', 'ignore') == 'franais' + assert self.utils.to_print_ascii(u'fran\xe7ais\n', 'ignore') == 'franais' + with pytest.raises(TypeError): + self.utils.to_print_ascii(123) + + def test_ctoi(self): + assert self.utils.ctoi(123) == 123 + assert self.utils.ctoi('ABC') == 65 + + def test_parse_int(self): + assert self.utils.parse_int(123) == 123 + assert self.utils.parse_int('123') == 123 + assert self.utils.parse_int(-123) == -123 + assert self.utils.parse_int('-123') == -123 + assert self.utils.parse_int('abc') == 0 + + def test_unique_seq(self): + assert self.utils.unique_seq((1, 2, 2, 3, 3, 3)) == (1, 2, 3) + assert self.utils.unique_seq((3, 3, 3, 2, 2, 1)) == (3, 2, 1) + assert self.utils.unique_seq([1, 2, 2, 3, 3, 3]) == [1, 2, 3] + assert self.utils.unique_seq([3, 3, 3, 2, 2, 1]) == [3, 2, 1]