OpenHands: chardet

Pytest Summary for test `.`

status	count
failed	367
xfailed	7
passed	9
total	383
collected	383

Failed pytests:

plane1-utf-32be.html-utf-32be]

plane1-utf-32be.html-utf-32be]

file_name = 'tests/UTF-32BE/plane1-utf-32be.html', encoding = 'utf-32be'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

nobom-utf32be.txt-utf-32be]

nobom-utf32be.txt-utf-32be]

file_name = 'tests/UTF-32BE/nobom-utf32be.txt', encoding = 'utf-32be'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
            result = chardet.detect(input_bytes)
            try:
                expected_unicode = input_bytes.decode(encoding)
            except LookupError:
                expected_unicode = ""
            try:
                detected_unicode = input_bytes.decode(result["encoding"])
            except (LookupError, UnicodeDecodeError, TypeError):
                detected_unicode = ""
        if result:
            encoding_match = (result["encoding"] or "").lower() == encoding
        else:
            encoding_match = False
        # Only care about mismatches that would actually result in different
        # behavior when decoding
        if not encoding_match and expected_unicode != detected_unicode:
            wrapped_expected = "\n".join(textwrap.wrap(expected_unicode, 100)) + "\n"
            wrapped_detected = "\n".join(textwrap.wrap(detected_unicode, 100)) + "\n"
            diff = "".join(
                list(
                    ndiff(
                        wrapped_expected.splitlines(True), wrapped_detected.splitlines(True)
                    )
                )[:20]
            )
            all_encodings = chardet.detect_all(input_bytes, ignore_threshold=True)
        else:
            diff = ""
            encoding_match = True
            all_encodings = [result]
>       assert encoding_match, (
            f"Expected {encoding}, but got {result} for {file_name}.  First 20 "
            f"lines of character differences: \n{diff}\n"
            f"All encodings: {pformat(all_encodings)}"
        )
E       AssertionError: Expected utf-32be, but got {'encoding': 'ascii', 'confidence': 1.0, 'language': ''} for tests/UTF-32BE/nobom-utf32be.txt.  First 20 lines of character differences: 
E         - DateTime,Bid,Ask  07/19/2015 21:00:15.469,1.083,1.08332  07/19/2015 21:00:16.949,1.08311,1.08332
E         - 07/19/2015 21:00:16.955,1.08311,1.08338  07/19/2015 21:00:17.120,1.08313,1.08338  07/19/2015
E         - 21:00:17.166,1.08313,1.0834  07/19/2015 21:00:17.205,1.08313,1.08339  07/19/2015
E         - 21:00:17.218,1.08312,1.08339  07/19/2015 21:00:17.469,1.08316,1.08339  07/19/2015
E         - 21:00:17.476,1.08316,1.08347  07/19/2015 21:00:17.505,1.08316,1.08344  07/19/2015
E         - 21:00:17.584,1.08316,1.08348  07/19/2015 21:00:18.905,1.08316,1.08351  07/19/2015
E         - 21:00:19.005,1.08336,1.08351  07/19/2015 21:00:19.011,1.08336,1.08403  07/19/2015
E         - 21:00:19.019,1.08334,1.08403  07/19/2015 21:00:19.025,1.08334,1.08406  07/19/2015
E         - 21:00:20.310,1.08307,1.08353  07/19/2015 21:00:20.317,1.08278,1.08353  07/19/2015
E         - 21:00:20.319,1.08307,1.08353
E         + DateTime,Bid,Ask 
E         + 07/19/2015 21:00:15.469,1
E         + .083,1.08332  07/19/2015 
E         + 21:00:16.949,1.08311,1.08
E         + 332  07/19/2015 21:00:16.
E         + 955,1.08311,1.08338 
E         + 07/19/2015 21:00:17.120,1
E         + .08313,1.08338 
E         + 07/19/2015 21:00:17.166,1
E         + .08313,1.0834 
E         
E         All encodings: [{'confidence': 1.0, 'encoding': 'ascii', 'language': ''}]
E       assert False

test.py:110: AssertionError

_ude_2.txt-iso-8859-9]

_ude_2.txt-iso-8859-9]

file_name = 'tests/iso-8859-9-turkish/_ude_2.txt', encoding = 'iso-8859-9'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

wikitop_tr_ISO-8859-9.txt-iso-8859-9]

wikitop_tr_ISO-8859-9.txt-iso-8859-9]

file_name = 'tests/iso-8859-9-turkish/wikitop_tr_ISO-8859-9.txt'
encoding = 'iso-8859-9'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

divxplanet.com.xml-iso-8859-9]

divxplanet.com.xml-iso-8859-9]

file_name = 'tests/iso-8859-9-turkish/divxplanet.com.xml'
encoding = 'iso-8859-9'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

subtitle.srt-iso-8859-9]

subtitle.srt-iso-8859-9]

file_name = 'tests/iso-8859-9-turkish/subtitle.srt', encoding = 'iso-8859-9'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-iso-8859-9]

_ude_1.txt-iso-8859-9]

file_name = 'tests/iso-8859-9-turkish/_ude_1.txt', encoding = 'iso-8859-9'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.9.xml-ibm866]

forum.template-toolkit.ru.9.xml-ibm866]

file_name = 'tests/IBM866/forum.template-toolkit.ru.9.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-ibm866]

_ude_1.txt-ibm866]

file_name = 'tests/IBM866/_ude_1.txt', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

money.rin.ru.xml-ibm866]

money.rin.ru.xml-ibm866]

file_name = 'tests/IBM866/money.rin.ru.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

music.peeps.ru.xml-ibm866]

music.peeps.ru.xml-ibm866]

file_name = 'tests/IBM866/music.peeps.ru.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.6.xml-ibm866]

forum.template-toolkit.ru.6.xml-ibm866]

file_name = 'tests/IBM866/forum.template-toolkit.ru.6.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.4.xml-ibm866]

forum.template-toolkit.ru.4.xml-ibm866]

file_name = 'tests/IBM866/forum.template-toolkit.ru.4.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

intertat.ru.xml-ibm866]

intertat.ru.xml-ibm866]

file_name = 'tests/IBM866/intertat.ru.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

janulalife.blogspot.com.xml-ibm866]

janulalife.blogspot.com.xml-ibm866]

file_name = 'tests/IBM866/janulalife.blogspot.com.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.1.xml-ibm866]

forum.template-toolkit.ru.1.xml-ibm866]

file_name = 'tests/IBM866/forum.template-toolkit.ru.1.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.mlmaster.com.xml-ibm866]

blog.mlmaster.com.xml-ibm866]

file_name = 'tests/IBM866/blog.mlmaster.com.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

newsru.com.xml-ibm866]

newsru.com.xml-ibm866]

file_name = 'tests/IBM866/newsru.com.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

greek.ru.xml-ibm866]

greek.ru.xml-ibm866]

file_name = 'tests/IBM866/greek.ru.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

kapranoff.ru.xml-ibm866]

kapranoff.ru.xml-ibm866]

file_name = 'tests/IBM866/kapranoff.ru.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aif.ru.health.xml-ibm866]

aif.ru.health.xml-ibm866]

file_name = 'tests/IBM866/aif.ru.health.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.8.xml-ibm866]

forum.template-toolkit.ru.8.xml-ibm866]

file_name = 'tests/IBM866/forum.template-toolkit.ru.8.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aug32.hole.ru.xml-ibm866]

aug32.hole.ru.xml-ibm866]

file_name = 'tests/IBM866/aug32.hole.ru.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aviaport.ru.xml-ibm866]

aviaport.ru.xml-ibm866]

file_name = 'tests/IBM866/aviaport.ru.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

susu.ac.ru.xml-ibm866]

susu.ac.ru.xml-ibm866]

file_name = 'tests/IBM866/susu.ac.ru.xml', encoding = 'ibm866'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

hardsoft.at.webry.info.xml-cp932]

hardsoft.at.webry.info.xml-cp932]

file_name = 'tests/CP932/hardsoft.at.webry.info.xml', encoding = 'cp932'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

y-moto.com.xml-cp932]

y-moto.com.xml-cp932]

file_name = 'tests/CP932/y-moto.com.xml', encoding = 'cp932'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

www2.chuo-u.ac.jp-suishin.xml-cp932]

www2.chuo-u.ac.jp-suishin.xml-cp932]

file_name = 'tests/CP932/www2.chuo-u.ac.jp-suishin.xml', encoding = 'cp932'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

balatonblog.typepad.com.xml-utf-8]

balatonblog.typepad.com.xml-utf-8]

file_name = 'tests/utf-8/balatonblog.typepad.com.xml', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_2.txt-utf-8]

_ude_2.txt-utf-8]

file_name = 'tests/utf-8/_ude_2.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_greek.txt-utf-8]

_ude_greek.txt-utf-8]

file_name = 'tests/utf-8/_ude_greek.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_5.txt-utf-8]

_ude_5.txt-utf-8]

file_name = 'tests/utf-8/_ude_5.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_he1.txt-utf-8]

_ude_he1.txt-utf-8]

file_name = 'tests/utf-8/_ude_he1.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug426271_text-utf-8.html-utf-8]

_mozilla_bug426271_text-utf-8.html-utf-8]

file_name = 'tests/utf-8/_mozilla_bug426271_text-utf-8.html', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_he3.txt-utf-8]

_ude_he3.txt-utf-8]

file_name = 'tests/utf-8/_ude_he3.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_UTF-8_with_no_encoding_specified.html-utf-8]

_chromium_UTF-8_with_no_encoding_specified.html-utf-8]

file_name = 'tests/utf-8/_chromium_UTF-8_with_no_encoding_specified.html'
encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug306272_text.html-utf-8]

_mozilla_bug306272_text.html-utf-8]

file_name = 'tests/utf-8/_mozilla_bug306272_text.html', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

weblabor.hu.2.xml-utf-8]

weblabor.hu.2.xml-utf-8]

file_name = 'tests/utf-8/weblabor.hu.2.xml', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-utf-8]

_ude_1.txt-utf-8]

file_name = 'tests/utf-8/_ude_1.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

boobooo.blogspot.com.xml-utf-8]

boobooo.blogspot.com.xml-utf-8]

file_name = 'tests/utf-8/boobooo.blogspot.com.xml', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

weblabor.hu.xml-utf-8]

weblabor.hu.xml-utf-8]

file_name = 'tests/utf-8/weblabor.hu.xml', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

anitabee.blogspot.com.xml-utf-8]

anitabee.blogspot.com.xml-utf-8]

file_name = 'tests/utf-8/anitabee.blogspot.com.xml', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

pihgy.hu.xml-utf-8]

pihgy.hu.xml-utf-8]

file_name = 'tests/utf-8/pihgy.hu.xml', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_3.txt-utf-8]

_ude_3.txt-utf-8]

file_name = 'tests/utf-8/_ude_3.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

linuxbox.hu.xml-utf-8]

linuxbox.hu.xml-utf-8]

file_name = 'tests/utf-8/linuxbox.hu.xml', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_russian.txt-utf-8]

_ude_russian.txt-utf-8]

file_name = 'tests/utf-8/_ude_russian.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_he2.txt-utf-8]

_ude_he2.txt-utf-8]

file_name = 'tests/utf-8/_ude_he2.txt', encoding = 'utf-8'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_iso1.txt-iso-2022-kr]

_ude_iso1.txt-iso-2022-kr]

file_name = 'tests/iso-2022-kr/_ude_iso1.txt', encoding = 'iso-2022-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:137: in feed
    self._esc_charset_prober = EscCharSetProber()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = 
lang_filter = None

    def __init__(self, lang_filter=None):
        super().__init__(lang_filter=lang_filter)
        self.coding_sm = []
>       if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
E       TypeError: unsupported operand type(s) for &: 'NoneType' and 'int'

chardet/escprober.py:16: TypeError

_ude_iso2.txt-iso-2022-kr]

_ude_iso2.txt-iso-2022-kr]

file_name = 'tests/iso-2022-kr/_ude_iso2.txt', encoding = 'iso-2022-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:137: in feed
    self._esc_charset_prober = EscCharSetProber()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = 
lang_filter = None

    def __init__(self, lang_filter=None):
        super().__init__(lang_filter=lang_filter)
        self.coding_sm = []
>       if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
E       TypeError: unsupported operand type(s) for &: 'NoneType' and 'int'

chardet/escprober.py:16: TypeError

contents-factory.com.xml-euc-jp]

contents-factory.com.xml-euc-jp]

file_name = 'tests/EUC-JP/contents-factory.com.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

artifact-jp.com.xml-euc-jp]

artifact-jp.com.xml-euc-jp]

file_name = 'tests/EUC-JP/artifact-jp.com.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

siesta.co.jp.aozora.xml-euc-jp]

siesta.co.jp.aozora.xml-euc-jp]

file_name = 'tests/EUC-JP/siesta.co.jp.aozora.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

tls.org.xml-euc-jp]

tls.org.xml-euc-jp]

file_name = 'tests/EUC-JP/tls.org.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug431054_text.html-euc-jp]

_mozilla_bug431054_text.html-euc-jp]

file_name = 'tests/EUC-JP/_mozilla_bug431054_text.html', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

azoz.org.xml-euc-jp]

azoz.org.xml-euc-jp]

file_name = 'tests/EUC-JP/azoz.org.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

atom.ycf.nanet.co.jp.xml-euc-jp]

atom.ycf.nanet.co.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/atom.ycf.nanet.co.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bphrs.net.xml-euc-jp]

bphrs.net.xml-euc-jp]

file_name = 'tests/EUC-JP/bphrs.net.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ch.kitaguni.tv.xml-euc-jp]

ch.kitaguni.tv.xml-euc-jp]

file_name = 'tests/EUC-JP/ch.kitaguni.tv.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

rdf.ycf.nanet.co.jp.xml-euc-jp]

rdf.ycf.nanet.co.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/rdf.ycf.nanet.co.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

manana.moo.jp.xml-euc-jp]

manana.moo.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/manana.moo.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

furusatonoeki.cutegirl.jp.xml-euc-jp]

furusatonoeki.cutegirl.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/furusatonoeki.cutegirl.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

overcube.com.xml-euc-jp]

overcube.com.xml-euc-jp]

file_name = 'tests/EUC-JP/overcube.com.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

pinkupa.com.xml-euc-jp]

pinkupa.com.xml-euc-jp]

file_name = 'tests/EUC-JP/pinkupa.com.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

mimizun.com.xml-euc-jp]

mimizun.com.xml-euc-jp]

file_name = 'tests/EUC-JP/mimizun.com.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

club.h14m.org.xml-euc-jp]

club.h14m.org.xml-euc-jp]

file_name = 'tests/EUC-JP/club.h14m.org.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aristrist.s57.xrea.com.xml-euc-jp]

aristrist.s57.xrea.com.xml-euc-jp]

file_name = 'tests/EUC-JP/aristrist.s57.xrea.com.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

azito.under.jp.xml-euc-jp]

azito.under.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/azito.under.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug620106_text.html-euc-jp]

_mozilla_bug620106_text.html-euc-jp]

file_name = 'tests/EUC-JP/_mozilla_bug620106_text.html', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

yukiboh.moo.jp.xml-euc-jp]

yukiboh.moo.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/yukiboh.moo.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.kabu-navi.com.atom.xml-euc-jp]

blog.kabu-navi.com.atom.xml-euc-jp]

file_name = 'tests/EUC-JP/blog.kabu-navi.com.atom.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

misuzilla.org.xml-euc-jp]

misuzilla.org.xml-euc-jp]

file_name = 'tests/EUC-JP/misuzilla.org.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

overcube.com.atom.xml-euc-jp]

overcube.com.atom.xml-euc-jp]

file_name = 'tests/EUC-JP/overcube.com.atom.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

arclamp.jp.xml-euc-jp]

arclamp.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/arclamp.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aivy.co.jp.xml-euc-jp]

aivy.co.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/aivy.co.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-euc-jp]

_ude_1.txt-euc-jp]

file_name = 'tests/EUC-JP/_ude_1.txt', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug426271_text-euc-jp.html-euc-jp]

_mozilla_bug426271_text-euc-jp.html-euc-jp]

file_name = 'tests/EUC-JP/_mozilla_bug426271_text-euc-jp.html'
encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

akaname.main.jp.xml-euc-jp]

akaname.main.jp.xml-euc-jp]

file_name = 'tests/EUC-JP/akaname.main.jp.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.kabu-navi.com.xml-euc-jp]

blog.kabu-navi.com.xml-euc-jp]

file_name = 'tests/EUC-JP/blog.kabu-navi.com.xml', encoding = 'euc-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

informator.org.xml-windows-1251]

informator.org.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/informator.org.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.9.xml-windows-1251]

bpm.cult.bg.9.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/bpm.cult.bg.9.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

debian.gabrovo.com.news.xml-windows-1251]

debian.gabrovo.com.news.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/debian.gabrovo.com.news.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

linux-bg.org.xml-windows-1251]

linux-bg.org.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/linux-bg.org.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

debian.gabrovo.com.xml-windows-1251]

debian.gabrovo.com.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/debian.gabrovo.com.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ecloga.cult.bg.xml-windows-1251]

ecloga.cult.bg.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/ecloga.cult.bg.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.4.xml-windows-1251]

bpm.cult.bg.4.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/bpm.cult.bg.4.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

doncho.net.comments.xml-windows-1251]

doncho.net.comments.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/doncho.net.comments.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

doncho.net.xml-windows-1251]

doncho.net.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/doncho.net.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.medusa.4.xml-windows-1251]

bpm.cult.bg.medusa.4.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/bpm.cult.bg.medusa.4.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ide.li.xml-windows-1251]

ide.li.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/ide.li.xml', encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.3.xml-windows-1251]

bpm.cult.bg.3.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/bpm.cult.bg.3.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bbc.co.uk.popshow.xml-windows-1251]

bbc.co.uk.popshow.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/bbc.co.uk.popshow.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.xml-windows-1251]

bpm.cult.bg.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/bpm.cult.bg.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

rinennor.org.xml-windows-1251]

rinennor.org.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/rinennor.org.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.2.xml-windows-1251]

bpm.cult.bg.2.xml-windows-1251]

file_name = 'tests/windows-1251-bulgarian/bpm.cult.bg.2.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

nobom-utf32le.txt-utf-32le]

nobom-utf32le.txt-utf-32le]

file_name = 'tests/UTF-32LE/nobom-utf32le.txt', encoding = 'utf-32le'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
            result = chardet.detect(input_bytes)
            try:
                expected_unicode = input_bytes.decode(encoding)
            except LookupError:
                expected_unicode = ""
            try:
                detected_unicode = input_bytes.decode(result["encoding"])
            except (LookupError, UnicodeDecodeError, TypeError):
                detected_unicode = ""
        if result:
            encoding_match = (result["encoding"] or "").lower() == encoding
        else:
            encoding_match = False
        # Only care about mismatches that would actually result in different
        # behavior when decoding
        if not encoding_match and expected_unicode != detected_unicode:
            wrapped_expected = "\n".join(textwrap.wrap(expected_unicode, 100)) + "\n"
            wrapped_detected = "\n".join(textwrap.wrap(detected_unicode, 100)) + "\n"
            diff = "".join(
                list(
                    ndiff(
                        wrapped_expected.splitlines(True), wrapped_detected.splitlines(True)
                    )
                )[:20]
            )
            all_encodings = chardet.detect_all(input_bytes, ignore_threshold=True)
        else:
            diff = ""
            encoding_match = True
            all_encodings = [result]
>       assert encoding_match, (
            f"Expected {encoding}, but got {result} for {file_name}.  First 20 "
            f"lines of character differences: \n{diff}\n"
            f"All encodings: {pformat(all_encodings)}"
        )
E       AssertionError: Expected utf-32le, but got {'encoding': 'ascii', 'confidence': 1.0, 'language': ''} for tests/UTF-32LE/nobom-utf32le.txt.  First 20 lines of character differences: 
E         - DateTime,Bid,Ask  07/19/2015 21:00:15.469,1.083,1.08332  07/19/2015 21:00:16.949,1.08311,1.08332
E         - 07/19/2015 21:00:16.955,1.08311,1.08338  07/19/2015 21:00:17.120,1.08313,1.08338  07/19/2015
E         - 21:00:17.166,1.08313,1.0834  07/19/2015 21:00:17.205,1.08313,1.08339  07/19/2015
E         - 21:00:17.218,1.08312,1.08339  07/19/2015 21:00:17.469,1.08316,1.08339  07/19/2015
E         - 21:00:17.476,1.08316,1.08347  07/19/2015 21:00:17.505,1.08316,1.08344  07/19/2015
E         - 21:00:17.584,1.08316,1.08348  07/19/2015 21:00:18.905,1.08316,1.08351  07/19/2015
E         - 21:00:19.005,1.08336,1.08351  07/19/2015 21:00:19.011,1.08336,1.08403  07/19/2015
E         - 21:00:19.019,1.08334,1.08403  07/19/2015 21:00:19.025,1.08334,1.08406  07/19/2015
E         - 21:00:20.310,1.08307,1.08353  07/19/2015 21:00:20.317,1.08278,1.08353  07/19/2015
E         - 21:00:20.319,1.08307,1.08353
E         + DateTime,Bid,Ask 
E         + 07/19/2015 21:00:15.469,1
E         + .083,1.08332  07/19/2015 
E         + 21:00:16.949,1.08311,1.08
E         + 332  07/19/2015 21:00:16.
E         + 955,1.08311,1.08338 
E         + 07/19/2015 21:00:17.120,1
E         + .08313,1.08338 
E         + 07/19/2015 21:00:17.166,1
E         + .08313,1.0834 
E         
E         All encodings: [{'confidence': 1.0, 'encoding': 'ascii', 'language': ''}]
E       assert False

test.py:110: AssertionError

plane1-utf-32le.html-utf-32le]

plane1-utf-32le.html-utf-32le]

file_name = 'tests/UTF-32LE/plane1-utf-32le.html', encoding = 'utf-32le'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

pcplus.co.il.xml-windows-1255]

pcplus.co.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/pcplus.co.il.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

sharks.co.il.xml-windows-1255]

sharks.co.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/sharks.co.il.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

whatsup.org.il.xml-windows-1255]

whatsup.org.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/whatsup.org.il.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

info.org.il.xml-windows-1255]

info.org.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/info.org.il.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

notes.co.il.50.xml-windows-1255]

notes.co.il.50.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/notes.co.il.50.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

notes.co.il.8.xml-windows-1255]

notes.co.il.8.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/notes.co.il.8.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

law.co.il.xml-windows-1255]

law.co.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/law.co.il.xml', encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

neviim.net.xml-windows-1255]

neviim.net.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/neviim.net.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

notes.co.il.6.xml-windows-1255]

notes.co.il.6.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/notes.co.il.6.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

maakav.org.xml-windows-1255]

maakav.org.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/maakav.org.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

exego.net.2.xml-windows-1255]

exego.net.2.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/exego.net.2.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_ISO-8859-8_with_no_encoding_specified.html-windows-1255]

_chromium_ISO-8859-8_with_no_encoding_specified.html-windows-1255]

file_name = 'tests/windows-1255-hebrew/_chromium_ISO-8859-8_with_no_encoding_specified.html'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

hagada.org.il.xml-windows-1255]

hagada.org.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/hagada.org.il.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

hevra.org.il.xml-windows-1255]

hevra.org.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/hevra.org.il.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_he2.txt-windows-1255]

_ude_he2.txt-windows-1255]

file_name = 'tests/windows-1255-hebrew/_ude_he2.txt', encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

carshops.co.il.xml-windows-1255]

carshops.co.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/carshops.co.il.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

notes.co.il.7.xml-windows-1255]

notes.co.il.7.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/notes.co.il.7.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

infomed.co.il.xml-windows-1255]

infomed.co.il.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/infomed.co.il.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_he1.txt-windows-1255]

_ude_he1.txt-windows-1255]

file_name = 'tests/windows-1255-hebrew/_ude_he1.txt', encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

hydepark.hevre.co.il.7957.xml-windows-1255]

hydepark.hevre.co.il.7957.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/hydepark.hevre.co.il.7957.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_he3.txt-windows-1255]

_ude_he3.txt-windows-1255]

file_name = 'tests/windows-1255-hebrew/_ude_he3.txt', encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

halemo.net.edoar.xml-windows-1255]

halemo.net.edoar.xml-windows-1255]

file_name = 'tests/windows-1255-hebrew/halemo.net.edoar.xml'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_windows-1255_with_no_encoding_specified.html-windows-1255]

_chromium_windows-1255_with_no_encoding_specified.html-windows-1255]

file_name = 'tests/windows-1255-hebrew/_chromium_windows-1255_with_no_encoding_specified.html'
encoding = 'windows-1255'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_euc-tw1.txt-euc-tw]

_ude_euc-tw1.txt-euc-tw]

file_name = 'tests/EUC-TW/_ude_euc-tw1.txt', encoding = 'euc-tw'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

intertat.ru.xml-windows-1251]

intertat.ru.xml-windows-1251]

file_name = 'tests/windows-1251-russian/intertat.ru.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

money.rin.ru.xml-windows-1251]

money.rin.ru.xml-windows-1251]

file_name = 'tests/windows-1251-russian/money.rin.ru.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aif.ru.health.xml-windows-1251]

aif.ru.health.xml-windows-1251]

file_name = 'tests/windows-1251-russian/aif.ru.health.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aviaport.ru.xml-windows-1251]

aviaport.ru.xml-windows-1251]

file_name = 'tests/windows-1251-russian/aviaport.ru.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

newsru.com.xml-windows-1251]

newsru.com.xml-windows-1251]

file_name = 'tests/windows-1251-russian/newsru.com.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

music.peeps.ru.xml-windows-1251]

music.peeps.ru.xml-windows-1251]

file_name = 'tests/windows-1251-russian/music.peeps.ru.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

janulalife.blogspot.com.xml-windows-1251]

janulalife.blogspot.com.xml-windows-1251]

file_name = 'tests/windows-1251-russian/janulalife.blogspot.com.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_windows-1251_with_no_encoding_specified.html-windows-1251]

_chromium_windows-1251_with_no_encoding_specified.html-windows-1251]

file_name = 'tests/windows-1251-russian/_chromium_windows-1251_with_no_encoding_specified.html'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

greek.ru.xml-windows-1251]

greek.ru.xml-windows-1251]

file_name = 'tests/windows-1251-russian/greek.ru.xml', encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

anthropology.ru.xml-windows-1251]

anthropology.ru.xml-windows-1251]

file_name = 'tests/windows-1251-russian/anthropology.ru.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

kapranoff.ru.xml-windows-1251]

kapranoff.ru.xml-windows-1251]

file_name = 'tests/windows-1251-russian/kapranoff.ru.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.4.xml-windows-1251]

forum.template-toolkit.ru.4.xml-windows-1251]

file_name = 'tests/windows-1251-russian/forum.template-toolkit.ru.4.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.6.xml-windows-1251]

forum.template-toolkit.ru.6.xml-windows-1251]

file_name = 'tests/windows-1251-russian/forum.template-toolkit.ru.6.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-windows-1251]

_ude_1.txt-windows-1251]

file_name = 'tests/windows-1251-russian/_ude_1.txt', encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.9.xml-windows-1251]

forum.template-toolkit.ru.9.xml-windows-1251]

file_name = 'tests/windows-1251-russian/forum.template-toolkit.ru.9.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.1.xml-windows-1251]

forum.template-toolkit.ru.1.xml-windows-1251]

file_name = 'tests/windows-1251-russian/forum.template-toolkit.ru.1.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.8.xml-windows-1251]

forum.template-toolkit.ru.8.xml-windows-1251]

file_name = 'tests/windows-1251-russian/forum.template-toolkit.ru.8.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aug32.hole.ru.xml-windows-1251]

aug32.hole.ru.xml-windows-1251]

file_name = 'tests/windows-1251-russian/aug32.hole.ru.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.mlmaster.com.xml-windows-1251]

blog.mlmaster.com.xml-windows-1251]

file_name = 'tests/windows-1251-russian/blog.mlmaster.com.xml'
encoding = 'windows-1251'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-windows-1252]

_ude_1.txt-windows-1252]

file_name = 'tests/windows-1252/_ude_1.txt', encoding = 'windows-1252'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

github_bug_9.txt-windows-1252]

github_bug_9.txt-windows-1252]

file_name = 'tests/windows-1252/github_bug_9.txt', encoding = 'windows-1252'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug421271_text.html-windows-1252]

_mozilla_bug421271_text.html-windows-1252]

file_name = 'tests/windows-1252/_mozilla_bug421271_text.html'
encoding = 'windows-1252'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_2.txt-windows-1252]

_ude_2.txt-windows-1252]

file_name = 'tests/windows-1252/_ude_2.txt', encoding = 'windows-1252'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_4.txt-iso-8859-1]

_ude_4.txt-iso-8859-1]

file_name = 'tests/iso-8859-1/_ude_4.txt', encoding = 'iso-8859-1'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_2.txt-iso-8859-1]

_ude_2.txt-iso-8859-1]

file_name = 'tests/iso-8859-1/_ude_2.txt', encoding = 'iso-8859-1'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_3.txt-iso-8859-1]

_ude_3.txt-iso-8859-1]

file_name = 'tests/iso-8859-1/_ude_3.txt', encoding = 'iso-8859-1'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_6.txt-iso-8859-1]

_ude_6.txt-iso-8859-1]

file_name = 'tests/iso-8859-1/_ude_6.txt', encoding = 'iso-8859-1'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_5.txt-iso-8859-1]

_ude_5.txt-iso-8859-1]

file_name = 'tests/iso-8859-1/_ude_5.txt', encoding = 'iso-8859-1'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-iso-8859-1]

_ude_1.txt-iso-8859-1]

file_name = 'tests/iso-8859-1/_ude_1.txt', encoding = 'iso-8859-1'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

greek.ru.xml-koi8-r]

greek.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/greek.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.mlmaster.com.xml-koi8-r]

blog.mlmaster.com.xml-koi8-r]

file_name = 'tests/KOI8-R/blog.mlmaster.com.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aviaport.ru.xml-koi8-r]

aviaport.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/aviaport.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-koi8-r]

_ude_1.txt-koi8-r]

file_name = 'tests/KOI8-R/_ude_1.txt', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.1.xml-koi8-r]

forum.template-toolkit.ru.1.xml-koi8-r]

file_name = 'tests/KOI8-R/forum.template-toolkit.ru.1.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aug32.hole.ru.xml-koi8-r]

aug32.hole.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/aug32.hole.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aif.ru.health.xml-koi8-r]

aif.ru.health.xml-koi8-r]

file_name = 'tests/KOI8-R/aif.ru.health.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_KOI8-R_with_no_encoding_specified.html-koi8-r]

_chromium_KOI8-R_with_no_encoding_specified.html-koi8-r]

file_name = 'tests/KOI8-R/_chromium_KOI8-R_with_no_encoding_specified.html'
encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

janulalife.blogspot.com.xml-koi8-r]

janulalife.blogspot.com.xml-koi8-r]

file_name = 'tests/KOI8-R/janulalife.blogspot.com.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.9.xml-koi8-r]

forum.template-toolkit.ru.9.xml-koi8-r]

file_name = 'tests/KOI8-R/forum.template-toolkit.ru.9.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

newsru.com.xml-koi8-r]

newsru.com.xml-koi8-r]

file_name = 'tests/KOI8-R/newsru.com.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

koi.kinder.ru.xml-koi8-r]

koi.kinder.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/koi.kinder.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.4.xml-koi8-r]

forum.template-toolkit.ru.4.xml-koi8-r]

file_name = 'tests/KOI8-R/forum.template-toolkit.ru.4.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

kapranoff.ru.xml-koi8-r]

kapranoff.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/kapranoff.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.6.xml-koi8-r]

forum.template-toolkit.ru.6.xml-koi8-r]

file_name = 'tests/KOI8-R/forum.template-toolkit.ru.6.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

music.peeps.ru.xml-koi8-r]

music.peeps.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/music.peeps.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.8.xml-koi8-r]

forum.template-toolkit.ru.8.xml-koi8-r]

file_name = 'tests/KOI8-R/forum.template-toolkit.ru.8.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

susu.ac.ru.xml-koi8-r]

susu.ac.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/susu.ac.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

intertat.ru.xml-koi8-r]

intertat.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/intertat.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

money.rin.ru.xml-koi8-r]

money.rin.ru.xml-koi8-r]

file_name = 'tests/KOI8-R/money.rin.ru.xml', encoding = 'koi8-r'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

naftemporiki.gr.mrt.xml-iso-8859-7]

naftemporiki.gr.mrt.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/naftemporiki.gr.mrt.xml'
encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

naftemporiki.gr.wld.xml-iso-8859-7]

naftemporiki.gr.wld.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/naftemporiki.gr.wld.xml'
encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

disabled.gr.xml-iso-8859-7]

disabled.gr.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/disabled.gr.xml', encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

hotstation.gr.xml-iso-8859-7]

hotstation.gr.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/hotstation.gr.xml', encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

naftemporiki.gr.bus.xml-iso-8859-7]

naftemporiki.gr.bus.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/naftemporiki.gr.bus.xml'
encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

naftemporiki.gr.cmm.xml-iso-8859-7]

naftemporiki.gr.cmm.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/naftemporiki.gr.cmm.xml'
encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

naftemporiki.gr.spo.xml-iso-8859-7]

naftemporiki.gr.spo.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/naftemporiki.gr.spo.xml'
encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_3.txt-iso-8859-7]

_ude_3.txt-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/_ude_3.txt', encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_2.txt-iso-8859-7]

_ude_2.txt-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/_ude_2.txt', encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

naftemporiki.gr.mrk.xml-iso-8859-7]

naftemporiki.gr.mrk.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/naftemporiki.gr.mrk.xml'
encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

naftemporiki.gr.fin.xml-iso-8859-7]

naftemporiki.gr.fin.xml-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/naftemporiki.gr.fin.xml'
encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_ISO-8859-7_with_no_encoding_specified.html-iso-8859-7]

_chromium_ISO-8859-7_with_no_encoding_specified.html-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/_chromium_ISO-8859-7_with_no_encoding_specified.html'
encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-iso-8859-7]

_ude_1.txt-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/_ude_1.txt', encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_greek.txt-iso-8859-7]

_ude_greek.txt-iso-8859-7]

file_name = 'tests/iso-8859-7-greek/_ude_greek.txt', encoding = 'iso-8859-7'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

andore.com.money.xml-shift_jis]

andore.com.money.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/andore.com.money.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

10e.org.xml-shift_jis]

10e.org.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/10e.org.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

setsuzei119.jp.xml-shift_jis]

setsuzei119.jp.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/setsuzei119.jp.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

nextbeaut.com.xml-shift_jis]

nextbeaut.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/nextbeaut.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bloglelife.com.xml-shift_jis]

bloglelife.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/bloglelife.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

accessories-brand.com.xml-shift_jis]

accessories-brand.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/accessories-brand.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_2.txt-shift_jis]

_ude_2.txt-shift_jis]

file_name = 'tests/SHIFT_JIS/_ude_2.txt', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_4.txt-shift_jis]

_ude_4.txt-shift_jis]

file_name = 'tests/SHIFT_JIS/_ude_4.txt', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

perth-on.net.xml-shift_jis]

perth-on.net.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/perth-on.net.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ooganemochi.com.xml-shift_jis]

ooganemochi.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/ooganemochi.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

sakusaka-silk.net.xml-shift_jis]

sakusaka-silk.net.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/sakusaka-silk.net.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

amefoot.net.xml-shift_jis]

amefoot.net.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/amefoot.net.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_3.txt-shift_jis]

_ude_3.txt-shift_jis]

file_name = 'tests/SHIFT_JIS/_ude_3.txt', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

brag.zaka.to.xml-shift_jis]

brag.zaka.to.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/brag.zaka.to.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-shift_jis]

_ude_1.txt-shift_jis]

file_name = 'tests/SHIFT_JIS/_ude_1.txt', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

grebeweb.net.xml-shift_jis]

grebeweb.net.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/grebeweb.net.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

andore.com.xml-shift_jis]

andore.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/andore.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

tamuyou.haun.org.xml-shift_jis]

tamuyou.haun.org.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/tamuyou.haun.org.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

andore.com.inami.xml-shift_jis]

andore.com.inami.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/andore.com.inami.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.paseri.ne.jp.xml-shift_jis]

blog.paseri.ne.jp.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/blog.paseri.ne.jp.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

1affliate.com.xml-shift_jis]

1affliate.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/1affliate.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

moon-light.ne.jp.xml-shift_jis]

moon-light.ne.jp.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/moon-light.ne.jp.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_Shift-JIS_with_no_encoding_specified.html-shift_jis]

_chromium_Shift-JIS_with_no_encoding_specified.html-shift_jis]

file_name = 'tests/SHIFT_JIS/_chromium_Shift-JIS_with_no_encoding_specified.html'
encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

do.beginnersrack.com.xml-shift_jis]

do.beginnersrack.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/do.beginnersrack.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.inkase.net.xml-shift_jis]

blog.inkase.net.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/blog.inkase.net.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

milliontimes.jp.xml-shift_jis]

milliontimes.jp.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/milliontimes.jp.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

celeb.lalalu.com.xml-shift_jis]

celeb.lalalu.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/celeb.lalalu.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

yasuhisa.com.xml-shift_jis]

yasuhisa.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/yasuhisa.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

clickablewords.com.xml-shift_jis]

clickablewords.com.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/clickablewords.com.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

dogsinn.jp.xml-shift_jis]

dogsinn.jp.xml-shift_jis]

file_name = 'tests/SHIFT_JIS/dogsinn.jp.xml', encoding = 'shift_jis'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

iyagi-readme.txt-johab]

iyagi-readme.txt-johab]

file_name = 'tests/Johab/iyagi-readme.txt', encoding = 'johab'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

hlpro-readme.txt-johab]

hlpro-readme.txt-johab]

file_name = 'tests/Johab/hlpro-readme.txt', encoding = 'johab'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

mdir-doc.txt-johab]

mdir-doc.txt-johab]

file_name = 'tests/Johab/mdir-doc.txt', encoding = 'johab'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.9.xml-iso-8859-5]

bpm.cult.bg.9.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/bpm.cult.bg.9.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bbc.co.uk.popshow.xml-iso-8859-5]

bbc.co.uk.popshow.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/bbc.co.uk.popshow.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.4.xml-iso-8859-5]

bpm.cult.bg.4.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/bpm.cult.bg.4.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ide.li.xml-iso-8859-5]

ide.li.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/ide.li.xml', encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.medusa.4.xml-iso-8859-5]

bpm.cult.bg.medusa.4.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/bpm.cult.bg.medusa.4.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

debian.gabrovo.com.xml-iso-8859-5]

debian.gabrovo.com.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/debian.gabrovo.com.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ecloga.cult.bg.xml-iso-8859-5]

ecloga.cult.bg.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/ecloga.cult.bg.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.xml-iso-8859-5]

bpm.cult.bg.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/bpm.cult.bg.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

linux-bg.org.xml-iso-8859-5]

linux-bg.org.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/linux-bg.org.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

debian.gabrovo.com.news.xml-iso-8859-5]

debian.gabrovo.com.news.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/debian.gabrovo.com.news.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bpm.cult.bg.2.xml-iso-8859-5]

bpm.cult.bg.2.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/bpm.cult.bg.2.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aero-bg.com.xml-iso-8859-5]

aero-bg.com.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/aero-bg.com.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

doncho.net.comments.xml-iso-8859-5]

doncho.net.comments.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-bulgarian/doncho.net.comments.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ricanet.com.xml-cp949]

ricanet.com.xml-cp949]

file_name = 'tests/CP949/ricanet.com.xml', encoding = 'cp949'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

catshadow.blogspot.com.xml-big5]

catshadow.blogspot.com.xml-big5]

file_name = 'tests/Big5/catshadow.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

letterlego.blogspot.com.xml-big5]

letterlego.blogspot.com.xml-big5]

file_name = 'tests/Big5/letterlego.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

0804.blogspot.com.xml-big5]

0804.blogspot.com.xml-big5]

file_name = 'tests/Big5/0804.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

oui-design.com.xml-big5]

oui-design.com.xml-big5]

file_name = 'tests/Big5/oui-design.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ytc.blogspot.com.xml-big5]

ytc.blogspot.com.xml-big5]

file_name = 'tests/Big5/ytc.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

kafkatseng.blogspot.com.xml-big5]

kafkatseng.blogspot.com.xml-big5]

file_name = 'tests/Big5/kafkatseng.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-big5]

_ude_1.txt-big5]

file_name = 'tests/Big5/_ude_1.txt', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ebao.us.xml-big5]

ebao.us.xml-big5]

file_name = 'tests/Big5/ebao.us.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

fudesign.blogspot.com.xml-big5]

fudesign.blogspot.com.xml-big5]

file_name = 'tests/Big5/fudesign.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

leavesth.blogspot.com.xml-big5]

leavesth.blogspot.com.xml-big5]

file_name = 'tests/Big5/leavesth.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

linyijen.blogspot.com.xml-big5]

linyijen.blogspot.com.xml-big5]

file_name = 'tests/Big5/linyijen.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

sanwenji.blogspot.com.xml-big5]

sanwenji.blogspot.com.xml-big5]

file_name = 'tests/Big5/sanwenji.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

sinica.edu.tw.xml-big5]

sinica.edu.tw.xml-big5]

file_name = 'tests/Big5/sinica.edu.tw.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

coolloud.org.tw.xml-big5]

coolloud.org.tw.xml-big5]

file_name = 'tests/Big5/coolloud.org.tw.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

sylvia1976.blogspot.com.xml-big5]

sylvia1976.blogspot.com.xml-big5]

file_name = 'tests/Big5/sylvia1976.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

unoriginalblog.com.xml-big5]

unoriginalblog.com.xml-big5]

file_name = 'tests/Big5/unoriginalblog.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

myblog.pchome.com.tw.xml-big5]

myblog.pchome.com.tw.xml-big5]

file_name = 'tests/Big5/myblog.pchome.com.tw.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

tlkkuo.blogspot.com.xml-big5]

tlkkuo.blogspot.com.xml-big5]

file_name = 'tests/Big5/tlkkuo.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

digitalwall.com.xml-big5]

digitalwall.com.xml-big5]

file_name = 'tests/Big5/digitalwall.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_Big5_with_no_encoding_specified.html-big5]

_chromium_Big5_with_no_encoding_specified.html-big5]

file_name = 'tests/Big5/_chromium_Big5_with_no_encoding_specified.html'
encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.worren.net.xml-big5]

blog.worren.net.xml-big5]

file_name = 'tests/Big5/blog.worren.net.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

carbonxiv.blogspot.com.xml-big5]

carbonxiv.blogspot.com.xml-big5]

file_name = 'tests/Big5/carbonxiv.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ke207.blogspot.com.xml-big5]

ke207.blogspot.com.xml-big5]

file_name = 'tests/Big5/ke207.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

upsaid.com.xml-big5]

upsaid.com.xml-big5]

file_name = 'tests/Big5/upsaid.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

willythecop.blogspot.com.xml-big5]

willythecop.blogspot.com.xml-big5]

file_name = 'tests/Big5/willythecop.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

marilynwu.blogspot.com.xml-big5]

marilynwu.blogspot.com.xml-big5]

file_name = 'tests/Big5/marilynwu.blogspot.com.xml', encoding = 'big5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

plane1-utf-16le.html-utf-16le]

plane1-utf-16le.html-utf-16le]

file_name = 'tests/UTF-16LE/plane1-utf-16le.html', encoding = 'utf-16le'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

nobom-utf16le.txt-utf-16le]

nobom-utf16le.txt-utf-16le]

file_name = 'tests/UTF-16LE/nobom-utf16le.txt', encoding = 'utf-16le'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
            result = chardet.detect(input_bytes)
            try:
                expected_unicode = input_bytes.decode(encoding)
            except LookupError:
                expected_unicode = ""
            try:
                detected_unicode = input_bytes.decode(result["encoding"])
            except (LookupError, UnicodeDecodeError, TypeError):
                detected_unicode = ""
        if result:
            encoding_match = (result["encoding"] or "").lower() == encoding
        else:
            encoding_match = False
        # Only care about mismatches that would actually result in different
        # behavior when decoding
        if not encoding_match and expected_unicode != detected_unicode:
            wrapped_expected = "\n".join(textwrap.wrap(expected_unicode, 100)) + "\n"
            wrapped_detected = "\n".join(textwrap.wrap(detected_unicode, 100)) + "\n"
            diff = "".join(
                list(
                    ndiff(
                        wrapped_expected.splitlines(True), wrapped_detected.splitlines(True)
                    )
                )[:20]
            )
            all_encodings = chardet.detect_all(input_bytes, ignore_threshold=True)
        else:
            diff = ""
            encoding_match = True
            all_encodings = [result]
>       assert encoding_match, (
            f"Expected {encoding}, but got {result} for {file_name}.  First 20 "
            f"lines of character differences: \n{diff}\n"
            f"All encodings: {pformat(all_encodings)}"
        )
E       AssertionError: Expected utf-16le, but got {'encoding': 'ascii', 'confidence': 1.0, 'language': ''} for tests/UTF-16LE/nobom-utf16le.txt.  First 20 lines of character differences: 
E         - DateTime,Bid,Ask  07/19/2015 21:00:15.469,1.083,1.08332  07/19/2015 21:00:16.949,1.08311,1.08332
E         - 07/19/2015 21:00:16.955,1.08311,1.08338  07/19/2015 21:00:17.120,1.08313,1.08338  07/19/2015
E         - 21:00:17.166,1.08313,1.0834  07/19/2015 21:00:17.205,1.08313,1.08339  07/19/2015
E         - 21:00:17.218,1.08312,1.08339  07/19/2015 21:00:17.469,1.08316,1.08339  07/19/2015
E         - 21:00:17.476,1.08316,1.08347  07/19/2015 21:00:17.505,1.08316,1.08344  07/19/2015
E         - 21:00:17.584,1.08316,1.08348  07/19/2015 21:00:18.905,1.08316,1.08351  07/19/2015
E         - 21:00:19.005,1.08336,1.08351  07/19/2015 21:00:19.011,1.08336,1.08403  07/19/2015
E         - 21:00:19.019,1.08334,1.08403  07/19/2015 21:00:19.025,1.08334,1.08406  07/19/2015
E         - 21:00:20.310,1.08307,1.08353  07/19/2015 21:00:20.317,1.08278,1.08353  07/19/2015
E         - 21:00:20.319,1.08307,1.08353
E         + DateTime,Bid,Ask  07/19/2015
E         + 21:00:15.469,1.083,1.08332  07/19/2015
E         + 21:00:16.949,1.08311,1.08332  07/19/2015
E         + 21:00:16.955,1.08311,1.08338  07/19/2015
E         + 21:00:17.120,1.08313,1.08338  07/19/2015
E         + 21:00:17.166,1.08313,1.0834  07/19/2015
E         + 21:00:17.205,1.08313,1.08339  07/19/2015
E         + 21:00:17.218,1.08312,1.08339  07/19/2015
E         + 21:00:17.469,1.08316,1.08339  07/19/2015
E         + 21:00:17.476,1.08316,1.08347  07/19/2015
E         
E         All encodings: [{'confidence': 1.0, 'encoding': 'ascii', 'language': ''}]
E       assert False

test.py:110: AssertionError

intertat.ru.xml-maccyrillic]

intertat.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/intertat.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

music.peeps.ru.xml-maccyrillic]

music.peeps.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/music.peeps.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.4.xml-maccyrillic]

forum.template-toolkit.ru.4.xml-maccyrillic]

file_name = 'tests/MacCyrillic/forum.template-toolkit.ru.4.xml'
encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aviaport.ru.xml-maccyrillic]

aviaport.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/aviaport.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aif.ru.health.xml-maccyrillic]

aif.ru.health.xml-maccyrillic]

file_name = 'tests/MacCyrillic/aif.ru.health.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

susu.ac.ru.xml-maccyrillic]

susu.ac.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/susu.ac.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

kapranoff.ru.xml-maccyrillic]

kapranoff.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/kapranoff.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.mlmaster.com.xml-maccyrillic]

blog.mlmaster.com.xml-maccyrillic]

file_name = 'tests/MacCyrillic/blog.mlmaster.com.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.8.xml-maccyrillic]

forum.template-toolkit.ru.8.xml-maccyrillic]

file_name = 'tests/MacCyrillic/forum.template-toolkit.ru.8.xml'
encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.9.xml-maccyrillic]

forum.template-toolkit.ru.9.xml-maccyrillic]

file_name = 'tests/MacCyrillic/forum.template-toolkit.ru.9.xml'
encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

newsru.com.xml-maccyrillic]

newsru.com.xml-maccyrillic]

file_name = 'tests/MacCyrillic/newsru.com.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aug32.hole.ru.xml-maccyrillic]

aug32.hole.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/aug32.hole.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

greek.ru.xml-maccyrillic]

greek.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/greek.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.6.xml-maccyrillic]

forum.template-toolkit.ru.6.xml-maccyrillic]

file_name = 'tests/MacCyrillic/forum.template-toolkit.ru.6.xml'
encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

money.rin.ru.xml-maccyrillic]

money.rin.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/money.rin.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-maccyrillic]

_ude_1.txt-maccyrillic]

file_name = 'tests/MacCyrillic/_ude_1.txt', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

koi.kinder.ru.xml-maccyrillic]

koi.kinder.ru.xml-maccyrillic]

file_name = 'tests/MacCyrillic/koi.kinder.ru.xml', encoding = 'maccyrillic'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aviaport.ru.xml-iso-8859-5]

aviaport.ru.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/aviaport.ru.xml', encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

kapranoff.ru.xml-iso-8859-5]

kapranoff.ru.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/kapranoff.ru.xml', encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

janulalife.blogspot.com.xml-iso-8859-5]

janulalife.blogspot.com.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/janulalife.blogspot.com.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

newsru.com.xml-iso-8859-5]

newsru.com.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/newsru.com.xml', encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aug32.hole.ru.xml-iso-8859-5]

aug32.hole.ru.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/aug32.hole.ru.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

greek.ru.xml-iso-8859-5]

greek.ru.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/greek.ru.xml', encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.9.xml-iso-8859-5]

forum.template-toolkit.ru.9.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/forum.template-toolkit.ru.9.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.1.xml-iso-8859-5]

forum.template-toolkit.ru.1.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/forum.template-toolkit.ru.1.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.4.xml-iso-8859-5]

forum.template-toolkit.ru.4.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/forum.template-toolkit.ru.4.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.8.xml-iso-8859-5]

forum.template-toolkit.ru.8.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/forum.template-toolkit.ru.8.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.6.xml-iso-8859-5]

forum.template-toolkit.ru.6.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/forum.template-toolkit.ru.6.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aif.ru.health.xml-iso-8859-5]

aif.ru.health.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/aif.ru.health.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_ISO-8859-5_with_no_encoding_specified.html-iso-8859-5]

_chromium_ISO-8859-5_with_no_encoding_specified.html-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/_chromium_ISO-8859-5_with_no_encoding_specified.html'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

money.rin.ru.xml-iso-8859-5]

money.rin.ru.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/money.rin.ru.xml', encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

susu.ac.ru.xml-iso-8859-5]

susu.ac.ru.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/susu.ac.ru.xml', encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

music.peeps.ru.xml-iso-8859-5]

music.peeps.ru.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/music.peeps.ru.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

intertat.ru.xml-iso-8859-5]

intertat.ru.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/intertat.ru.xml', encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.mlmaster.com.xml-iso-8859-5]

blog.mlmaster.com.xml-iso-8859-5]

file_name = 'tests/iso-8859-5-russian/blog.mlmaster.com.xml'
encoding = 'iso-8859-5'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug171813_text.html-gb2312]

_mozilla_bug171813_text.html-gb2312]

file_name = 'tests/GB2312/_mozilla_bug171813_text.html', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

xy15400.blogcn.com.xml-gb2312]

xy15400.blogcn.com.xml-gb2312]

file_name = 'tests/GB2312/xy15400.blogcn.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

acnnewswire.net.xml-gb2312]

acnnewswire.net.xml-gb2312]

file_name = 'tests/GB2312/acnnewswire.net.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

2.blog.westca.com.xml-gb2312]

2.blog.westca.com.xml-gb2312]

file_name = 'tests/GB2312/2.blog.westca.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

eighthday.blogspot.com.xml-gb2312]

eighthday.blogspot.com.xml-gb2312]

file_name = 'tests/GB2312/eighthday.blogspot.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_gb18030_with_no_encoding_specified.html.xml-gb2312]

_chromium_gb18030_with_no_encoding_specified.html.xml-gb2312]

file_name = 'tests/GB2312/_chromium_gb18030_with_no_encoding_specified.html.xml'
encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

cindychen.com.xml-gb2312]

cindychen.com.xml-gb2312]

file_name = 'tests/GB2312/cindychen.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

chen56.blogcn.com.xml-gb2312]

chen56.blogcn.com.xml-gb2312]

file_name = 'tests/GB2312/chen56.blogcn.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

lily.blogsome.com.xml-gb2312]

lily.blogsome.com.xml-gb2312]

file_name = 'tests/GB2312/lily.blogsome.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

w3cn.org.xml-gb2312]

w3cn.org.xml-gb2312]

file_name = 'tests/GB2312/w3cn.org.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

jjgod.3322.org.xml-gb2312]

jjgod.3322.org.xml-gb2312]

file_name = 'tests/GB2312/jjgod.3322.org.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

coverer.com.xml-gb2312]

coverer.com.xml-gb2312]

file_name = 'tests/GB2312/coverer.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

14.blog.westca.com.xml-gb2312]

14.blog.westca.com.xml-gb2312]

file_name = 'tests/GB2312/14.blog.westca.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

bbs.blogsome.com.xml-gb2312]

bbs.blogsome.com.xml-gb2312]

file_name = 'tests/GB2312/bbs.blogsome.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

softsea.net.xml-gb2312]

softsea.net.xml-gb2312]

file_name = 'tests/GB2312/softsea.net.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

luciferwang.blogcn.com.xml-gb2312]

luciferwang.blogcn.com.xml-gb2312]

file_name = 'tests/GB2312/luciferwang.blogcn.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

pda.blogsome.com.xml-gb2312]

pda.blogsome.com.xml-gb2312]

file_name = 'tests/GB2312/pda.blogsome.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

cappuccinos.3322.org.xml-gb2312]

cappuccinos.3322.org.xml-gb2312]

file_name = 'tests/GB2312/cappuccinos.3322.org.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

cnblog.org.xml-gb2312]

cnblog.org.xml-gb2312]

file_name = 'tests/GB2312/cnblog.org.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

godthink.blogsome.com.xml-gb2312]

godthink.blogsome.com.xml-gb2312]

file_name = 'tests/GB2312/godthink.blogsome.com.xml', encoding = 'gb2312'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

plane1-utf-16be.html-utf-16be]

plane1-utf-16be.html-utf-16be]

file_name = 'tests/UTF-16BE/plane1-utf-16be.html', encoding = 'utf-16be'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

nobom-utf16be.txt-utf-16be]

nobom-utf16be.txt-utf-16be]

file_name = 'tests/UTF-16BE/nobom-utf16be.txt', encoding = 'utf-16be'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
            result = chardet.detect(input_bytes)
            try:
                expected_unicode = input_bytes.decode(encoding)
            except LookupError:
                expected_unicode = ""
            try:
                detected_unicode = input_bytes.decode(result["encoding"])
            except (LookupError, UnicodeDecodeError, TypeError):
                detected_unicode = ""
        if result:
            encoding_match = (result["encoding"] or "").lower() == encoding
        else:
            encoding_match = False
        # Only care about mismatches that would actually result in different
        # behavior when decoding
        if not encoding_match and expected_unicode != detected_unicode:
            wrapped_expected = "\n".join(textwrap.wrap(expected_unicode, 100)) + "\n"
            wrapped_detected = "\n".join(textwrap.wrap(detected_unicode, 100)) + "\n"
            diff = "".join(
                list(
                    ndiff(
                        wrapped_expected.splitlines(True), wrapped_detected.splitlines(True)
                    )
                )[:20]
            )
            all_encodings = chardet.detect_all(input_bytes, ignore_threshold=True)
        else:
            diff = ""
            encoding_match = True
            all_encodings = [result]
>       assert encoding_match, (
            f"Expected {encoding}, but got {result} for {file_name}.  First 20 "
            f"lines of character differences: \n{diff}\n"
            f"All encodings: {pformat(all_encodings)}"
        )
E       AssertionError: Expected utf-16be, but got {'encoding': 'ascii', 'confidence': 1.0, 'language': ''} for tests/UTF-16BE/nobom-utf16be.txt.  First 20 lines of character differences: 
E         - DateTime,Bid,Ask  07/19/2015 21:00:15.469,1.083,1.08332  07/19/2015 21:00:16.949,1.08311,1.08332
E         - 07/19/2015 21:00:16.955,1.08311,1.08338  07/19/2015 21:00:17.120,1.08313,1.08338  07/19/2015
E         - 21:00:17.166,1.08313,1.0834  07/19/2015 21:00:17.205,1.08313,1.08339  07/19/2015
E         - 21:00:17.218,1.08312,1.08339  07/19/2015 21:00:17.469,1.08316,1.08339  07/19/2015
E         - 21:00:17.476,1.08316,1.08347  07/19/2015 21:00:17.505,1.08316,1.08344  07/19/2015
E         - 21:00:17.584,1.08316,1.08348  07/19/2015 21:00:18.905,1.08316,1.08351  07/19/2015
E         - 21:00:19.005,1.08336,1.08351  07/19/2015 21:00:19.011,1.08336,1.08403  07/19/2015
E         - 21:00:19.019,1.08334,1.08403  07/19/2015 21:00:19.025,1.08334,1.08406  07/19/2015
E         - 21:00:20.310,1.08307,1.08353  07/19/2015 21:00:20.317,1.08278,1.08353  07/19/2015
E         - 21:00:20.319,1.08307,1.08353
E         + DateTime,Bid,Ask  07/19/2015
E         + 21:00:15.469,1.083,1.08332  07/19/2015
E         + 21:00:16.949,1.08311,1.08332  07/19/2015
E         + 21:00:16.955,1.08311,1.08338  07/19/2015
E         + 21:00:17.120,1.08313,1.08338  07/19/2015
E         + 21:00:17.166,1.08313,1.0834  07/19/2015
E         + 21:00:17.205,1.08313,1.08339  07/19/2015
E         + 21:00:17.218,1.08312,1.08339  07/19/2015
E         + 21:00:17.469,1.08316,1.08339  07/19/2015
E         + 21:00:17.476,1.08316,1.08347  07/19/2015
E         
E         All encodings: [{'confidence': 1.0, 'encoding': 'ascii', 'language': ''}]
E       assert False

test.py:110: AssertionError

_ude_1.txt-iso-2022-jp]

_ude_1.txt-iso-2022-jp]

file_name = 'tests/iso-2022-jp/_ude_1.txt', encoding = 'iso-2022-jp'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:137: in feed
    self._esc_charset_prober = EscCharSetProber()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = 
lang_filter = None

    def __init__(self, lang_filter=None):
        super().__init__(lang_filter=lang_filter)
        self.coding_sm = []
>       if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
E       TypeError: unsupported operand type(s) for &: 'NoneType' and 'int'

chardet/escprober.py:16: TypeError

pharmacy.kku.ac.th.centerlab.xml-tis-620]

pharmacy.kku.ac.th.centerlab.xml-tis-620]

file_name = 'tests/TIS-620/pharmacy.kku.ac.th.centerlab.xml'
encoding = 'tis-620'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

trickspot.boxchart.com.xml-tis-620]

trickspot.boxchart.com.xml-tis-620]

file_name = 'tests/TIS-620/trickspot.boxchart.com.xml', encoding = 'tis-620'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

pharmacy.kku.ac.th.analyse1.xml-tis-620]

pharmacy.kku.ac.th.analyse1.xml-tis-620]

file_name = 'tests/TIS-620/pharmacy.kku.ac.th.analyse1.xml'
encoding = 'tis-620'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug488426_text.html-tis-620]

_mozilla_bug488426_text.html-tis-620]

file_name = 'tests/TIS-620/_mozilla_bug488426_text.html', encoding = 'tis-620'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

opentle.org.xml-tis-620]

opentle.org.xml-tis-620]

file_name = 'tests/TIS-620/opentle.org.xml', encoding = 'tis-620'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

pharmacy.kku.ac.th.healthinfo-ne.xml-tis-620]

pharmacy.kku.ac.th.healthinfo-ne.xml-tis-620]

file_name = 'tests/TIS-620/pharmacy.kku.ac.th.healthinfo-ne.xml'
encoding = 'tis-620'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

money.rin.ru.xml-ibm855]

money.rin.ru.xml-ibm855]

file_name = 'tests/IBM855/money.rin.ru.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.4.xml-ibm855]

forum.template-toolkit.ru.4.xml-ibm855]

file_name = 'tests/IBM855/forum.template-toolkit.ru.4.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.mlmaster.com.xml-ibm855]

blog.mlmaster.com.xml-ibm855]

file_name = 'tests/IBM855/blog.mlmaster.com.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aug32.hole.ru.xml-ibm855]

aug32.hole.ru.xml-ibm855]

file_name = 'tests/IBM855/aug32.hole.ru.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

intertat.ru.xml-ibm855]

intertat.ru.xml-ibm855]

file_name = 'tests/IBM855/intertat.ru.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aviaport.ru.xml-ibm855]

aviaport.ru.xml-ibm855]

file_name = 'tests/IBM855/aviaport.ru.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

music.peeps.ru.xml-ibm855]

music.peeps.ru.xml-ibm855]

file_name = 'tests/IBM855/music.peeps.ru.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

greek.ru.xml-ibm855]

greek.ru.xml-ibm855]

file_name = 'tests/IBM855/greek.ru.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_1.txt-ibm855]

_ude_1.txt-ibm855]

file_name = 'tests/IBM855/_ude_1.txt', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

susu.ac.ru.xml-ibm855]

susu.ac.ru.xml-ibm855]

file_name = 'tests/IBM855/susu.ac.ru.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.8.xml-ibm855]

forum.template-toolkit.ru.8.xml-ibm855]

file_name = 'tests/IBM855/forum.template-toolkit.ru.8.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.9.xml-ibm855]

forum.template-toolkit.ru.9.xml-ibm855]

file_name = 'tests/IBM855/forum.template-toolkit.ru.9.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.6.xml-ibm855]

forum.template-toolkit.ru.6.xml-ibm855]

file_name = 'tests/IBM855/forum.template-toolkit.ru.6.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

forum.template-toolkit.ru.1.xml-ibm855]

forum.template-toolkit.ru.1.xml-ibm855]

file_name = 'tests/IBM855/forum.template-toolkit.ru.1.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

kapranoff.ru.xml-ibm855]

kapranoff.ru.xml-ibm855]

file_name = 'tests/IBM855/kapranoff.ru.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

janulalife.blogspot.com.xml-ibm855]

janulalife.blogspot.com.xml-ibm855]

file_name = 'tests/IBM855/janulalife.blogspot.com.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

aif.ru.health.xml-ibm855]

aif.ru.health.xml-ibm855]

file_name = 'tests/IBM855/aif.ru.health.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

newsru.com.xml-ibm855]

newsru.com.xml-ibm855]

file_name = 'tests/IBM855/newsru.com.xml', encoding = 'ibm855'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

arts.egloos.com.xml-euc-kr]

arts.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/arts.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

scarletkh2.egloos.com.xml-euc-kr]

scarletkh2.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/scarletkh2.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_euc2.txt-euc-kr]

_ude_euc2.txt-euc-kr]

file_name = 'tests/EUC-KR/_ude_euc2.txt', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_mozilla_bug9357_text.html-euc-kr]

_mozilla_bug9357_text.html-euc-kr]

file_name = 'tests/EUC-KR/_mozilla_bug9357_text.html', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

jely.pe.kr.xml-euc-kr]

jely.pe.kr.xml-euc-kr]

file_name = 'tests/EUC-KR/jely.pe.kr.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_ude_euc1.txt-euc-kr]

_ude_euc1.txt-euc-kr]

file_name = 'tests/EUC-KR/_ude_euc1.txt', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

willis.egloos.com.xml-euc-kr]

willis.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/willis.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.bd-lab.com.xml-euc-kr]

blog.bd-lab.com.xml-euc-kr]

file_name = 'tests/EUC-KR/blog.bd-lab.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.empas.com.xml-euc-kr]

blog.empas.com.xml-euc-kr]

file_name = 'tests/EUC-KR/blog.empas.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

_chromium_windows-949_with_no_encoding_specified.html-euc-kr]

_chromium_windows-949_with_no_encoding_specified.html-euc-kr]

file_name = 'tests/EUC-KR/_chromium_windows-949_with_no_encoding_specified.html'
encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

chisato.info.xml-euc-kr]

chisato.info.xml-euc-kr]

file_name = 'tests/EUC-KR/chisato.info.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

tori02.egloos.com.xml-euc-kr]

tori02.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/tori02.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

acnnewswire.net.xml-euc-kr]

acnnewswire.net.xml-euc-kr]

file_name = 'tests/EUC-KR/acnnewswire.net.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

epitaph.egloos.com.xml-euc-kr]

epitaph.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/epitaph.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

blog.rss.naver.com.xml-euc-kr]

blog.rss.naver.com.xml-euc-kr]

file_name = 'tests/EUC-KR/blog.rss.naver.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

xenix.egloos.com.xml-euc-kr]

xenix.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/xenix.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

kina.egloos.com.xml-euc-kr]

kina.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/kina.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

birder.egloos.com.xml-euc-kr]

birder.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/birder.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

siwoo.org.xml-euc-kr]

siwoo.org.xml-euc-kr]

file_name = 'tests/EUC-KR/siwoo.org.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

jowchung.oolim.net.xml-euc-kr]

jowchung.oolim.net.xml-euc-kr]

file_name = 'tests/EUC-KR/jowchung.oolim.net.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

yunho.egloos.com.xml-euc-kr]

yunho.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/yunho.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

zangsalang.egloos.com.xml-euc-kr]

zangsalang.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/zangsalang.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

ittrend.egloos.com.xml-euc-kr]

ittrend.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/ittrend.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

poliplus.egloos.com.xml-euc-kr]

poliplus.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/poliplus.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

console.linuxstudy.pe.kr.xml-euc-kr]

console.linuxstudy.pe.kr.xml-euc-kr]

file_name = 'tests/EUC-KR/console.linuxstudy.pe.kr.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

oroll.egloos.com.xml-euc-kr]

oroll.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/oroll.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

calmguy.egloos.com.xml-euc-kr]

calmguy.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/calmguy.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

critique.or.kr.xml-euc-kr]

critique.or.kr.xml-euc-kr]

file_name = 'tests/EUC-KR/critique.or.kr.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

jely.egloos.com.xml-euc-kr]

jely.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/jely.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

sparcs.kaist.ac.kr.xml-euc-kr]

sparcs.kaist.ac.kr.xml-euc-kr]

file_name = 'tests/EUC-KR/sparcs.kaist.ac.kr.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

alogblog.com.xml-euc-kr]

alogblog.com.xml-euc-kr]

file_name = 'tests/EUC-KR/alogblog.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

lennon81.egloos.com.xml-euc-kr]

lennon81.egloos.com.xml-euc-kr]

file_name = 'tests/EUC-KR/lennon81.egloos.com.xml', encoding = 'euc-kr'

    @pytest.mark.parametrize("file_name, encoding", gen_test_params())
    def test_encoding_detection(file_name, encoding):
        with open(file_name, "rb") as f:
            input_bytes = f.read()
>           result = chardet.detect(input_bytes)

test.py:80: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
chardet/__init__.py:39: in detect
    detector.feed(byte_str)
chardet/universaldetector.py:154: in feed
    if prober.feed(byte_str) == ProbingState.FOUND_IT:
chardet/utf1632prober.py:113: in feed
    if not self.validate_utf16_characters(pair_be):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , pair = []

    def validate_utf16_characters(self, pair):
        """
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        """
>       value = (pair[0] << 8) | pair[1]
E       IndexError: list index out of range

chardet/utf1632prober.py:56: IndexError

test.py::test_never_fails_to_detect_if_there_is_a_valid_encoding

test.py::test_never_fails_to_detect_if_there_is_a_valid_encoding

+ Exception Group Traceback (most recent call last):
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/runner.py", line 341, in from_call
  |     result: TResult | None = func()
  |                              ^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/runner.py", line 242, in 
  |     lambda: runtest_hook(item=item, **kwds), when=when, reraise=reraise
  |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_hooks.py", line 513, in __call__
  |     return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_manager.py", line 120, in _hookexec
  |     return self._inner_hookexec(hook_name, methods, kwargs, firstresult)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 182, in _multicall
  |     return outcome.get_result()
  |            ^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_result.py", line 100, in get_result
  |     raise exc.with_traceback(exc.__traceback__)
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/threadexception.py", line 92, in pytest_runtest_call
  |     yield from thread_exception_runtest_hook()
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/threadexception.py", line 68, in thread_exception_runtest_hook
  |     yield
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/unraisableexception.py", line 95, in pytest_runtest_call
  |     yield from unraisable_exception_runtest_hook()
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/unraisableexception.py", line 70, in unraisable_exception_runtest_hook
  |     yield
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/logging.py", line 846, in pytest_runtest_call
  |     yield from self._runtest_for(item, "call")
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/logging.py", line 829, in _runtest_for
  |     yield
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/capture.py", line 880, in pytest_runtest_call
  |     return (yield)
  |             ^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/skipping.py", line 257, in pytest_runtest_call
  |     return (yield)
  |             ^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 103, in _multicall
  |     res = hook_impl.function(*args)
  |           ^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/runner.py", line 174, in pytest_runtest_call
  |     item.runtest()
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/python.py", line 1627, in runtest
  |     self.ihook.pytest_pyfunc_call(pyfuncitem=self)
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_hooks.py", line 513, in __call__
  |     return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_manager.py", line 120, in _hookexec
  |     return self._inner_hookexec(hook_name, methods, kwargs, firstresult)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 139, in _multicall
  |     raise exception.with_traceback(exception.__traceback__)
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 103, in _multicall
  |     res = hook_impl.function(*args)
  |           ^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/python.py", line 159, in pytest_pyfunc_call
  |     result = testfunction(**testargs)
  |              ^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/test.py", line 123, in test_never_fails_to_detect_if_there_is_a_valid_encoding
  |     @given(
  |             
  |   File "/testbed/.venv/lib/python3.12/site-packages/hypothesis/core.py", line 1722, in wrapped_test
  |     raise the_error_hypothesis_found
  | ExceptionGroup: Hypothesis found 2 distinct failures. (2 sub-exceptions)
  +-+---------------- 1 ----------------
    | Traceback (most recent call last):
    |   File "/testbed/test.py", line 144, in test_never_fails_to_detect_if_there_is_a_valid_encoding
    |     detected = chardet.detect(data)["encoding"]
    |                ^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/__init__.py", line 39, in detect
    |     detector.feed(byte_str)
    |   File "/testbed/chardet/universaldetector.py", line 154, in feed
    |     if prober.feed(byte_str) == ProbingState.FOUND_IT:
    |        ^^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/utf1632prober.py", line 113, in feed
    |     if not self.validate_utf16_characters(pair_be):
    |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/utf1632prober.py", line 56, in validate_utf16_characters
    |     value = (pair[0] << 8) | pair[1]
    |              ~~~~^^^
    | IndexError: list index out of range
    | Falsifying example: test_never_fails_to_detect_if_there_is_a_valid_encoding(
    |     txt='𐀀',
    |     enc='utf-8',
    |     rnd=HypothesisRandom(generated data),
    | )
    | Explanation:
    |     These lines were always and only run by failing examples:
    |         /testbed/chardet/charsetprober.py:13
    |         /testbed/chardet/codingstatemachine.py:33
    |         /testbed/chardet/universaldetector.py:129
    +---------------- 2 ----------------
    | Traceback (most recent call last):
    |   File "/testbed/test.py", line 144, in test_never_fails_to_detect_if_there_is_a_valid_encoding
    |     detected = chardet.detect(data)["encoding"]
    |                ^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/__init__.py", line 39, in detect
    |     detector.feed(byte_str)
    |   File "/testbed/chardet/universaldetector.py", line 154, in feed
    |     if prober.feed(byte_str) == ProbingState.FOUND_IT:
    |        ^^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/charsetgroupprober.py", line 35, in feed
    |     state = prober.feed(byte_str)
    |             ^^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/sbcharsetprober.py", line 52, in feed
    |     byte_str = self.filter_with_english_letters(byte_str)
    |                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    | AttributeError: 'SingleByteCharSetProber' object has no attribute 'filter_with_english_letters'
    | Falsifying example: test_never_fails_to_detect_if_there_is_a_valid_encoding(
    |     txt='\x80',
    |     enc='utf-8',
    |     rnd=HypothesisRandom(generated data),
    | )
    | Explanation:
    |     These lines were always and only run by failing examples:
    |         /testbed/chardet/charsetprober.py:13
    |         /testbed/chardet/codingstatemachine.py:33
    |         /testbed/chardet/universaldetector.py:129
    +------------------------------------

test.py::test_detect_all_and_detect_one_should_agree

test.py::test_detect_all_and_detect_one_should_agree

+ Exception Group Traceback (most recent call last):
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/runner.py", line 341, in from_call
  |     result: TResult | None = func()
  |                              ^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/runner.py", line 242, in 
  |     lambda: runtest_hook(item=item, **kwds), when=when, reraise=reraise
  |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_hooks.py", line 513, in __call__
  |     return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_manager.py", line 120, in _hookexec
  |     return self._inner_hookexec(hook_name, methods, kwargs, firstresult)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 182, in _multicall
  |     return outcome.get_result()
  |            ^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_result.py", line 100, in get_result
  |     raise exc.with_traceback(exc.__traceback__)
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/threadexception.py", line 92, in pytest_runtest_call
  |     yield from thread_exception_runtest_hook()
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/threadexception.py", line 68, in thread_exception_runtest_hook
  |     yield
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/unraisableexception.py", line 95, in pytest_runtest_call
  |     yield from unraisable_exception_runtest_hook()
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/unraisableexception.py", line 70, in unraisable_exception_runtest_hook
  |     yield
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/logging.py", line 846, in pytest_runtest_call
  |     yield from self._runtest_for(item, "call")
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/logging.py", line 829, in _runtest_for
  |     yield
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/capture.py", line 880, in pytest_runtest_call
  |     return (yield)
  |             ^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 167, in _multicall
  |     teardown.throw(outcome._exception)
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/skipping.py", line 257, in pytest_runtest_call
  |     return (yield)
  |             ^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 103, in _multicall
  |     res = hook_impl.function(*args)
  |           ^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/runner.py", line 174, in pytest_runtest_call
  |     item.runtest()
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/python.py", line 1627, in runtest
  |     self.ihook.pytest_pyfunc_call(pyfuncitem=self)
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_hooks.py", line 513, in __call__
  |     return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_manager.py", line 120, in _hookexec
  |     return self._inner_hookexec(hook_name, methods, kwargs, firstresult)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 139, in _multicall
  |     raise exception.with_traceback(exception.__traceback__)
  |   File "/testbed/.venv/lib/python3.12/site-packages/pluggy/_callers.py", line 103, in _multicall
  |     res = hook_impl.function(*args)
  |           ^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/_pytest/python.py", line 159, in pytest_pyfunc_call
  |     result = testfunction(**testargs)
  |              ^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/testbed/test.py", line 160, in test_detect_all_and_detect_one_should_agree
  |     st.text(min_size=1),
  |            ^^^
  |   File "/testbed/.venv/lib/python3.12/site-packages/hypothesis/core.py", line 1722, in wrapped_test
  |     raise the_error_hypothesis_found
  | ExceptionGroup: Hypothesis found 2 distinct failures. (2 sub-exceptions)
  +-+---------------- 1 ----------------
    | Traceback (most recent call last):
    |   File "/testbed/test.py", line 181, in test_detect_all_and_detect_one_should_agree
    |     result = chardet.detect(data)
    |              ^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/__init__.py", line 39, in detect
    |     detector.feed(byte_str)
    |   File "/testbed/chardet/universaldetector.py", line 154, in feed
    |     if prober.feed(byte_str) == ProbingState.FOUND_IT:
    |        ^^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/utf1632prober.py", line 113, in feed
    |     if not self.validate_utf16_characters(pair_be):
    |            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/utf1632prober.py", line 56, in validate_utf16_characters
    |     value = (pair[0] << 8) | pair[1]
    |              ~~~~^^^
    | IndexError: list index out of range
    | 
    | During handling of the above exception, another exception occurred:
    | 
    | Traceback (most recent call last):
    |   File "/testbed/test.py", line 185, in test_detect_all_and_detect_one_should_agree
    |     raise RuntimeError(f"{result} != {results}") from exc
    |                           ^^^^^^
    | UnboundLocalError: cannot access local variable 'result' where it is not associated with a value
    | Falsifying example: test_detect_all_and_detect_one_should_agree(
    |     txt='𐀀',
    |     enc='utf-8',
    |     _=HypothesisRandom(generated data),
    | )
    | Explanation:
    |     These lines were always and only run by failing examples:
    |         /testbed/chardet/charsetprober.py:13
    |         /testbed/chardet/codingstatemachine.py:33
    |         /testbed/chardet/universaldetector.py:129
    +---------------- 2 ----------------
    | Traceback (most recent call last):
    |   File "/testbed/test.py", line 181, in test_detect_all_and_detect_one_should_agree
    |     result = chardet.detect(data)
    |              ^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/__init__.py", line 39, in detect
    |     detector.feed(byte_str)
    |   File "/testbed/chardet/universaldetector.py", line 154, in feed
    |     if prober.feed(byte_str) == ProbingState.FOUND_IT:
    |        ^^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/charsetgroupprober.py", line 35, in feed
    |     state = prober.feed(byte_str)
    |             ^^^^^^^^^^^^^^^^^^^^^
    |   File "/testbed/chardet/sbcharsetprober.py", line 52, in feed
    |     byte_str = self.filter_with_english_letters(byte_str)
    |                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    | AttributeError: 'SingleByteCharSetProber' object has no attribute 'filter_with_english_letters'
    | 
    | During handling of the above exception, another exception occurred:
    | 
    | Traceback (most recent call last):
    |   File "/testbed/test.py", line 185, in test_detect_all_and_detect_one_should_agree
    |     raise RuntimeError(f"{result} != {results}") from exc
    |                           ^^^^^^
    | UnboundLocalError: cannot access local variable 'result' where it is not associated with a value
    | Falsifying example: test_detect_all_and_detect_one_should_agree(
    |     txt='\x80',
    |     enc='utf-8',
    |     _=HypothesisRandom(generated data),
    | )
    | Explanation:
    |     These lines were always and only run by failing examples:
    |         /testbed/chardet/charsetprober.py:13
    |         /testbed/chardet/codingstatemachine.py:33
    |         /testbed/chardet/universaldetector.py:129
    +------------------------------------

Patch diff

diff --git a/chardet/big5prober.py b/chardet/big5prober.py
index 51ab8fb..7c27716 100644
--- a/chardet/big5prober.py
+++ b/chardet/big5prober.py
@@ -1,5 +1,6 @@
 from .chardistribution import Big5DistributionAnalysis
 from .codingstatemachine import CodingStateMachine
+from .enums import ProbingState
 from .mbcharsetprober import MultiByteCharSetProber
 from .mbcssm import BIG5_SM_MODEL

@@ -9,4 +10,16 @@ class Big5Prober(MultiByteCharSetProber):
         super().__init__()
         self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
         self.distribution_analyzer = Big5DistributionAnalysis()
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self._state = ProbingState.DETECTING
+
+    @property
+    def charset_name(self):
+        return "Big5"
+
+    @property
+    def language(self):
+        return "Traditional Chinese"
\ No newline at end of file
diff --git a/chardet/chardistribution.py b/chardet/chardistribution.py
index 1b51bcd..9043d80 100644
--- a/chardet/chardistribution.py
+++ b/chardet/chardistribution.py
@@ -22,15 +22,39 @@ class CharDistributionAnalysis:

     def reset(self):
         """reset analyser, clear any state"""
-        pass
+        self._done = False
+        self._total_chars = 0
+        self._freq_chars = 0

     def feed(self, char, char_len):
         """feed a character with known length"""
-        pass
+        if char_len == 2:
+            # we only care about 2-bytes character in our distribution analysis
+            order = -1
+            if char[0] in self._char_to_freq_order:
+                order = self._char_to_freq_order[char[0]]
+            if order != -1 and order < self._table_size:
+                self._total_chars += 1
+                if order < 512:
+                    self._freq_chars += 1
+
+    def got_enough_data(self):
+        # It is not necessary to receive all data to draw conclusion.
+        # For charset probers, certain amount of data is enough
+        return self._total_chars > self.ENOUGH_DATA_THRESHOLD

     def get_confidence(self):
         """return confidence based on existing data"""
-        pass
+        if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
+            return self.SURE_NO
+
+        if self._total_chars != self._freq_chars:
+            r = self._freq_chars / ((self._total_chars - self._freq_chars) * self.typical_distribution_ratio)
+            if r < self.SURE_YES:
+                return r
+
+        # normalize confidence, (we don't want to be 100% sure)
+        return self.SURE_YES

 class EUCTWDistributionAnalysis(CharDistributionAnalysis):

diff --git a/chardet/charsetgroupprober.py b/chardet/charsetgroupprober.py
index db44415..f89bbbc 100644
--- a/chardet/charsetgroupprober.py
+++ b/chardet/charsetgroupprober.py
@@ -7,4 +7,57 @@ class CharSetGroupProber(CharSetProber):
         super().__init__(lang_filter=lang_filter)
         self._active_num = 0
         self.probers = []
-        self._best_guess_prober = None
\ No newline at end of file
+        self._best_guess_prober = None
+
+    def reset(self):
+        super().reset()
+        self._active_num = 0
+        for prober in self.probers:
+            if prober:
+                prober.reset()
+                prober.active = True
+                self._active_num += 1
+        self._best_guess_prober = None
+
+    def get_charset_name(self):
+        if not self._best_guess_prober:
+            self.get_confidence()
+            if not self._best_guess_prober:
+                return None
+        return self._best_guess_prober.get_charset_name()
+
+    def feed(self, byte_str):
+        for prober in self.probers:
+            if not prober:
+                continue
+            if not prober.active:
+                continue
+            state = prober.feed(byte_str)
+            if not state:
+                continue
+            if state == ProbingState.FOUND_IT:
+                self._best_guess_prober = prober
+                return self.state
+            elif state == ProbingState.NOT_ME:
+                prober.active = False
+                self._active_num -= 1
+                if self._active_num <= 0:
+                    self._state = ProbingState.NOT_ME
+                    return self.state
+        return self.state
+
+    def get_confidence(self):
+        st = 0.0
+        if not self._best_guess_prober:
+            for prober in self.probers:
+                if not prober:
+                    continue
+                if not prober.active:
+                    continue
+                cf = prober.get_confidence()
+                if cf > st:
+                    st = cf
+                    self._best_guess_prober = prober
+        if not self._best_guess_prober:
+            return 0.0
+        return st
\ No newline at end of file
diff --git a/chardet/charsetprober.py b/chardet/charsetprober.py
index 7f492d7..4b6f6ea 100644
--- a/chardet/charsetprober.py
+++ b/chardet/charsetprober.py
@@ -10,6 +10,7 @@ class CharSetProber:
         self._state = None
         self.lang_filter = lang_filter
         self.logger = logging.getLogger(__name__)
+        self.active = True

     @staticmethod
     def filter_international_words(buf):
@@ -24,7 +25,33 @@ class CharSetProber:
         are replaced by a single space ascii character.
         This filter applies to all scripts which do not use English characters.
         """
-        pass
+        filtered = bytearray()
+        in_word = False
+        prev_marker = True
+        for byte in buf:
+            # Get the byte value as an integer
+            byte_int = byte if isinstance(byte, int) else ord(byte)
+            
+            # Check if it's an alphabet character
+            is_alpha = (byte_int >= 65 and byte_int <= 90) or (byte_int >= 97 and byte_int <= 122)
+            # Check if it's an international character
+            is_international = byte_int >= 0x80 and byte_int <= 0xFF
+            
+            if is_alpha or is_international:
+                if prev_marker and not in_word:
+                    in_word = True
+                if in_word:
+                    filtered.append(byte_int)
+            else:  # it's a marker
+                if in_word:
+                    in_word = False
+                    if not prev_marker:
+                        filtered.append(32)  # ASCII space
+                prev_marker = True
+                continue
+            prev_marker = False
+            
+        return bytes(filtered)

     @staticmethod
     def remove_xml_tags(buf):
@@ -35,4 +62,58 @@ class CharSetProber:
         characters and extended ASCII characters, but is currently only used by
         ``Latin1Prober``.
         """
-        pass
\ No newline at end of file
+        filtered = bytearray()
+        in_tag = False
+        for byte in buf:
+            byte_int = byte if isinstance(byte, int) else ord(byte)
+            
+            if byte_int == ord('<'):
+                in_tag = True
+                continue
+            elif byte_int == ord('>'):
+                in_tag = False
+                continue
+            
+            if not in_tag:
+                filtered.append(byte_int)
+                
+        return bytes(filtered)
+
+    def reset(self):
+        """
+        Reset the prober state to its initial value.
+        """
+        self._state = ProbingState.DETECTING
+
+    def feed(self, buf):
+        """
+        Feed a chunk of bytes to the prober and update its state.
+        """
+        raise NotImplementedError
+
+    def get_confidence(self):
+        """
+        Return confidence level of the prober.
+        """
+        raise NotImplementedError
+
+    @property
+    def charset_name(self):
+        """
+        Return the charset name detected by the prober.
+        """
+        raise NotImplementedError
+
+    @property
+    def state(self):
+        """
+        Return the state of the prober.
+        """
+        return self._state
+
+    @property
+    def language(self):
+        """
+        Return the language detected by the prober.
+        """
+        raise NotImplementedError
\ No newline at end of file
diff --git a/chardet/codingstatemachine.py b/chardet/codingstatemachine.py
index 14d1fa4..4aa404e 100644
--- a/chardet/codingstatemachine.py
+++ b/chardet/codingstatemachine.py
@@ -30,4 +30,42 @@ class CodingStateMachine:
         self._curr_char_len = 0
         self._curr_state = None
         self.logger = logging.getLogger(__name__)
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        """
+        Reset the state machine to its initial state.
+        """
+        self._curr_state = MachineState.START
+        self._curr_byte_pos = 0
+        self._curr_char_len = 0
+
+    def next_state(self, c):
+        """
+        Process one byte at a time and return the new state.
+        """
+        # for each byte we get its class
+        byte_class = self._model['class_table'][c]
+        if byte_class == 'eError':  # we represent error class as None
+            self._curr_state = MachineState.ERROR
+            return self._curr_state
+
+        # for each byte class we get a state transition table
+        if self._curr_state == MachineState.START:
+            self._curr_byte_pos = 0
+            self._curr_char_len = self._model['char_len_table'][byte_class]
+
+        # from byte's class and state_table, we get its next state
+        curr_state = self._curr_state * self._model['class_factor'] + byte_class
+        self._curr_state = self._model['state_table'][curr_state]
+
+        # we increment the byte position counter
+        self._curr_byte_pos += 1
+
+        return self._curr_state
+
+    def get_current_charlen(self):
+        """
+        Return the length of the current character being detected.
+        """
+        return self._curr_char_len
\ No newline at end of file
diff --git a/chardet/cp949prober.py b/chardet/cp949prober.py
index 1b272ad..0c4f482 100644
--- a/chardet/cp949prober.py
+++ b/chardet/cp949prober.py
@@ -1,5 +1,6 @@
 from .chardistribution import EUCKRDistributionAnalysis
 from .codingstatemachine import CodingStateMachine
+from .enums import ProbingState
 from .mbcharsetprober import MultiByteCharSetProber
 from .mbcssm import CP949_SM_MODEL

@@ -9,4 +10,16 @@ class CP949Prober(MultiByteCharSetProber):
         super().__init__()
         self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
         self.distribution_analyzer = EUCKRDistributionAnalysis()
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self._state = ProbingState.DETECTING
+
+    @property
+    def charset_name(self):
+        return "CP949"
+
+    @property
+    def language(self):
+        return "Korean"
\ No newline at end of file
diff --git a/chardet/enums.py b/chardet/enums.py
index 0b0e575..1858080 100644
--- a/chardet/enums.py
+++ b/chardet/enums.py
@@ -54,7 +54,7 @@ class SequenceLikelihood:
     @classmethod
     def get_num_categories(cls):
         """:returns: The number of likelihood categories in the enum."""
-        pass
+        return 4  # NEGATIVE through POSITIVE

 class CharacterCategory:
     """
diff --git a/chardet/eucjpprober.py b/chardet/eucjpprober.py
index cdbbf2f..7306a5f 100644
--- a/chardet/eucjpprober.py
+++ b/chardet/eucjpprober.py
@@ -12,4 +12,50 @@ class EUCJPProber(MultiByteCharSetProber):
         self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
         self.distribution_analyzer = EUCJPDistributionAnalysis()
         self.context_analyzer = EUCJPContextAnalysis()
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self.context_analyzer.reset()
+        self._state = ProbingState.DETECTING
+
+    @property
+    def charset_name(self):
+        return self.context_analyzer.charset_name
+
+    @property
+    def language(self):
+        return "Japanese"
+
+    def feed(self, byte_str):
+        for i in range(len(byte_str)):
+            coding_state = self.coding_sm.next_state(byte_str[i])
+            if coding_state == MachineState.ERROR:
+                self._state = ProbingState.NOT_ME
+                break
+            elif coding_state == MachineState.ITS_ME:
+                self._state = ProbingState.FOUND_IT
+                break
+            elif coding_state == MachineState.START:
+                char_len = self.coding_sm.get_current_charlen()
+                if i == 0:
+                    self._last_char[1] = byte_str[0]
+                    self.context_analyzer.feed(self._last_char, char_len)
+                    self.distribution_analyzer.feed(self._last_char, char_len)
+                else:
+                    self.context_analyzer.feed(byte_str[i-1:i+1], char_len)
+                    self.distribution_analyzer.feed(byte_str[i-1:i+1], char_len)
+
+        self._last_char[0] = byte_str[-1]
+
+        if self.state == ProbingState.DETECTING:
+            if self.context_analyzer.got_enough_data() and (
+                    self.get_confidence() > self.SHORTCUT_THRESHOLD):
+                self._state = ProbingState.FOUND_IT
+
+        return self.state
+
+    def get_confidence(self):
+        context_conf = self.context_analyzer.get_confidence()
+        distrib_conf = self.distribution_analyzer.get_confidence()
+        return max(context_conf, distrib_conf)
\ No newline at end of file
diff --git a/chardet/euckrprober.py b/chardet/euckrprober.py
index 067b9e8..646aa7a 100644
--- a/chardet/euckrprober.py
+++ b/chardet/euckrprober.py
@@ -1,5 +1,6 @@
 from .chardistribution import EUCKRDistributionAnalysis
 from .codingstatemachine import CodingStateMachine
+from .enums import ProbingState
 from .mbcharsetprober import MultiByteCharSetProber
 from .mbcssm import EUCKR_SM_MODEL

@@ -9,4 +10,16 @@ class EUCKRProber(MultiByteCharSetProber):
         super().__init__()
         self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
         self.distribution_analyzer = EUCKRDistributionAnalysis()
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self._state = ProbingState.DETECTING
+
+    @property
+    def charset_name(self):
+        return "EUC-KR"
+
+    @property
+    def language(self):
+        return "Korean"
\ No newline at end of file
diff --git a/chardet/euctwprober.py b/chardet/euctwprober.py
index 8fa06d8..7417fd1 100644
--- a/chardet/euctwprober.py
+++ b/chardet/euctwprober.py
@@ -1,5 +1,6 @@
 from .chardistribution import EUCTWDistributionAnalysis
 from .codingstatemachine import CodingStateMachine
+from .enums import ProbingState
 from .mbcharsetprober import MultiByteCharSetProber
 from .mbcssm import EUCTW_SM_MODEL

@@ -9,4 +10,16 @@ class EUCTWProber(MultiByteCharSetProber):
         super().__init__()
         self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
         self.distribution_analyzer = EUCTWDistributionAnalysis()
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self._state = ProbingState.DETECTING
+
+    @property
+    def charset_name(self):
+        return "EUC-TW"
+
+    @property
+    def language(self):
+        return "Traditional Chinese"
\ No newline at end of file
diff --git a/chardet/gb2312prober.py b/chardet/gb2312prober.py
index a897d30..69373da 100644
--- a/chardet/gb2312prober.py
+++ b/chardet/gb2312prober.py
@@ -1,5 +1,6 @@
 from .chardistribution import GB2312DistributionAnalysis
 from .codingstatemachine import CodingStateMachine
+from .enums import ProbingState
 from .mbcharsetprober import MultiByteCharSetProber
 from .mbcssm import GB2312_SM_MODEL

@@ -9,4 +10,16 @@ class GB2312Prober(MultiByteCharSetProber):
         super().__init__()
         self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
         self.distribution_analyzer = GB2312DistributionAnalysis()
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self._state = ProbingState.DETECTING
+
+    @property
+    def charset_name(self):
+        return "GB2312"
+
+    @property
+    def language(self):
+        return "Chinese"
\ No newline at end of file
diff --git a/chardet/hebrewprober.py b/chardet/hebrewprober.py
index b6262c5..57c7bd8 100644
--- a/chardet/hebrewprober.py
+++ b/chardet/hebrewprober.py
@@ -25,4 +25,107 @@ class HebrewProber(CharSetProber):
         self._before_prev = None
         self._logical_prober = None
         self._visual_prober = None
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        self._final_char_logical_score = 0
+        self._final_char_visual_score = 0
+        self._prev = ' '
+        self._before_prev = ' '
+        self._logical_prober = None
+        self._visual_prober = None
+
+    def set_model_probers(self, logical_prober, visual_prober):
+        self._logical_prober = logical_prober
+        self._visual_prober = visual_prober
+
+    def is_final(self, c):
+        return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
+                    self.FINAL_PE, self.FINAL_TSADI]
+
+    def is_non_final(self, c):
+        return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN,
+                    self.NORMAL_PE, self.NORMAL_TSADI]
+
+    def feed(self, byte_str):
+        if self._state == ProbingState.NOT_ME:
+            return self._state
+
+        for c in byte_str:
+            if c >= 128:
+                # If we got a non-ascii character, check if it's a final or non-final letter
+                if self.is_final(c):
+                    # If the previous character was a non-final letter, this is logical
+                    if self._prev == ' ':
+                        self._final_char_logical_score += 0
+                        self._final_char_visual_score += 0
+                    elif self.is_non_final(self._prev):
+                        self._final_char_logical_score += 1
+                        self._final_char_visual_score -= 1
+                    else:
+                        self._final_char_logical_score += 0
+                        self._final_char_visual_score += 0
+                elif self.is_non_final(c):
+                    # If the previous character was a final letter, this is visual
+                    if self._prev == ' ':
+                        self._final_char_logical_score += 0
+                        self._final_char_visual_score += 0
+                    elif self.is_final(self._prev):
+                        self._final_char_logical_score -= 1
+                        self._final_char_visual_score += 1
+                    else:
+                        self._final_char_logical_score += 0
+                        self._final_char_visual_score += 0
+
+            self._before_prev = self._prev
+            self._prev = c
+
+        return self._state
+
+    def get_charset_name(self):
+        # If we have both probers and one is significantly more confident,
+        # use its charset name
+        finalsub = abs(self._final_char_logical_score - self._final_char_visual_score)
+        if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
+            if self._final_char_logical_score > self._final_char_visual_score:
+                return self.LOGICAL_HEBREW_NAME
+            return self.VISUAL_HEBREW_NAME
+
+        # If we don't have a clear winner, use the one with higher confidence
+        if self._logical_prober and self._visual_prober:
+            logical_conf = self._logical_prober.get_confidence()
+            visual_conf = self._visual_prober.get_confidence()
+            diff = abs(logical_conf - visual_conf)
+            if diff >= self.MIN_MODEL_DISTANCE:
+                if logical_conf > visual_conf:
+                    return self.LOGICAL_HEBREW_NAME
+                return self.VISUAL_HEBREW_NAME
+
+        # Still no clear winner, return logical Hebrew by default
+        return self.LOGICAL_HEBREW_NAME
+
+    def get_state(self):
+        # Assume we're good unless both model probers say otherwise
+        if (self._logical_prober and self._visual_prober and
+            self._logical_prober.get_state() == ProbingState.NOT_ME and
+            self._visual_prober.get_state() == ProbingState.NOT_ME):
+            return ProbingState.NOT_ME
+        return ProbingState.DETECTING
+
+    def get_confidence(self):
+        # If we have a clear winner from final letters analysis, use that
+        finalsub = abs(self._final_char_logical_score - self._final_char_visual_score)
+        if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
+            return 0.95
+
+        # If we have both probers and one is significantly more confident,
+        # use its confidence
+        if self._logical_prober and self._visual_prober:
+            logical_conf = self._logical_prober.get_confidence()
+            visual_conf = self._visual_prober.get_confidence()
+            diff = abs(logical_conf - visual_conf)
+            if diff >= self.MIN_MODEL_DISTANCE:
+                return max(logical_conf, visual_conf)
+
+        # No clear winner, return a moderate confidence
+        return 0.5
\ No newline at end of file
diff --git a/chardet/johabprober.py b/chardet/johabprober.py
index caeafe1..1696da3 100644
--- a/chardet/johabprober.py
+++ b/chardet/johabprober.py
@@ -1,5 +1,6 @@
 from .chardistribution import JOHABDistributionAnalysis
 from .codingstatemachine import CodingStateMachine
+from .enums import ProbingState
 from .mbcharsetprober import MultiByteCharSetProber
 from .mbcssm import JOHAB_SM_MODEL

@@ -9,4 +10,16 @@ class JOHABProber(MultiByteCharSetProber):
         super().__init__()
         self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL)
         self.distribution_analyzer = JOHABDistributionAnalysis()
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self._state = ProbingState.DETECTING
+
+    @property
+    def charset_name(self):
+        return "JOHAB"
+
+    @property
+    def language(self):
+        return "Korean"
\ No newline at end of file
diff --git a/chardet/jpcntx.py b/chardet/jpcntx.py
index 0652b56..7933219 100644
--- a/chardet/jpcntx.py
+++ b/chardet/jpcntx.py
@@ -15,11 +15,101 @@ class JapaneseContextAnalysis:
         self._done = None
         self.reset()

+    def reset(self):
+        """Reset the context analysis."""
+        self._total_rel = 0  # Total relative order
+        self._rel_sample = [0] * self.NUM_OF_CATEGORY  # Category counters
+        self._need_to_skip_char_num = 0  # Number of characters to skip
+        self._last_char_order = self.DONT_KNOW  # Last character's relative order
+        self._done = False  # Done analyzing
+
+    def get_order(self, byte_str):
+        """Get the order of the byte string."""
+        return -1
+
+    def get_confidence(self):
+        """Return confidence based on existing data."""
+        if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
+            return 0.99
+        elif self._total_rel > 0:
+            return 0.75
+        return 0.0
+
+    def got_enough_data(self):
+        """Return true if we've received enough data."""
+        return self._done
+
+    def feed(self, byte_str, num_bytes):
+        """Feed a character with its byte length."""
+        if self._done:
+            return
+
+        # We only care about 2-bytes characters in our analysis
+        if num_bytes != 2:
+            return
+
+        # Skip half the input of less than 512 bytes
+        if self._total_rel < 512:
+            self._need_to_skip_char_num += 1
+            if self._need_to_skip_char_num % 2:
+                return
+
+        order = self.get_order(byte_str)
+        if order != self.DONT_KNOW:
+            self._total_rel += 1
+            if self._last_char_order != self.DONT_KNOW:
+                if self._total_rel > self.MAX_REL_THRESHOLD:
+                    self._done = True
+                    return
+                if order < self.NUM_OF_CATEGORY:
+                    self._rel_sample[order] += 1
+            self._last_char_order = order
+
 class SJISContextAnalysis(JapaneseContextAnalysis):

     def __init__(self):
         super().__init__()
         self._charset_name = 'SHIFT_JIS'

+    def get_order(self, byte_str):
+        if not byte_str:
+            return -1
+        # find out current char's byte length
+        first_char = byte_str[0]
+        if (0x81 <= first_char <= 0x9F or 0xE0 <= first_char <= 0xFC):
+            char_len = 2
+            if len(byte_str) < char_len:
+                return -1
+            order = jp2_char_context[first_char - 0x81]
+        else:
+            char_len = 1
+            if first_char < 0x80:
+                return -1
+            order = jp2_char_context[first_char - 0xA1]
+        return order
+
 class EUCJPContextAnalysis(JapaneseContextAnalysis):
-    pass
\ No newline at end of file
+    def __init__(self):
+        super().__init__()
+        self._charset_name = 'EUC-JP'
+
+    def get_order(self, byte_str):
+        if not byte_str:
+            return -1
+        # find out current char's byte length
+        first_char = byte_str[0]
+        if first_char == 0x8E or first_char == 0x8F:
+            char_len = 2
+            if len(byte_str) < char_len:
+                return -1
+            if first_char == 0x8F:
+                char_len = 3
+                if len(byte_str) < char_len:
+                    return -1
+            return -1
+        else:
+            char_len = 1
+            if first_char < 0xA1:
+                return -1
+            order = jp2_char_context[first_char - 0xA1]
+        return order
\ No newline at end of file
diff --git a/chardet/mbcharsetprober.py b/chardet/mbcharsetprober.py
index 28d62fe..9a98f22 100644
--- a/chardet/mbcharsetprober.py
+++ b/chardet/mbcharsetprober.py
@@ -10,4 +10,42 @@ class MultiByteCharSetProber(CharSetProber):
         super().__init__(lang_filter=lang_filter)
         self.distribution_analyzer = None
         self.coding_sm = None
-        self._last_char = [0, 0]
\ No newline at end of file
+        self._last_char = [0, 0]
+
+    def reset(self):
+        super().reset()
+        if self.coding_sm:
+            self.coding_sm.reset()
+        if self.distribution_analyzer:
+            self.distribution_analyzer.reset()
+        self._last_char = [0, 0]
+        self._state = ProbingState.DETECTING
+
+    def feed(self, byte_str):
+        for i in range(len(byte_str)):
+            coding_state = self.coding_sm.next_state(byte_str[i])
+            if coding_state == MachineState.ERROR:
+                self._state = ProbingState.NOT_ME
+                break
+            elif coding_state == MachineState.ITS_ME:
+                self._state = ProbingState.FOUND_IT
+                break
+            elif coding_state == MachineState.START:
+                char_len = self.coding_sm.get_current_charlen()
+                if i == 0:
+                    self._last_char[1] = byte_str[0]
+                    self.distribution_analyzer.feed(self._last_char, char_len)
+                else:
+                    self.distribution_analyzer.feed(byte_str[i-1:i+1], char_len)
+
+        self._last_char[0] = byte_str[-1]
+
+        if self.state == ProbingState.DETECTING:
+            if self.distribution_analyzer.got_enough_data() and (
+                    self.get_confidence() > self.SHORTCUT_THRESHOLD):
+                self._state = ProbingState.FOUND_IT
+
+        return self.state
+
+    def get_confidence(self):
+        return self.distribution_analyzer.get_confidence()
\ No newline at end of file
diff --git a/chardet/sbcharsetprober.py b/chardet/sbcharsetprober.py
index 003f325..5068a27 100644
--- a/chardet/sbcharsetprober.py
+++ b/chardet/sbcharsetprober.py
@@ -20,4 +20,81 @@ class SingleByteCharSetProber(CharSetProber):
         self._total_char = None
         self._control_char = None
         self._freq_char = None
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self._last_order = 255
+        self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
+        self._total_seqs = 0
+        self._total_char = 0
+        self._control_char = 0
+        self._freq_char = 0
+
+    def get_charset_name(self):
+        if self._name_prober:
+            return self._name_prober.get_charset_name()
+        return self._model.charset_name
+
+    @property
+    def charset_name(self):
+        return self._model.charset_name
+
+    @property
+    def language(self):
+        return self._model.language
+
+    def feed(self, byte_str):
+        if not self._model.keep_ascii_letters:
+            byte_str = self.filter_international_words(byte_str)
+            if not byte_str:
+                return self.state
+        byte_str = self.filter_with_english_letters(byte_str)
+        if not byte_str:
+            return self.state
+
+        char_len = len(byte_str)
+        if char_len > 0:
+            if not self._model.char_to_order_map or not self._model.language_model:
+                self._state = ProbingState.NOT_ME
+                return self.state
+
+            for i, c in enumerate(byte_str):
+                order = self._model.char_to_order_map.get(c, CharacterCategory.UNDEFINED)
+                if order < CharacterCategory.CONTROL:
+                    self._control_char += 1
+                elif order == CharacterCategory.SAME_CLASS_WORD:
+                    self._freq_char += 1
+
+                if order < len(self._model.language_model):
+                    if i > 0:
+                        last_order = self._last_order
+                        if last_order < len(self._model.language_model):
+                            self._total_seqs += 1
+                            if not self._reversed:
+                                lm_cat = self._model.language_model[last_order][order]
+                                self._seq_counters[lm_cat] += 1
+                            else:
+                                lm_cat = self._model.language_model[order][last_order]
+                                self._seq_counters[lm_cat] += 1
+                    self._last_order = order
+
+            charset_name = self.charset_name
+            if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
+                cf = self.get_confidence()
+                if cf > self.POSITIVE_SHORTCUT_THRESHOLD:
+                    self._state = ProbingState.FOUND_IT
+                elif cf < self.NEGATIVE_SHORTCUT_THRESHOLD:
+                    self._state = ProbingState.NOT_ME
+
+        return self.state
+
+    def get_confidence(self):
+        r = 0.01
+        if self._total_seqs > 0:
+            r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / self._total_seqs
+                 / self._model.typical_positive_ratio)
+            r = r * (self._total_seqs / self.SAMPLE_SIZE)
+            if r >= 1.0:
+                r = 0.99
+        return r
\ No newline at end of file
diff --git a/chardet/sjisprober.py b/chardet/sjisprober.py
index fe26d49..3e8f3a4 100644
--- a/chardet/sjisprober.py
+++ b/chardet/sjisprober.py
@@ -12,4 +12,50 @@ class SJISProber(MultiByteCharSetProber):
         self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
         self.distribution_analyzer = SJISDistributionAnalysis()
         self.context_analyzer = SJISContextAnalysis()
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self.context_analyzer.reset()
+        self._state = ProbingState.DETECTING
+
+    @property
+    def charset_name(self):
+        return self.context_analyzer.charset_name
+
+    @property
+    def language(self):
+        return "Japanese"
+
+    def feed(self, byte_str):
+        for i in range(len(byte_str)):
+            coding_state = self.coding_sm.next_state(byte_str[i])
+            if coding_state == MachineState.ERROR:
+                self._state = ProbingState.NOT_ME
+                break
+            elif coding_state == MachineState.ITS_ME:
+                self._state = ProbingState.FOUND_IT
+                break
+            elif coding_state == MachineState.START:
+                char_len = self.coding_sm.get_current_charlen()
+                if i == 0:
+                    self._last_char[1] = byte_str[0]
+                    self.context_analyzer.feed(self._last_char, char_len)
+                    self.distribution_analyzer.feed(self._last_char, char_len)
+                else:
+                    self.context_analyzer.feed(byte_str[i-1:i+1], char_len)
+                    self.distribution_analyzer.feed(byte_str[i-1:i+1], char_len)
+
+        self._last_char[0] = byte_str[-1]
+
+        if self.state == ProbingState.DETECTING:
+            if self.context_analyzer.got_enough_data() and (
+                    self.get_confidence() > self.SHORTCUT_THRESHOLD):
+                self._state = ProbingState.FOUND_IT
+
+        return self.state
+
+    def get_confidence(self):
+        context_conf = self.context_analyzer.get_confidence()
+        distrib_conf = self.distribution_analyzer.get_confidence()
+        return max(context_conf, distrib_conf)
\ No newline at end of file
diff --git a/chardet/universaldetector.py b/chardet/universaldetector.py
index a0351fc..18d7631 100644
--- a/chardet/universaldetector.py
+++ b/chardet/universaldetector.py
@@ -54,13 +54,31 @@ class UniversalDetector:
         self._has_win_bytes = None
         self.reset()

+    @property
+    def input_state(self):
+        return self._input_state
+
     def reset(self):
         """
         Reset the UniversalDetector and all of its probers back to their
         initial states.  This is called by ``__init__``, so you only need to
         call this directly in between analyses of different documents.
         """
-        pass
+        self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
+        self.done = False
+        self._got_data = False
+        self._has_win_bytes = False
+        self._input_state = InputState.PURE_ASCII
+        self._last_char = None
+        if self._esc_charset_prober:
+            self._esc_charset_prober.reset()
+        if self._utf1632_prober:
+            self._utf1632_prober.reset()
+        for prober in self._charset_probers:
+            prober.reset()
+        self._esc_charset_prober = None
+        self._utf1632_prober = None
+        self._charset_probers = []

     def feed(self, byte_str):
         """
@@ -76,7 +94,74 @@ class UniversalDetector:
            You should always call ``close`` when you're done feeding in your
            document if ``done`` is not already ``True``.
         """
-        pass
+        if self.done:
+            return
+
+        if not len(byte_str):
+            return
+
+        if not self._got_data:
+            self._got_data = True
+            if byte_str.startswith(codecs.BOM_UTF8):
+                self.result = {'encoding': 'UTF-8-SIG', 'confidence': 1.0, 'language': ''}
+                self.done = True
+                return
+            if byte_str.startswith(codecs.BOM_UTF32_LE):
+                self.result = {'encoding': 'UTF-32', 'confidence': 1.0, 'language': ''}
+                self.done = True
+                return
+            if byte_str.startswith(codecs.BOM_UTF32_BE):
+                self.result = {'encoding': 'UTF-32', 'confidence': 1.0, 'language': ''}
+                self.done = True
+                return
+            if byte_str.startswith(codecs.BOM_UTF16_LE):
+                self.result = {'encoding': 'UTF-16', 'confidence': 1.0, 'language': ''}
+                self.done = True
+                return
+            if byte_str.startswith(codecs.BOM_UTF16_BE):
+                self.result = {'encoding': 'UTF-16', 'confidence': 1.0, 'language': ''}
+                self.done = True
+                return
+
+        # If none of the above BOMs matched and we see a high byte
+        if self._input_state == InputState.PURE_ASCII:
+            if self.HIGH_BYTE_DETECTOR.search(byte_str):
+                self._input_state = InputState.HIGH_BYTE
+            elif self.ESC_DETECTOR.search(byte_str):
+                self._input_state = InputState.ESC_ASCII
+
+        self._last_char = byte_str[-1:]
+
+        if self._input_state == InputState.ESC_ASCII:
+            if not self._esc_charset_prober:
+                self._esc_charset_prober = EscCharSetProber()
+            if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
+                self.result = {'encoding': self._esc_charset_prober.charset_name,
+                             'confidence': self._esc_charset_prober.get_confidence(),
+                             'language': self._esc_charset_prober.language}
+                self.done = True
+        elif self._input_state == InputState.HIGH_BYTE:
+            if not self._utf1632_prober:
+                self._utf1632_prober = UTF1632Prober()
+            if not self._charset_probers:
+                self._charset_probers = [MBCSGroupProber(self.lang_filter),
+                                       SBCSGroupProber(),
+                                       Latin1Prober()]
+            if self.WIN_BYTE_DETECTOR.search(byte_str):
+                self._has_win_bytes = True
+
+            for prober in [self._utf1632_prober] + self._charset_probers:
+                if prober.feed(byte_str) == ProbingState.FOUND_IT:
+                    charset_name = prober.charset_name
+                    if charset_name.startswith('UTF-16'):
+                        charset_name = 'UTF-16'
+                    elif charset_name.startswith('UTF-32'):
+                        charset_name = 'UTF-32'
+                    self.result = {'encoding': charset_name,
+                                 'confidence': prober.get_confidence(),
+                                 'language': prober.language}
+                    self.done = True
+                    break

     def close(self):
         """
@@ -86,4 +171,56 @@ class UniversalDetector:
         :returns:  The ``result`` attribute, a ``dict`` with the keys
                    `encoding`, `confidence`, and `language`.
         """
-        pass
\ No newline at end of file
+        if self.done:
+            return self.result
+
+        if not self._got_data:
+            self.logger.debug('no data received!')
+            return self.result
+
+        if self._input_state == InputState.PURE_ASCII:
+            self.result = {'encoding': 'ascii',
+                          'confidence': 1.0,
+                          'language': ''}
+            return self.result
+
+        if self._input_state == InputState.HIGH_BYTE:
+            probers = [self._utf1632_prober] if self._utf1632_prober else []
+            probers.extend(self._charset_probers)
+            max_prober = None
+            max_confidence = 0.0
+            for prober in probers:
+                if not prober:
+                    continue
+                prober.close()
+                confidence = prober.get_confidence()
+                if confidence > max_confidence:
+                    max_confidence = confidence
+                    max_prober = prober
+
+            if max_prober and max_confidence > self.MINIMUM_THRESHOLD:
+                charset_name = max_prober.charset_name
+                lower_charset_name = charset_name.lower()
+                confidence = max_prober.get_confidence()
+                # Use Windows encoding name instead of ISO
+                if lower_charset_name in self.ISO_WIN_MAP and self._has_win_bytes:
+                    charset_name = self.ISO_WIN_MAP[lower_charset_name]
+                    confidence = confidence * 0.9  # Penalize for using Windows charset
+                # Normalize UTF-16/32 names
+                if lower_charset_name.startswith('utf-16'):
+                    charset_name = 'UTF-16'
+                elif lower_charset_name.startswith('utf-32'):
+                    charset_name = 'UTF-32'
+                self.result = {'encoding': charset_name,
+                             'confidence': confidence,
+                             'language': max_prober.language}
+
+        if self._input_state == InputState.ESC_ASCII and self._esc_charset_prober:
+            self._esc_charset_prober.close()
+            confidence = self._esc_charset_prober.get_confidence()
+            if confidence > self.MINIMUM_THRESHOLD:
+                self.result = {'encoding': self._esc_charset_prober.charset_name,
+                             'confidence': confidence,
+                             'language': self._esc_charset_prober.language}
+
+        return self.result
\ No newline at end of file
diff --git a/chardet/utf1632prober.py b/chardet/utf1632prober.py
index be3cac6..8716059 100644
--- a/chardet/utf1632prober.py
+++ b/chardet/utf1632prober.py
@@ -36,7 +36,12 @@ class UTF1632Prober(CharSetProber):

         https://en.wikipedia.org/wiki/UTF-32
         """
-        pass
+        value = (quad[0] << 24) | (quad[1] << 16) | (quad[2] << 8) | quad[3]
+        if value > 0x0010FFFF:
+            return False
+        if 0xD800 <= value <= 0xDFFF:
+            return False
+        return True

     def validate_utf16_characters(self, pair):
         """
@@ -48,4 +53,121 @@ class UTF1632Prober(CharSetProber):

         https://en.wikipedia.org/wiki/UTF-16
         """
-        pass
\ No newline at end of file
+        value = (pair[0] << 8) | pair[1]
+        if 0xD800 <= value <= 0xDBFF:
+            return True  # First half of surrogate pair
+        if 0xDC00 <= value <= 0xDFFF:
+            return True  # Second half of surrogate pair
+        if value >= 0xD800 and value <= 0xDFFF:
+            return False  # Invalid surrogate value
+        return True
+
+    def reset(self):
+        """
+        Reset the prober to its initial state.
+        """
+        super().reset()
+        self.position = 0
+        self.zeros_at_mod = [0] * 4
+        self.nonzeros_at_mod = [0] * 4
+        self._state = ProbingState.DETECTING
+        self.quad = [0, 0, 0, 0]
+        self.invalid_utf16be = False
+        self.invalid_utf16le = False
+        self.invalid_utf32be = False
+        self.invalid_utf32le = False
+        self.first_half_surrogate_pair_detected_16be = False
+        self.first_half_surrogate_pair_detected_16le = False
+        self._charset_name = None
+
+    def feed(self, byte_str):
+        """
+        Feed a chunk of bytes to the prober and update its state.
+        """
+        if self._state == ProbingState.NOT_ME:
+            return self._state
+
+        for byte in byte_str:
+            self.quad[self.position % 4] = byte
+            if byte == 0:
+                self.zeros_at_mod[self.position % 4] += 1
+            else:
+                self.nonzeros_at_mod[self.position % 4] += 1
+
+            if self.position % 4 == 3:  # We have a complete quad
+                # Check UTF-32BE
+                if not self.invalid_utf32be:
+                    if not self.validate_utf32_characters(self.quad):
+                        self.invalid_utf32be = True
+
+                # Check UTF-32LE
+                quad_le = self.quad[::-1]  # Reverse the quad for LE
+                if not self.invalid_utf32le:
+                    if not self.validate_utf32_characters(quad_le):
+                        self.invalid_utf32le = True
+
+            if self.position % 2 == 1:  # We have a complete pair
+                # Check UTF-16BE
+                if not self.invalid_utf16be:
+                    pair_be = self.quad[(self.position - 1) % 4:(self.position + 1) % 4]
+                    if not self.validate_utf16_characters(pair_be):
+                        self.invalid_utf16be = True
+
+                # Check UTF-16LE
+                if not self.invalid_utf16le:
+                    pair_le = self.quad[(self.position - 1) % 4:(self.position + 1) % 4][::-1]
+                    if not self.validate_utf16_characters(pair_le):
+                        self.invalid_utf16le = True
+
+            self.position += 1
+
+            # Early detection if we have enough data
+            if self.position >= self.MIN_CHARS_FOR_DETECTION:
+                # Check UTF-32BE pattern
+                if (self.zeros_at_mod[0] > 0 and self.zeros_at_mod[1] > 0 and
+                    self.zeros_at_mod[2] > 0 and not self.invalid_utf32be):
+                    ratio = min(self.zeros_at_mod[0:3]) / (self.position / 4)
+                    if ratio > self.EXPECTED_RATIO:
+                        self._charset_name = "UTF-32BE"
+                        self._state = ProbingState.FOUND_IT
+                        return self._state
+
+                # Check UTF-32LE pattern
+                if (self.zeros_at_mod[1] > 0 and self.zeros_at_mod[2] > 0 and
+                    self.zeros_at_mod[3] > 0 and not self.invalid_utf32le):
+                    ratio = min(self.zeros_at_mod[1:4]) / (self.position / 4)
+                    if ratio > self.EXPECTED_RATIO:
+                        self._charset_name = "UTF-32LE"
+                        self._state = ProbingState.FOUND_IT
+                        return self._state
+
+                # Check UTF-16BE pattern
+                if self.zeros_at_mod[0] > 0 and not self.invalid_utf16be:
+                    ratio = self.zeros_at_mod[0] / (self.position / 2)
+                    if ratio > self.EXPECTED_RATIO:
+                        self._charset_name = "UTF-16BE"
+                        self._state = ProbingState.FOUND_IT
+                        return self._state
+
+                # Check UTF-16LE pattern
+                if self.zeros_at_mod[1] > 0 and not self.invalid_utf16le:
+                    ratio = self.zeros_at_mod[1] / (self.position / 2)
+                    if ratio > self.EXPECTED_RATIO:
+                        self._charset_name = "UTF-16LE"
+                        self._state = ProbingState.FOUND_IT
+                        return self._state
+
+        return self._state
+
+    @property
+    def charset_name(self):
+        return self._charset_name
+
+    @property
+    def language(self):
+        return ""
+
+    def get_confidence(self):
+        if self._state == ProbingState.FOUND_IT:
+            return 0.99
+        return 0.0
\ No newline at end of file
diff --git a/chardet/utf8prober.py b/chardet/utf8prober.py
index fb6f22f..41e3bc3 100644
--- a/chardet/utf8prober.py
+++ b/chardet/utf8prober.py
@@ -10,4 +10,45 @@ class UTF8Prober(CharSetProber):
         super().__init__()
         self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
         self._num_mb_chars = None
-        self.reset()
\ No newline at end of file
+        self.reset()
+
+    def reset(self):
+        super().reset()
+        self.coding_sm.reset()
+        self._num_mb_chars = 0
+
+    @property
+    def charset_name(self):
+        return "utf-8"
+
+    @property
+    def language(self):
+        return ""
+
+    def feed(self, byte_str):
+        for c in byte_str:
+            coding_state = self.coding_sm.next_state(c)
+            if coding_state == MachineState.ERROR:
+                self._state = ProbingState.NOT_ME
+                break
+            elif coding_state == MachineState.ITS_ME:
+                self._state = ProbingState.FOUND_IT
+                break
+            elif coding_state == MachineState.START:
+                char_len = self.coding_sm.get_current_charlen()
+                if char_len >= 2:
+                    self._num_mb_chars += 1
+
+        if self.state == ProbingState.DETECTING:
+            if self.get_confidence() > self.SHORTCUT_THRESHOLD:
+                self._state = ProbingState.FOUND_IT
+
+        return self.state
+
+    def get_confidence(self):
+        unlike = 0.99
+        if self._num_mb_chars < 6:
+            for i in range(0, self._num_mb_chars):
+                unlike = unlike * self.ONE_CHAR_PROB
+            return 1.0 - unlike
+        return unlike
\ No newline at end of file

OpenHands: chardet

Pytest Summary for test .

Failed pytests:

plane1-utf-32be.html-utf-32be]

nobom-utf32be.txt-utf-32be]

_ude_2.txt-iso-8859-9]

wikitop_tr_ISO-8859-9.txt-iso-8859-9]

divxplanet.com.xml-iso-8859-9]

subtitle.srt-iso-8859-9]

_ude_1.txt-iso-8859-9]

forum.template-toolkit.ru.9.xml-ibm866]

_ude_1.txt-ibm866]

money.rin.ru.xml-ibm866]

music.peeps.ru.xml-ibm866]

forum.template-toolkit.ru.6.xml-ibm866]

forum.template-toolkit.ru.4.xml-ibm866]

intertat.ru.xml-ibm866]

janulalife.blogspot.com.xml-ibm866]

forum.template-toolkit.ru.1.xml-ibm866]

blog.mlmaster.com.xml-ibm866]

newsru.com.xml-ibm866]

greek.ru.xml-ibm866]

kapranoff.ru.xml-ibm866]

aif.ru.health.xml-ibm866]

forum.template-toolkit.ru.8.xml-ibm866]

aug32.hole.ru.xml-ibm866]

aviaport.ru.xml-ibm866]

susu.ac.ru.xml-ibm866]

hardsoft.at.webry.info.xml-cp932]

y-moto.com.xml-cp932]

www2.chuo-u.ac.jp-suishin.xml-cp932]

balatonblog.typepad.com.xml-utf-8]

_ude_2.txt-utf-8]

_ude_greek.txt-utf-8]

_ude_5.txt-utf-8]

_ude_he1.txt-utf-8]

_mozilla_bug426271_text-utf-8.html-utf-8]

_ude_he3.txt-utf-8]

_chromium_UTF-8_with_no_encoding_specified.html-utf-8]

_mozilla_bug306272_text.html-utf-8]

weblabor.hu.2.xml-utf-8]

_ude_1.txt-utf-8]

boobooo.blogspot.com.xml-utf-8]

weblabor.hu.xml-utf-8]

anitabee.blogspot.com.xml-utf-8]

pihgy.hu.xml-utf-8]

_ude_3.txt-utf-8]

linuxbox.hu.xml-utf-8]

_ude_russian.txt-utf-8]

_ude_he2.txt-utf-8]

_ude_iso1.txt-iso-2022-kr]

_ude_iso2.txt-iso-2022-kr]

contents-factory.com.xml-euc-jp]

artifact-jp.com.xml-euc-jp]

siesta.co.jp.aozora.xml-euc-jp]

tls.org.xml-euc-jp]

_mozilla_bug431054_text.html-euc-jp]

azoz.org.xml-euc-jp]

atom.ycf.nanet.co.jp.xml-euc-jp]

bphrs.net.xml-euc-jp]

ch.kitaguni.tv.xml-euc-jp]

rdf.ycf.nanet.co.jp.xml-euc-jp]

manana.moo.jp.xml-euc-jp]

furusatonoeki.cutegirl.jp.xml-euc-jp]

overcube.com.xml-euc-jp]

pinkupa.com.xml-euc-jp]

mimizun.com.xml-euc-jp]

club.h14m.org.xml-euc-jp]

aristrist.s57.xrea.com.xml-euc-jp]

azito.under.jp.xml-euc-jp]

_mozilla_bug620106_text.html-euc-jp]

yukiboh.moo.jp.xml-euc-jp]

blog.kabu-navi.com.atom.xml-euc-jp]

misuzilla.org.xml-euc-jp]

overcube.com.atom.xml-euc-jp]

arclamp.jp.xml-euc-jp]

aivy.co.jp.xml-euc-jp]

_ude_1.txt-euc-jp]

_mozilla_bug426271_text-euc-jp.html-euc-jp]

akaname.main.jp.xml-euc-jp]

Pytest Summary for test `.`