Skip to content

back to OpenHands summary

OpenHands: parsel

Pytest Summary for test tests

status count
passed 109
failed 97
skipped 2
total 208
collected 208

Failed pytests:

test_selector.py::SelectorTestCase::test_boolean_result

test_selector.py::SelectorTestCase::test_boolean_result
self = 

    def test_boolean_result(self) -> None:
        body = "

" xs = self.sscls(text=body) > self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), ["1"]) E AssertionError: Lists differ: ['True'] != ['1'] E E First differing element 0: E 'True' E '1' E E - ['True'] E + ['1'] tests/test_selector.py:369: AssertionError

test_selector.py::SelectorTestCase::test_deep_nesting

test_selector.py::SelectorTestCase::test_deep_nesting
self = 

    def test_deep_nesting(self) -> None:
        lxml_version = Version(etree.__version__)
        lxml_huge_tree_version = Version("4.2")

        content = """
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        hello world
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
some test
""" # If lxml doesn't support huge trees expect wrong results and a warning if lxml_version < lxml_huge_tree_version: with warnings.catch_warnings(record=True) as w: sel = Selector(text=content) self.assertIn("huge_tree", str(w[0].message)) self.assertLessEqual(len(sel.css("span")), 256) self.assertEqual(len(sel.css("td")), 0) return # Same goes for explicitly disabling huge trees with warnings.catch_warnings(record=True) as w: sel = Selector(text=content, huge_tree=False) > self.assertIn("huge_tree", str(w[0].message)) E IndexError: list index out of range tests/test_selector.py:1113: IndexError

test_selector.py::SelectorTestCase::test_differences_parsing_xml_vs_html

test_selector.py::SelectorTestCase::test_differences_parsing_xml_vs_html
self = 

    def test_differences_parsing_xml_vs_html(self) -> None:
        """Test that XML and HTML Selector's behave differently"""
        # some text which is parsed differently by XML and HTML flavors
        text = '

Hello

' hs = self.sscls(text=text, type="html") > self.assertEqual( hs.xpath("//div").extract(), ['

Hello

'], ) E AssertionError: Lists differ: ['

Hello

'] != ['

Hello

'] E E First differing element 0: E '

Hello

' E '

Hello

' E E - ['

Hello

'] E ? - E E + ['

Hello

'] tests/test_selector.py:377: AssertionError

test_selector.py::SelectorTestCase::test_etree_root_invalid_type

test_selector.py::SelectorTestCase::test_etree_root_invalid_type
self = 

    def test_etree_root_invalid_type(self) -> None:
        selector = Selector("")
>       self.assertRaisesRegex(
            ValueError,
            "object as root",
            Selector,
            root=selector.root,
            type="text",
        )
E       AssertionError: ValueError not raised by Selector

tests/test_selector.py:1175: AssertionError

test_selector.py::SelectorTestCase::test_http_header_encoding_precedence

test_selector.py::SelectorTestCase::test_http_header_encoding_precedence
self = 

    def test_http_header_encoding_precedence(self) -> None:
        # '\xa3'     = pound symbol in unicode
        # '\xc2\xa3' = pound symbol in utf-8
        # '\xa3'     = pound symbol in latin-1 (iso-8859-1)

        text = """
        
        \xa3"""
        x = self.sscls(text=text)
>       self.assertEqual(x.xpath("//span[@id='blank']/text()").extract(), ["\xa3"])
E       AssertionError: Lists differ: ['£'] != ['£']
E       
E       First differing element 0:
E       '£'
E       '£'
E       
E       - ['£']
E       ?   -
E       
E       + ['£']

tests/test_selector.py:790: AssertionError

test_selector.py::SelectorTestCase::test_invalid_json

test_selector.py::SelectorTestCase::test_invalid_json
text = '', input_type = 'json', base_url = None, huge_tree = True

    def _get_root_and_type_from_text(text: str, input_type: Optional[str]=None, base_url: Optional[str]=None, huge_tree: bool=LXML_SUPPORTS_HUGE_TREE) -> Tuple[Any, str]:
        """Get root node and type from text input."""
        if input_type == 'json':
            try:
>               return json.loads(text), 'json'

parsel/selector.py:59: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/root/.local/share/uv/python/cpython-3.12.6-linux-x86_64-gnu/lib/python3.12/json/__init__.py:346: in loads
    return _default_decoder.decode(s)
/root/.local/share/uv/python/cpython-3.12.6-linux-x86_64-gnu/lib/python3.12/json/decoder.py:337: in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = , s = ''
idx = 0

    def raw_decode(self, s, idx=0):
        """Decode a JSON document from ``s`` (a ``str`` beginning with
        a JSON document) and return a 2-tuple of the Python
        representation and the index in ``s`` where the document ended.

        This can be used to decode a JSON document from a string that may
        have extraneous data at the end.

        """
        try:
            obj, end = self.scan_once(s, idx)
        except StopIteration as err:
>           raise JSONDecodeError("Expecting value", s, err.value) from None
E           json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

/root/.local/share/uv/python/cpython-3.12.6-linux-x86_64-gnu/lib/python3.12/json/decoder.py:355: JSONDecodeError

During handling of the above exception, another exception occurred:

self = 

    def test_invalid_json(self) -> None:
        text = ""
>       selector = self.sscls(text, type="json")

tests/test_selector.py:1164: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
parsel/selector.py:281: in __init__
    root, type = _get_root_and_type_from_text(text, input_type=type, base_url=base_url, huge_tree=huge_tree)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

text = '', input_type = 'json', base_url = None, huge_tree = True

    def _get_root_and_type_from_text(text: str, input_type: Optional[str]=None, base_url: Optional[str]=None, huge_tree: bool=LXML_SUPPORTS_HUGE_TREE) -> Tuple[Any, str]:
        """Get root node and type from text input."""
        if input_type == 'json':
            try:
                return json.loads(text), 'json'
            except json.JSONDecodeError as e:
>               raise ValueError(f"Invalid JSON: {str(e)}")
E               ValueError: Invalid JSON: Expecting value: line 1 column 1 (char 0)

parsel/selector.py:61: ValueError

test_selector.py::SelectorTestCase::test_invalid_xpath

test_selector.py::SelectorTestCase::test_invalid_xpath
self = 

    def test_invalid_xpath(self) -> None:
        "Test invalid xpath raises ValueError with the invalid xpath"
        x = self.sscls(text="")
        xpath = "//test[@foo='bar]"
>       self.assertRaisesRegex(ValueError, re.escape(xpath), x.xpath, xpath)

tests/test_selector.py:773: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
parsel/selector.py:357: in xpath
    result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath
    ???
src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E   lxml.etree.XPathEvalError: Unfinished literal

src/lxml/xpath.pxi:210: XPathEvalError

test_selector.py::SelectorTestCase::test_invalid_xpath_unicode

test_selector.py::SelectorTestCase::test_invalid_xpath_unicode
self = 

    def test_invalid_xpath_unicode(self) -> None:
        "Test *Unicode* invalid xpath raises ValueError with the invalid xpath"
        x = self.sscls(text="")
        xpath = "//test[@foo='\\u0431ar]"
>       self.assertRaisesRegex(ValueError, re.escape(xpath), x.xpath, xpath)

tests/test_selector.py:779: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
parsel/selector.py:357: in xpath
    result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath
    ???
src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E   lxml.etree.XPathEvalError: Unfinished literal

src/lxml/xpath.pxi:210: XPathEvalError

test_selector.py::SelectorTestCase::test_json_css

test_selector.py::SelectorTestCase::test_json_css
self = 

    def test_json_css(self) -> None:
        obj = 1
        selector = self.sscls(root=obj)
>       with self.assertRaises(ValueError):
E       AssertionError: ValueError not raised

tests/test_selector.py:1159: AssertionError

test_selector.py::SelectorTestCase::test_json_root

test_selector.py::SelectorTestCase::test_json_root
self = 

    def test_json_root(self) -> None:
        obj = 1
        selector = self.sscls(root=obj)
        self.assertEqual(selector.root, obj)
>       self.assertEqual(selector.type, "json")
E       AssertionError: 'html' != 'json'
E       - html
E       + json

tests/test_selector.py:1148: AssertionError

test_selector.py::SelectorTestCase::test_json_selector_representation

test_selector.py::SelectorTestCase::test_json_selector_representation
self = 

    def test_json_selector_representation(self) -> None:
        selector = Selector(text="true")
>       assert repr(selector) == ""
E       AssertionError

tests/test_selector.py:1192: AssertionError

test_selector.py::SelectorTestCase::test_json_xpath

test_selector.py::SelectorTestCase::test_json_xpath
self = 

    def test_json_xpath(self) -> None:
        obj = 1
        selector = self.sscls(root=obj)
>       with self.assertRaises(ValueError):
E       AssertionError: ValueError not raised

tests/test_selector.py:1153: AssertionError

test_selector.py::SelectorTestCase::test_mixed_nested_selectors

test_selector.py::SelectorTestCase::test_mixed_nested_selectors
self = 

    def test_mixed_nested_selectors(self) -> None:
        body = """
                    
notme

text

foo
""" sel = self.sscls(text=body) self.assertEqual( > sel.xpath('//div[@id="1"]').css("span::text").extract(), ["me"] ) tests/test_selector.py:498: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:163: in css return self.__class__(flatten([x.css(query) for x in self])) parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[span], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:81: AttributeError

test_selector.py::SelectorTestCase::test_namespaces_adhoc

test_selector.py::SelectorTestCase::test_namespaces_adhoc
self = 

    def test_namespaces_adhoc(self) -> None:
        body = """
        
           take this
           found
        
        """

>       x = self.sscls(text=body, type="xml")

tests/test_selector.py:527: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
parsel/selector.py:281: in __init__
    root, type = _get_root_and_type_from_text(text, input_type=type, base_url=base_url, huge_tree=huge_tree)
parsel/selector.py:66: in _get_root_and_type_from_text
    root = create_root_node(text, parser_cls, base_url=base_url, huge_tree=huge_tree)
parsel/selector.py:92: in create_root_node
    root = etree.fromstring(text.encode(encoding), parser=parser, base_url=base_url)
src/lxml/etree.pyx:3306: in lxml.etree.fromstring
    ???
src/lxml/parser.pxi:1995: in lxml.etree._parseMemoryDocument
    ???
src/lxml/parser.pxi:1882: in lxml.etree._parseDoc
    ???
src/lxml/parser.pxi:1164: in lxml.etree._BaseParser._parseDoc
    ???
src/lxml/parser.pxi:633: in lxml.etree._ParserContext._handleParseResultDoc
    ???
src/lxml/parser.pxi:743: in lxml.etree._handleParseResult
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E     File "", line 3
E   lxml.etree.XMLSyntaxError: Opening and ending tag mismatch: a line 3 and a, line 3, column 44

src/lxml/parser.pxi:672: XMLSyntaxError

test_selector.py::SelectorTestCase::test_namespaces_adhoc_variables

test_selector.py::SelectorTestCase::test_namespaces_adhoc_variables
self = 

    def test_namespaces_adhoc_variables(self) -> None:
        body = """
        
           take this
           found
        
        """

>       x = self.sscls(text=body, type="xml")

tests/test_selector.py:545: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
parsel/selector.py:281: in __init__
    root, type = _get_root_and_type_from_text(text, input_type=type, base_url=base_url, huge_tree=huge_tree)
parsel/selector.py:66: in _get_root_and_type_from_text
    root = create_root_node(text, parser_cls, base_url=base_url, huge_tree=huge_tree)
parsel/selector.py:92: in create_root_node
    root = etree.fromstring(text.encode(encoding), parser=parser, base_url=base_url)
src/lxml/etree.pyx:3306: in lxml.etree.fromstring
    ???
src/lxml/parser.pxi:1995: in lxml.etree._parseMemoryDocument
    ???
src/lxml/parser.pxi:1882: in lxml.etree._parseDoc
    ???
src/lxml/parser.pxi:1164: in lxml.etree._BaseParser._parseDoc
    ???
src/lxml/parser.pxi:633: in lxml.etree._ParserContext._handleParseResultDoc
    ???
src/lxml/parser.pxi:743: in lxml.etree._handleParseResult
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E     File "", line 3
E   lxml.etree.XMLSyntaxError: Opening and ending tag mismatch: a line 3 and a, line 3, column 44

src/lxml/parser.pxi:672: XMLSyntaxError

test_selector.py::SelectorTestCase::test_namespaces_multiple

test_selector.py::SelectorTestCase::test_namespaces_multiple
self = 

        def test_namespaces_multiple(self) -> None:
            body = """
    
        hello
        value
        iron90Dried Rose
    
            """
            x = self.sscls(text=body, type="xml")
            x.register_namespace(
                "xmlns",
                "http://webservices.amazon.com/AWSECommerceService/2005-10-05",
            )
            x.register_namespace("p", "http://www.scrapy.org/product")
            x.register_namespace("b", "http://somens.com")
            self.assertEqual(len(x.xpath("//xmlns:TestTag")), 1)
            self.assertEqual(x.xpath("//b:Operation/text()").extract()[0], "hello")
            self.assertEqual(x.xpath("//xmlns:TestTag/@b:att").extract()[0], "value")
            self.assertEqual(
                x.xpath("//p:SecondTestTag/xmlns:price/text()").extract()[0], "90"
            )
            self.assertEqual(
>               x.xpath("//p:SecondTestTag").xpath("./xmlns:price/text()")[0].extract(),
                "90",
            )

tests/test_selector.py:580: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
parsel/selector.py:154: in xpath
    return self.__class__(flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]))
parsel/selector.py:357: in xpath
    result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath
    ???
src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E   lxml.etree.XPathEvalError: Undefined namespace prefix

src/lxml/xpath.pxi:210: XPathEvalError

test_selector.py::SelectorTestCase::test_namespaces_multiple_adhoc

test_selector.py::SelectorTestCase::test_namespaces_multiple_adhoc
self = 

        def test_namespaces_multiple_adhoc(self) -> None:
            body = """
    
        hello
        value
        iron90Dried Rose
    
            """
            x = self.sscls(text=body, type="xml")
            x.register_namespace(
                "xmlns",
                "http://webservices.amazon.com/AWSECommerceService/2005-10-05",
            )
            self.assertEqual(len(x.xpath("//xmlns:TestTag")), 1)

            # "b" namespace is not declared yet
>           self.assertRaises(ValueError, x.xpath, "//xmlns:TestTag/@b:att")

tests/test_selector.py:606: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
parsel/selector.py:357: in xpath
    result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath
    ???
src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E   lxml.etree.XPathEvalError: Undefined namespace prefix

src/lxml/xpath.pxi:210: XPathEvalError

test_selector.py::SelectorTestCase::test_namespaces_simple

test_selector.py::SelectorTestCase::test_namespaces_simple
self = 

    def test_namespaces_simple(self) -> None:
        body = """
        
           take this
           found
        
        """

>       x = self.sscls(text=body, type="xml")

tests/test_selector.py:514: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
parsel/selector.py:281: in __init__
    root, type = _get_root_and_type_from_text(text, input_type=type, base_url=base_url, huge_tree=huge_tree)
parsel/selector.py:66: in _get_root_and_type_from_text
    root = create_root_node(text, parser_cls, base_url=base_url, huge_tree=huge_tree)
parsel/selector.py:92: in create_root_node
    root = etree.fromstring(text.encode(encoding), parser=parser, base_url=base_url)
src/lxml/etree.pyx:3306: in lxml.etree.fromstring
    ???
src/lxml/parser.pxi:1995: in lxml.etree._parseMemoryDocument
    ???
src/lxml/parser.pxi:1882: in lxml.etree._parseDoc
    ???
src/lxml/parser.pxi:1164: in lxml.etree._BaseParser._parseDoc
    ???
src/lxml/parser.pxi:633: in lxml.etree._ParserContext._handleParseResultDoc
    ???
src/lxml/parser.pxi:743: in lxml.etree._handleParseResult
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E     File "", line 3
E   lxml.etree.XMLSyntaxError: Opening and ending tag mismatch: a line 3 and a, line 3, column 44

src/lxml/parser.pxi:672: XMLSyntaxError

test_selector.py::SelectorTestCase::test_re

test_selector.py::SelectorTestCase::test_re
self = 

    def test_re(self) -> None:
        body = """
Name: Mary
  • Name: John
  • Age: 10
  • Name: Paul
  • Age: 20
Age: 20
""" x = self.sscls(text=body) name_re = re.compile(r"Name: (\w+)") > self.assertEqual(x.xpath("//ul/li").re(name_re), ["John", "Paul"]) tests/test_selector.py:707: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:175: in re return list(flatten([x.re(regex, replace_entities=replace_entities) for x in self])) parsel/selector.py:391: in re return extract_regex(regex, self.get(), replace_entities=replace_entities) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ regex = re.compile('Name: (\\w+)'), text = '
  • Name: John
  • ' replace_entities = True def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]: """Extract a list of strings from the given text/encoding using the following policies: * if the regex contains a named group called "extract" that will be returned * if the regex contains multiple numbered groups, all those will be returned (flattened) * if the regex doesn't contain any group the entire regex matching is returned """ if not text: return [] if replace_entities: > text = w3lib_replace_entities(text, keep_entities=True) E TypeError: replace_entities() got an unexpected keyword argument 'keep_entities' parsel/utils.py:63: TypeError

    test_selector.py::SelectorTestCase::test_re_first

    test_selector.py::SelectorTestCase::test_re_first
    self = 
    
        def test_re_first(self) -> None:
            """Test if re_first() returns first matched element"""
            body = '
    • 1
    • 2
    ' sel = self.sscls(text=body) self.assertEqual( > sel.xpath("//ul/li/text()").re_first(r"\d"), sel.xpath("//ul/li/text()").re(r"\d")[0], ) tests/test_selector.py:311: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:189: in re_first for el in iflatten(x.re(regex, replace_entities=replace_entities) for x in self): parsel/utils.py:25: in iflatten for el in x: parsel/selector.py:189: in for el in iflatten(x.re(regex, replace_entities=replace_entities) for x in self): parsel/selector.py:391: in re return extract_regex(regex, self.get(), replace_entities=replace_entities) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ regex = '\\d', text = '1', replace_entities = True def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]: """Extract a list of strings from the given text/encoding using the following policies: * if the regex contains a named group called "extract" that will be returned * if the regex contains multiple numbered groups, all those will be returned (flattened) * if the regex doesn't contain any group the entire regex matching is returned """ if not text: return [] if replace_entities: > text = w3lib_replace_entities(text, keep_entities=True) E TypeError: replace_entities() got an unexpected keyword argument 'keep_entities' parsel/utils.py:63: TypeError

    test_selector.py::SelectorTestCase::test_re_intl

    test_selector.py::SelectorTestCase::test_re_intl
    self = 
    
        def test_re_intl(self) -> None:
            body = "
    Evento: cumplea\xf1os
    " x = self.sscls(text=body) > self.assertEqual(x.xpath("//div").re(r"Evento: (\w+)"), ["cumplea\xf1os"]) tests/test_selector.py:760: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:175: in re return list(flatten([x.re(regex, replace_entities=replace_entities) for x in self])) parsel/selector.py:391: in re return extract_regex(regex, self.get(), replace_entities=replace_entities) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ regex = 'Evento: (\\w+)', text = '
    Evento: cumpleaños
    ' replace_entities = True def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]: """Extract a list of strings from the given text/encoding using the following policies: * if the regex contains a named group called "extract" that will be returned * if the regex contains multiple numbered groups, all those will be returned (flattened) * if the regex doesn't contain any group the entire regex matching is returned """ if not text: return [] if replace_entities: > text = w3lib_replace_entities(text, keep_entities=True) E TypeError: replace_entities() got an unexpected keyword argument 'keep_entities' parsel/utils.py:63: TypeError

    test_selector.py::SelectorTestCase::test_re_replace_entities

    test_selector.py::SelectorTestCase::test_re_replace_entities
    self = 
    
        def test_re_replace_entities(self) -> None:
            body = """"""
            x = self.sscls(text=body)
    
            name_re = re.compile('{"foo":(.*)}')
    
            # by default, only & and < are preserved ;
            # other entities are converted
            expected = '"bar & "baz""'
    >       self.assertEqual(x.xpath("//script/text()").re(name_re), [expected])
    
    tests/test_selector.py:728: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:175: in re
        return list(flatten([x.re(regex, replace_entities=replace_entities) for x in self]))
    parsel/selector.py:391: in re
        return extract_regex(regex, self.get(), replace_entities=replace_entities)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    regex = re.compile('{"foo":(.*)}'), text = '{"foo":"bar & "baz""}'
    replace_entities = True
    
        def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]:
            """Extract a list of strings from the given text/encoding using the following policies:
            * if the regex contains a named group called "extract" that will be returned
            * if the regex contains multiple numbered groups, all those will be returned (flattened)
            * if the regex doesn't contain any group the entire regex matching is returned
            """
            if not text:
                return []
            if replace_entities:
    >           text = w3lib_replace_entities(text, keep_entities=True)
    E           TypeError: replace_entities() got an unexpected keyword argument 'keep_entities'
    
    parsel/utils.py:63: TypeError
    

    test_selector.py::SelectorTestCase::test_remove_namespaces

    test_selector.py::SelectorTestCase::test_remove_namespaces
    self = 
    
            def test_remove_namespaces(self) -> None:
                xml = """
        
          
          
            
          
          
        
        """
                sel = self.sscls(text=xml, type="xml")
                self.assertEqual(len(sel.xpath("//link")), 0)
                self.assertEqual(len(sel.xpath("./namespace::*")), 3)
                sel.remove_namespaces()
                self.assertEqual(len(sel.xpath("//link")), 3)
    >           self.assertEqual(len(sel.xpath("./namespace::*")), 1)
    E           AssertionError: 3 != 1
    
    tests/test_selector.py:864: AssertionError
    

    test_selector.py::SelectorTestCase::test_remove_namespaces_embedded

    test_selector.py::SelectorTestCase::test_remove_namespaces_embedded
    self = 
    
        def test_remove_namespaces_embedded(self) -> None:
            xml = """
            
              
              
                
              
              
                
                  
                  
                
                
              
            
            """
            sel = self.sscls(text=xml, type="xml")
            self.assertEqual(len(sel.xpath("//link")), 0)
            self.assertEqual(len(sel.xpath("//stop")), 0)
            self.assertEqual(len(sel.xpath("./namespace::*")), 2)
            self.assertEqual(
                len(
                    sel.xpath(
                        "//f:link",
                        namespaces={"f": "http://www.w3.org/2005/Atom"},
                    )
                ),
                2,
            )
            self.assertEqual(
                len(sel.xpath("//s:stop", namespaces={"s": "http://www.w3.org/2000/svg"})),
                2,
            )
            sel.remove_namespaces()
            self.assertEqual(len(sel.xpath("//link")), 2)
            self.assertEqual(len(sel.xpath("//stop")), 2)
    >       self.assertEqual(len(sel.xpath("./namespace::*")), 1)
    E       AssertionError: 2 != 1
    
    tests/test_selector.py:902: AssertionError
    

    test_selector.py::SelectorTestCase::test_remove_pseudo_element_selector

    test_selector.py::SelectorTestCase::test_remove_pseudo_element_selector
    self = 
    
        def test_remove_pseudo_element_selector(self) -> None:
            sel = self.sscls(
                text="
    • 1
    • 2
    • 3
    " ) > sel_list = sel.css("li::text") tests/test_selector.py:1026: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[li], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:95: AttributeError

    test_selector.py::SelectorTestCase::test_remove_pseudo_element_selector_list

    test_selector.py::SelectorTestCase::test_remove_pseudo_element_selector_list
    self = 
    
        def test_remove_pseudo_element_selector_list(self) -> None:
            sel = self.sscls(
                text="
    • 1
    • 2
    • 3
    " ) > sel_list = sel.css("li::text") tests/test_selector.py:1014: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[li], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:95: AttributeError

    test_selector.py::SelectorTestCase::test_remove_root_element_selector

    test_selector.py::SelectorTestCase::test_remove_root_element_selector
    self = 
    
        def test_remove_root_element_selector(self) -> None:
            sel = self.sscls(
                text="
    • 1
    • 2
    • 3
    " ) > sel_list = sel.css("li::text") tests/test_selector.py:1038: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[li], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:95: AttributeError

    test_selector.py::SelectorTestCase::test_remove_selector

    test_selector.py::SelectorTestCase::test_remove_selector
    self = 
    
        def test_remove_selector(self) -> None:
            sel = self.sscls(
                text="
    • 1
    • 2
    • 3
    " ) sel_list = sel.css("li") sel_list[0].drop() self.assertIsSelectorList(sel.css("li")) > self.assertEqual(sel.css("li::text").getall(), ["2", "3"]) tests/test_selector.py:1008: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[li], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:95: AttributeError

    test_selector.py::SelectorTestCase::test_replacement_null_char_from_body

    test_selector.py::SelectorTestCase::test_replacement_null_char_from_body
    self = 
    
        def test_replacement_null_char_from_body(self) -> None:
            text = "\x00

    Grainy

    " > self.assertEqual( "

    Grainy

    ", self.sscls(text).extract(), ) E AssertionError: '

    Grainy

    ' != '' E -

    Grainy

    E + tests/test_selector.py:987: AssertionError

    test_selector.py::SelectorTestCase::test_representation_unicode_query

    test_selector.py::SelectorTestCase::test_representation_unicode_query
    self = 
    
        def test_representation_unicode_query(self) -> None:
            body = f"

    " representation = "" sel = self.sscls(text=body) > self.assertEqual( [repr(it) for it in sel.xpath('//input[@value="\xa9"]/@value')], [representation], ) E AssertionError: Lists differ: [] != [''] E E Second list contains 1 additional elements. E First extra element 0: E '' E E - [] E + [''] tests/test_selector.py:226: AssertionError

    test_selector.py::SelectorTestCase::test_select_unicode_query

    test_selector.py::SelectorTestCase::test_select_unicode_query
    self = 
    
        def test_select_unicode_query(self) -> None:
            body = "

    " sel = self.sscls(text=body) > self.assertEqual(sel.xpath('//input[@name="\xa9"]/@value').extract(), ["1"]) E AssertionError: Lists differ: [] != ['1'] E E Second list contains 1 additional elements. E First extra element 0: E '1' E E - [] E + ['1'] tests/test_selector.py:352: AssertionError

    test_selector.py::SelectorTestCase::test_simple_selection_with_variables

    test_selector.py::SelectorTestCase::test_simple_selection_with_variables
    self = 
    
        def test_simple_selection_with_variables(self) -> None:
            """Using XPath variables"""
            body = "

    " sel = self.sscls(text=body) self.assertEqual( [x.extract() for x in sel.xpath("//input[@value=$number]/@name", number=1)], ["a"], ) self.assertEqual( [ x.extract() for x in sel.xpath("//input[@name=$letter]/@value", letter="b") ], ["2"], ) self.assertEqual( sel.xpath( "count(//input[@value=$number or @name=$letter])", number=2, letter="a", ).extract(), ["2.0"], ) # you can also pass booleans > self.assertEqual( sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=2, test=True).extract(), ["1"], ) E AssertionError: Lists differ: ['True'] != ['1'] E E First differing element 0: E 'True' E '1' E E - ['True'] E + ['1'] tests/test_selector.py:118: AssertionError

    test_selector.py::SelectorTestCase::test_simple_selection_with_variables_escape_friendly

    test_selector.py::SelectorTestCase::test_simple_selection_with_variables_escape_friendly
    self = 
    
        def test_simple_selection_with_variables_escape_friendly(self) -> None:
            """Using XPath variables with quotes that would need escaping with string formatting"""
            body = """

    I'm mixing single and "double quotes" and I don't care :)

    """ sel = self.sscls(text=body) t = 'I say "Yeah!"' # naive string formatting with give something like: # ValueError: XPath error: Invalid predicate in //input[@value="I say "Yeah!""]/@name > self.assertRaises(ValueError, sel.xpath, f'//input[@value="{t}"]/@name') tests/test_selector.py:153: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:357: in xpath result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs) src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath ??? src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__ ??? _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E lxml.etree.XPathEvalError: Invalid predicate src/lxml/xpath.pxi:210: XPathEvalError

    test_selector.py::SelectorTestCaseBytes::test_boolean_result

    test_selector.py::SelectorTestCaseBytes::test_boolean_result
    self = 
    
        def test_boolean_result(self) -> None:
            body = "

    " xs = self.sscls(text=body) > self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), ["1"]) E AssertionError: Lists differ: ['True'] != ['1'] E E First differing element 0: E 'True' E '1' E E - ['True'] E + ['1'] tests/test_selector.py:369: AssertionError

    test_selector.py::SelectorTestCaseBytes::test_deep_nesting

    test_selector.py::SelectorTestCaseBytes::test_deep_nesting
    self = 
    
        def test_deep_nesting(self) -> None:
            lxml_version = Version(etree.__version__)
            lxml_huge_tree_version = Version("4.2")
    
            content = """
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            hello world
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
    some test
    """ # If lxml doesn't support huge trees expect wrong results and a warning if lxml_version < lxml_huge_tree_version: with warnings.catch_warnings(record=True) as w: sel = Selector(text=content) self.assertIn("huge_tree", str(w[0].message)) self.assertLessEqual(len(sel.css("span")), 256) self.assertEqual(len(sel.css("td")), 0) return # Same goes for explicitly disabling huge trees with warnings.catch_warnings(record=True) as w: sel = Selector(text=content, huge_tree=False) > self.assertIn("huge_tree", str(w[0].message)) E IndexError: list index out of range tests/test_selector.py:1113: IndexError

    test_selector.py::SelectorTestCaseBytes::test_differences_parsing_xml_vs_html

    test_selector.py::SelectorTestCaseBytes::test_differences_parsing_xml_vs_html
    self = 
    
        def test_differences_parsing_xml_vs_html(self) -> None:
            """Test that XML and HTML Selector's behave differently"""
            # some text which is parsed differently by XML and HTML flavors
            text = '

    Hello

    ' hs = self.sscls(text=text, type="html") > self.assertEqual( hs.xpath("//div").extract(), ['

    Hello

    '], ) E AssertionError: Lists differ: ['

    Hello

    '] != ['

    Hello

    '] E E First differing element 0: E '

    Hello

    ' E '

    Hello

    ' E E - ['

    Hello

    '] E ? - E E + ['

    Hello

    '] tests/test_selector.py:377: AssertionError

    test_selector.py::SelectorTestCaseBytes::test_etree_root_invalid_type

    test_selector.py::SelectorTestCaseBytes::test_etree_root_invalid_type
    self = 
    
        def test_etree_root_invalid_type(self) -> None:
            selector = Selector("")
    >       self.assertRaisesRegex(
                ValueError,
                "object as root",
                Selector,
                root=selector.root,
                type="text",
            )
    E       AssertionError: ValueError not raised by Selector
    
    tests/test_selector.py:1175: AssertionError
    

    test_selector.py::SelectorTestCaseBytes::test_http_header_encoding_precedence

    test_selector.py::SelectorTestCaseBytes::test_http_header_encoding_precedence
    self = 
    
        def test_http_header_encoding_precedence(self) -> None:
            # '\xa3'     = pound symbol in unicode
            # '\xc2\xa3' = pound symbol in utf-8
            # '\xa3'     = pound symbol in latin-1 (iso-8859-1)
    
            text = """
            
            \xa3"""
            x = self.sscls(text=text)
    >       self.assertEqual(x.xpath("//span[@id='blank']/text()").extract(), ["\xa3"])
    E       AssertionError: Lists differ: ['£'] != ['£']
    E       
    E       First differing element 0:
    E       '£'
    E       '£'
    E       
    E       - ['£']
    E       ?   -
    E       
    E       + ['£']
    
    tests/test_selector.py:790: AssertionError
    

    test_selector.py::SelectorTestCaseBytes::test_invalid_json

    test_selector.py::SelectorTestCaseBytes::test_invalid_json
    body = b'', encoding = 'utf8', input_type = 'json', base_url = None
    huge_tree = True
    
        def _get_root_and_type_from_bytes(body: bytes, encoding: str='utf8', input_type: Optional[str]=None, base_url: Optional[str]=None, huge_tree: bool=LXML_SUPPORTS_HUGE_TREE) -> Tuple[Any, str]:
            """Get root node and type from bytes input."""
            if input_type == 'json':
                try:
    >               return json.loads(body.decode(encoding)), 'json'
    
    parsel/selector.py:73: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    /root/.local/share/uv/python/cpython-3.12.6-linux-x86_64-gnu/lib/python3.12/json/__init__.py:346: in loads
        return _default_decoder.decode(s)
    /root/.local/share/uv/python/cpython-3.12.6-linux-x86_64-gnu/lib/python3.12/json/decoder.py:337: in decode
        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = , s = ''
    idx = 0
    
        def raw_decode(self, s, idx=0):
            """Decode a JSON document from ``s`` (a ``str`` beginning with
            a JSON document) and return a 2-tuple of the Python
            representation and the index in ``s`` where the document ended.
    
            This can be used to decode a JSON document from a string that may
            have extraneous data at the end.
    
            """
            try:
                obj, end = self.scan_once(s, idx)
            except StopIteration as err:
    >           raise JSONDecodeError("Expecting value", s, err.value) from None
    E           json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
    
    /root/.local/share/uv/python/cpython-3.12.6-linux-x86_64-gnu/lib/python3.12/json/decoder.py:355: JSONDecodeError
    
    During handling of the above exception, another exception occurred:
    
    self = 
    
        def test_invalid_json(self) -> None:
            text = ""
    >       selector = self.sscls(text, type="json")
    
    tests/test_selector.py:1164: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector.py:1359: in __init__
        super().__init__(
    parsel/selector.py:288: in __init__
        root, type = _get_root_and_type_from_bytes(body=body, encoding=encoding, input_type=type, base_url=base_url, huge_tree=huge_tree)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    body = b'', encoding = 'utf8', input_type = 'json', base_url = None
    huge_tree = True
    
        def _get_root_and_type_from_bytes(body: bytes, encoding: str='utf8', input_type: Optional[str]=None, base_url: Optional[str]=None, huge_tree: bool=LXML_SUPPORTS_HUGE_TREE) -> Tuple[Any, str]:
            """Get root node and type from bytes input."""
            if input_type == 'json':
                try:
                    return json.loads(body.decode(encoding)), 'json'
                except json.JSONDecodeError as e:
    >               raise ValueError(f"Invalid JSON: {str(e)}")
    E               ValueError: Invalid JSON: Expecting value: line 1 column 1 (char 0)
    
    parsel/selector.py:75: ValueError
    

    test_selector.py::SelectorTestCaseBytes::test_invalid_xpath

    test_selector.py::SelectorTestCaseBytes::test_invalid_xpath
    self = 
    
        def test_invalid_xpath(self) -> None:
            "Test invalid xpath raises ValueError with the invalid xpath"
            x = self.sscls(text="")
            xpath = "//test[@foo='bar]"
    >       self.assertRaisesRegex(ValueError, re.escape(xpath), x.xpath, xpath)
    
    tests/test_selector.py:773: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:357: in xpath
        result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
    src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath
        ???
    src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__
        ???
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    >   ???
    E   lxml.etree.XPathEvalError: Unfinished literal
    
    src/lxml/xpath.pxi:210: XPathEvalError
    

    test_selector.py::SelectorTestCaseBytes::test_invalid_xpath_unicode

    test_selector.py::SelectorTestCaseBytes::test_invalid_xpath_unicode
    self = 
    
        def test_invalid_xpath_unicode(self) -> None:
            "Test *Unicode* invalid xpath raises ValueError with the invalid xpath"
            x = self.sscls(text="")
            xpath = "//test[@foo='\\u0431ar]"
    >       self.assertRaisesRegex(ValueError, re.escape(xpath), x.xpath, xpath)
    
    tests/test_selector.py:779: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:357: in xpath
        result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
    src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath
        ???
    src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__
        ???
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    >   ???
    E   lxml.etree.XPathEvalError: Unfinished literal
    
    src/lxml/xpath.pxi:210: XPathEvalError
    

    test_selector.py::SelectorTestCaseBytes::test_json_css

    test_selector.py::SelectorTestCaseBytes::test_json_css
    self = 
    
        def test_json_css(self) -> None:
            obj = 1
            selector = self.sscls(root=obj)
    >       with self.assertRaises(ValueError):
    E       AssertionError: ValueError not raised
    
    tests/test_selector.py:1159: AssertionError
    

    test_selector.py::SelectorTestCaseBytes::test_json_root

    test_selector.py::SelectorTestCaseBytes::test_json_root
    self = 
    
        def test_json_root(self) -> None:
            obj = 1
            selector = self.sscls(root=obj)
            self.assertEqual(selector.root, obj)
    >       self.assertEqual(selector.type, "json")
    E       AssertionError: 'html' != 'json'
    E       - html
    E       + json
    
    tests/test_selector.py:1148: AssertionError
    

    test_selector.py::SelectorTestCaseBytes::test_json_selector_representation

    test_selector.py::SelectorTestCaseBytes::test_json_selector_representation
    self = 
    
        def test_json_selector_representation(self) -> None:
            selector = Selector(text="true")
    >       assert repr(selector) == ""
    E       AssertionError
    
    tests/test_selector.py:1192: AssertionError
    

    test_selector.py::SelectorTestCaseBytes::test_json_xpath

    test_selector.py::SelectorTestCaseBytes::test_json_xpath
    self = 
    
        def test_json_xpath(self) -> None:
            obj = 1
            selector = self.sscls(root=obj)
    >       with self.assertRaises(ValueError):
    E       AssertionError: ValueError not raised
    
    tests/test_selector.py:1153: AssertionError
    

    test_selector.py::SelectorTestCaseBytes::test_mixed_nested_selectors

    test_selector.py::SelectorTestCaseBytes::test_mixed_nested_selectors
    self = 
    
        def test_mixed_nested_selectors(self) -> None:
            body = """
                        
    notme

    text

    foo
    """ sel = self.sscls(text=body) self.assertEqual( > sel.xpath('//div[@id="1"]').css("span::text").extract(), ["me"] ) tests/test_selector.py:498: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:163: in css return self.__class__(flatten([x.css(query) for x in self])) parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[span], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:81: AttributeError

    test_selector.py::SelectorTestCaseBytes::test_namespaces_adhoc

    test_selector.py::SelectorTestCaseBytes::test_namespaces_adhoc
    self = 
    
        def test_namespaces_adhoc(self) -> None:
            body = """
            
               take this
               found
            
            """
    
    >       x = self.sscls(text=body, type="xml")
    
    tests/test_selector.py:527: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector.py:1359: in __init__
        super().__init__(
    parsel/selector.py:288: in __init__
        root, type = _get_root_and_type_from_bytes(body=body, encoding=encoding, input_type=type, base_url=base_url, huge_tree=huge_tree)
    parsel/selector.py:80: in _get_root_and_type_from_bytes
        root = create_root_node('', parser_cls, base_url=base_url, huge_tree=huge_tree, body=body, encoding=encoding)
    parsel/selector.py:90: in create_root_node
        root = etree.fromstring(body, parser=parser, base_url=base_url)
    src/lxml/etree.pyx:3306: in lxml.etree.fromstring
        ???
    src/lxml/parser.pxi:1995: in lxml.etree._parseMemoryDocument
        ???
    src/lxml/parser.pxi:1882: in lxml.etree._parseDoc
        ???
    src/lxml/parser.pxi:1164: in lxml.etree._BaseParser._parseDoc
        ???
    src/lxml/parser.pxi:633: in lxml.etree._ParserContext._handleParseResultDoc
        ???
    src/lxml/parser.pxi:743: in lxml.etree._handleParseResult
        ???
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    >   ???
    E     File "", line 3
    E   lxml.etree.XMLSyntaxError: Opening and ending tag mismatch: a line 3 and a, line 3, column 44
    
    src/lxml/parser.pxi:672: XMLSyntaxError
    

    test_selector.py::SelectorTestCaseBytes::test_namespaces_adhoc_variables

    test_selector.py::SelectorTestCaseBytes::test_namespaces_adhoc_variables
    self = 
    
        def test_namespaces_adhoc_variables(self) -> None:
            body = """
            
               take this
               found
            
            """
    
    >       x = self.sscls(text=body, type="xml")
    
    tests/test_selector.py:545: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector.py:1359: in __init__
        super().__init__(
    parsel/selector.py:288: in __init__
        root, type = _get_root_and_type_from_bytes(body=body, encoding=encoding, input_type=type, base_url=base_url, huge_tree=huge_tree)
    parsel/selector.py:80: in _get_root_and_type_from_bytes
        root = create_root_node('', parser_cls, base_url=base_url, huge_tree=huge_tree, body=body, encoding=encoding)
    parsel/selector.py:90: in create_root_node
        root = etree.fromstring(body, parser=parser, base_url=base_url)
    src/lxml/etree.pyx:3306: in lxml.etree.fromstring
        ???
    src/lxml/parser.pxi:1995: in lxml.etree._parseMemoryDocument
        ???
    src/lxml/parser.pxi:1882: in lxml.etree._parseDoc
        ???
    src/lxml/parser.pxi:1164: in lxml.etree._BaseParser._parseDoc
        ???
    src/lxml/parser.pxi:633: in lxml.etree._ParserContext._handleParseResultDoc
        ???
    src/lxml/parser.pxi:743: in lxml.etree._handleParseResult
        ???
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    >   ???
    E     File "", line 3
    E   lxml.etree.XMLSyntaxError: Opening and ending tag mismatch: a line 3 and a, line 3, column 44
    
    src/lxml/parser.pxi:672: XMLSyntaxError
    

    test_selector.py::SelectorTestCaseBytes::test_namespaces_multiple

    test_selector.py::SelectorTestCaseBytes::test_namespaces_multiple
    self = 
    
            def test_namespaces_multiple(self) -> None:
                body = """
        
            hello
            value
            iron90Dried Rose
        
                """
                x = self.sscls(text=body, type="xml")
                x.register_namespace(
                    "xmlns",
                    "http://webservices.amazon.com/AWSECommerceService/2005-10-05",
                )
                x.register_namespace("p", "http://www.scrapy.org/product")
                x.register_namespace("b", "http://somens.com")
                self.assertEqual(len(x.xpath("//xmlns:TestTag")), 1)
                self.assertEqual(x.xpath("//b:Operation/text()").extract()[0], "hello")
                self.assertEqual(x.xpath("//xmlns:TestTag/@b:att").extract()[0], "value")
                self.assertEqual(
                    x.xpath("//p:SecondTestTag/xmlns:price/text()").extract()[0], "90"
                )
                self.assertEqual(
    >               x.xpath("//p:SecondTestTag").xpath("./xmlns:price/text()")[0].extract(),
                    "90",
                )
    
    tests/test_selector.py:580: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:154: in xpath
        return self.__class__(flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]))
    parsel/selector.py:357: in xpath
        result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
    src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath
        ???
    src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__
        ???
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    >   ???
    E   lxml.etree.XPathEvalError: Undefined namespace prefix
    
    src/lxml/xpath.pxi:210: XPathEvalError
    

    test_selector.py::SelectorTestCaseBytes::test_namespaces_multiple_adhoc

    test_selector.py::SelectorTestCaseBytes::test_namespaces_multiple_adhoc
    self = 
    
            def test_namespaces_multiple_adhoc(self) -> None:
                body = """
        
            hello
            value
            iron90Dried Rose
        
                """
                x = self.sscls(text=body, type="xml")
                x.register_namespace(
                    "xmlns",
                    "http://webservices.amazon.com/AWSECommerceService/2005-10-05",
                )
                self.assertEqual(len(x.xpath("//xmlns:TestTag")), 1)
    
                # "b" namespace is not declared yet
    >           self.assertRaises(ValueError, x.xpath, "//xmlns:TestTag/@b:att")
    
    tests/test_selector.py:606: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:357: in xpath
        result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
    src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath
        ???
    src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__
        ???
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    >   ???
    E   lxml.etree.XPathEvalError: Undefined namespace prefix
    
    src/lxml/xpath.pxi:210: XPathEvalError
    

    test_selector.py::SelectorTestCaseBytes::test_namespaces_simple

    test_selector.py::SelectorTestCaseBytes::test_namespaces_simple
    self = 
    
        def test_namespaces_simple(self) -> None:
            body = """
            
               take this
               found
            
            """
    
    >       x = self.sscls(text=body, type="xml")
    
    tests/test_selector.py:514: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector.py:1359: in __init__
        super().__init__(
    parsel/selector.py:288: in __init__
        root, type = _get_root_and_type_from_bytes(body=body, encoding=encoding, input_type=type, base_url=base_url, huge_tree=huge_tree)
    parsel/selector.py:80: in _get_root_and_type_from_bytes
        root = create_root_node('', parser_cls, base_url=base_url, huge_tree=huge_tree, body=body, encoding=encoding)
    parsel/selector.py:90: in create_root_node
        root = etree.fromstring(body, parser=parser, base_url=base_url)
    src/lxml/etree.pyx:3306: in lxml.etree.fromstring
        ???
    src/lxml/parser.pxi:1995: in lxml.etree._parseMemoryDocument
        ???
    src/lxml/parser.pxi:1882: in lxml.etree._parseDoc
        ???
    src/lxml/parser.pxi:1164: in lxml.etree._BaseParser._parseDoc
        ???
    src/lxml/parser.pxi:633: in lxml.etree._ParserContext._handleParseResultDoc
        ???
    src/lxml/parser.pxi:743: in lxml.etree._handleParseResult
        ???
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    >   ???
    E     File "", line 3
    E   lxml.etree.XMLSyntaxError: Opening and ending tag mismatch: a line 3 and a, line 3, column 44
    
    src/lxml/parser.pxi:672: XMLSyntaxError
    

    test_selector.py::SelectorTestCaseBytes::test_re

    test_selector.py::SelectorTestCaseBytes::test_re
    self = 
    
        def test_re(self) -> None:
            body = """
    Name: Mary
    • Name: John
    • Age: 10
    • Name: Paul
    • Age: 20
    Age: 20
    """ x = self.sscls(text=body) name_re = re.compile(r"Name: (\w+)") > self.assertEqual(x.xpath("//ul/li").re(name_re), ["John", "Paul"]) tests/test_selector.py:707: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:175: in re return list(flatten([x.re(regex, replace_entities=replace_entities) for x in self])) parsel/selector.py:391: in re return extract_regex(regex, self.get(), replace_entities=replace_entities) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ regex = re.compile('Name: (\\w+)'), text = '
  • Name: John
  • ' replace_entities = True def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]: """Extract a list of strings from the given text/encoding using the following policies: * if the regex contains a named group called "extract" that will be returned * if the regex contains multiple numbered groups, all those will be returned (flattened) * if the regex doesn't contain any group the entire regex matching is returned """ if not text: return [] if replace_entities: > text = w3lib_replace_entities(text, keep_entities=True) E TypeError: replace_entities() got an unexpected keyword argument 'keep_entities' parsel/utils.py:63: TypeError

    test_selector.py::SelectorTestCaseBytes::test_re_first

    test_selector.py::SelectorTestCaseBytes::test_re_first
    self = 
    
        def test_re_first(self) -> None:
            """Test if re_first() returns first matched element"""
            body = '
    • 1
    • 2
    ' sel = self.sscls(text=body) self.assertEqual( > sel.xpath("//ul/li/text()").re_first(r"\d"), sel.xpath("//ul/li/text()").re(r"\d")[0], ) tests/test_selector.py:311: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:189: in re_first for el in iflatten(x.re(regex, replace_entities=replace_entities) for x in self): parsel/utils.py:25: in iflatten for el in x: parsel/selector.py:189: in for el in iflatten(x.re(regex, replace_entities=replace_entities) for x in self): parsel/selector.py:391: in re return extract_regex(regex, self.get(), replace_entities=replace_entities) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ regex = '\\d', text = '1', replace_entities = True def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]: """Extract a list of strings from the given text/encoding using the following policies: * if the regex contains a named group called "extract" that will be returned * if the regex contains multiple numbered groups, all those will be returned (flattened) * if the regex doesn't contain any group the entire regex matching is returned """ if not text: return [] if replace_entities: > text = w3lib_replace_entities(text, keep_entities=True) E TypeError: replace_entities() got an unexpected keyword argument 'keep_entities' parsel/utils.py:63: TypeError

    test_selector.py::SelectorTestCaseBytes::test_re_intl

    test_selector.py::SelectorTestCaseBytes::test_re_intl
    self = 
    
        def test_re_intl(self) -> None:
            body = "
    Evento: cumplea\xf1os
    " x = self.sscls(text=body) > self.assertEqual(x.xpath("//div").re(r"Evento: (\w+)"), ["cumplea\xf1os"]) tests/test_selector.py:760: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:175: in re return list(flatten([x.re(regex, replace_entities=replace_entities) for x in self])) parsel/selector.py:391: in re return extract_regex(regex, self.get(), replace_entities=replace_entities) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ regex = 'Evento: (\\w+)', text = '
    Evento: cumpleaños
    ' replace_entities = True def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]: """Extract a list of strings from the given text/encoding using the following policies: * if the regex contains a named group called "extract" that will be returned * if the regex contains multiple numbered groups, all those will be returned (flattened) * if the regex doesn't contain any group the entire regex matching is returned """ if not text: return [] if replace_entities: > text = w3lib_replace_entities(text, keep_entities=True) E TypeError: replace_entities() got an unexpected keyword argument 'keep_entities' parsel/utils.py:63: TypeError

    test_selector.py::SelectorTestCaseBytes::test_re_replace_entities

    test_selector.py::SelectorTestCaseBytes::test_re_replace_entities
    self = 
    
        def test_re_replace_entities(self) -> None:
            body = """"""
            x = self.sscls(text=body)
    
            name_re = re.compile('{"foo":(.*)}')
    
            # by default, only & and < are preserved ;
            # other entities are converted
            expected = '"bar & "baz""'
    >       self.assertEqual(x.xpath("//script/text()").re(name_re), [expected])
    
    tests/test_selector.py:728: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:175: in re
        return list(flatten([x.re(regex, replace_entities=replace_entities) for x in self]))
    parsel/selector.py:391: in re
        return extract_regex(regex, self.get(), replace_entities=replace_entities)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    regex = re.compile('{"foo":(.*)}'), text = '{"foo":"bar & "baz""}'
    replace_entities = True
    
        def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]:
            """Extract a list of strings from the given text/encoding using the following policies:
            * if the regex contains a named group called "extract" that will be returned
            * if the regex contains multiple numbered groups, all those will be returned (flattened)
            * if the regex doesn't contain any group the entire regex matching is returned
            """
            if not text:
                return []
            if replace_entities:
    >           text = w3lib_replace_entities(text, keep_entities=True)
    E           TypeError: replace_entities() got an unexpected keyword argument 'keep_entities'
    
    parsel/utils.py:63: TypeError
    

    test_selector.py::SelectorTestCaseBytes::test_remove_namespaces

    test_selector.py::SelectorTestCaseBytes::test_remove_namespaces
    self = 
    
            def test_remove_namespaces(self) -> None:
                xml = """
        
          
          
            
          
          
        
        """
                sel = self.sscls(text=xml, type="xml")
                self.assertEqual(len(sel.xpath("//link")), 0)
                self.assertEqual(len(sel.xpath("./namespace::*")), 3)
                sel.remove_namespaces()
                self.assertEqual(len(sel.xpath("//link")), 3)
    >           self.assertEqual(len(sel.xpath("./namespace::*")), 1)
    E           AssertionError: 3 != 1
    
    tests/test_selector.py:864: AssertionError
    

    test_selector.py::SelectorTestCaseBytes::test_remove_namespaces_embedded

    test_selector.py::SelectorTestCaseBytes::test_remove_namespaces_embedded
    self = 
    
        def test_remove_namespaces_embedded(self) -> None:
            xml = """
            
              
              
                
              
              
                
                  
                  
                
                
              
            
            """
            sel = self.sscls(text=xml, type="xml")
            self.assertEqual(len(sel.xpath("//link")), 0)
            self.assertEqual(len(sel.xpath("//stop")), 0)
            self.assertEqual(len(sel.xpath("./namespace::*")), 2)
            self.assertEqual(
                len(
                    sel.xpath(
                        "//f:link",
                        namespaces={"f": "http://www.w3.org/2005/Atom"},
                    )
                ),
                2,
            )
            self.assertEqual(
                len(sel.xpath("//s:stop", namespaces={"s": "http://www.w3.org/2000/svg"})),
                2,
            )
            sel.remove_namespaces()
            self.assertEqual(len(sel.xpath("//link")), 2)
            self.assertEqual(len(sel.xpath("//stop")), 2)
    >       self.assertEqual(len(sel.xpath("./namespace::*")), 1)
    E       AssertionError: 2 != 1
    
    tests/test_selector.py:902: AssertionError
    

    test_selector.py::SelectorTestCaseBytes::test_remove_pseudo_element_selector

    test_selector.py::SelectorTestCaseBytes::test_remove_pseudo_element_selector
    self = 
    
        def test_remove_pseudo_element_selector(self) -> None:
            sel = self.sscls(
                text="
    • 1
    • 2
    • 3
    " ) > sel_list = sel.css("li::text") tests/test_selector.py:1026: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[li], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:95: AttributeError

    test_selector.py::SelectorTestCaseBytes::test_remove_pseudo_element_selector_list

    test_selector.py::SelectorTestCaseBytes::test_remove_pseudo_element_selector_list
    self = 
    
        def test_remove_pseudo_element_selector_list(self) -> None:
            sel = self.sscls(
                text="
    • 1
    • 2
    • 3
    " ) > sel_list = sel.css("li::text") tests/test_selector.py:1014: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[li], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:95: AttributeError

    test_selector.py::SelectorTestCaseBytes::test_remove_root_element_selector

    test_selector.py::SelectorTestCaseBytes::test_remove_root_element_selector
    self = 
    
        def test_remove_root_element_selector(self) -> None:
            sel = self.sscls(
                text="
    • 1
    • 2
    • 3
    " ) > sel_list = sel.css("li::text") tests/test_selector.py:1038: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[li], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:95: AttributeError

    test_selector.py::SelectorTestCaseBytes::test_remove_selector

    test_selector.py::SelectorTestCaseBytes::test_remove_selector
    self = 
    
        def test_remove_selector(self) -> None:
            sel = self.sscls(
                text="
    • 1
    • 2
    • 3
    " ) sel_list = sel.css("li") sel_list[0].drop() self.assertIsSelectorList(sel.css("li")) > self.assertEqual(sel.css("li::text").getall(), ["2", "3"]) tests/test_selector.py:1008: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:375: in css xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query) .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath return " | ".join( .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True) .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = xpath = XPathExpr[li], pseudo_element = 'text' def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr: if isinstance(pseudo_element, FunctionalPseudoElement): method = f'xpath_{pseudo_element.name}_functional_pseudo_element' if not hasattr(self, method): raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()') method = getattr(self, method) return method(xpath, pseudo_element) > method = f'xpath_{pseudo_element.name}_simple_pseudo_element' E AttributeError: 'str' object has no attribute 'name' parsel/csstranslator.py:95: AttributeError

    test_selector.py::SelectorTestCaseBytes::test_replacement_null_char_from_body

    test_selector.py::SelectorTestCaseBytes::test_replacement_null_char_from_body
    self = 
    
        def test_replacement_null_char_from_body(self) -> None:
            text = "\x00

    Grainy

    " > self.assertEqual( "

    Grainy

    ", self.sscls(text).extract(), ) E AssertionError: '

    Grainy

    ' != '' E -

    Grainy

    E + tests/test_selector.py:987: AssertionError

    test_selector.py::SelectorTestCaseBytes::test_select_unicode_query

    test_selector.py::SelectorTestCaseBytes::test_select_unicode_query
    self = 
    
        def test_select_unicode_query(self) -> None:
            body = "

    " sel = self.sscls(text=body) > self.assertEqual(sel.xpath('//input[@name="\xa9"]/@value').extract(), ["1"]) E AssertionError: Lists differ: [] != ['1'] E E Second list contains 1 additional elements. E First extra element 0: E '1' E E - [] E + ['1'] tests/test_selector.py:352: AssertionError

    test_selector.py::SelectorTestCaseBytes::test_simple_selection_with_variables

    test_selector.py::SelectorTestCaseBytes::test_simple_selection_with_variables
    self = 
    
        def test_simple_selection_with_variables(self) -> None:
            """Using XPath variables"""
            body = "

    " sel = self.sscls(text=body) self.assertEqual( [x.extract() for x in sel.xpath("//input[@value=$number]/@name", number=1)], ["a"], ) self.assertEqual( [ x.extract() for x in sel.xpath("//input[@name=$letter]/@value", letter="b") ], ["2"], ) self.assertEqual( sel.xpath( "count(//input[@value=$number or @name=$letter])", number=2, letter="a", ).extract(), ["2.0"], ) # you can also pass booleans > self.assertEqual( sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=2, test=True).extract(), ["1"], ) E AssertionError: Lists differ: ['True'] != ['1'] E E First differing element 0: E 'True' E '1' E E - ['True'] E + ['1'] tests/test_selector.py:118: AssertionError

    test_selector.py::SelectorTestCaseBytes::test_simple_selection_with_variables_escape_friendly

    test_selector.py::SelectorTestCaseBytes::test_simple_selection_with_variables_escape_friendly
    self = 
    
        def test_simple_selection_with_variables_escape_friendly(self) -> None:
            """Using XPath variables with quotes that would need escaping with string formatting"""
            body = """

    I'm mixing single and "double quotes" and I don't care :)

    """ sel = self.sscls(text=body) t = 'I say "Yeah!"' # naive string formatting with give something like: # ValueError: XPath error: Invalid predicate in //input[@value="I say "Yeah!""]/@name > self.assertRaises(ValueError, sel.xpath, f'//input[@value="{t}"]/@name') tests/test_selector.py:153: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:357: in xpath result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs) src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath ??? src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__ ??? _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E lxml.etree.XPathEvalError: Invalid predicate src/lxml/xpath.pxi:210: XPathEvalError

    test_selector_csstranslator.py::HTMLTranslatorTest::test_attr_function

    test_selector_csstranslator.py::HTMLTranslatorTest::test_attr_function
    self = 
    
        def test_attr_function(self: TranslatorTestProtocol) -> None:
            cases = [
                ("::attr(name)", "descendant-or-self::*/@name"),
                ("a::attr(href)", "descendant-or-self::a/@href"),
                (
                    "a ::attr(img)",
                    "descendant-or-self::a/descendant-or-self::*/@img",
                ),
                ("a > ::attr(class)", "descendant-or-self::a/*/@class"),
            ]
            for css, xpath in cases:
    >           self.assertEqual(self.c2x(css), xpath, css)
    
    tests/test_selector_csstranslator.py:89: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    parsel/csstranslator.py:94: in xpath_pseudo_element
        return method(xpath, pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[*], function = FunctionalPseudoElement[::attr(['name'])]
    
        def xpath_attr_functional_pseudo_element(self, xpath: OriginalXPathExpr, function: FunctionalPseudoElement) -> XPathExpr:
            """Support selecting attribute values using ::attr() pseudo-element"""
            if not function.arguments:
                raise ExpressionError("Expected at least 1 argument for ::attr(), got 0")
            if not isinstance(function.arguments[0], str):
    >           raise ExpressionError("Expected a string value for ::attr(), got %r" % function.arguments[0])
    E           TypeError: not all arguments converted during string formatting
    
    parsel/csstranslator.py:61: TypeError
    

    test_selector_csstranslator.py::HTMLTranslatorTest::test_attr_function_exception

    test_selector_csstranslator.py::HTMLTranslatorTest::test_attr_function_exception
    self = 
    
        def test_attr_function_exception(self: TranslatorTestProtocol) -> None:
            cases = [
                ("::attr(12)", ExpressionError),
                ("::attr(34test)", ExpressionError),
                ("::attr(@href)", SelectorSyntaxError),
            ]
            for css, exc in cases:
    >           self.assertRaises(exc, self.c2x, css)
    
    tests/test_selector_csstranslator.py:98: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    parsel/csstranslator.py:94: in xpath_pseudo_element
        return method(xpath, pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
        def xpath_attr_functional_pseudo_element(self, xpath: OriginalXPathExpr, function: FunctionalPseudoElement) -> XPathExpr:
            """Support selecting attribute values using ::attr() pseudo-element"""
            if not function.arguments:
                raise ExpressionError("Expected at least 1 argument for ::attr(), got 0")
            if not isinstance(function.arguments[0], str):
    >           raise ExpressionError("Expected a string value for ::attr(), got %r" % function.arguments[0])
    E           TypeError: not all arguments converted during string formatting
    
    parsel/csstranslator.py:61: TypeError
    

    test_selector_csstranslator.py::HTMLTranslatorTest::test_text_pseudo_element

    test_selector_csstranslator.py::HTMLTranslatorTest::test_text_pseudo_element
    self = 
    
        def test_text_pseudo_element(self: TranslatorTestProtocol) -> None:
            cases = [
                ("::text", "descendant-or-self::text()"),
                ("p::text", "descendant-or-self::p/text()"),
                ("p ::text", "descendant-or-self::p/descendant-or-self::text()"),
                ("#id::text", "descendant-or-self::*[@id = 'id']/text()"),
                ("p#id::text", "descendant-or-self::p[@id = 'id']/text()"),
                (
                    "p#id ::text",
                    "descendant-or-self::p[@id = 'id']/descendant-or-self::text()",
                ),
                ("p#id > ::text", "descendant-or-self::p[@id = 'id']/*/text()"),
                (
                    "p#id ~ ::text",
                    "descendant-or-self::p[@id = 'id']/following-sibling::*/text()",
                ),
                ("a[href]::text", "descendant-or-self::a[@href]/text()"),
                (
                    "a[href] ::text",
                    "descendant-or-self::a[@href]/descendant-or-self::text()",
                ),
                (
                    "p::text, a::text",
                    "descendant-or-self::p/text() | descendant-or-self::a/text()",
                ),
            ]
            for css, xpath in cases:
    >           self.assertEqual(self.c2x(css), xpath, css)
    
    tests/test_selector_csstranslator.py:127: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[*], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:95: AttributeError
    

    test_selector_csstranslator.py::HTMLTranslatorTest::test_unknown_pseudo_element

    test_selector_csstranslator.py::HTMLTranslatorTest::test_unknown_pseudo_element
    self = 
    
        def test_unknown_pseudo_element(self: TranslatorTestProtocol) -> None:
            cases = [
                ("::text-node", ExpressionError),
            ]
            for css, exc in cases:
    >           self.assertRaises(exc, self.c2x, css)
    
    tests/test_selector_csstranslator.py:143: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:95: AttributeError
    

    test_selector_csstranslator.py::GenericTranslatorTest::test_attr_function

    test_selector_csstranslator.py::GenericTranslatorTest::test_attr_function
    self = 
    
        def test_attr_function(self: TranslatorTestProtocol) -> None:
            cases = [
                ("::attr(name)", "descendant-or-self::*/@name"),
                ("a::attr(href)", "descendant-or-self::a/@href"),
                (
                    "a ::attr(img)",
                    "descendant-or-self::a/descendant-or-self::*/@img",
                ),
                ("a > ::attr(class)", "descendant-or-self::a/*/@class"),
            ]
            for css, xpath in cases:
    >           self.assertEqual(self.c2x(css), xpath, css)
    
    tests/test_selector_csstranslator.py:89: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    parsel/csstranslator.py:80: in xpath_pseudo_element
        return method(xpath, pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[*], function = FunctionalPseudoElement[::attr(['name'])]
    
        def xpath_attr_functional_pseudo_element(self, xpath: OriginalXPathExpr, function: FunctionalPseudoElement) -> XPathExpr:
            """Support selecting attribute values using ::attr() pseudo-element"""
            if not function.arguments:
                raise ExpressionError("Expected at least 1 argument for ::attr(), got 0")
            if not isinstance(function.arguments[0], str):
    >           raise ExpressionError("Expected a string value for ::attr(), got %r" % function.arguments[0])
    E           TypeError: not all arguments converted during string formatting
    
    parsel/csstranslator.py:61: TypeError
    

    test_selector_csstranslator.py::GenericTranslatorTest::test_attr_function_exception

    test_selector_csstranslator.py::GenericTranslatorTest::test_attr_function_exception
    self = 
    
        def test_attr_function_exception(self: TranslatorTestProtocol) -> None:
            cases = [
                ("::attr(12)", ExpressionError),
                ("::attr(34test)", ExpressionError),
                ("::attr(@href)", SelectorSyntaxError),
            ]
            for css, exc in cases:
    >           self.assertRaises(exc, self.c2x, css)
    
    tests/test_selector_csstranslator.py:98: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    parsel/csstranslator.py:80: in xpath_pseudo_element
        return method(xpath, pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
        def xpath_attr_functional_pseudo_element(self, xpath: OriginalXPathExpr, function: FunctionalPseudoElement) -> XPathExpr:
            """Support selecting attribute values using ::attr() pseudo-element"""
            if not function.arguments:
                raise ExpressionError("Expected at least 1 argument for ::attr(), got 0")
            if not isinstance(function.arguments[0], str):
    >           raise ExpressionError("Expected a string value for ::attr(), got %r" % function.arguments[0])
    E           TypeError: not all arguments converted during string formatting
    
    parsel/csstranslator.py:61: TypeError
    

    test_selector_csstranslator.py::GenericTranslatorTest::test_text_pseudo_element

    test_selector_csstranslator.py::GenericTranslatorTest::test_text_pseudo_element
    self = 
    
        def test_text_pseudo_element(self: TranslatorTestProtocol) -> None:
            cases = [
                ("::text", "descendant-or-self::text()"),
                ("p::text", "descendant-or-self::p/text()"),
                ("p ::text", "descendant-or-self::p/descendant-or-self::text()"),
                ("#id::text", "descendant-or-self::*[@id = 'id']/text()"),
                ("p#id::text", "descendant-or-self::p[@id = 'id']/text()"),
                (
                    "p#id ::text",
                    "descendant-or-self::p[@id = 'id']/descendant-or-self::text()",
                ),
                ("p#id > ::text", "descendant-or-self::p[@id = 'id']/*/text()"),
                (
                    "p#id ~ ::text",
                    "descendant-or-self::p[@id = 'id']/following-sibling::*/text()",
                ),
                ("a[href]::text", "descendant-or-self::a[@href]/text()"),
                (
                    "a[href] ::text",
                    "descendant-or-self::a[@href]/descendant-or-self::text()",
                ),
                (
                    "p::text, a::text",
                    "descendant-or-self::p/text() | descendant-or-self::a/text()",
                ),
            ]
            for css, xpath in cases:
    >           self.assertEqual(self.c2x(css), xpath, css)
    
    tests/test_selector_csstranslator.py:127: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[*], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:81: AttributeError
    

    test_selector_csstranslator.py::GenericTranslatorTest::test_unknown_pseudo_element

    test_selector_csstranslator.py::GenericTranslatorTest::test_unknown_pseudo_element
    self = 
    
        def test_unknown_pseudo_element(self: TranslatorTestProtocol) -> None:
            cases = [
                ("::text-node", ExpressionError),
            ]
            for css, exc in cases:
    >           self.assertRaises(exc, self.c2x, css)
    
    tests/test_selector_csstranslator.py:143: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:81: AttributeError
    

    test_selector_csstranslator.py::CSSSelectorTest::test_attribute_function

    test_selector_csstranslator.py::CSSSelectorTest::test_attribute_function
    self = 
    
        def test_attribute_function(self) -> None:
    >       self.assertEqual(self.x("#p-b2::attr(id)"), ["p-b2"])
    
    tests/test_selector_csstranslator.py:204: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector_csstranslator.py:181: in x
        return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    parsel/csstranslator.py:94: in xpath_pseudo_element
        return method(xpath, pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[*[@id = 'p-b2']]
    function = FunctionalPseudoElement[::attr(['id'])]
    
        def xpath_attr_functional_pseudo_element(self, xpath: OriginalXPathExpr, function: FunctionalPseudoElement) -> XPathExpr:
            """Support selecting attribute values using ::attr() pseudo-element"""
            if not function.arguments:
                raise ExpressionError("Expected at least 1 argument for ::attr(), got 0")
            if not isinstance(function.arguments[0], str):
    >           raise ExpressionError("Expected a string value for ::attr(), got %r" % function.arguments[0])
    E           TypeError: not all arguments converted during string formatting
    
    parsel/csstranslator.py:61: TypeError
    

    test_selector_csstranslator.py::CSSSelectorTest::test_nested_selector

    test_selector_csstranslator.py::CSSSelectorTest::test_nested_selector
    self = 
    
        def test_nested_selector(self) -> None:
    >       self.assertEqual(self.sel.css("p").css("b::text").extract(), ["hi", "guy"])
    
    tests/test_selector_csstranslator.py:214: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:163: in css
        return self.__class__(flatten([x.css(query) for x in self]))
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[b], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:81: AttributeError
    

    test_selector_csstranslator.py::CSSSelectorTest::test_pseudoclass_has

    test_selector_csstranslator.py::CSSSelectorTest::test_pseudoclass_has
    self = 
    
        @pytest.mark.xfail(
            Version(cssselect.__version__) < Version("1.2.0"),
            reason="Support added in cssselect 1.2.0",
        )
        def test_pseudoclass_has(self) -> None:
    >       self.assertEqual(self.x("p:has(b)::text"), ["lorem ipsum text"])
    
    tests/test_selector_csstranslator.py:225: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector_csstranslator.py:181: in x
        return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[p[descendant::b]], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:95: AttributeError
    

    test_selector_csstranslator.py::CSSSelectorTest::test_text_pseudo_element

    test_selector_csstranslator.py::CSSSelectorTest::test_text_pseudo_element
    self = 
    
        def test_text_pseudo_element(self) -> None:
            self.assertEqual(self.x("#p-b2"), ['guy'])
    >       self.assertEqual(self.x("#p-b2::text"), ["guy"])
    
    tests/test_selector_csstranslator.py:193: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector_csstranslator.py:181: in x
        return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[*[@id = 'p-b2']], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:95: AttributeError
    

    test_selector_csstranslator.py::CSSSelectorTestBytes::test_attribute_function

    test_selector_csstranslator.py::CSSSelectorTestBytes::test_attribute_function
    self = 
    
        def test_attribute_function(self) -> None:
    >       self.assertEqual(self.x("#p-b2::attr(id)"), ["p-b2"])
    
    tests/test_selector_csstranslator.py:204: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector_csstranslator.py:181: in x
        return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    parsel/csstranslator.py:94: in xpath_pseudo_element
        return method(xpath, pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[*[@id = 'p-b2']]
    function = FunctionalPseudoElement[::attr(['id'])]
    
        def xpath_attr_functional_pseudo_element(self, xpath: OriginalXPathExpr, function: FunctionalPseudoElement) -> XPathExpr:
            """Support selecting attribute values using ::attr() pseudo-element"""
            if not function.arguments:
                raise ExpressionError("Expected at least 1 argument for ::attr(), got 0")
            if not isinstance(function.arguments[0], str):
    >           raise ExpressionError("Expected a string value for ::attr(), got %r" % function.arguments[0])
    E           TypeError: not all arguments converted during string formatting
    
    parsel/csstranslator.py:61: TypeError
    

    test_selector_csstranslator.py::CSSSelectorTestBytes::test_nested_selector

    test_selector_csstranslator.py::CSSSelectorTestBytes::test_nested_selector
    self = 
    
        def test_nested_selector(self) -> None:
    >       self.assertEqual(self.sel.css("p").css("b::text").extract(), ["hi", "guy"])
    
    tests/test_selector_csstranslator.py:214: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:163: in css
        return self.__class__(flatten([x.css(query) for x in self]))
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[b], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:81: AttributeError
    

    test_selector_csstranslator.py::CSSSelectorTestBytes::test_pseudoclass_has

    test_selector_csstranslator.py::CSSSelectorTestBytes::test_pseudoclass_has
    self = 
    
        @pytest.mark.xfail(
            Version(cssselect.__version__) < Version("1.2.0"),
            reason="Support added in cssselect 1.2.0",
        )
        def test_pseudoclass_has(self) -> None:
    >       self.assertEqual(self.x("p:has(b)::text"), ["lorem ipsum text"])
    
    tests/test_selector_csstranslator.py:225: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector_csstranslator.py:181: in x
        return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[p[descendant::b]], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:95: AttributeError
    

    test_selector_csstranslator.py::CSSSelectorTestBytes::test_text_pseudo_element

    test_selector_csstranslator.py::CSSSelectorTestBytes::test_text_pseudo_element
    self = 
    
        def test_text_pseudo_element(self) -> None:
            self.assertEqual(self.x("#p-b2"), ['guy'])
    >       self.assertEqual(self.x("#p-b2::text"), ["guy"])
    
    tests/test_selector_csstranslator.py:193: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    tests/test_selector_csstranslator.py:181: in x
        return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[*[@id = 'p-b2']], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:95: AttributeError
    

    test_selector_jmespath.py::JMESPathTestCase::test_html_has_json

    test_selector_jmespath.py::JMESPathTestCase::test_html_has_json
    self = 
    
        def test_html_has_json(self) -> None:
            html_text = """
            

    Information

    { "user": [ { "name": "A", "age": 18 }, { "name": "B", "age": 32 }, { "name": "C", "age": 22 }, { "name": "D", "age": 25 } ], "total": 4, "status": "ok" }
    """ sel = Selector(text=html_text) self.assertEqual( > sel.xpath("//div/content/text()").jmespath("user[*].name").getall(), ["A", "B", "C", "D"], ) tests/test_selector_jmespath.py:81: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:135: in jmespath return self.__class__(flatten([x.jmespath(query, **kwargs) for x in self])) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = query = 'user[*].name', kwargs = {} def jmespath(self: _SelectorType, query: str, **kwargs: Any) -> SelectorList[_SelectorType]: """ Find objects matching the JMESPath ``query`` and return the result as a :class:`SelectorList` instance with all elements flattened. List elements implement :class:`Selector` interface too. ``query`` is a string containing the `JMESPath `_ query to apply. Any additional named arguments are passed to the underlying ``jmespath.search`` call, e.g.:: selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict)) """ if self.type != 'json': > raise ValueError('JMESPath expressions can only be applied to JSON data') E ValueError: JMESPath expressions can only be applied to JSON data parsel/selector.py:321: ValueError

    test_selector_jmespath.py::JMESPathTestCase::test_jmestpath_with_re

    test_selector_jmespath.py::JMESPathTestCase::test_jmestpath_with_re
    self = 
    
        def test_jmestpath_with_re(self) -> None:
            html_text = """
                

    Information

    { "user": [ { "name": "A", "age": 18 }, { "name": "B", "age": 32 }, { "name": "C", "age": 22 }, { "name": "D", "age": 25 } ], "total": 4, "status": "ok" }
    """ sel = Selector(text=html_text) self.assertEqual( > sel.xpath("//div/content/text()").jmespath("user[*].name").re(r"(\w+)"), ["A", "B", "C", "D"], ) tests/test_selector_jmespath.py:122: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:135: in jmespath return self.__class__(flatten([x.jmespath(query, **kwargs) for x in self])) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = query = 'user[*].name', kwargs = {} def jmespath(self: _SelectorType, query: str, **kwargs: Any) -> SelectorList[_SelectorType]: """ Find objects matching the JMESPath ``query`` and return the result as a :class:`SelectorList` instance with all elements flattened. List elements implement :class:`Selector` interface too. ``query`` is a string containing the `JMESPath `_ query to apply. Any additional named arguments are passed to the underlying ``jmespath.search`` call, e.g.:: selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict)) """ if self.type != 'json': > raise ValueError('JMESPath expressions can only be applied to JSON data') E ValueError: JMESPath expressions can only be applied to JSON data parsel/selector.py:321: ValueError

    test_selector_jmespath.py::JMESPathTestCase::test_json_has_html

    test_selector_jmespath.py::JMESPathTestCase::test_json_has_html
    self = 
    
        def test_json_has_html(self) -> None:
            """Sometimes the information is returned in a json wrapper"""
            data = """
            {
                "content": [
                    {
                        "name": "A",
                        "value": "a"
                    },
                    {
                        "name": {
                            "age": 18
                        },
                        "value": "b"
                    },
                    {
                        "name": "C",
                        "value": "c"
                    },
                    {
                        "name": "D",
                        "value": "
    d
    " } ], "html": "
    def
    " } """ sel = Selector(text=data) self.assertEqual( > sel.jmespath("html").get(), "
    def
    ", ) tests/test_selector_jmespath.py:39: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = query = 'html', kwargs = {} def jmespath(self: _SelectorType, query: str, **kwargs: Any) -> SelectorList[_SelectorType]: """ Find objects matching the JMESPath ``query`` and return the result as a :class:`SelectorList` instance with all elements flattened. List elements implement :class:`Selector` interface too. ``query`` is a string containing the `JMESPath `_ query to apply. Any additional named arguments are passed to the underlying ``jmespath.search`` call, e.g.:: selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict)) """ if self.type != 'json': > raise ValueError('JMESPath expressions can only be applied to JSON data') E ValueError: JMESPath expressions can only be applied to JSON data parsel/selector.py:321: ValueError

    test_selector_jmespath.py::JMESPathTestCase::test_json_types

    test_selector_jmespath.py::JMESPathTestCase::test_json_types
    self = 
    
        def test_json_types(self) -> None:
            for text, root in (
                ("{}", {}),
                ('{"a": "b"}', {"a": "b"}),
                ("[]", []),
                ('["a"]', ["a"]),
                ('""', ""),
                ("0", 0),
                ("1", 1),
                ("true", True),
                ("false", False),
                ("null", None),
            ):
                selector = Selector(text=text, root=_NOT_SET)
    >           self.assertEqual(selector.type, "json")
    E           AssertionError: 'html' != 'json'
    E           - html
    E           + json
    
    tests/test_selector_jmespath.py:163: AssertionError
    

    test_utils.py::test_shorten[0-]

    test_utils.py::test_shorten[0-]
    width = 0, expected = ''
    
        @mark.parametrize(
            "width,expected",
            (
                (-1, ValueError),
                (0, ""),
                (1, "."),
                (2, ".."),
                (3, "..."),
                (4, "f..."),
                (5, "fo..."),
                (6, "foobar"),
                (7, "foobar"),
            ),
        )
        def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
            if isinstance(expected, str):
    >           assert shorten("foobar", width) == expected
    
    tests/test_utils.py:24: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    text = 'foobar', width = 0, suffix = '...'
    
        def shorten(text: str, width: int, suffix: str='...') -> str:
            """Truncate the given text to fit in the given width."""
            if width <= 0:
    >           raise ValueError('Width must be greater than 0')
    E           ValueError: Width must be greater than 0
    
    parsel/utils.py:79: ValueError
    

    test_utils.py::test_shorten[1-.]

    test_utils.py::test_shorten[1-.]
    width = 1, expected = '.'
    
        @mark.parametrize(
            "width,expected",
            (
                (-1, ValueError),
                (0, ""),
                (1, "."),
                (2, ".."),
                (3, "..."),
                (4, "f..."),
                (5, "fo..."),
                (6, "foobar"),
                (7, "foobar"),
            ),
        )
        def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
            if isinstance(expected, str):
    >           assert shorten("foobar", width) == expected
    E           AssertionError
    
    tests/test_utils.py:24: AssertionError
    

    test_utils.py::test_shorten[2-..]

    test_utils.py::test_shorten[2-..]
    width = 2, expected = '..'
    
        @mark.parametrize(
            "width,expected",
            (
                (-1, ValueError),
                (0, ""),
                (1, "."),
                (2, ".."),
                (3, "..."),
                (4, "f..."),
                (5, "fo..."),
                (6, "foobar"),
                (7, "foobar"),
            ),
        )
        def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
            if isinstance(expected, str):
    >           assert shorten("foobar", width) == expected
    E           AssertionError
    
    tests/test_utils.py:24: AssertionError
    

    test_utils.py::test_shorten[3-...]

    test_utils.py::test_shorten[3-...]
    width = 3, expected = '...'
    
        @mark.parametrize(
            "width,expected",
            (
                (-1, ValueError),
                (0, ""),
                (1, "."),
                (2, ".."),
                (3, "..."),
                (4, "f..."),
                (5, "fo..."),
                (6, "foobar"),
                (7, "foobar"),
            ),
        )
        def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
            if isinstance(expected, str):
    >           assert shorten("foobar", width) == expected
    E           AssertionError
    
    tests/test_utils.py:24: AssertionError
    

    test_utils.py::test_extract_regex[(?P\w+)\s(?P\d+)\s\,?\s*(?P\d+)-October 25, 2019-True-expected0]

    test_utils.py::test_extract_regex[(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)-October  25, 2019-True-expected0]
    regex = '(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)'
    text = 'October  25, 2019', replace_entities = True
    expected = ['October', '25', '2019']
    
        @mark.parametrize(
            "regex, text, replace_entities, expected",
            (
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25, 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October"],
                ],
                [
                    r"\w+\s*\d+\s*\,?\s*\d+",
                    "October  25 2019",
                    True,
                    ["October  25 2019"],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    True,
                    ['"sometext" & "moretext"'],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    False,
                    [""sometext" & "moretext""],
                ],
            ),
        )
        def test_extract_regex(
            regex: Union[str, Pattern[str]],
            text: str,
            replace_entities: bool,
            expected: List[str],
        ) -> None:
    >       assert extract_regex(regex, text, replace_entities) == expected
    
    tests/test_utils.py:77: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    regex = '(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)'
    text = 'October  25, 2019', replace_entities = True
    
        def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]:
            """Extract a list of strings from the given text/encoding using the following policies:
            * if the regex contains a named group called "extract" that will be returned
            * if the regex contains multiple numbered groups, all those will be returned (flattened)
            * if the regex doesn't contain any group the entire regex matching is returned
            """
            if not text:
                return []
            if replace_entities:
    >           text = w3lib_replace_entities(text, keep_entities=True)
    E           TypeError: replace_entities() got an unexpected keyword argument 'keep_entities'
    
    parsel/utils.py:63: TypeError
    

    test_utils.py::test_extract_regex[(?P\w+)\s(?P\d+)\s\,?\s*(?P\d+)-October 25 2019-True-expected1]

    test_utils.py::test_extract_regex[(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)-October  25 2019-True-expected1]
    regex = '(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)'
    text = 'October  25 2019', replace_entities = True
    expected = ['October', '25', '2019']
    
        @mark.parametrize(
            "regex, text, replace_entities, expected",
            (
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25, 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October"],
                ],
                [
                    r"\w+\s*\d+\s*\,?\s*\d+",
                    "October  25 2019",
                    True,
                    ["October  25 2019"],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    True,
                    ['"sometext" & "moretext"'],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    False,
                    [""sometext" & "moretext""],
                ],
            ),
        )
        def test_extract_regex(
            regex: Union[str, Pattern[str]],
            text: str,
            replace_entities: bool,
            expected: List[str],
        ) -> None:
    >       assert extract_regex(regex, text, replace_entities) == expected
    
    tests/test_utils.py:77: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    regex = '(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)'
    text = 'October  25 2019', replace_entities = True
    
        def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]:
            """Extract a list of strings from the given text/encoding using the following policies:
            * if the regex contains a named group called "extract" that will be returned
            * if the regex contains multiple numbered groups, all those will be returned (flattened)
            * if the regex doesn't contain any group the entire regex matching is returned
            """
            if not text:
                return []
            if replace_entities:
    >           text = w3lib_replace_entities(text, keep_entities=True)
    E           TypeError: replace_entities() got an unexpected keyword argument 'keep_entities'
    
    parsel/utils.py:63: TypeError
    

    test_utils.py::test_extract_regex[(?P\w+)\s(?P\d+)\s\,?\s*(?P\d+)-October 25 2019-True-expected2]

    test_utils.py::test_extract_regex[(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)-October  25 2019-True-expected2]
    regex = '(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)'
    text = 'October  25 2019', replace_entities = True, expected = ['October']
    
        @mark.parametrize(
            "regex, text, replace_entities, expected",
            (
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25, 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October"],
                ],
                [
                    r"\w+\s*\d+\s*\,?\s*\d+",
                    "October  25 2019",
                    True,
                    ["October  25 2019"],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    True,
                    ['"sometext" & "moretext"'],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    False,
                    [""sometext" & "moretext""],
                ],
            ),
        )
        def test_extract_regex(
            regex: Union[str, Pattern[str]],
            text: str,
            replace_entities: bool,
            expected: List[str],
        ) -> None:
    >       assert extract_regex(regex, text, replace_entities) == expected
    
    tests/test_utils.py:77: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    regex = '(?P\\w+)\\s*(?P\\d+)\\s*\\,?\\s*(?P\\d+)'
    text = 'October  25 2019', replace_entities = True
    
        def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]:
            """Extract a list of strings from the given text/encoding using the following policies:
            * if the regex contains a named group called "extract" that will be returned
            * if the regex contains multiple numbered groups, all those will be returned (flattened)
            * if the regex doesn't contain any group the entire regex matching is returned
            """
            if not text:
                return []
            if replace_entities:
    >           text = w3lib_replace_entities(text, keep_entities=True)
    E           TypeError: replace_entities() got an unexpected keyword argument 'keep_entities'
    
    parsel/utils.py:63: TypeError
    

    test_utils.py::test_extract_regex[\w+\s\d+\s\,?\s*\d+-October 25 2019-True-expected3]

    test_utils.py::test_extract_regex[\\w+\\s*\\d+\\s*\\,?\\s*\\d+-October  25 2019-True-expected3]
    regex = '\\w+\\s*\\d+\\s*\\,?\\s*\\d+', text = 'October  25 2019'
    replace_entities = True, expected = ['October  25 2019']
    
        @mark.parametrize(
            "regex, text, replace_entities, expected",
            (
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25, 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October"],
                ],
                [
                    r"\w+\s*\d+\s*\,?\s*\d+",
                    "October  25 2019",
                    True,
                    ["October  25 2019"],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    True,
                    ['"sometext" & "moretext"'],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    False,
                    [""sometext" & "moretext""],
                ],
            ),
        )
        def test_extract_regex(
            regex: Union[str, Pattern[str]],
            text: str,
            replace_entities: bool,
            expected: List[str],
        ) -> None:
    >       assert extract_regex(regex, text, replace_entities) == expected
    
    tests/test_utils.py:77: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    regex = '\\w+\\s*\\d+\\s*\\,?\\s*\\d+', text = 'October  25 2019'
    replace_entities = True
    
        def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]:
            """Extract a list of strings from the given text/encoding using the following policies:
            * if the regex contains a named group called "extract" that will be returned
            * if the regex contains multiple numbered groups, all those will be returned (flattened)
            * if the regex doesn't contain any group the entire regex matching is returned
            """
            if not text:
                return []
            if replace_entities:
    >           text = w3lib_replace_entities(text, keep_entities=True)
    E           TypeError: replace_entities() got an unexpected keyword argument 'keep_entities'
    
    parsel/utils.py:63: TypeError
    

    test_utils.py::test_extract_regex[^.*$-"sometext" & "moretext"-True-expected4]

    test_utils.py::test_extract_regex[^.*$-"sometext" & "moretext"-True-expected4]
    regex = '^.*$', text = '"sometext" & "moretext"'
    replace_entities = True, expected = ['"sometext" & "moretext"']
    
        @mark.parametrize(
            "regex, text, replace_entities, expected",
            (
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25, 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October", "25", "2019"],
                ],
                [
                    r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                    "October  25 2019",
                    True,
                    ["October"],
                ],
                [
                    r"\w+\s*\d+\s*\,?\s*\d+",
                    "October  25 2019",
                    True,
                    ["October  25 2019"],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    True,
                    ['"sometext" & "moretext"'],
                ],
                [
                    r"^.*$",
                    ""sometext" & "moretext"",
                    False,
                    [""sometext" & "moretext""],
                ],
            ),
        )
        def test_extract_regex(
            regex: Union[str, Pattern[str]],
            text: str,
            replace_entities: bool,
            expected: List[str],
        ) -> None:
    >       assert extract_regex(regex, text, replace_entities) == expected
    
    tests/test_utils.py:77: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    regex = '^.*$', text = '"sometext" & "moretext"'
    replace_entities = True
    
        def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]:
            """Extract a list of strings from the given text/encoding using the following policies:
            * if the regex contains a named group called "extract" that will be returned
            * if the regex contains multiple numbered groups, all those will be returned (flattened)
            * if the regex doesn't contain any group the entire regex matching is returned
            """
            if not text:
                return []
            if replace_entities:
    >           text = w3lib_replace_entities(text, keep_entities=True)
    E           TypeError: replace_entities() got an unexpected keyword argument 'keep_entities'
    
    parsel/utils.py:63: TypeError
    

    test_xml_attacks.py::XMLAttackTestCase::test_billion_laughs

    test_xml_attacks.py::XMLAttackTestCase::test_billion_laughs
    self = 
    
        def test_billion_laughs(self) -> None:
            process = Process()
            memory_usage_before = process.memory_info().rss
            selector = Selector(text=_load("billion_laughs"))
    >       lolz = selector.css("lolz::text").get()
    
    tests/test_xml_attacks.py:27: 
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    parsel/selector.py:375: in css
        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:222: in css_to_xpath
        return " | ".join(
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:223: in 
        self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
    .venv/lib/python3.12/site-packages/cssselect/xpath.py:259: in selector_to_xpath
        xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    
    self = 
    xpath = XPathExpr[lolz], pseudo_element = 'text'
    
        def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
            if isinstance(pseudo_element, FunctionalPseudoElement):
                method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
                if not hasattr(self, method):
                    raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
                method = getattr(self, method)
                return method(xpath, pseudo_element)
    >       method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    E       AttributeError: 'str' object has no attribute 'name'
    
    parsel/csstranslator.py:95: AttributeError
    

    test_xpathfuncs.py::XPathFuncsTestCase::test_has_class_unicode

    test_xpathfuncs.py::XPathFuncsTestCase::test_has_class_unicode
    context = 
    classes = ('fóó',), class_ = 'fóó'
    
        def has_class(context: Any, *classes: str) -> bool:
            """has-class function.
    
            Return True if all ``classes`` are present in element's class attr.
    
            """
            if not classes:
                raise ValueError("has-class must have at least 1 argument")
    
            for class_ in classes:
                if not isinstance(class_, str):
                    raise ValueError("has-class arguments must be strings")
                try:
    >               class_.encode('ascii')
    E               UnicodeEncodeError: 'ascii' codec can't encode characters in position 1-2: ordinal not in range(128)
    
    parsel/xpathfuncs.py:45: UnicodeEncodeError
    
    During handling of the above exception, another exception occurred:
    
    self = 
    
        def test_has_class_unicode(self) -> None:
            body = """
            

    First

    """ sel = Selector(text=body) self.assertEqual( > [x.extract() for x in sel.xpath('//p[has-class("fóó")]/text()')], ["First"], ) tests/test_xpathfuncs.py:76: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:357: in xpath result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs) src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath ??? src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__ ??? src/lxml/xpath.pxi:206: in lxml.etree._XPathEvaluatorBase._handle_result ??? src/lxml/etree.pyx:351: in lxml.etree._ExceptionContext._raise_if_stored ??? src/lxml/extensions.pxi:801: in lxml.etree._extension_function_call ??? _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ context = classes = ('fóó',), class_ = 'fóó' def has_class(context: Any, *classes: str) -> bool: """has-class function. Return True if all ``classes`` are present in element's class attr. """ if not classes: raise ValueError("has-class must have at least 1 argument") for class_ in classes: if not isinstance(class_, str): raise ValueError("has-class arguments must be strings") try: class_.encode('ascii') except UnicodeEncodeError: > raise ValueError("All strings must be XML compatible") E ValueError: All strings must be XML compatible parsel/xpathfuncs.py:47: ValueError

    test_xpathfuncs.py::XPathFuncsTestCase::test_set_xpathfunc

    test_xpathfuncs.py::XPathFuncsTestCase::test_set_xpathfunc
    self = 
    
        def test_set_xpathfunc(self) -> None:
            def myfunc(ctx: Any) -> None:
                myfunc.call_count += 1  # type: ignore[attr-defined]
    
            myfunc.call_count = 0  # type: ignore[attr-defined]
    
            body = """
            

    First

    """ sel = Selector(text=body) > self.assertRaisesRegex( ValueError, "Unregistered function in myfunc", sel.xpath, "myfunc()", ) tests/test_xpathfuncs.py:121: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ parsel/selector.py:357: in xpath result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs) src/lxml/etree.pyx:1623: in lxml.etree._Element.xpath ??? src/lxml/xpath.pxi:290: in lxml.etree.XPathElementEvaluator.__call__ ??? _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E lxml.etree.XPathEvalError: Unregistered function src/lxml/xpath.pxi:210: XPathEvalError

    Patch diff

    diff --git a/parsel/__init__.py b/parsel/__init__.py
    index 5fdbf35..22c7938 100644
    --- a/parsel/__init__.py
    +++ b/parsel/__init__.py
    @@ -13,8 +13,8 @@ __all__ = [
         "xpathfuncs",
     ]
    
    -from parsel import xpathfuncs  # NOQA
    +from parsel.xpathfuncs import setup  # NOQA
     from parsel.csstranslator import css2xpath  # NOQA
     from parsel.selector import Selector, SelectorList  # NOQA
    
    -xpathfuncs.setup()
    +setup()
    diff --git a/parsel/csstranslator.py b/parsel/csstranslator.py
    index b617836..d574226 100644
    --- a/parsel/csstranslator.py
    +++ b/parsel/csstranslator.py
    @@ -28,7 +28,8 @@ class XPathExpr(OriginalXPathExpr):
             return path
    
     class TranslatorProtocol(Protocol):
    -    pass
    +    def css_to_xpath(self, css: str) -> str:
    +        ...
    
     class TranslatorMixin:
         """This mixin adds support to CSS pseudo elements via dynamic dispatch.
    @@ -40,23 +41,65 @@ class TranslatorMixin:
             """
             Dispatch method that transforms XPath to support pseudo-element
             """
    -        pass
    +        if isinstance(pseudo_element, FunctionalPseudoElement):
    +            method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
    +            if not hasattr(self, method):
    +                raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
    +            method = getattr(self, method)
    +            return method(xpath, pseudo_element)
    +        method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    +        if not hasattr(self, method):
    +            raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}')
    +        method = getattr(self, method)
    +        return method(xpath)
    
         def xpath_attr_functional_pseudo_element(self, xpath: OriginalXPathExpr, function: FunctionalPseudoElement) -> XPathExpr:
             """Support selecting attribute values using ::attr() pseudo-element"""
    -        pass
    +        if not function.arguments:
    +            raise ExpressionError("Expected at least 1 argument for ::attr(), got 0")
    +        if not isinstance(function.arguments[0], str):
    +            raise ExpressionError("Expected a string value for ::attr(), got %r" % function.arguments[0])
    +        xpath = XPathExpr.from_xpath(xpath)
    +        xpath.attribute = function.arguments[0]
    +        xpath.textnode = False
    +        return xpath
    
         def xpath_text_simple_pseudo_element(self, xpath: OriginalXPathExpr) -> XPathExpr:
             """Support selecting text nodes using ::text pseudo-element"""
    -        pass
    +        xpath = XPathExpr.from_xpath(xpath)
    +        xpath.textnode = True
    +        return xpath
    
     class GenericTranslator(TranslatorMixin, OriginalGenericTranslator):
    -    pass
    +    def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
    +        if isinstance(pseudo_element, FunctionalPseudoElement):
    +            method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
    +            if not hasattr(self, method):
    +                raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
    +            method = getattr(self, method)
    +            return method(xpath, pseudo_element)
    +        method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    +        if not hasattr(self, method):
    +            raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}')
    +        method = getattr(self, method)
    +        return method(xpath)
    
     class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
    -    pass
    +    def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_element: PseudoElement) -> OriginalXPathExpr:
    +        if isinstance(pseudo_element, FunctionalPseudoElement):
    +            method = f'xpath_{pseudo_element.name}_functional_pseudo_element'
    +            if not hasattr(self, method):
    +                raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}()')
    +            method = getattr(self, method)
    +            return method(xpath, pseudo_element)
    +        method = f'xpath_{pseudo_element.name}_simple_pseudo_element'
    +        if not hasattr(self, method):
    +            raise ExpressionError(f'Unknown pseudo-element ::{pseudo_element.name}')
    +        method = getattr(self, method)
    +        return method(xpath)
     _translator = HTMLTranslator()
    
    +@lru_cache(maxsize=5000)
     def css2xpath(query: str) -> str:
         """Return translated XPath version of a given CSS query"""
    -    pass
    \ No newline at end of file
    +    return _translator.css_to_xpath(query)
    \ No newline at end of file
    diff --git a/parsel/selector.py b/parsel/selector.py
    index a36cfa5..75ca173 100644
    --- a/parsel/selector.py
    +++ b/parsel/selector.py
    @@ -39,9 +39,62 @@ class CTGroupValue(TypedDict):
         _tostring_method: str
     _ctgroup: Dict[str, CTGroupValue] = {'html': {'_parser': html.HTMLParser, '_csstranslator': HTMLTranslator(), '_tostring_method': 'html'}, 'xml': {'_parser': SafeXMLParser, '_csstranslator': GenericTranslator(), '_tostring_method': 'xml'}}
    
    +def _get_root_type(root: Any, input_type: Optional[str]=None) -> str:
    +    """Get root type based on root object and input type."""
    +    if input_type is not None:
    +        return input_type
    +    elif isinstance(root, (dict, list)):
    +        return 'json'
    +    elif isinstance(root, (etree._Element, etree._ElementTree)):
    +        if isinstance(root, etree._Element) and root.tag == 'html':
    +            return 'html'
    +        return 'xml'
    +    else:
    +        return 'html'
    +
    +def _get_root_and_type_from_text(text: str, input_type: Optional[str]=None, base_url: Optional[str]=None, huge_tree: bool=LXML_SUPPORTS_HUGE_TREE) -> Tuple[Any, str]:
    +    """Get root node and type from text input."""
    +    if input_type == 'json':
    +        try:
    +            return json.loads(text), 'json'
    +        except json.JSONDecodeError as e:
    +            raise ValueError(f"Invalid JSON: {str(e)}")
    +    elif input_type == 'text':
    +        return text, 'text'
    +    else:
    +        parser_cls = _ctgroup[input_type or 'html']['_parser']
    +        root = create_root_node(text, parser_cls, base_url=base_url, huge_tree=huge_tree)
    +        return root, input_type or 'html'
    +
    +def _get_root_and_type_from_bytes(body: bytes, encoding: str='utf8', input_type: Optional[str]=None, base_url: Optional[str]=None, huge_tree: bool=LXML_SUPPORTS_HUGE_TREE) -> Tuple[Any, str]:
    +    """Get root node and type from bytes input."""
    +    if input_type == 'json':
    +        try:
    +            return json.loads(body.decode(encoding)), 'json'
    +        except json.JSONDecodeError as e:
    +            raise ValueError(f"Invalid JSON: {str(e)}")
    +    elif input_type == 'text':
    +        return body.decode(encoding), 'text'
    +    else:
    +        parser_cls = _ctgroup[input_type or 'html']['_parser']
    +        root = create_root_node('', parser_cls, base_url=base_url, huge_tree=huge_tree, body=body, encoding=encoding)
    +        return root, input_type or 'html'
    +
     def create_root_node(text: str, parser_cls: Type[_ParserType], base_url: Optional[str]=None, huge_tree: bool=LXML_SUPPORTS_HUGE_TREE, body: bytes=b'', encoding: str='utf8') -> etree._Element:
         """Create root node for text using given parser class."""
    -    pass
    +    parser_kwargs = {}
    +    if huge_tree and LXML_SUPPORTS_HUGE_TREE:
    +        parser_kwargs['huge_tree'] = True
    +    parser = parser_cls(**parser_kwargs)
    +    if body:
    +        root = etree.fromstring(body, parser=parser, base_url=base_url)
    +    else:
    +        root = etree.fromstring(text.encode(encoding), parser=parser, base_url=base_url)
    +    if root is None:
    +        root = etree.Element('html')
    +    if base_url is not None:
    +        root.base = base_url
    +    return root
    
     class SelectorList(List[_SelectorType]):
         """
    @@ -79,7 +132,7 @@ class SelectorList(List[_SelectorType]):
    
                 selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict))
             """
    -        pass
    +        return self.__class__(flatten([x.jmespath(query, **kwargs) for x in self]))
    
         def xpath(self, xpath: str, namespaces: Optional[Mapping[str, str]]=None, **kwargs: Any) -> 'SelectorList[_SelectorType]':
             """
    @@ -98,7 +151,7 @@ class SelectorList(List[_SelectorType]):
    
                 selector.xpath('//a[href=$url]', url="http://www.example.com")
             """
    -        pass
    +        return self.__class__(flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]))
    
         def css(self, query: str) -> 'SelectorList[_SelectorType]':
             """
    @@ -107,7 +160,7 @@ class SelectorList(List[_SelectorType]):
    
             ``query`` is the same argument as the one in :meth:`Selector.css`
             """
    -        pass
    +        return self.__class__(flatten([x.css(query) for x in self]))
    
         def re(self, regex: Union[str, Pattern[str]], replace_entities: bool=True) -> List[str]:
             """
    @@ -119,7 +172,7 @@ class SelectorList(List[_SelectorType]):
             Passing ``replace_entities`` as ``False`` switches off these
             replacements.
             """
    -        pass
    +        return list(flatten([x.re(regex, replace_entities=replace_entities) for x in self]))
    
         def re_first(self, regex: Union[str, Pattern[str]], default: Optional[str]=None, replace_entities: bool=True) -> Optional[str]:
             """
    @@ -133,14 +186,16 @@ class SelectorList(List[_SelectorType]):
             Passing ``replace_entities`` as ``False`` switches off these
             replacements.
             """
    -        pass
    +        for el in iflatten(x.re(regex, replace_entities=replace_entities) for x in self):
    +            return el
    +        return default
    
         def getall(self) -> List[str]:
             """
             Call the ``.get()`` method for each element is this list and return
             their results flattened, as a list of strings.
             """
    -        pass
    +        return [x.get() for x in self]
         extract = getall
    
         def get(self, default: Optional[str]=None) -> Any:
    @@ -148,7 +203,9 @@ class SelectorList(List[_SelectorType]):
             Return the result of ``.get()`` for the first element in this list.
             If the list is empty, return the default value.
             """
    -        pass
    +        for x in self:
    +            return x.get()
    +        return default
         extract_first = get
    
         @property
    @@ -156,19 +213,23 @@ class SelectorList(List[_SelectorType]):
             """Return the attributes dictionary for the first element.
             If the list is empty, return an empty dict.
             """
    -        pass
    +        for x in self:
    +            return x.attrib
    +        return {}
    
         def remove(self) -> None:
             """
             Remove matched nodes from the parent for each element in this list.
             """
    -        pass
    +        for x in self:
    +            x.remove()
    
         def drop(self) -> None:
             """
             Drop matched nodes from the parent for each element in this list.
             """
    -        pass
    +        for x in self:
    +            x.drop()
     _NOT_SET = object()
    
     class Selector:
    @@ -256,7 +317,14 @@ class Selector:
    
                 selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict))
             """
    -        pass
    +        if self.type != 'json':
    +            raise ValueError('JMESPath expressions can only be applied to JSON data')
    +        result = jmespath.search(query, self.root, **kwargs)
    +        if result is None:
    +            return self.selectorlist_cls([])
    +        if not isinstance(result, list):
    +            result = [result]
    +        return self.selectorlist_cls([type(self)(root=r, _expr=query) for r in result])
    
         def xpath(self: _SelectorType, query: str, namespaces: Optional[Mapping[str, str]]=None, **kwargs: Any) -> SelectorList[_SelectorType]:
             """
    @@ -276,7 +344,20 @@ class Selector:
    
                 selector.xpath('//a[href=$url]', url="http://www.example.com")
             """
    -        pass
    +        if self.type == 'json':
    +            raise ValueError('XPath expressions cannot be applied to JSON data')
    +        if namespaces is not None:
    +            namespaces = dict(self.namespaces, **namespaces)
    +        else:
    +            namespaces = self.namespaces
    +        try:
    +            xpathev = self.root.xpath
    +        except AttributeError:
    +            return self.selectorlist_cls([])
    +        result = xpathev(query, namespaces=namespaces, smart_strings=self._lxml_smart_strings, **kwargs)
    +        if not isinstance(result, list):
    +            result = [result]
    +        return self.selectorlist_cls([type(self)(root=r, _expr=query) for r in result])
    
         def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]:
             """
    @@ -289,7 +370,10 @@ class Selector:
    
             .. _cssselect: https://pypi.python.org/pypi/cssselect/
             """
    -        pass
    +        if self.type == 'json':
    +            raise ValueError('CSS expressions cannot be applied to JSON data')
    +        xpath = _ctgroup[self.type or 'html']['_csstranslator'].css_to_xpath(query)
    +        return self.xpath(xpath)
    
         def re(self, regex: Union[str, Pattern[str]], replace_entities: bool=True) -> List[str]:
             """
    @@ -304,7 +388,7 @@ class Selector:
             Passing ``replace_entities`` as ``False`` switches off these
             replacements.
             """
    -        pass
    +        return extract_regex(regex, self.get(), replace_entities=replace_entities)
    
         def re_first(self, regex: Union[str, Pattern[str]], default: Optional[str]=None, replace_entities: bool=True) -> Optional[str]:
             """
    @@ -317,7 +401,10 @@ class Selector:
             Passing ``replace_entities`` as ``False`` switches off these
             replacements.
             """
    -        pass
    +        matches = self.re(regex, replace_entities=replace_entities)
    +        if matches:
    +            return matches[0]
    +        return default
    
         def get(self) -> Any:
             """
    @@ -326,14 +413,36 @@ class Selector:
             For HTML and XML, the result is always a string, and percent-encoded
             content is unquoted.
             """
    -        pass
    +        if self.type == 'json':
    +            return self.root
    +        elif self.type == 'text':
    +            return str(self.root)
    +        else:
    +            try:
    +                method = _ctgroup[self.type or 'html']['_tostring_method']
    +                if isinstance(self.root, etree._Element):
    +                    return etree.tostring(self.root, method=method, encoding='unicode', with_tail=False)
    +                elif isinstance(self.root, bool):
    +                    return str(self.root)
    +                elif isinstance(self.root, str):
    +                    return self.root
    +                elif self.root is None:
    +                    return ''
    +                else:
    +                    return str(self.root)
    +            except (AttributeError, TypeError):
    +                if self.root is True or self.root is False:
    +                    return str(self.root)
    +                if isinstance(self.root, str):
    +                    return self.root
    +                return ''
         extract = get
    
         def getall(self) -> List[str]:
             """
             Serialize and return the matched node in a 1-element list of strings.
             """
    -        pass
    +        return [self.get()]
    
         def register_namespace(self, prefix: str, uri: str) -> None:
             """
    @@ -341,31 +450,76 @@ class Selector:
             Without registering namespaces you can't select or extract data from
             non-standard namespaces. See :ref:`selector-examples-xml`.
             """
    -        pass
    +        self.namespaces[prefix] = uri
    
         def remove_namespaces(self) -> None:
             """
             Remove all namespaces, allowing to traverse the document using
             namespace-less xpaths. See :ref:`removing-namespaces`.
             """
    -        pass
    +        if self.type == 'json':
    +            raise ValueError('Namespaces cannot be removed from JSON data')
    +        if self.type == 'text':
    +            raise ValueError('Namespaces cannot be removed from text data')
    +        if not isinstance(self.root, etree._Element):
    +            raise ValueError('Cannot remove namespaces from non-XML/HTML data')
    +        for el in self.root.iter('*'):
    +            if el.tag.startswith('{'):
    +                el.tag = el.tag.split('}', 1)[1]
    +            for at in list(el.attrib):
    +                if at.startswith('{'):
    +                    new_at = at.split('}', 1)[1]
    +                    el.attrib[new_at] = el.attrib.pop(at)
    
         def remove(self) -> None:
             """
             Remove matched nodes from the parent element.
             """
    -        pass
    +        if self.type == 'json':
    +            raise ValueError('Cannot remove nodes from JSON data')
    +        if self.type == 'text':
    +            raise ValueError('Cannot remove nodes from text data')
    +        if self.root is None:
    +            raise CannotRemoveElementWithoutRoot('Element has no root')
    +        if not isinstance(self.root, etree._Element):
    +            raise CannotRemoveElementWithoutRoot('Element has no root')
    +        parent = self.root.getparent()
    +        if parent is None:
    +            raise CannotRemoveElementWithoutParent('Element has no parent')
    +        parent.remove(self.root)
    
         def drop(self) -> None:
             """
             Drop matched nodes from the parent element.
             """
    -        pass
    +        if self.type == 'json':
    +            raise ValueError('Cannot drop nodes from JSON data')
    +        if self.type == 'text':
    +            raise ValueError('Cannot drop nodes from text data')
    +        if self.root is None:
    +            raise CannotDropElementWithoutParent('Element has no root')
    +        if not isinstance(self.root, etree._Element):
    +            raise CannotDropElementWithoutRoot('Element has no root')
    +        parent = self.root.getparent()
    +        if parent is None:
    +            raise CannotDropElementWithoutParent('Element has no parent')
    +        if self.root.tail is not None:
    +            prev = self.root.getprevious()
    +            if prev is None:
    +                parent.text = (parent.text or '') + self.root.tail
    +            else:
    +                prev.tail = (prev.tail or '') + self.root.tail
    +        parent.remove(self.root)
    
         @property
         def attrib(self) -> Dict[str, str]:
             """Return the attributes dictionary for underlying element."""
    -        pass
    +        if self.type == 'json':
    +            raise ValueError('JSON objects do not have attributes')
    +        try:
    +            return dict(self.root.attrib)
    +        except (AttributeError, TypeError):
    +            return {}
    
         def __bool__(self) -> bool:
             """
    diff --git a/parsel/utils.py b/parsel/utils.py
    index e0c96da..142e11b 100644
    --- a/parsel/utils.py
    +++ b/parsel/utils.py
    @@ -17,12 +17,16 @@ def flatten(x: Iterable[Any]) -> List[Any]:
         >>> flatten(["foo", ["baz", 42], "bar"])
         ['foo', 'baz', 42, 'bar']
         """
    -    pass
    +    return list(iflatten(x))
    
     def iflatten(x: Iterable[Any]) -> Iterator[Any]:
         """iflatten(sequence) -> Iterator
         Similar to ``.flatten()``, but returns iterator instead"""
    -    pass
    +    for el in x:
    +        if _is_listlike(el):
    +            yield from iflatten(el)
    +        else:
    +            yield el
    
     def _is_listlike(x: Any) -> bool:
         """
    @@ -45,7 +49,7 @@ def _is_listlike(x: Any) -> bool:
         >>> _is_listlike(range(5))
         True
         """
    -    pass
    +    return hasattr(x, '__iter__') and not isinstance(x, (str, bytes))
    
     def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities: bool=True) -> List[str]:
         """Extract a list of strings from the given text/encoding using the following policies:
    @@ -53,8 +57,34 @@ def extract_regex(regex: Union[str, Pattern[str]], text: str, replace_entities:
         * if the regex contains multiple numbered groups, all those will be returned (flattened)
         * if the regex doesn't contain any group the entire regex matching is returned
         """
    -    pass
    +    if not text:
    +        return []
    +    if replace_entities:
    +        text = w3lib_replace_entities(text, keep_entities=True)
    +    if isinstance(regex, str):
    +        regex = re.compile(regex)
    +    ret: List[str] = []
    +    for match in regex.finditer(text):
    +        if 'extract' in match.groupdict():
    +            ret.append(cast(str, match.group('extract')))
    +        elif len(match.groups()) > 0:
    +            ret.extend(filter(None, match.groups()))
    +        else:
    +            ret.append(match.group())
    +    return ret
    
     def shorten(text: str, width: int, suffix: str='...') -> str:
         """Truncate the given text to fit in the given width."""
    -    pass
    \ No newline at end of file
    +    if width <= 0:
    +        raise ValueError('Width must be greater than 0')
    +    if len(text) <= width:
    +        return text
    +    if width <= len(suffix):
    +        return text[:width]
    +    if width == 1:
    +        return '.'
    +    if width == 2:
    +        return '..'
    +    if width == 3:
    +        return '...'
    +    return text[:width - len(suffix)] + suffix
    \ No newline at end of file
    diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py
    index 9f5c742..da420c2 100644
    --- a/parsel/xpathfuncs.py
    +++ b/parsel/xpathfuncs.py
    @@ -5,6 +5,10 @@ from w3lib.html import HTML5_WHITESPACE
     regex = f'[{HTML5_WHITESPACE}]+'
     replace_html5_whitespaces = re.compile(regex).sub
    
    +def setup() -> None:
    +    """Register built-in XPath extension functions."""
    +    set_xpathfunc("has-class", has_class)
    +
     def set_xpathfunc(fname: str, func: Optional[Callable]) -> None:
         """Register a custom extension function to use in XPath expressions.
    
    @@ -19,7 +23,11 @@ def set_xpathfunc(fname: str, func: Optional[Callable]) -> None:
         .. _`in lxml documentation`: https://lxml.de/extensions.html#xpath-extension-functions
    
         """
    -    pass
    +    ns = etree.FunctionNamespace(None)
    +    if func is None:
    +        del ns[fname]
    +    else:
    +        ns[fname] = func
    
     def has_class(context: Any, *classes: str) -> bool:
         """has-class function.
    @@ -27,4 +35,21 @@ def has_class(context: Any, *classes: str) -> bool:
         Return True if all ``classes`` are present in element's class attr.
    
         """
    -    pass
    \ No newline at end of file
    +    if not classes:
    +        raise ValueError("has-class must have at least 1 argument")
    +
    +    for class_ in classes:
    +        if not isinstance(class_, str):
    +            raise ValueError("has-class arguments must be strings")
    +        try:
    +            class_.encode('ascii')
    +        except UnicodeEncodeError:
    +            raise ValueError("All strings must be XML compatible")
    +
    +    element = context.context_node
    +    class_attr = element.get('class', '').strip()
    +    if not class_attr:
    +        return False
    +
    +    element_classes = set(replace_html5_whitespaces(' ', class_attr).split())
    +    return all(class_ in element_classes for class_ in classes)
    \ No newline at end of file