Skip to content

back to Claude Sonnet 3.5 - Fill-in summary

Claude Sonnet 3.5 - Fill-in: parsel

Pytest Summary for test tests

status count
failed 5
passed 10
total 15
collected 15

Failed pytests:

test_utils.py::test_shorten[-1-ValueError]

test_utils.py::test_shorten[-1-ValueError]
width = -1, expected = 

    @mark.parametrize(
        "width,expected",
        (
            (-1, ValueError),
            (0, ""),
            (1, "."),
            (2, ".."),
            (3, "..."),
            (4, "f..."),
            (5, "fo..."),
            (6, "foobar"),
            (7, "foobar"),
        ),
    )
    def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
        if isinstance(expected, str):
            assert shorten("foobar", width) == expected
        else:
>           with raises(expected):
E           Failed: DID NOT RAISE 

tests/test_utils.py:26: Failed

test_utils.py::test_shorten[0-]

test_utils.py::test_shorten[0-]
width = 0, expected = ''

    @mark.parametrize(
        "width,expected",
        (
            (-1, ValueError),
            (0, ""),
            (1, "."),
            (2, ".."),
            (3, "..."),
            (4, "f..."),
            (5, "fo..."),
            (6, "foobar"),
            (7, "foobar"),
        ),
    )
    def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
        if isinstance(expected, str):
>           assert shorten("foobar", width) == expected
E           AssertionError

tests/test_utils.py:24: AssertionError

test_utils.py::test_shorten[1-.]

test_utils.py::test_shorten[1-.]
width = 1, expected = '.'

    @mark.parametrize(
        "width,expected",
        (
            (-1, ValueError),
            (0, ""),
            (1, "."),
            (2, ".."),
            (3, "..."),
            (4, "f..."),
            (5, "fo..."),
            (6, "foobar"),
            (7, "foobar"),
        ),
    )
    def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
        if isinstance(expected, str):
>           assert shorten("foobar", width) == expected
E           AssertionError

tests/test_utils.py:24: AssertionError

test_utils.py::test_shorten[2-..]

test_utils.py::test_shorten[2-..]
width = 2, expected = '..'

    @mark.parametrize(
        "width,expected",
        (
            (-1, ValueError),
            (0, ""),
            (1, "."),
            (2, ".."),
            (3, "..."),
            (4, "f..."),
            (5, "fo..."),
            (6, "foobar"),
            (7, "foobar"),
        ),
    )
    def test_shorten(width: int, expected: Union[str, Type[Exception]]) -> None:
        if isinstance(expected, str):
>           assert shorten("foobar", width) == expected
E           AssertionError

tests/test_utils.py:24: AssertionError

test_utils.py::test_extract_regex[^.*$-"sometext" & "moretext"-True-expected4]

test_utils.py::test_extract_regex[^.*$-"sometext" & "moretext"-True-expected4]
regex = '^.*$', text = '"sometext" & "moretext"'
replace_entities = True, expected = ['"sometext" & "moretext"']

    @mark.parametrize(
        "regex, text, replace_entities, expected",
        (
            [
                r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                "October  25, 2019",
                True,
                ["October", "25", "2019"],
            ],
            [
                r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                "October  25 2019",
                True,
                ["October", "25", "2019"],
            ],
            [
                r"(?P\w+)\s*(?P\d+)\s*\,?\s*(?P\d+)",
                "October  25 2019",
                True,
                ["October"],
            ],
            [
                r"\w+\s*\d+\s*\,?\s*\d+",
                "October  25 2019",
                True,
                ["October  25 2019"],
            ],
            [
                r"^.*$",
                ""sometext" & "moretext"",
                True,
                ['"sometext" & "moretext"'],
            ],
            [
                r"^.*$",
                ""sometext" & "moretext"",
                False,
                [""sometext" & "moretext""],
            ],
        ),
    )
    def test_extract_regex(
        regex: Union[str, Pattern[str]],
        text: str,
        replace_entities: bool,
        expected: List[str],
    ) -> None:
>       assert extract_regex(regex, text, replace_entities) == expected
E       AssertionError

tests/test_utils.py:77: AssertionError

Patch diff

diff --git a/parsel/csstranslator.py b/parsel/csstranslator.py
index 2bf8cc7..dbf9a2e 100644
--- a/parsel/csstranslator.py
+++ b/parsel/csstranslator.py
@@ -44,17 +44,30 @@ class TranslatorMixin:
         """
         Dispatch method that transforms XPath to support pseudo-element
         """
-        pass
+        if isinstance(pseudo_element, FunctionalPseudoElement):
+            if pseudo_element.name == 'attr':
+                return self.xpath_attr_functional_pseudo_element(xpath, pseudo_element)
+        elif isinstance(pseudo_element, PseudoElement):
+            if pseudo_element.name == 'text':
+                return self.xpath_text_simple_pseudo_element(xpath)
+        return xpath

     def xpath_attr_functional_pseudo_element(self, xpath: OriginalXPathExpr,
         function: FunctionalPseudoElement) ->XPathExpr:
         """Support selecting attribute values using ::attr() pseudo-element"""
-        pass
+        if not function.arguments:
+            raise ExpressionError("The ::attr() pseudo-element requires an argument.")
+        attribute = function.arguments[0]
+        xpath = XPathExpr(xpath.path, xpath.element)
+        xpath.attribute = attribute
+        return xpath

     def xpath_text_simple_pseudo_element(self, xpath: OriginalXPathExpr
         ) ->XPathExpr:
         """Support selecting text nodes using ::text pseudo-element"""
-        pass
+        xpath = XPathExpr(xpath.path, xpath.element)
+        xpath.textnode = True
+        return xpath


 class GenericTranslator(TranslatorMixin, OriginalGenericTranslator):
@@ -68,6 +81,7 @@ class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
 _translator = HTMLTranslator()


+@lru_cache(maxsize=256)
 def css2xpath(query: str) ->str:
     """Return translated XPath version of a given CSS query"""
-    pass
+    return _translator.css_to_xpath(query)
diff --git a/parsel/selector.py b/parsel/selector.py
index 6aa73da..b1288f3 100644
--- a/parsel/selector.py
+++ b/parsel/selector.py
@@ -54,7 +54,15 @@ def create_root_node(text: str, parser_cls: Type[_ParserType], base_url:
     Optional[str]=None, huge_tree: bool=LXML_SUPPORTS_HUGE_TREE, body:
     bytes=b'', encoding: str='utf8') ->etree._Element:
     """Create root node for text using given parser class."""
-    pass
+    parser = parser_cls(recover=True, encoding=encoding)
+    if LXML_SUPPORTS_HUGE_TREE:
+        parser.set_option(etree.XML_PARSE_HUGE, huge_tree)
+    if body:
+        text = body.decode(encoding)
+    root = etree.fromstring(text.encode('utf-8'), parser=parser, base_url=base_url)
+    if root is None:
+        root = etree.fromstring('<html/>', parser=parser, base_url=base_url)
+    return root


 class SelectorList(List[_SelectorType]):
@@ -96,7 +104,7 @@ class SelectorList(List[_SelectorType]):

             selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict))
         """
-        pass
+        return self.__class__(flatten([x.jmespath(query, **kwargs) for x in self]))

     def xpath(self, xpath: str, namespaces: Optional[Mapping[str, str]]=
         None, **kwargs: Any) ->'SelectorList[_SelectorType]':
@@ -116,7 +124,7 @@ class SelectorList(List[_SelectorType]):

             selector.xpath('//a[href=$url]', url="http://www.example.com")
         """
-        pass
+        return self.__class__(flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]))

     def css(self, query: str) ->'SelectorList[_SelectorType]':
         """
@@ -125,7 +133,7 @@ class SelectorList(List[_SelectorType]):

         ``query`` is the same argument as the one in :meth:`Selector.css`
         """
-        pass
+        return self.__class__(flatten([x.css(query) for x in self]))

     def re(self, regex: Union[str, Pattern[str]], replace_entities: bool=True
         ) ->List[str]:
@@ -138,7 +146,7 @@ class SelectorList(List[_SelectorType]):
         Passing ``replace_entities`` as ``False`` switches off these
         replacements.
         """
-        pass
+        return flatten([x.re(regex, replace_entities=replace_entities) for x in self])

     def re_first(self, regex: Union[str, Pattern[str]], default: Optional[
         str]=None, replace_entities: bool=True) ->Optional[str]:
@@ -153,14 +161,18 @@ class SelectorList(List[_SelectorType]):
         Passing ``replace_entities`` as ``False`` switches off these
         replacements.
         """
-        pass
+        for el in iflatten(self):
+            m = el.re(regex, replace_entities=replace_entities)
+            if m:
+                return m[0]
+        return default

     def getall(self) ->List[str]:
         """
         Call the ``.get()`` method for each element is this list and return
         their results flattened, as a list of strings.
         """
-        pass
+        return [x.get() for x in self]
     extract = getall

     def get(self, default: Optional[str]=None) ->Any:
@@ -168,7 +180,9 @@ class SelectorList(List[_SelectorType]):
         Return the result of ``.get()`` for the first element in this list.
         If the list is empty, return the default value.
         """
-        pass
+        for x in self:
+            return x.get()
+        return default
     extract_first = get

     @property
@@ -176,19 +190,23 @@ class SelectorList(List[_SelectorType]):
         """Return the attributes dictionary for the first element.
         If the list is empty, return an empty dict.
         """
-        pass
+        for x in self:
+            return x.attrib
+        return {}

     def remove(self) ->None:
         """
         Remove matched nodes from the parent for each element in this list.
         """
-        pass
+        for x in self:
+            x.remove()

     def drop(self) ->None:
         """
         Drop matched nodes from the parent for each element in this list.
         """
-        pass
+        for x in self:
+            x.drop()


 _NOT_SET = object()
@@ -295,7 +313,11 @@ class Selector:

             selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict))
         """
-        pass
+        result = jmespath.search(query, self.get(), **kwargs)
+        if isinstance(result, list):
+            return self.selectorlist_cls(Selector(root=r, _expr=query) for r in result)
+        else:
+            return self.selectorlist_cls([Selector(root=result, _expr=query)])

     def xpath(self: _SelectorType, query: str, namespaces: Optional[Mapping
         [str, str]]=None, **kwargs: Any) ->SelectorList[_SelectorType]:
@@ -316,7 +338,25 @@ class Selector:

             selector.xpath('//a[href=$url]', url="http://www.example.com")
         """
-        pass
+        try:
+            xpathev = self.root.xpath
+        except AttributeError:
+            return self.selectorlist_cls([])
+
+        nsp = dict(self.namespaces)
+        if namespaces is not None:
+            nsp.update(namespaces)
+        try:
+            result = xpathev(query, namespaces=nsp, smart_strings=self._lxml_smart_strings, **kwargs)
+        except etree.XPathError as exc:
+            msg = f"XPath error: {exc} in {query}"
+            raise ValueError(msg)
+
+        if type(result) is not list:
+            result = [result]
+
+        return self.selectorlist_cls(self.__class__(root=x, _expr=query)
+                                     for x in result)

     def css(self: _SelectorType, query: str) ->SelectorList[_SelectorType]:
         """
@@ -329,7 +369,7 @@ class Selector:

         .. _cssselect: https://pypi.python.org/pypi/cssselect/
         """
-        pass
+        return self.xpath(_ctgroup[self.type]['_csstranslator'].css_to_xpath(query))

     def re(self, regex: Union[str, Pattern[str]], replace_entities: bool=True
         ) ->List[str]:
@@ -345,7 +385,7 @@ class Selector:
         Passing ``replace_entities`` as ``False`` switches off these
         replacements.
         """
-        pass
+        return extract_regex(regex, self.get(), replace_entities=replace_entities)

     def re_first(self, regex: Union[str, Pattern[str]], default: Optional[
         str]=None, replace_entities: bool=True) ->Optional[str]:
@@ -359,7 +399,7 @@ class Selector:
         Passing ``replace_entities`` as ``False`` switches off these
         replacements.
         """
-        pass
+        return next(iflatten(self.re(regex, replace_entities)), default)

     def get(self) ->Any:
         """
@@ -368,14 +408,25 @@ class Selector:
         For HTML and XML, the result is always a string, and percent-encoded
         content is unquoted.
         """
-        pass
+        try:
+            return etree.tostring(self.root,
+                                  method=_ctgroup[self.type]['_tostring_method'],
+                                  encoding='unicode',
+                                  with_tail=False)
+        except (AttributeError, TypeError):
+            if self.root is True:
+                return '1'
+            elif self.root is False:
+                return '0'
+            else:
+                return str(self.root)
     extract = get

     def getall(self) ->List[str]:
         """
         Serialize and return the matched node in a 1-element list of strings.
         """
-        pass
+        return [self.get()]

     def register_namespace(self, prefix: str, uri: str) ->None:
         """
@@ -383,26 +434,37 @@ class Selector:
         Without registering namespaces you can't select or extract data from
         non-standard namespaces. See :ref:`selector-examples-xml`.
         """
-        pass
+        self.namespaces[prefix] = uri

     def remove_namespaces(self) ->None:
         """
         Remove all namespaces, allowing to traverse the document using
         namespace-less xpaths. See :ref:`removing-namespaces`.
         """
-        pass
+        for el in self.root.iter('*'):
+            if el.tag.startswith('{'):
+                el.tag = el.tag.split('}', 1)[1]
+            for an in list(el.attrib.keys()):
+                if an.startswith('{'):
+                    el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an)

     def remove(self) ->None:
         """
         Remove matched nodes from the parent element.
         """
-        pass
+        parent = self.root.getparent()
+        if parent is None:
+            raise CannotRemoveElementWithoutRoot("Element has no parent")
+        parent.remove(self.root)

     def drop(self) ->None:
         """
         Drop matched nodes from the parent element.
         """
-        pass
+        parent = self.root.getparent()
+        if parent is None:
+            raise CannotDropElementWithoutParent("Element has no parent")
+        parent.drop(self.root)

     @property
     def attrib(self) ->Dict[str, str]:
diff --git a/parsel/utils.py b/parsel/utils.py
index 361f99c..36c006f 100644
--- a/parsel/utils.py
+++ b/parsel/utils.py
@@ -18,13 +18,23 @@ def flatten(x: Iterable[Any]) ->List[Any]:
     >>> flatten(["foo", ["baz", 42], "bar"])
     ['foo', 'baz', 42, 'bar']
     """
-    pass
+    result = []
+    for el in x:
+        if _is_listlike(el):
+            result.extend(flatten(el))
+        else:
+            result.append(el)
+    return result


 def iflatten(x: Iterable[Any]) ->Iterator[Any]:
     """iflatten(sequence) -> Iterator
     Similar to ``.flatten()``, but returns iterator instead"""
-    pass
+    for el in x:
+        if _is_listlike(el):
+            yield from iflatten(el)
+        else:
+            yield el


 def _is_listlike(x: Any) ->bool:
@@ -48,7 +58,10 @@ def _is_listlike(x: Any) ->bool:
     >>> _is_listlike(range(5))
     True
     """
-    pass
+    return (
+        hasattr(x, '__iter__') and
+        not isinstance(x, (str, bytes))
+    )


 def extract_regex(regex: Union[str, Pattern[str]], text: str,
@@ -58,9 +71,25 @@ def extract_regex(regex: Union[str, Pattern[str]], text: str,
     * if the regex contains multiple numbered groups, all those will be returned (flattened)
     * if the regex doesn't contain any group the entire regex matching is returned
     """
-    pass
+    if isinstance(regex, str):
+        regex = re.compile(regex)
+
+    if replace_entities:
+        text = w3lib_replace_entities(text)
+
+    if 'extract' in regex.groupindex:
+        extracted = [match.group('extract') for match in regex.finditer(text)]
+    elif regex.groups > 0:
+        extracted = [cast(Match[str], match).groups() for match in regex.finditer(text)]
+        extracted = flatten(extracted)
+    else:
+        extracted = regex.findall(text)
+
+    return [str(s) for s in extracted]


 def shorten(text: str, width: int, suffix: str='...') ->str:
     """Truncate the given text to fit in the given width."""
-    pass
+    if len(text) <= width:
+        return text
+    return text[:width - len(suffix)] + suffix
diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py
index 55d8f4a..0de3274 100644
--- a/parsel/xpathfuncs.py
+++ b/parsel/xpathfuncs.py
@@ -20,7 +20,12 @@ def set_xpathfunc(fname: str, func: Optional[Callable]) ->None:
     .. _`in lxml documentation`: https://lxml.de/extensions.html#xpath-extension-functions

     """
-    pass
+    ns = etree.FunctionNamespace(None)
+    if func is None:
+        if fname in ns:
+            del ns[fname]
+    else:
+        ns[fname] = func


 def has_class(context: Any, *classes: str) ->bool:
@@ -29,4 +34,7 @@ def has_class(context: Any, *classes: str) ->bool:
     Return True if all ``classes`` are present in element's class attr.

     """
-    pass
+    if not context.context_node.get('class'):
+        return False
+    node_classes = set(replace_html5_whitespaces(' ', context.context_node.get('class')).split())
+    return all(cls in node_classes for cls in classes)