Skip to content

back to Reference (Gold) summary

Reference (Gold): scrapy

Pytest Summary for test tests

status count
passed 2858
failed 33
skipped 371
xfailed 21
error 13
total 3296
collected 3296

Failed pytests:

test_commands.py::GenspiderCommandTest::test_template

test_commands.py::GenspiderCommandTest::test_template
self = 
tplname = 'crawl'

    def test_template(self, tplname="crawl"):
        args = [f"--template={tplname}"] if tplname else []
        spname = "test_spider"
        spmodule = f"{self.project_name}.spiders.{spname}"
        p, out, err = self.proc("genspider", spname, "test.com", *args)
        self.assertIn(
            f"Created spider {spname!r} using template {tplname!r} in module:{os.linesep}  {spmodule}",
            out,
        )
        self.assertTrue(Path(self.proj_mod_path, "spiders", "test_spider.py").exists())
        modify_time_before = (
            Path(self.proj_mod_path, "spiders", "test_spider.py").stat().st_mtime
        )
        p, out, err = self.proc("genspider", spname, "test.com", *args)
        self.assertIn(f"Spider {spname!r} already exists in module", out)
        modify_time_after = (
            Path(self.proj_mod_path, "spiders", "test_spider.py").stat().st_mtime
        )
>       self.assertEqual(modify_time_after, modify_time_before)

/testbed/tests/test_commands.py:472: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/.venv/lib/python3.12/site-packages/twisted/trial/_synctest.py:444: in assertEqual
    super().assertEqual(first, second, msg)
E   twisted.trial.unittest.FailTest: 1727385204.784021 != 1727385204.785175

test_commands.py::GenspiderCommandTest::test_template_basic

test_commands.py::GenspiderCommandTest::test_template_basic
self = 

    def test_template_basic(self):
>       self.test_template("basic")

/testbed/tests/test_commands.py:475: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/tests/test_commands.py:472: in test_template
    self.assertEqual(modify_time_after, modify_time_before)
/testbed/.venv/lib/python3.12/site-packages/twisted/trial/_synctest.py:444: in assertEqual
    super().assertEqual(first, second, msg)
E   twisted.trial.unittest.FailTest: 1727385206.4120243 != 1727385206.4133556

test_commands.py::GenspiderCommandTest::test_template_csvfeed

test_commands.py::GenspiderCommandTest::test_template_csvfeed
self = 

    def test_template_csvfeed(self):
>       self.test_template("csvfeed")

/testbed/tests/test_commands.py:478: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/tests/test_commands.py:472: in test_template
    self.assertEqual(modify_time_after, modify_time_before)
/testbed/.venv/lib/python3.12/site-packages/twisted/trial/_synctest.py:444: in assertEqual
    super().assertEqual(first, second, msg)
E   twisted.trial.unittest.FailTest: 1727385208.0210276 != 1727385208.0219028

test_commands.py::GenspiderCommandTest::test_template_xmlfeed

test_commands.py::GenspiderCommandTest::test_template_xmlfeed
self = 

    def test_template_xmlfeed(self):
>       self.test_template("xmlfeed")

/testbed/tests/test_commands.py:481: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/tests/test_commands.py:472: in test_template
    self.assertEqual(modify_time_after, modify_time_before)
/testbed/.venv/lib/python3.12/site-packages/twisted/trial/_synctest.py:444: in assertEqual
    super().assertEqual(first, second, msg)
E   twisted.trial.unittest.FailTest: 1727385210.770033 != 1727385210.7708642

test_commands.py::GenspiderStandaloneCommandTest::test_same_name_as_existing_file

test_commands.py::GenspiderStandaloneCommandTest::test_same_name_as_existing_file
self = 
force = False

    def test_same_name_as_existing_file(self, force=False):
        file_name = "example"
        file_path = Path(self.temp_path, file_name + ".py")
        p, out, err = self.proc("genspider", file_name, "example.com")
        self.assertIn(f"Created spider {file_name!r} using template 'basic' ", out)
        assert file_path.exists()
        modify_time_before = file_path.stat().st_mtime
        file_contents_before = file_path.read_text(encoding="utf-8")

        if force:
            # use different template to ensure contents were changed
            p, out, err = self.proc(
                "genspider", "--force", "-t", "crawl", file_name, "example.com"
            )
            self.assertIn(f"Created spider {file_name!r} using template 'crawl' ", out)
            modify_time_after = file_path.stat().st_mtime
            self.assertNotEqual(modify_time_after, modify_time_before)
            file_contents_after = file_path.read_text(encoding="utf-8")
            self.assertNotEqual(file_contents_after, file_contents_before)
        else:
            p, out, err = self.proc("genspider", file_name, "example.com")
            self.assertIn(
                f"{Path(self.temp_path, file_name + '.py').resolve()} already exists",
                out,
            )
            modify_time_after = file_path.stat().st_mtime
>           self.assertEqual(modify_time_after, modify_time_before)

/testbed/tests/test_commands.py:641: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/.venv/lib/python3.12/site-packages/twisted/trial/_synctest.py:444: in assertEqual
    super().assertEqual(first, second, msg)
E   twisted.trial.unittest.FailTest: 1727385214.8590412 != 1727385214.8595073

test_crawl.py::CrawlSpiderTestCase::test_response_ssl_certificate_empty_response

test_crawl.py::CrawlSpiderTestCase::test_response_ssl_certificate_empty_response
self = 

    @mark.xfail(reason="Responses with no body return early and contain no certificate")
    @defer.inlineCallbacks
    def test_response_ssl_certificate_empty_response(self):
        crawler = get_crawler(SingleRequestSpider)
        url = self.mockserver.url("/status?n=200", is_secure=True)
        yield crawler.crawl(seed=url, mockserver=self.mockserver)
        cert = crawler.spider.meta["responses"][0].certificate
>       self.assertIsInstance(cert, Certificate)

/testbed/tests/test_crawl.py:629: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/.venv/lib/python3.12/site-packages/twisted/trial/_synctest.py:666: in assertIsInstance
    self.fail(f"{instance!r} is not an instance of {classOrTuple}{suffix}")
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = 
msg = "None is not an instance of "

    def fail(self, msg: Optional[object] = None) -> NoReturn:
        """
        Absolutely fail the test.  Do not pass go, do not collect $200.

        @param msg: the message that will be displayed as the reason for the
        failure
        """
>       raise self.failureException(msg)
E       twisted.trial.unittest.FailTest: None is not an instance of 

/testbed/.venv/lib/python3.12/site-packages/twisted/trial/_synctest.py:381: FailTest

test_downloader_handlers.py::HttpTestCase::test_download_head

test_downloader_handlers.py::HttpTestCase::test_download_head
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::HttpTestCase::test_get_duplicate_header

test_downloader_handlers.py::HttpTestCase::test_get_duplicate_header
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::HttpTestCase::test_host_header_not_in_request_headers

test_downloader_handlers.py::HttpTestCase::test_host_header_not_in_request_headers
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::HttpTestCase::test_payload

test_downloader_handlers.py::HttpTestCase::test_payload
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::HttpTestCase::test_response_header_content_length

test_downloader_handlers.py::HttpTestCase::test_response_header_content_length
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https10TestCase::test_content_length_zero_bodyless_post_request_headers

test_downloader_handlers.py::Https10TestCase::test_content_length_zero_bodyless_post_request_headers
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https10TestCase::test_payload

test_downloader_handlers.py::Https10TestCase::test_payload
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https10TestCase::test_protocol

test_downloader_handlers.py::Https10TestCase::test_protocol
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https10TestCase::test_redirect_status

test_downloader_handlers.py::Https10TestCase::test_redirect_status
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https10TestCase::test_redirect_status_head

test_downloader_handlers.py::Https10TestCase::test_redirect_status_head
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https10TestCase::test_response_class_from_body

test_downloader_handlers.py::Https10TestCase::test_response_class_from_body
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https10TestCase::test_response_header_content_length

test_downloader_handlers.py::Https10TestCase::test_response_header_content_length
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11TestCase::test_content_length_zero_bodyless_post_only_one

test_downloader_handlers.py::Http11TestCase::test_content_length_zero_bodyless_post_only_one
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11TestCase::test_download_head

test_downloader_handlers.py::Http11TestCase::test_download_head
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11TestCase::test_download_with_maxsize

test_downloader_handlers.py::Http11TestCase::test_download_with_maxsize
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11TestCase::test_download_with_maxsize_per_req

test_downloader_handlers.py::Http11TestCase::test_download_with_maxsize_per_req
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11TestCase::test_download_with_small_maxsize_per_spider

test_downloader_handlers.py::Http11TestCase::test_download_with_small_maxsize_per_spider
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11TestCase::test_host_header_not_in_request_headers

test_downloader_handlers.py::Http11TestCase::test_host_header_not_in_request_headers
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11TestCase::test_response_class_choosing_request

test_downloader_handlers.py::Http11TestCase::test_response_class_choosing_request
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11TestCase::test_response_header_content_length

test_downloader_handlers.py::Http11TestCase::test_response_header_content_length
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https11WrongHostnameTestCase::test_download_broken_chunked_content_cause_data_loss

test_downloader_handlers.py::Https11WrongHostnameTestCase::test_download_broken_chunked_content_cause_data_loss
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Https11InvalidDNSPattern::test_timeout_download_from_spider_nodata_rcvd

test_downloader_handlers.py::Https11InvalidDNSPattern::test_timeout_download_from_spider_nodata_rcvd
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::Http11ProxyTestCase::test_download_with_proxy_without_http_scheme

test_downloader_handlers.py::Http11ProxyTestCase::test_download_with_proxy_without_http_scheme
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers.py::BaseFTPTestCase::test_ftp_download_nonexistent

test_downloader_handlers.py::BaseFTPTestCase::test_ftp_download_nonexistent
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers.py::BaseFTPTestCase::test_ftp_download_success

test_downloader_handlers.py::BaseFTPTestCase::test_ftp_download_success
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers.py::BaseFTPTestCase::test_ftp_local_filename

test_downloader_handlers.py::BaseFTPTestCase::test_ftp_local_filename
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers.py::BaseFTPTestCase::test_response_class_from_body

test_downloader_handlers.py::BaseFTPTestCase::test_response_class_from_body
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers.py::FTPTestCase::test_ftp_download_path_with_spaces

test_downloader_handlers.py::FTPTestCase::test_ftp_download_path_with_spaces
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers.py::FTPTestCase::test_ftp_download_success

test_downloader_handlers.py::FTPTestCase::test_ftp_download_success
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers.py::FTPTestCase::test_response_class_from_url

test_downloader_handlers.py::FTPTestCase::test_response_class_from_url
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers.py::AnonymousFTPTestCase::test_ftp_download_nonexistent

test_downloader_handlers.py::AnonymousFTPTestCase::test_ftp_download_nonexistent
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers.py::AnonymousFTPTestCase::test_response_class_from_url

test_downloader_handlers.py::AnonymousFTPTestCase::test_response_class_from_url
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)

test_downloader_handlers_http2.py::Http11ProxyTestCase::test_download_with_proxy

test_downloader_handlers_http2.py::Http11ProxyTestCase::test_download_with_proxy
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers_http2.py::Http11ProxyTestCase::test_download_without_proxy

test_downloader_handlers_http2.py::Http11ProxyTestCase::test_download_without_proxy
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers_http2.py::Https11TestCase::test_download_broken_chunked_content_cause_data_loss

test_downloader_handlers_http2.py::Https11TestCase::test_download_broken_chunked_content_cause_data_loss
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloader_handlers_http2.py::Https11TestCase::test_timeout_download_from_spider_nodata_rcvd

test_downloader_handlers_http2.py::Https11TestCase::test_timeout_download_from_spider_nodata_rcvd
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
Selectables:

test_downloadermiddleware_cookies.py::CookiesMiddlewareTest::test_keep_cookie_from_default_request_headers_middleware
self = 

    @pytest.mark.xfail(reason="Cookie header is not currently being processed")
    def test_keep_cookie_from_default_request_headers_middleware(self):
        DEFAULT_REQUEST_HEADERS = dict(Cookie="default=value; asdf=qwerty")
        mw_default_headers = DefaultHeadersMiddleware(DEFAULT_REQUEST_HEADERS.items())
        # overwrite with values from 'cookies' request argument
        req1 = Request("http://example.org", cookies={"default": "something"})
        assert mw_default_headers.process_request(req1, self.spider) is None
        assert self.mw.process_request(req1, self.spider) is None
>       self.assertCookieValEqual(
            req1.headers["Cookie"], b"default=something; asdf=qwerty"
        )

/testbed/tests/test_downloadermiddleware_cookies.py:329: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/tests/test_downloadermiddleware_cookies.py:59: in assertCookieValEqual
    return self.assertEqual(split_cookies(first), split_cookies(second), msg=msg)
E   AssertionError: Lists differ: [b'default=something'] != [b'asdf=qwerty', b'default=something']
E   
E   First differing element 0:
E   b'default=something'
E   b'asdf=qwerty'
E   
E   Second list contains 1 additional elements.
E   First extra element 1:
E   b'default=something'
E   
E   - [b'default=something']
E   + [b'asdf=qwerty', b'default=something']
test_downloadermiddleware_cookies.py::CookiesMiddlewareTest::test_keep_cookie_header
self = 

    @pytest.mark.xfail(reason="Cookie header is not currently being processed")
    def test_keep_cookie_header(self):
        # keep only cookies from 'Cookie' request header
        req1 = Request("http://scrapytest.org", headers={"Cookie": "a=b; c=d"})
        assert self.mw.process_request(req1, self.spider) is None
>       self.assertCookieValEqual(req1.headers["Cookie"], "a=b; c=d")

/testbed/tests/test_downloadermiddleware_cookies.py:345: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/scrapy/http/headers.py:49: in __getitem__
    return super().__getitem__(key)[-1]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = {}, key = 'Cookie'

    def __getitem__(self, key):
>       return dict.__getitem__(self, self.normkey(key))
E       KeyError: b'Cookie'

/testbed/scrapy/utils/datatypes.py:41: KeyError
test_downloadermiddleware_cookies.py::CookiesMiddlewareTest::test_request_headers_cookie_encoding
self = 

    @pytest.mark.xfail(reason="Cookie header is not currently being processed")
    def test_request_headers_cookie_encoding(self):
        # 1) UTF8-encoded bytes
        req1 = Request("http://example.org", headers={"Cookie": "a=รก".encode("utf8")})
        assert self.mw.process_request(req1, self.spider) is None
>       self.assertCookieValEqual(req1.headers["Cookie"], b"a=\xc3\xa1")

/testbed/tests/test_downloadermiddleware_cookies.py:382: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/scrapy/http/headers.py:49: in __getitem__
    return super().__getitem__(key)[-1]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = {}, key = 'Cookie'

    def __getitem__(self, key):
>       return dict.__getitem__(self, self.normkey(key))
E       KeyError: b'Cookie'

/testbed/scrapy/utils/datatypes.py:41: KeyError

test_engine.py::EngineTest::test_crawler_dupefilter

test_engine.py::EngineTest::test_crawler_dupefilter
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)



test_engine_stop_download_bytes.py::EngineTest::test_crawler_change_close_reason_on_idle

test_engine_stop_download_bytes.py::EngineTest::test_crawler_change_close_reason_on_idle
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)




test_engine_stop_download_bytes.py::EngineTest::test_crawler_itemerror

test_engine_stop_download_bytes.py::EngineTest::test_crawler_itemerror
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)



test_engine_stop_download_bytes.py::BytesReceivedEngineTest::test_crawler_itemerror

test_engine_stop_download_bytes.py::BytesReceivedEngineTest::test_crawler_itemerror
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)




test_engine_stop_download_headers.py::EngineTest::test_crawler_change_close_reason_on_idle

test_engine_stop_download_headers.py::EngineTest::test_crawler_change_close_reason_on_idle
'NoneType' object is not iterable

During handling of the above exception, another exception occurred:
NOTE: Incompatible Exception Representation, displaying natively:

twisted.trial.util.DirtyReactorAggregateError: Reactor was unclean.
DelayedCalls: (set twisted.internet.base.DelayedCall.debug = True to debug)




init.py::BaseSettingsTest::test_update_iterable

__init__.py::BaseSettingsTest::test_update_iterable
self = 

    @pytest.mark.xfail(
        raises=AttributeError,
        reason="BaseSettings.update doesn't support iterable input",
    )
    def test_update_iterable(self):
        settings = BaseSettings({"key": 0})
>       settings.update([("key", 1)])

/testbed/tests/test_settings/__init__.py:217: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = 
values = [('key', 1)], priority = 'project'

    def update(self, values: _SettingsInputT, priority: Union[int, str] = "project") -> None:  # type: ignore[override]
        """
        Store key/value pairs with a given priority.

        This is a helper function that calls
        :meth:`~scrapy.settings.BaseSettings.set` for every item of ``values``
        with the provided ``priority``.

        If ``values`` is a string, it is assumed to be JSON-encoded and parsed
        into a dict with ``json.loads()`` first. If it is a
        :class:`~scrapy.settings.BaseSettings` instance, the per-key priorities
        will be used and the ``priority`` parameter ignored. This allows
        inserting/updating settings with different priorities with a single
        command.

        :param values: the settings names and values
        :type values: dict or string or :class:`~scrapy.settings.BaseSettings`

        :param priority: the priority of the settings. Should be a key of
            :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
        :type priority: str or int
        """
        self._assert_mutability()
        if isinstance(values, str):
            values = cast(dict, json.loads(values))
        if values is not None:
            if isinstance(values, BaseSettings):
                for name, value in values.items():
                    self.set(name, value, cast(int, values.getpriority(name)))
            else:
>               for name, value in values.items():
E               AttributeError: 'list' object has no attribute 'items'

/testbed/scrapy/settings/__init__.py:421: AttributeError

init.py::BaseSettingsTest::test_update_kwargs

__init__.py::BaseSettingsTest::test_update_kwargs
self = 

    @pytest.mark.xfail(
        raises=TypeError, reason="BaseSettings.update doesn't support kwargs input"
    )
    def test_update_kwargs(self):
        settings = BaseSettings({"key": 0})
>       settings.update(key=1)  # pylint: disable=unexpected-keyword-arg
E       TypeError: BaseSettings.update() got an unexpected keyword argument 'key'

/testbed/tests/test_settings/__init__.py:209: TypeError

test_squeues.py::MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::ChunkSize1MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::ChunkSize1MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::ChunkSize2MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::ChunkSize2MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::ChunkSize3MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::ChunkSize3MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::ChunkSize4MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::ChunkSize4MarshalFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::ChunkSize1PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::ChunkSize1PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::ChunkSize2PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::ChunkSize2PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::ChunkSize3PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::ChunkSize3PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::ChunkSize4PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::ChunkSize4PickleFifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::MarshalLifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::MarshalLifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_squeues.py::PickleLifoDiskQueueTest::test_non_bytes_raises_typeerror

test_squeues.py::PickleLifoDiskQueueTest::test_non_bytes_raises_typeerror
self = 

    @pytest.mark.xfail(
        reason="Reenable once Scrapy.squeues stops extending from this testsuite"
    )
    def test_non_bytes_raises_typeerror(self):
        q = self.queue()
>       self.assertRaises(TypeError, q.push, 0)
E       AssertionError: TypeError not raised by push

/testbed/.venv/lib/python3.12/site-packages/queuelib/tests/test_queue.py:223: AssertionError

test_utils_defer.py::AsyncDefTestsuiteTest::test_deferred_f_from_coro_f_xfail

test_utils_defer.py::AsyncDefTestsuiteTest::test_deferred_f_from_coro_f_xfail
self = 

    @mark.xfail(reason="Checks that the test is actually executed", strict=True)
    @deferred_f_from_coro_f
    async def test_deferred_f_from_coro_f_xfail(self):
>       raise Exception("This is expected to be raised")
E       Exception: This is expected to be raised

/testbed/tests/test_utils_defer.py:171: Exception

test_utils_request.py::RequestFingerprintTest::test_part_separation

test_utils_request.py::RequestFingerprintTest::test_part_separation
self = 

    @pytest.mark.xfail(reason="known bug kept for backward compatibility", strict=True)
    def test_part_separation(self):
>       super().test_part_separation()

/testbed/tests/test_utils_request.py:325: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/tests/test_utils_request.py:224: in test_part_separation
    self.assertNotEqual(fp1, fp2)
E   AssertionError: '4e38b5ad81c4739738db8a4e3573c22aba5c5c28' == '4e38b5ad81c4739738db8a4e3573c22aba5c5c28'

test_utils_request.py::RequestFingerprintAsBytesTest::test_part_separation

test_utils_request.py::RequestFingerprintAsBytesTest::test_part_separation
self = 

    @pytest.mark.xfail(reason="known bug kept for backward compatibility", strict=True)
    def test_part_separation(self):
>       super().test_part_separation()

/testbed/tests/test_utils_request.py:361: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/testbed/tests/test_utils_request.py:224: in test_part_separation
    self.assertNotEqual(fp1, fp2)
E   AssertionError: b'N8\xb5\xad\x81\xc4s\x978\xdb\x8aN5s\xc2*\xba\\\\(' == b'N8\xb5\xad\x81\xc4s\x978\xdb\x8aN5s\xc2*\xba\\\\('

Patch diff

diff --git a/scrapy/addons.py b/scrapy/addons.py
index b20d143a9..9060d4f3f 100644
--- a/scrapy/addons.py
+++ b/scrapy/addons.py
@@ -1,28 +1,53 @@
 import logging
 from typing import TYPE_CHECKING, Any, List
+
 from scrapy.exceptions import NotConfigured
 from scrapy.settings import Settings
 from scrapy.utils.conf import build_component_list
 from scrapy.utils.misc import create_instance, load_object
+
 if TYPE_CHECKING:
     from scrapy.crawler import Crawler
+
 logger = logging.getLogger(__name__)


 class AddonManager:
     """This class facilitates loading and storing :ref:`topics-addons`."""

-    def __init__(self, crawler: 'Crawler') ->None:
-        self.crawler: 'Crawler' = crawler
+    def __init__(self, crawler: "Crawler") -> None:
+        self.crawler: "Crawler" = crawler
         self.addons: List[Any] = []

-    def load_settings(self, settings: Settings) ->None:
+    def load_settings(self, settings: Settings) -> None:
         """Load add-ons and configurations from a settings object and apply them.

         This will load the add-on for every add-on path in the
         ``ADDONS`` setting and execute their ``update_settings`` methods.

-        :param settings: The :class:`~scrapy.settings.Settings` object from             which to read the add-on configuration
+        :param settings: The :class:`~scrapy.settings.Settings` object from \
+            which to read the add-on configuration
         :type settings: :class:`~scrapy.settings.Settings`
         """
-        pass
+        for clspath in build_component_list(settings["ADDONS"]):
+            try:
+                addoncls = load_object(clspath)
+                addon = create_instance(
+                    addoncls, settings=settings, crawler=self.crawler
+                )
+                addon.update_settings(settings)
+                self.addons.append(addon)
+            except NotConfigured as e:
+                if e.args:
+                    logger.warning(
+                        "Disabled %(clspath)s: %(eargs)s",
+                        {"clspath": clspath, "eargs": e.args[0]},
+                        extra={"crawler": self.crawler},
+                    )
+        logger.info(
+            "Enabled addons:\n%(addons)s",
+            {
+                "addons": self.addons,
+            },
+            extra={"crawler": self.crawler},
+        )
diff --git a/scrapy/cmdline.py b/scrapy/cmdline.py
index bf63f266a..6580ba9ce 100644
--- a/scrapy/cmdline.py
+++ b/scrapy/cmdline.py
@@ -4,6 +4,7 @@ import inspect
 import os
 import sys
 from importlib.metadata import entry_points
+
 import scrapy
 from scrapy.commands import BaseRunSpiderCommand, ScrapyCommand, ScrapyHelpFormatter
 from scrapy.crawler import CrawlerProcess
@@ -14,11 +15,175 @@ from scrapy.utils.python import garbage_collect


 class ScrapyArgumentParser(argparse.ArgumentParser):
-    pass
+    def _parse_optional(self, arg_string):
+        # if starts with -: it means that is a parameter not a argument
+        if arg_string[:2] == "-:":
+            return None
+
+        return super()._parse_optional(arg_string)
+
+
+def _iter_command_classes(module_name):
+    # TODO: add `name` attribute to commands and merge this function with
+    # scrapy.utils.spider.iter_spider_classes
+    for module in walk_modules(module_name):
+        for obj in vars(module).values():
+            if (
+                inspect.isclass(obj)
+                and issubclass(obj, ScrapyCommand)
+                and obj.__module__ == module.__name__
+                and obj not in (ScrapyCommand, BaseRunSpiderCommand)
+            ):
+                yield obj
+
+
+def _get_commands_from_module(module, inproject):
+    d = {}
+    for cmd in _iter_command_classes(module):
+        if inproject or not cmd.requires_project:
+            cmdname = cmd.__module__.split(".")[-1]
+            d[cmdname] = cmd()
+    return d
+
+
+def _get_commands_from_entry_points(inproject, group="scrapy.commands"):
+    cmds = {}
+    if sys.version_info >= (3, 10):
+        eps = entry_points(group=group)
+    else:
+        eps = entry_points().get(group, ())
+    for entry_point in eps:
+        obj = entry_point.load()
+        if inspect.isclass(obj):
+            cmds[entry_point.name] = obj()
+        else:
+            raise Exception(f"Invalid entry point {entry_point.name}")
+    return cmds
+
+
+def _get_commands_dict(settings, inproject):
+    cmds = _get_commands_from_module("scrapy.commands", inproject)
+    cmds.update(_get_commands_from_entry_points(inproject))
+    cmds_module = settings["COMMANDS_MODULE"]
+    if cmds_module:
+        cmds.update(_get_commands_from_module(cmds_module, inproject))
+    return cmds
+
+
+def _pop_command_name(argv):
+    i = 0
+    for arg in argv[1:]:
+        if not arg.startswith("-"):
+            del argv[i]
+            return arg
+        i += 1
+
+
+def _print_header(settings, inproject):
+    version = scrapy.__version__
+    if inproject:
+        print(f"Scrapy {version} - active project: {settings['BOT_NAME']}\n")
+
+    else:
+        print(f"Scrapy {version} - no active project\n")
+
+
+def _print_commands(settings, inproject):
+    _print_header(settings, inproject)
+    print("Usage:")
+    print("  scrapy <command> [options] [args]\n")
+    print("Available commands:")
+    cmds = _get_commands_dict(settings, inproject)
+    for cmdname, cmdclass in sorted(cmds.items()):
+        print(f"  {cmdname:<13} {cmdclass.short_desc()}")
+    if not inproject:
+        print()
+        print("  [ more ]      More commands available when run from project directory")
+    print()
+    print('Use "scrapy <command> -h" to see more info about a command')
+
+
+def _print_unknown_command(settings, cmdname, inproject):
+    _print_header(settings, inproject)
+    print(f"Unknown command: {cmdname}\n")
+    print('Use "scrapy" to see available commands')
+
+
+def _run_print_help(parser, func, *a, **kw):
+    try:
+        func(*a, **kw)
+    except UsageError as e:
+        if str(e):
+            parser.error(str(e))
+        if e.print_help:
+            parser.print_help()
+        sys.exit(2)
+
+
+def execute(argv=None, settings=None):
+    if argv is None:
+        argv = sys.argv
+
+    if settings is None:
+        settings = get_project_settings()
+        # set EDITOR from environment if available
+        try:
+            editor = os.environ["EDITOR"]
+        except KeyError:
+            pass
+        else:
+            settings["EDITOR"] = editor
+
+    inproject = inside_project()
+    cmds = _get_commands_dict(settings, inproject)
+    cmdname = _pop_command_name(argv)
+    if not cmdname:
+        _print_commands(settings, inproject)
+        sys.exit(0)
+    elif cmdname not in cmds:
+        _print_unknown_command(settings, cmdname, inproject)
+        sys.exit(2)
+
+    cmd = cmds[cmdname]
+    parser = ScrapyArgumentParser(
+        formatter_class=ScrapyHelpFormatter,
+        usage=f"scrapy {cmdname} {cmd.syntax()}",
+        conflict_handler="resolve",
+        description=cmd.long_desc(),
+    )
+    settings.setdict(cmd.default_settings, priority="command")
+    cmd.settings = settings
+    cmd.add_options(parser)
+    opts, args = parser.parse_known_args(args=argv[1:])
+    _run_print_help(parser, cmd.process_options, args, opts)
+
+    cmd.crawler_process = CrawlerProcess(settings)
+    _run_print_help(parser, _run_command, cmd, args, opts)
+    sys.exit(cmd.exitcode)
+
+
+def _run_command(cmd, args, opts):
+    if opts.profile:
+        _run_command_profiled(cmd, args, opts)
+    else:
+        cmd.run(args, opts)
+
+
+def _run_command_profiled(cmd, args, opts):
+    if opts.profile:
+        sys.stderr.write(f"scrapy: writing cProfile stats to {opts.profile!r}\n")
+    loc = locals()
+    p = cProfile.Profile()
+    p.runctx("cmd.run(args, opts)", globals(), loc)
+    if opts.profile:
+        p.dump_stats(opts.profile)


-if __name__ == '__main__':
+if __name__ == "__main__":
     try:
         execute()
     finally:
+        # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit:
+        # http://doc.pypy.org/en/latest/cpython_differences.html
+        # ?highlight=gc.collect#differences-related-to-garbage-collection-strategies
         garbage_collect()
diff --git a/scrapy/commands/bench.py b/scrapy/commands/bench.py
index 1c049d02b..e1ccdc451 100644
--- a/scrapy/commands/bench.py
+++ b/scrapy/commands/bench.py
@@ -2,23 +2,34 @@ import subprocess
 import sys
 import time
 from urllib.parse import urlencode
+
 import scrapy
 from scrapy.commands import ScrapyCommand
 from scrapy.linkextractors import LinkExtractor


 class Command(ScrapyCommand):
-    default_settings = {'LOG_LEVEL': 'INFO', 'LOGSTATS_INTERVAL': 1,
-        'CLOSESPIDER_TIMEOUT': 10}
+    default_settings = {
+        "LOG_LEVEL": "INFO",
+        "LOGSTATS_INTERVAL": 1,
+        "CLOSESPIDER_TIMEOUT": 10,
+    }

+    def short_desc(self):
+        return "Run quick benchmark test"

-class _BenchServer:
+    def run(self, args, opts):
+        with _BenchServer():
+            self.crawler_process.crawl(_BenchSpider, total=100000)
+            self.crawler_process.start()

+
+class _BenchServer:
     def __enter__(self):
         from scrapy.utils.test import get_testenv
-        pargs = [sys.executable, '-u', '-m', 'scrapy.utils.benchserver']
-        self.proc = subprocess.Popen(pargs, stdout=subprocess.PIPE, env=
-            get_testenv())
+
+        pargs = [sys.executable, "-u", "-m", "scrapy.utils.benchserver"]
+        self.proc = subprocess.Popen(pargs, stdout=subprocess.PIPE, env=get_testenv())
         self.proc.stdout.readline()

     def __exit__(self, exc_type, exc_value, traceback):
@@ -29,8 +40,18 @@ class _BenchServer:

 class _BenchSpider(scrapy.Spider):
     """A spider that follows all links"""
-    name = 'follow'
+
+    name = "follow"
     total = 10000
     show = 20
-    baseurl = 'http://localhost:8998'
+    baseurl = "http://localhost:8998"
     link_extractor = LinkExtractor()
+
+    def start_requests(self):
+        qargs = {"total": self.total, "show": self.show}
+        url = f"{self.baseurl}?{urlencode(qargs, doseq=True)}"
+        return [scrapy.Request(url, dont_filter=True)]
+
+    def parse(self, response):
+        for link in self.link_extractor.extract_links(response):
+            yield scrapy.Request(link.url, callback=self.parse)
diff --git a/scrapy/commands/check.py b/scrapy/commands/check.py
index 7d6b7e3ed..de54ca4d3 100644
--- a/scrapy/commands/check.py
+++ b/scrapy/commands/check.py
@@ -2,6 +2,7 @@ import time
 from collections import defaultdict
 from unittest import TextTestResult as _TextTestResult
 from unittest import TextTestRunner
+
 from scrapy.commands import ScrapyCommand
 from scrapy.contracts import ContractsManager
 from scrapy.utils.conf import build_component_list
@@ -9,9 +10,99 @@ from scrapy.utils.misc import load_object, set_environ


 class TextTestResult(_TextTestResult):
-    pass
+    def printSummary(self, start, stop):
+        write = self.stream.write
+        writeln = self.stream.writeln
+
+        run = self.testsRun
+        plural = "s" if run != 1 else ""
+
+        writeln(self.separator2)
+        writeln(f"Ran {run} contract{plural} in {stop - start:.3f}s")
+        writeln()
+
+        infos = []
+        if not self.wasSuccessful():
+            write("FAILED")
+            failed, errored = map(len, (self.failures, self.errors))
+            if failed:
+                infos.append(f"failures={failed}")
+            if errored:
+                infos.append(f"errors={errored}")
+        else:
+            write("OK")
+
+        if infos:
+            writeln(f" ({', '.join(infos)})")
+        else:
+            write("\n")


 class Command(ScrapyCommand):
     requires_project = True
-    default_settings = {'LOG_ENABLED': False}
+    default_settings = {"LOG_ENABLED": False}
+
+    def syntax(self):
+        return "[options] <spider>"
+
+    def short_desc(self):
+        return "Check spider contracts"
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_argument(
+            "-l",
+            "--list",
+            dest="list",
+            action="store_true",
+            help="only list contracts, without checking them",
+        )
+        parser.add_argument(
+            "-v",
+            "--verbose",
+            dest="verbose",
+            default=False,
+            action="store_true",
+            help="print contract tests for all spiders",
+        )
+
+    def run(self, args, opts):
+        # load contracts
+        contracts = build_component_list(self.settings.getwithbase("SPIDER_CONTRACTS"))
+        conman = ContractsManager(load_object(c) for c in contracts)
+        runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
+        result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity)
+
+        # contract requests
+        contract_reqs = defaultdict(list)
+
+        spider_loader = self.crawler_process.spider_loader
+
+        with set_environ(SCRAPY_CHECK="true"):
+            for spidername in args or spider_loader.list():
+                spidercls = spider_loader.load(spidername)
+                spidercls.start_requests = lambda s: conman.from_spider(s, result)
+
+                tested_methods = conman.tested_methods_from_spidercls(spidercls)
+                if opts.list:
+                    for method in tested_methods:
+                        contract_reqs[spidercls.name].append(method)
+                elif tested_methods:
+                    self.crawler_process.crawl(spidercls)
+
+            # start checks
+            if opts.list:
+                for spider, methods in sorted(contract_reqs.items()):
+                    if not methods and not opts.verbose:
+                        continue
+                    print(spider)
+                    for method in sorted(methods):
+                        print(f"  * {method}")
+            else:
+                start = time.time()
+                self.crawler_process.start()
+                stop = time.time()
+
+                result.printErrors()
+                result.printSummary(start, stop)
+                self.exitcode = int(not result.wasSuccessful())
diff --git a/scrapy/commands/crawl.py b/scrapy/commands/crawl.py
index 2348fd64d..2f0f1c7b9 100644
--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@@ -4,3 +4,34 @@ from scrapy.exceptions import UsageError

 class Command(BaseRunSpiderCommand):
     requires_project = True
+
+    def syntax(self):
+        return "[options] <spider>"
+
+    def short_desc(self):
+        return "Run a spider"
+
+    def run(self, args, opts):
+        if len(args) < 1:
+            raise UsageError()
+        elif len(args) > 1:
+            raise UsageError(
+                "running 'scrapy crawl' with more than one spider is not supported"
+            )
+        spname = args[0]
+
+        crawl_defer = self.crawler_process.crawl(spname, **opts.spargs)
+
+        if getattr(crawl_defer, "result", None) is not None and issubclass(
+            crawl_defer.result.type, Exception
+        ):
+            self.exitcode = 1
+        else:
+            self.crawler_process.start()
+
+            if (
+                self.crawler_process.bootstrap_failed
+                or hasattr(self.crawler_process, "has_exception")
+                and self.crawler_process.has_exception
+            ):
+                self.exitcode = 1
diff --git a/scrapy/commands/edit.py b/scrapy/commands/edit.py
index ce7b67cc7..03a8ed5c7 100644
--- a/scrapy/commands/edit.py
+++ b/scrapy/commands/edit.py
@@ -1,9 +1,40 @@
 import os
 import sys
+
 from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError


 class Command(ScrapyCommand):
     requires_project = True
-    default_settings = {'LOG_ENABLED': False}
+    default_settings = {"LOG_ENABLED": False}
+
+    def syntax(self):
+        return "<spider>"
+
+    def short_desc(self):
+        return "Edit spider"
+
+    def long_desc(self):
+        return (
+            "Edit a spider using the editor defined in the EDITOR environment"
+            " variable or else the EDITOR setting"
+        )
+
+    def _err(self, msg):
+        sys.stderr.write(msg + os.linesep)
+        self.exitcode = 1
+
+    def run(self, args, opts):
+        if len(args) != 1:
+            raise UsageError()
+
+        editor = self.settings["EDITOR"]
+        try:
+            spidercls = self.crawler_process.spider_loader.load(args[0])
+        except KeyError:
+            return self._err(f"Spider not found: {args[0]}")
+
+        sfile = sys.modules[spidercls.__module__].__file__
+        sfile = sfile.replace(".pyc", ".py")
+        self.exitcode = os.system(f'{editor} "{sfile}"')
diff --git a/scrapy/commands/fetch.py b/scrapy/commands/fetch.py
index 59dcdb771..cdb7ad4ae 100644
--- a/scrapy/commands/fetch.py
+++ b/scrapy/commands/fetch.py
@@ -1,7 +1,9 @@
 import sys
 from argparse import Namespace
 from typing import List, Type
+
 from w3lib.url import is_url
+
 from scrapy import Spider
 from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError
@@ -12,3 +14,74 @@ from scrapy.utils.spider import DefaultSpider, spidercls_for_request

 class Command(ScrapyCommand):
     requires_project = False
+
+    def syntax(self):
+        return "[options] <url>"
+
+    def short_desc(self):
+        return "Fetch a URL using the Scrapy downloader"
+
+    def long_desc(self):
+        return (
+            "Fetch a URL using the Scrapy downloader and print its content"
+            " to stdout. You may want to use --nolog to disable logging"
+        )
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_argument("--spider", dest="spider", help="use this spider")
+        parser.add_argument(
+            "--headers",
+            dest="headers",
+            action="store_true",
+            help="print response HTTP headers instead of body",
+        )
+        parser.add_argument(
+            "--no-redirect",
+            dest="no_redirect",
+            action="store_true",
+            default=False,
+            help="do not handle HTTP 3xx status codes and print response as-is",
+        )
+
+    def _print_headers(self, headers, prefix):
+        for key, values in headers.items():
+            for value in values:
+                self._print_bytes(prefix + b" " + key + b": " + value)
+
+    def _print_response(self, response, opts):
+        if opts.headers:
+            self._print_headers(response.request.headers, b">")
+            print(">")
+            self._print_headers(response.headers, b"<")
+        else:
+            self._print_bytes(response.body)
+
+    def _print_bytes(self, bytes_):
+        sys.stdout.buffer.write(bytes_ + b"\n")
+
+    def run(self, args: List[str], opts: Namespace) -> None:
+        if len(args) != 1 or not is_url(args[0]):
+            raise UsageError()
+        request = Request(
+            args[0],
+            callback=self._print_response,
+            cb_kwargs={"opts": opts},
+            dont_filter=True,
+        )
+        # by default, let the framework handle redirects,
+        # i.e. command handles all codes expect 3xx
+        if not opts.no_redirect:
+            request.meta["handle_httpstatus_list"] = SequenceExclude(range(300, 400))
+        else:
+            request.meta["handle_httpstatus_all"] = True
+
+        spidercls: Type[Spider] = DefaultSpider
+        assert self.crawler_process
+        spider_loader = self.crawler_process.spider_loader
+        if opts.spider:
+            spidercls = spider_loader.load(opts.spider)
+        else:
+            spidercls = spidercls_for_request(spider_loader, request, spidercls)
+        self.crawler_process.crawl(spidercls, start_requests=lambda: [request])
+        self.crawler_process.start()
diff --git a/scrapy/commands/genspider.py b/scrapy/commands/genspider.py
index 5c5068083..68cbe8ff6 100644
--- a/scrapy/commands/genspider.py
+++ b/scrapy/commands/genspider.py
@@ -5,6 +5,7 @@ from importlib import import_module
 from pathlib import Path
 from typing import Optional, cast
 from urllib.parse import urlparse
+
 import scrapy
 from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError
@@ -16,23 +17,186 @@ def sanitize_module_name(module_name):
     with underscores and prefixing it with a letter if it doesn't start
     with one
     """
-    pass
+    module_name = module_name.replace("-", "_").replace(".", "_")
+    if module_name[0] not in string.ascii_letters:
+        module_name = "a" + module_name
+    return module_name


 def extract_domain(url):
     """Extract domain name from URL string"""
-    pass
+    o = urlparse(url)
+    if o.scheme == "" and o.netloc == "":
+        o = urlparse("//" + url.lstrip("/"))
+    return o.netloc


 def verify_url_scheme(url):
     """Check url for scheme and insert https if none found."""
-    pass
+    parsed = urlparse(url)
+    if parsed.scheme == "" and parsed.netloc == "":
+        parsed = urlparse("//" + url)._replace(scheme="https")
+    return parsed.geturl()


 class Command(ScrapyCommand):
     requires_project = False
-    default_settings = {'LOG_ENABLED': False}
+    default_settings = {"LOG_ENABLED": False}
+
+    def syntax(self):
+        return "[options] <name> <domain>"
+
+    def short_desc(self):
+        return "Generate new spider using pre-defined templates"
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_argument(
+            "-l",
+            "--list",
+            dest="list",
+            action="store_true",
+            help="List available templates",
+        )
+        parser.add_argument(
+            "-e",
+            "--edit",
+            dest="edit",
+            action="store_true",
+            help="Edit spider after creating it",
+        )
+        parser.add_argument(
+            "-d",
+            "--dump",
+            dest="dump",
+            metavar="TEMPLATE",
+            help="Dump template to standard output",
+        )
+        parser.add_argument(
+            "-t",
+            "--template",
+            dest="template",
+            default="basic",
+            help="Uses a custom template.",
+        )
+        parser.add_argument(
+            "--force",
+            dest="force",
+            action="store_true",
+            help="If the spider already exists, overwrite it with the template",
+        )
+
+    def run(self, args, opts):
+        if opts.list:
+            self._list_templates()
+            return
+        if opts.dump:
+            template_file = self._find_template(opts.dump)
+            if template_file:
+                print(template_file.read_text(encoding="utf-8"))
+            return
+        if len(args) != 2:
+            raise UsageError()
+
+        name, url = args[0:2]
+        url = verify_url_scheme(url)
+        module = sanitize_module_name(name)
+
+        if self.settings.get("BOT_NAME") == module:
+            print("Cannot create a spider with the same name as your project")
+            return
+
+        if not opts.force and self._spider_exists(name):
+            return
+
+        template_file = self._find_template(opts.template)
+        if template_file:
+            self._genspider(module, name, url, opts.template, template_file)
+            if opts.edit:
+                self.exitcode = os.system(f'scrapy edit "{name}"')

     def _genspider(self, module, name, url, template_name, template_file):
         """Generate the spider module, based on the given template"""
-        pass
+        capitalized_module = "".join(s.capitalize() for s in module.split("_"))
+        domain = extract_domain(url)
+        tvars = {
+            "project_name": self.settings.get("BOT_NAME"),
+            "ProjectName": string_camelcase(self.settings.get("BOT_NAME")),
+            "module": module,
+            "name": name,
+            "url": url,
+            "domain": domain,
+            "classname": f"{capitalized_module}Spider",
+        }
+        if self.settings.get("NEWSPIDER_MODULE"):
+            spiders_module = import_module(self.settings["NEWSPIDER_MODULE"])
+            spiders_dir = Path(spiders_module.__file__).parent.resolve()
+        else:
+            spiders_module = None
+            spiders_dir = Path(".")
+        spider_file = f"{spiders_dir / module}.py"
+        shutil.copyfile(template_file, spider_file)
+        render_templatefile(spider_file, **tvars)
+        print(
+            f"Created spider {name!r} using template {template_name!r} ",
+            end=("" if spiders_module else "\n"),
+        )
+        if spiders_module:
+            print(f"in module:\n  {spiders_module.__name__}.{module}")
+
+    def _find_template(self, template: str) -> Optional[Path]:
+        template_file = Path(self.templates_dir, f"{template}.tmpl")
+        if template_file.exists():
+            return template_file
+        print(f"Unable to find template: {template}\n")
+        print('Use "scrapy genspider --list" to see all available templates.')
+        return None
+
+    def _list_templates(self):
+        print("Available templates:")
+        for file in sorted(Path(self.templates_dir).iterdir()):
+            if file.suffix == ".tmpl":
+                print(f"  {file.stem}")
+
+    def _spider_exists(self, name: str) -> bool:
+        if not self.settings.get("NEWSPIDER_MODULE"):
+            # if run as a standalone command and file with same filename already exists
+            path = Path(name + ".py")
+            if path.exists():
+                print(f"{path.resolve()} already exists")
+                return True
+            return False
+
+        assert (
+            self.crawler_process is not None
+        ), "crawler_process must be set before calling run"
+
+        try:
+            spidercls = self.crawler_process.spider_loader.load(name)
+        except KeyError:
+            pass
+        else:
+            # if spider with same name exists
+            print(f"Spider {name!r} already exists in module:")
+            print(f"  {spidercls.__module__}")
+            return True
+
+        # a file with the same name exists in the target directory
+        spiders_module = import_module(self.settings["NEWSPIDER_MODULE"])
+        spiders_dir = Path(cast(str, spiders_module.__file__)).parent
+        spiders_dir_abs = spiders_dir.resolve()
+        path = spiders_dir_abs / (name + ".py")
+        if path.exists():
+            print(f"{path} already exists")
+            return True
+
+        return False
+
+    @property
+    def templates_dir(self) -> str:
+        return str(
+            Path(
+                self.settings["TEMPLATES_DIR"] or Path(scrapy.__path__[0], "templates"),
+                "spiders",
+            )
+        )
diff --git a/scrapy/commands/list.py b/scrapy/commands/list.py
index 59078bd88..2f5032360 100644
--- a/scrapy/commands/list.py
+++ b/scrapy/commands/list.py
@@ -3,4 +3,11 @@ from scrapy.commands import ScrapyCommand

 class Command(ScrapyCommand):
     requires_project = True
-    default_settings = {'LOG_ENABLED': False}
+    default_settings = {"LOG_ENABLED": False}
+
+    def short_desc(self):
+        return "List available spiders"
+
+    def run(self, args, opts):
+        for s in sorted(self.crawler_process.spider_loader.list()):
+            print(s)
diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py
index 63e47a92b..c9f8586d3 100644
--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@@ -3,9 +3,11 @@ import inspect
 import json
 import logging
 from typing import Dict
+
 from itemadapter import ItemAdapter, is_item
 from twisted.internet.defer import maybeDeferred
 from w3lib.url import is_url
+
 from scrapy.commands import BaseRunSpiderCommand
 from scrapy.exceptions import UsageError
 from scrapy.http import Request
@@ -15,12 +17,338 @@ from scrapy.utils.defer import aiter_errback, deferred_from_coro
 from scrapy.utils.log import failure_to_exc_info
 from scrapy.utils.misc import arg_to_iter
 from scrapy.utils.spider import spidercls_for_request
+
 logger = logging.getLogger(__name__)


 class Command(BaseRunSpiderCommand):
     requires_project = True
+
     spider = None
     items: Dict[int, list] = {}
     requests: Dict[int, list] = {}
+
     first_response = None
+
+    def syntax(self):
+        return "[options] <url>"
+
+    def short_desc(self):
+        return "Parse URL (using its spider) and print the results"
+
+    def add_options(self, parser):
+        BaseRunSpiderCommand.add_options(self, parser)
+        parser.add_argument(
+            "--spider",
+            dest="spider",
+            default=None,
+            help="use this spider without looking for one",
+        )
+        parser.add_argument(
+            "--pipelines", action="store_true", help="process items through pipelines"
+        )
+        parser.add_argument(
+            "--nolinks",
+            dest="nolinks",
+            action="store_true",
+            help="don't show links to follow (extracted requests)",
+        )
+        parser.add_argument(
+            "--noitems",
+            dest="noitems",
+            action="store_true",
+            help="don't show scraped items",
+        )
+        parser.add_argument(
+            "--nocolour",
+            dest="nocolour",
+            action="store_true",
+            help="avoid using pygments to colorize the output",
+        )
+        parser.add_argument(
+            "-r",
+            "--rules",
+            dest="rules",
+            action="store_true",
+            help="use CrawlSpider rules to discover the callback",
+        )
+        parser.add_argument(
+            "-c",
+            "--callback",
+            dest="callback",
+            help="use this callback for parsing, instead looking for a callback",
+        )
+        parser.add_argument(
+            "-m",
+            "--meta",
+            dest="meta",
+            help="inject extra meta into the Request, it must be a valid raw json string",
+        )
+        parser.add_argument(
+            "--cbkwargs",
+            dest="cbkwargs",
+            help="inject extra callback kwargs into the Request, it must be a valid raw json string",
+        )
+        parser.add_argument(
+            "-d",
+            "--depth",
+            dest="depth",
+            type=int,
+            default=1,
+            help="maximum depth for parsing requests [default: %(default)s]",
+        )
+        parser.add_argument(
+            "-v",
+            "--verbose",
+            dest="verbose",
+            action="store_true",
+            help="print each depth level one by one",
+        )
+
+    @property
+    def max_level(self):
+        max_items, max_requests = 0, 0
+        if self.items:
+            max_items = max(self.items)
+        if self.requests:
+            max_requests = max(self.requests)
+        return max(max_items, max_requests)
+
+    def handle_exception(self, _failure):
+        logger.error(
+            "An error is caught while iterating the async iterable",
+            exc_info=failure_to_exc_info(_failure),
+        )
+
+    def iterate_spider_output(self, result):
+        if inspect.isasyncgen(result):
+            d = deferred_from_coro(
+                collect_asyncgen(aiter_errback(result, self.handle_exception))
+            )
+            d.addCallback(self.iterate_spider_output)
+            return d
+        if inspect.iscoroutine(result):
+            d = deferred_from_coro(result)
+            d.addCallback(self.iterate_spider_output)
+            return d
+        return arg_to_iter(deferred_from_coro(result))
+
+    def add_items(self, lvl, new_items):
+        old_items = self.items.get(lvl, [])
+        self.items[lvl] = old_items + new_items
+
+    def add_requests(self, lvl, new_reqs):
+        old_reqs = self.requests.get(lvl, [])
+        self.requests[lvl] = old_reqs + new_reqs
+
+    def print_items(self, lvl=None, colour=True):
+        if lvl is None:
+            items = [item for lst in self.items.values() for item in lst]
+        else:
+            items = self.items.get(lvl, [])
+
+        print("# Scraped Items ", "-" * 60)
+        display.pprint([ItemAdapter(x).asdict() for x in items], colorize=colour)
+
+    def print_requests(self, lvl=None, colour=True):
+        if lvl is None:
+            if self.requests:
+                requests = self.requests[max(self.requests)]
+            else:
+                requests = []
+        else:
+            requests = self.requests.get(lvl, [])
+
+        print("# Requests ", "-" * 65)
+        display.pprint(requests, colorize=colour)
+
+    def print_results(self, opts):
+        colour = not opts.nocolour
+
+        if opts.verbose:
+            for level in range(1, self.max_level + 1):
+                print(f"\n>>> DEPTH LEVEL: {level} <<<")
+                if not opts.noitems:
+                    self.print_items(level, colour)
+                if not opts.nolinks:
+                    self.print_requests(level, colour)
+        else:
+            print(f"\n>>> STATUS DEPTH LEVEL {self.max_level} <<<")
+            if not opts.noitems:
+                self.print_items(colour=colour)
+            if not opts.nolinks:
+                self.print_requests(colour=colour)
+
+    def _get_items_and_requests(self, spider_output, opts, depth, spider, callback):
+        items, requests = [], []
+        for x in spider_output:
+            if is_item(x):
+                items.append(x)
+            elif isinstance(x, Request):
+                requests.append(x)
+        return items, requests, opts, depth, spider, callback
+
+    def run_callback(self, response, callback, cb_kwargs=None):
+        cb_kwargs = cb_kwargs or {}
+        d = maybeDeferred(self.iterate_spider_output, callback(response, **cb_kwargs))
+        return d
+
+    def get_callback_from_rules(self, spider, response):
+        if getattr(spider, "rules", None):
+            for rule in spider.rules:
+                if rule.link_extractor.matches(response.url):
+                    return rule.callback or "parse"
+        else:
+            logger.error(
+                "No CrawlSpider rules found in spider %(spider)r, "
+                "please specify a callback to use for parsing",
+                {"spider": spider.name},
+            )
+
+    def set_spidercls(self, url, opts):
+        spider_loader = self.crawler_process.spider_loader
+        if opts.spider:
+            try:
+                self.spidercls = spider_loader.load(opts.spider)
+            except KeyError:
+                logger.error(
+                    "Unable to find spider: %(spider)s", {"spider": opts.spider}
+                )
+        else:
+            self.spidercls = spidercls_for_request(spider_loader, Request(url))
+            if not self.spidercls:
+                logger.error("Unable to find spider for: %(url)s", {"url": url})
+
+        def _start_requests(spider):
+            yield self.prepare_request(spider, Request(url), opts)
+
+        if self.spidercls:
+            self.spidercls.start_requests = _start_requests
+
+    def start_parsing(self, url, opts):
+        self.crawler_process.crawl(self.spidercls, **opts.spargs)
+        self.pcrawler = list(self.crawler_process.crawlers)[0]
+        self.crawler_process.start()
+
+        if not self.first_response:
+            logger.error("No response downloaded for: %(url)s", {"url": url})
+
+    def scraped_data(self, args):
+        items, requests, opts, depth, spider, callback = args
+        if opts.pipelines:
+            itemproc = self.pcrawler.engine.scraper.itemproc
+            for item in items:
+                itemproc.process_item(item, spider)
+        self.add_items(depth, items)
+        self.add_requests(depth, requests)
+
+        scraped_data = items if opts.output else []
+        if depth < opts.depth:
+            for req in requests:
+                req.meta["_depth"] = depth + 1
+                req.meta["_callback"] = req.callback
+                req.callback = callback
+            scraped_data += requests
+
+        return scraped_data
+
+    def _get_callback(self, *, spider, opts, response=None):
+        cb = None
+        if response:
+            cb = response.meta["_callback"]
+        if not cb:
+            if opts.callback:
+                cb = opts.callback
+            elif response and opts.rules and self.first_response == response:
+                cb = self.get_callback_from_rules(spider, response)
+                if not cb:
+                    raise ValueError(
+                        f"Cannot find a rule that matches {response.url!r} in spider: "
+                        f"{spider.name}"
+                    )
+            else:
+                cb = "parse"
+
+        if not callable(cb):
+            cb_method = getattr(spider, cb, None)
+            if callable(cb_method):
+                cb = cb_method
+            else:
+                raise ValueError(
+                    f"Cannot find callback {cb!r} in spider: {spider.name}"
+                )
+        return cb
+
+    def prepare_request(self, spider, request, opts):
+        def callback(response, **cb_kwargs):
+            # memorize first request
+            if not self.first_response:
+                self.first_response = response
+
+            cb = self._get_callback(spider=spider, opts=opts, response=response)
+
+            # parse items and requests
+            depth = response.meta["_depth"]
+
+            d = self.run_callback(response, cb, cb_kwargs)
+            d.addCallback(self._get_items_and_requests, opts, depth, spider, callback)
+            d.addCallback(self.scraped_data)
+            return d
+
+        # update request meta if any extra meta was passed through the --meta/-m opts.
+        if opts.meta:
+            request.meta.update(opts.meta)
+
+        # update cb_kwargs if any extra values were was passed through the --cbkwargs option.
+        if opts.cbkwargs:
+            request.cb_kwargs.update(opts.cbkwargs)
+
+        request.meta["_depth"] = 1
+        request.meta["_callback"] = request.callback
+        if not request.callback and not opts.rules:
+            cb = self._get_callback(spider=spider, opts=opts)
+            functools.update_wrapper(callback, cb)
+        request.callback = callback
+        return request
+
+    def process_options(self, args, opts):
+        BaseRunSpiderCommand.process_options(self, args, opts)
+
+        self.process_request_meta(opts)
+        self.process_request_cb_kwargs(opts)
+
+    def process_request_meta(self, opts):
+        if opts.meta:
+            try:
+                opts.meta = json.loads(opts.meta)
+            except ValueError:
+                raise UsageError(
+                    "Invalid -m/--meta value, pass a valid json string to -m or --meta. "
+                    'Example: --meta=\'{"foo" : "bar"}\'',
+                    print_help=False,
+                )
+
+    def process_request_cb_kwargs(self, opts):
+        if opts.cbkwargs:
+            try:
+                opts.cbkwargs = json.loads(opts.cbkwargs)
+            except ValueError:
+                raise UsageError(
+                    "Invalid --cbkwargs value, pass a valid json string to --cbkwargs. "
+                    'Example: --cbkwargs=\'{"foo" : "bar"}\'',
+                    print_help=False,
+                )
+
+    def run(self, args, opts):
+        # parse arguments
+        if not len(args) == 1 or not is_url(args[0]):
+            raise UsageError()
+        else:
+            url = args[0]
+
+        # prepare spidercls
+        self.set_spidercls(url, opts)
+
+        if self.spidercls and opts.depth > 0:
+            self.start_parsing(url, opts)
+            self.print_results(opts)
diff --git a/scrapy/commands/runspider.py b/scrapy/commands/runspider.py
index d6b20ae5f..58ed89a81 100644
--- a/scrapy/commands/runspider.py
+++ b/scrapy/commands/runspider.py
@@ -4,11 +4,55 @@ from os import PathLike
 from pathlib import Path
 from types import ModuleType
 from typing import Union
+
 from scrapy.commands import BaseRunSpiderCommand
 from scrapy.exceptions import UsageError
 from scrapy.utils.spider import iter_spider_classes


+def _import_file(filepath: Union[str, PathLike]) -> ModuleType:
+    abspath = Path(filepath).resolve()
+    if abspath.suffix not in (".py", ".pyw"):
+        raise ValueError(f"Not a Python source file: {abspath}")
+    dirname = str(abspath.parent)
+    sys.path = [dirname] + sys.path
+    try:
+        module = import_module(abspath.stem)
+    finally:
+        sys.path.pop(0)
+    return module
+
+
 class Command(BaseRunSpiderCommand):
     requires_project = False
-    default_settings = {'SPIDER_LOADER_WARN_ONLY': True}
+    default_settings = {"SPIDER_LOADER_WARN_ONLY": True}
+
+    def syntax(self):
+        return "[options] <spider_file>"
+
+    def short_desc(self):
+        return "Run a self-contained spider (without creating a project)"
+
+    def long_desc(self):
+        return "Run the spider defined in the given file"
+
+    def run(self, args, opts):
+        if len(args) != 1:
+            raise UsageError()
+        filename = Path(args[0])
+        if not filename.exists():
+            raise UsageError(f"File not found: {filename}\n")
+        try:
+            module = _import_file(filename)
+        except (ImportError, ValueError) as e:
+            raise UsageError(f"Unable to load {str(filename)!r}: {e}\n")
+        spclasses = list(iter_spider_classes(module))
+        if not spclasses:
+            raise UsageError(f"No spider found in file: {filename}\n")
+        spidercls = spclasses.pop()
+
+        self.crawler_process.crawl(spidercls, **opts.spargs)
+        self.crawler_process.start()
+
+        if self.crawler_process.bootstrap_failed:
+            self.exitcode = 1
diff --git a/scrapy/commands/settings.py b/scrapy/commands/settings.py
index 017f56138..318187204 100644
--- a/scrapy/commands/settings.py
+++ b/scrapy/commands/settings.py
@@ -1,8 +1,62 @@
 import json
+
 from scrapy.commands import ScrapyCommand
 from scrapy.settings import BaseSettings


 class Command(ScrapyCommand):
     requires_project = False
-    default_settings = {'LOG_ENABLED': False, 'SPIDER_LOADER_WARN_ONLY': True}
+    default_settings = {"LOG_ENABLED": False, "SPIDER_LOADER_WARN_ONLY": True}
+
+    def syntax(self):
+        return "[options]"
+
+    def short_desc(self):
+        return "Get settings values"
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_argument(
+            "--get", dest="get", metavar="SETTING", help="print raw setting value"
+        )
+        parser.add_argument(
+            "--getbool",
+            dest="getbool",
+            metavar="SETTING",
+            help="print setting value, interpreted as a boolean",
+        )
+        parser.add_argument(
+            "--getint",
+            dest="getint",
+            metavar="SETTING",
+            help="print setting value, interpreted as an integer",
+        )
+        parser.add_argument(
+            "--getfloat",
+            dest="getfloat",
+            metavar="SETTING",
+            help="print setting value, interpreted as a float",
+        )
+        parser.add_argument(
+            "--getlist",
+            dest="getlist",
+            metavar="SETTING",
+            help="print setting value, interpreted as a list",
+        )
+
+    def run(self, args, opts):
+        settings = self.crawler_process.settings
+        if opts.get:
+            s = settings.get(opts.get)
+            if isinstance(s, BaseSettings):
+                print(json.dumps(s.copy_to_dict()))
+            else:
+                print(s)
+        elif opts.getbool:
+            print(settings.getbool(opts.getbool))
+        elif opts.getint:
+            print(settings.getint(opts.getint))
+        elif opts.getfloat:
+            print(settings.getfloat(opts.getfloat))
+        elif opts.getlist:
+            print(settings.getlist(opts.getlist))
diff --git a/scrapy/commands/shell.py b/scrapy/commands/shell.py
index fadb2c519..12e37babc 100644
--- a/scrapy/commands/shell.py
+++ b/scrapy/commands/shell.py
@@ -6,6 +6,7 @@ See documentation in docs/topics/shell.rst
 from argparse import Namespace
 from threading import Thread
 from typing import List, Type
+
 from scrapy import Spider
 from scrapy.commands import ScrapyCommand
 from scrapy.http import Request
@@ -16,11 +17,80 @@ from scrapy.utils.url import guess_scheme

 class Command(ScrapyCommand):
     requires_project = False
-    default_settings = {'KEEP_ALIVE': True, 'LOGSTATS_INTERVAL': 0,
-        'DUPEFILTER_CLASS': 'scrapy.dupefilters.BaseDupeFilter'}
+    default_settings = {
+        "KEEP_ALIVE": True,
+        "LOGSTATS_INTERVAL": 0,
+        "DUPEFILTER_CLASS": "scrapy.dupefilters.BaseDupeFilter",
+    }
+
+    def syntax(self):
+        return "[url|file]"
+
+    def short_desc(self):
+        return "Interactive scraping console"
+
+    def long_desc(self):
+        return (
+            "Interactive console for scraping the given url or file. "
+            "Use ./file.html syntax or full path for local file."
+        )
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_argument(
+            "-c",
+            dest="code",
+            help="evaluate the code in the shell, print the result and exit",
+        )
+        parser.add_argument("--spider", dest="spider", help="use this spider")
+        parser.add_argument(
+            "--no-redirect",
+            dest="no_redirect",
+            action="store_true",
+            default=False,
+            help="do not handle HTTP 3xx status codes and print response as-is",
+        )

     def update_vars(self, vars):
         """You can use this function to update the Scrapy objects that will be
         available in the shell
         """
         pass
+
+    def run(self, args: List[str], opts: Namespace) -> None:
+        url = args[0] if args else None
+        if url:
+            # first argument may be a local file
+            url = guess_scheme(url)
+
+        assert self.crawler_process
+        spider_loader = self.crawler_process.spider_loader
+
+        spidercls: Type[Spider] = DefaultSpider
+        if opts.spider:
+            spidercls = spider_loader.load(opts.spider)
+        elif url:
+            spidercls = spidercls_for_request(
+                spider_loader, Request(url), spidercls, log_multiple=True
+            )
+
+        # The crawler is created this way since the Shell manually handles the
+        # crawling engine, so the set up in the crawl method won't work
+        crawler = self.crawler_process._create_crawler(spidercls)
+        crawler._apply_settings()
+        # The Shell class needs a persistent engine in the crawler
+        crawler.engine = crawler._create_engine()
+        crawler.engine.start()
+
+        self._start_crawler_thread()
+
+        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
+        shell.start(url=url, redirect=not opts.no_redirect)
+
+    def _start_crawler_thread(self):
+        t = Thread(
+            target=self.crawler_process.start,
+            kwargs={"stop_after_crawl": False, "install_signal_handlers": False},
+        )
+        t.daemon = True
+        t.start()
diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
index c9783c28e..fde609c6f 100644
--- a/scrapy/commands/startproject.py
+++ b/scrapy/commands/startproject.py
@@ -5,19 +5,53 @@ from importlib.util import find_spec
 from pathlib import Path
 from shutil import copy2, copystat, ignore_patterns, move
 from stat import S_IWUSR as OWNER_WRITE_PERMISSION
+
 import scrapy
 from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError
 from scrapy.utils.template import render_templatefile, string_camelcase
-TEMPLATES_TO_RENDER = ('scrapy.cfg',), ('${project_name}', 'settings.py.tmpl'
-    ), ('${project_name}', 'items.py.tmpl'), ('${project_name}',
-    'pipelines.py.tmpl'), ('${project_name}', 'middlewares.py.tmpl')
-IGNORE = ignore_patterns('*.pyc', '__pycache__', '.svn')
+
+TEMPLATES_TO_RENDER = (
+    ("scrapy.cfg",),
+    ("${project_name}", "settings.py.tmpl"),
+    ("${project_name}", "items.py.tmpl"),
+    ("${project_name}", "pipelines.py.tmpl"),
+    ("${project_name}", "middlewares.py.tmpl"),
+)
+
+IGNORE = ignore_patterns("*.pyc", "__pycache__", ".svn")
+
+
+def _make_writable(path):
+    current_permissions = os.stat(path).st_mode
+    os.chmod(path, current_permissions | OWNER_WRITE_PERMISSION)


 class Command(ScrapyCommand):
     requires_project = False
-    default_settings = {'LOG_ENABLED': False, 'SPIDER_LOADER_WARN_ONLY': True}
+    default_settings = {"LOG_ENABLED": False, "SPIDER_LOADER_WARN_ONLY": True}
+
+    def syntax(self):
+        return "<project_name> [project_dir]"
+
+    def short_desc(self):
+        return "Create new project"
+
+    def _is_valid_name(self, project_name):
+        def _module_exists(module_name):
+            spec = find_spec(module_name)
+            return spec is not None and spec.loader is not None
+
+        if not re.search(r"^[_a-zA-Z]\w*$", project_name):
+            print(
+                "Error: Project names must begin with a letter and contain"
+                " only\nletters, numbers and underscores"
+            )
+        elif _module_exists(project_name):
+            print(f"Error: Module {project_name!r} already exists")
+        else:
+            return True
+        return False

     def _copytree(self, src: Path, dst: Path):
         """
@@ -28,4 +62,77 @@ class Command(ScrapyCommand):
         More info at:
         https://github.com/scrapy/scrapy/pull/2005
         """
-        pass
+        ignore = IGNORE
+        names = [x.name for x in src.iterdir()]
+        ignored_names = ignore(src, names)
+
+        if not dst.exists():
+            dst.mkdir(parents=True)
+
+        for name in names:
+            if name in ignored_names:
+                continue
+
+            srcname = src / name
+            dstname = dst / name
+            if srcname.is_dir():
+                self._copytree(srcname, dstname)
+            else:
+                copy2(srcname, dstname)
+                _make_writable(dstname)
+
+        copystat(src, dst)
+        _make_writable(dst)
+
+    def run(self, args, opts):
+        if len(args) not in (1, 2):
+            raise UsageError()
+
+        project_name = args[0]
+
+        if len(args) == 2:
+            project_dir = Path(args[1])
+        else:
+            project_dir = Path(args[0])
+
+        if (project_dir / "scrapy.cfg").exists():
+            self.exitcode = 1
+            print(f"Error: scrapy.cfg already exists in {project_dir.resolve()}")
+            return
+
+        if not self._is_valid_name(project_name):
+            self.exitcode = 1
+            return
+
+        self._copytree(Path(self.templates_dir), project_dir.resolve())
+        move(project_dir / "module", project_dir / project_name)
+        for paths in TEMPLATES_TO_RENDER:
+            tplfile = Path(
+                project_dir,
+                *(
+                    string.Template(s).substitute(project_name=project_name)
+                    for s in paths
+                ),
+            )
+            render_templatefile(
+                tplfile,
+                project_name=project_name,
+                ProjectName=string_camelcase(project_name),
+            )
+        print(
+            f"New Scrapy project '{project_name}', using template directory "
+            f"'{self.templates_dir}', created in:"
+        )
+        print(f"    {project_dir.resolve()}\n")
+        print("You can start your first spider with:")
+        print(f"    cd {project_dir}")
+        print("    scrapy genspider example example.com")
+
+    @property
+    def templates_dir(self) -> str:
+        return str(
+            Path(
+                self.settings["TEMPLATES_DIR"] or Path(scrapy.__path__[0], "templates"),
+                "project",
+            )
+        )
diff --git a/scrapy/commands/version.py b/scrapy/commands/version.py
index 409f6ebb2..47582866b 100644
--- a/scrapy/commands/version.py
+++ b/scrapy/commands/version.py
@@ -4,4 +4,29 @@ from scrapy.utils.versions import scrapy_components_versions


 class Command(ScrapyCommand):
-    default_settings = {'LOG_ENABLED': False, 'SPIDER_LOADER_WARN_ONLY': True}
+    default_settings = {"LOG_ENABLED": False, "SPIDER_LOADER_WARN_ONLY": True}
+
+    def syntax(self):
+        return "[-v]"
+
+    def short_desc(self):
+        return "Print Scrapy version"
+
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_argument(
+            "--verbose",
+            "-v",
+            dest="verbose",
+            action="store_true",
+            help="also display twisted/python/platform info (useful for bug reports)",
+        )
+
+    def run(self, args, opts):
+        if opts.verbose:
+            versions = scrapy_components_versions()
+            width = max(len(n) for (n, _) in versions)
+            for name, version in versions:
+                print(f"{name:<{width}} : {version}")
+        else:
+            print(f"Scrapy {scrapy.__version__}")
diff --git a/scrapy/commands/view.py b/scrapy/commands/view.py
index 4b95dcfbd..ebdfa10a8 100644
--- a/scrapy/commands/view.py
+++ b/scrapy/commands/view.py
@@ -1,7 +1,21 @@
 import argparse
+
 from scrapy.commands import fetch
 from scrapy.utils.response import open_in_browser


 class Command(fetch.Command):
-    pass
+    def short_desc(self):
+        return "Open URL in browser, as seen by Scrapy"
+
+    def long_desc(self):
+        return (
+            "Fetch a URL using the Scrapy downloader and show its contents in a browser"
+        )
+
+    def add_options(self, parser):
+        super().add_options(parser)
+        parser.add_argument("--headers", help=argparse.SUPPRESS)
+
+    def _print_response(self, response, opts):
+        open_in_browser(response)
diff --git a/scrapy/contracts/default.py b/scrapy/contracts/default.py
index 63c140a96..eac702cef 100644
--- a/scrapy/contracts/default.py
+++ b/scrapy/contracts/default.py
@@ -1,15 +1,23 @@
 import json
+
 from itemadapter import ItemAdapter, is_item
+
 from scrapy.contracts import Contract
 from scrapy.exceptions import ContractFail
 from scrapy.http import Request


+# contracts
 class UrlContract(Contract):
     """Contract to set the url of the request (mandatory)
     @url http://scrapy.org
     """
-    name = 'url'
+
+    name = "url"
+
+    def adjust_request_args(self, args):
+        args["url"] = self.args[0]
+        return args


 class CallbackKeywordArgumentsContract(Contract):
@@ -18,7 +26,12 @@ class CallbackKeywordArgumentsContract(Contract):

     @cb_kwargs {"arg1": "some value"}
     """
-    name = 'cb_kwargs'
+
+    name = "cb_kwargs"
+
+    def adjust_request_args(self, args):
+        args["cb_kwargs"] = json.loads(" ".join(self.args))
+        return args


 class ReturnsContract(Contract):
@@ -33,31 +46,65 @@ class ReturnsContract(Contract):
     @returns request 2 10
     @returns request 0 10
     """
-    name = 'returns'
-    object_type_verifiers = {'request': lambda x: isinstance(x, Request),
-        'requests': lambda x: isinstance(x, Request), 'item': is_item,
-        'items': is_item}
+
+    name = "returns"
+    object_type_verifiers = {
+        "request": lambda x: isinstance(x, Request),
+        "requests": lambda x: isinstance(x, Request),
+        "item": is_item,
+        "items": is_item,
+    }

     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+
         if len(self.args) not in [1, 2, 3]:
             raise ValueError(
-                f'Incorrect argument quantity: expected 1, 2 or 3, got {len(self.args)}'
-                )
+                f"Incorrect argument quantity: expected 1, 2 or 3, got {len(self.args)}"
+            )
         self.obj_name = self.args[0] or None
         self.obj_type_verifier = self.object_type_verifiers[self.obj_name]
+
         try:
             self.min_bound = int(self.args[1])
         except IndexError:
             self.min_bound = 1
+
         try:
             self.max_bound = int(self.args[2])
         except IndexError:
-            self.max_bound = float('inf')
+            self.max_bound = float("inf")
+
+    def post_process(self, output):
+        occurrences = 0
+        for x in output:
+            if self.obj_type_verifier(x):
+                occurrences += 1
+
+        assertion = self.min_bound <= occurrences <= self.max_bound
+
+        if not assertion:
+            if self.min_bound == self.max_bound:
+                expected = self.min_bound
+            else:
+                expected = f"{self.min_bound}..{self.max_bound}"
+
+            raise ContractFail(
+                f"Returned {occurrences} {self.obj_name}, expected {expected}"
+            )


 class ScrapesContract(Contract):
     """Contract to check presence of fields in scraped items
     @scrapes page_name page_body
     """
-    name = 'scrapes'
+
+    name = "scrapes"
+
+    def post_process(self, output):
+        for x in output:
+            if is_item(x):
+                missing = [arg for arg in self.args if arg not in ItemAdapter(x)]
+                if missing:
+                    missing_fields = ", ".join(missing)
+                    raise ContractFail(f"Missing fields: {missing_fields}")
diff --git a/scrapy/core/downloader/contextfactory.py b/scrapy/core/downloader/contextfactory.py
index fe70014d1..909cc273f 100644
--- a/scrapy/core/downloader/contextfactory.py
+++ b/scrapy/core/downloader/contextfactory.py
@@ -1,15 +1,27 @@
 import warnings
 from typing import TYPE_CHECKING, Any, List, Optional
+
 from OpenSSL import SSL
 from twisted.internet._sslverify import _setAcceptableProtocols
-from twisted.internet.ssl import AcceptableCiphers, CertificateOptions, optionsForClientTLS, platformTrust
+from twisted.internet.ssl import (
+    AcceptableCiphers,
+    CertificateOptions,
+    optionsForClientTLS,
+    platformTrust,
+)
 from twisted.web.client import BrowserLikePolicyForHTTPS
 from twisted.web.iweb import IPolicyForHTTPS
 from zope.interface.declarations import implementer
 from zope.interface.verify import verifyObject
-from scrapy.core.downloader.tls import DEFAULT_CIPHERS, ScrapyClientTLSOptions, openssl_methods
+
+from scrapy.core.downloader.tls import (
+    DEFAULT_CIPHERS,
+    ScrapyClientTLSOptions,
+    openssl_methods,
+)
 from scrapy.settings import BaseSettings
 from scrapy.utils.misc import create_instance, load_object
+
 if TYPE_CHECKING:
     from twisted.internet._sslverify import ClientTLSOptions

@@ -26,19 +38,76 @@ class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
      understand the TLSv1, TLSv1.1 and TLSv1.2 protocols.'
     """

-    def __init__(self, method: int=SSL.SSLv23_METHOD, tls_verbose_logging:
-        bool=False, tls_ciphers: Optional[str]=None, *args: Any, **kwargs: Any
-        ):
+    def __init__(
+        self,
+        method: int = SSL.SSLv23_METHOD,
+        tls_verbose_logging: bool = False,
+        tls_ciphers: Optional[str] = None,
+        *args: Any,
+        **kwargs: Any,
+    ):
         super().__init__(*args, **kwargs)
         self._ssl_method: int = method
         self.tls_verbose_logging: bool = tls_verbose_logging
         self.tls_ciphers: AcceptableCiphers
         if tls_ciphers:
-            self.tls_ciphers = AcceptableCiphers.fromOpenSSLCipherString(
-                tls_ciphers)
+            self.tls_ciphers = AcceptableCiphers.fromOpenSSLCipherString(tls_ciphers)
         else:
             self.tls_ciphers = DEFAULT_CIPHERS

+    @classmethod
+    def from_settings(
+        cls,
+        settings: BaseSettings,
+        method: int = SSL.SSLv23_METHOD,
+        *args: Any,
+        **kwargs: Any,
+    ):
+        tls_verbose_logging: bool = settings.getbool(
+            "DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING"
+        )
+        tls_ciphers: Optional[str] = settings["DOWNLOADER_CLIENT_TLS_CIPHERS"]
+        return cls(  # type: ignore[misc]
+            method=method,
+            tls_verbose_logging=tls_verbose_logging,
+            tls_ciphers=tls_ciphers,
+            *args,
+            **kwargs,
+        )
+
+    def getCertificateOptions(self) -> CertificateOptions:
+        # setting verify=True will require you to provide CAs
+        # to verify against; in other words: it's not that simple
+
+        # backward-compatible SSL/TLS method:
+        #
+        # * this will respect `method` attribute in often recommended
+        #   `ScrapyClientContextFactory` subclass
+        #   (https://github.com/scrapy/scrapy/issues/1429#issuecomment-131782133)
+        #
+        # * getattr() for `_ssl_method` attribute for context factories
+        #   not calling super().__init__
+        return CertificateOptions(
+            verify=False,
+            method=getattr(self, "method", getattr(self, "_ssl_method", None)),
+            fixBrokenPeers=True,
+            acceptableCiphers=self.tls_ciphers,
+        )
+
+    # kept for old-style HTTP/1.0 downloader context twisted calls,
+    # e.g. connectSSL()
+    def getContext(self, hostname: Any = None, port: Any = None) -> SSL.Context:
+        ctx = self.getCertificateOptions().getContext()
+        ctx.set_options(0x4)  # OP_LEGACY_SERVER_CONNECT
+        return ctx
+
+    def creatorForNetloc(self, hostname: bytes, port: int) -> "ClientTLSOptions":
+        return ScrapyClientTLSOptions(
+            hostname.decode("ascii"),
+            self.getContext(),
+            verbose_logging=self.tls_verbose_logging,
+        )
+

 @implementer(IPolicyForHTTPS)
 class BrowserLikeContextFactory(ScrapyClientContextFactory):
@@ -59,6 +128,17 @@ class BrowserLikeContextFactory(ScrapyClientContextFactory):
     ``SSLv23_METHOD``) which allows TLS protocol negotiation.
     """

+    def creatorForNetloc(self, hostname: bytes, port: int) -> "ClientTLSOptions":
+        # trustRoot set to platformTrust() will use the platform's root CAs.
+        #
+        # This means that a website like https://www.cacert.org will be rejected
+        # by default, since CAcert.org CA certificate is seldom shipped.
+        return optionsForClientTLS(
+            hostname=hostname.decode("ascii"),
+            trustRoot=platformTrust(),
+            extraCertificateOptions={"method": self._ssl_method},
+        )
+

 @implementer(IPolicyForHTTPS)
 class AcceptableProtocolsContextFactory:
@@ -67,8 +147,44 @@ class AcceptableProtocolsContextFactory:
     negotiation.
     """

-    def __init__(self, context_factory: Any, acceptable_protocols: List[bytes]
-        ):
+    def __init__(self, context_factory: Any, acceptable_protocols: List[bytes]):
         verifyObject(IPolicyForHTTPS, context_factory)
         self._wrapped_context_factory: Any = context_factory
         self._acceptable_protocols: List[bytes] = acceptable_protocols
+
+    def creatorForNetloc(self, hostname: bytes, port: int) -> "ClientTLSOptions":
+        options: "ClientTLSOptions" = self._wrapped_context_factory.creatorForNetloc(
+            hostname, port
+        )
+        _setAcceptableProtocols(options._ctx, self._acceptable_protocols)
+        return options
+
+
+def load_context_factory_from_settings(settings, crawler):
+    ssl_method = openssl_methods[settings.get("DOWNLOADER_CLIENT_TLS_METHOD")]
+    context_factory_cls = load_object(settings["DOWNLOADER_CLIENTCONTEXTFACTORY"])
+    # try method-aware context factory
+    try:
+        context_factory = create_instance(
+            objcls=context_factory_cls,
+            settings=settings,
+            crawler=crawler,
+            method=ssl_method,
+        )
+    except TypeError:
+        # use context factory defaults
+        context_factory = create_instance(
+            objcls=context_factory_cls,
+            settings=settings,
+            crawler=crawler,
+        )
+        msg = (
+            f"{settings['DOWNLOADER_CLIENTCONTEXTFACTORY']} does not accept "
+            "a `method` argument (type OpenSSL.SSL method, e.g. "
+            "OpenSSL.SSL.SSLv23_METHOD) and/or a `tls_verbose_logging` "
+            "argument and/or a `tls_ciphers` argument. Please, upgrade your "
+            "context factory class to handle them or ignore them."
+        )
+        warnings.warn(msg)
+
+    return context_factory
diff --git a/scrapy/core/downloader/handlers/datauri.py b/scrapy/core/downloader/handlers/datauri.py
index 25a176778..8b78c53c1 100644
--- a/scrapy/core/downloader/handlers/datauri.py
+++ b/scrapy/core/downloader/handlers/datauri.py
@@ -1,4 +1,5 @@
 from w3lib.url import parse_data_uri
+
 from scrapy.http import TextResponse
 from scrapy.responsetypes import responsetypes
 from scrapy.utils.decorators import defers
@@ -6,3 +7,15 @@ from scrapy.utils.decorators import defers

 class DataURIDownloadHandler:
     lazy = False
+
+    @defers
+    def download_request(self, request, spider):
+        uri = parse_data_uri(request.url)
+        respcls = responsetypes.from_mimetype(uri.media_type)
+
+        resp_kwargs = {}
+        if issubclass(respcls, TextResponse) and uri.media_type.split("/")[0] == "text":
+            charset = uri.media_type_parameters.get("charset")
+            resp_kwargs["encoding"] = charset
+
+        return respcls(url=request.url, body=uri.data, **resp_kwargs)
diff --git a/scrapy/core/downloader/handlers/file.py b/scrapy/core/downloader/handlers/file.py
index 8fa3d2938..4824167da 100644
--- a/scrapy/core/downloader/handlers/file.py
+++ b/scrapy/core/downloader/handlers/file.py
@@ -1,8 +1,17 @@
 from pathlib import Path
+
 from w3lib.url import file_uri_to_path
+
 from scrapy.responsetypes import responsetypes
 from scrapy.utils.decorators import defers


 class FileDownloadHandler:
     lazy = False
+
+    @defers
+    def download_request(self, request, spider):
+        filepath = file_uri_to_path(request.url)
+        body = Path(filepath).read_bytes()
+        respcls = responsetypes.from_args(filename=filepath, body=body)
+        return respcls(url=request.url, body=body)
diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py
index 78ad52f12..4081545ce 100644
--- a/scrapy/core/downloader/handlers/ftp.py
+++ b/scrapy/core/downloader/handlers/ftp.py
@@ -27,11 +27,14 @@ In case of status 200 request, response.headers will come with two keys:
     'Local Filename' - with the value of the local filename if given
     'Size' - with size of the downloaded data
 """
+
 import re
 from io import BytesIO
 from urllib.parse import unquote
+
 from twisted.internet.protocol import ClientCreator, Protocol
 from twisted.protocols.ftp import CommandFailed, FTPClient
+
 from scrapy.http import Response
 from scrapy.responsetypes import responsetypes
 from scrapy.utils.httpobj import urlparse_cached
@@ -39,21 +42,84 @@ from scrapy.utils.python import to_bytes


 class ReceivedDataProtocol(Protocol):
-
     def __init__(self, filename=None):
         self.__filename = filename
-        self.body = open(filename, 'wb') if filename else BytesIO()
+        self.body = open(filename, "wb") if filename else BytesIO()
         self.size = 0

+    def dataReceived(self, data):
+        self.body.write(data)
+        self.size += len(data)
+
+    @property
+    def filename(self):
+        return self.__filename
+
+    def close(self):
+        self.body.close() if self.filename else self.body.seek(0)
+

-_CODE_RE = re.compile('\\d+')
+_CODE_RE = re.compile(r"\d+")


 class FTPDownloadHandler:
     lazy = False
-    CODE_MAPPING = {'550': 404, 'default': 503}
+
+    CODE_MAPPING = {
+        "550": 404,
+        "default": 503,
+    }

     def __init__(self, settings):
-        self.default_user = settings['FTP_USER']
-        self.default_password = settings['FTP_PASSWORD']
-        self.passive_mode = settings['FTP_PASSIVE_MODE']
+        self.default_user = settings["FTP_USER"]
+        self.default_password = settings["FTP_PASSWORD"]
+        self.passive_mode = settings["FTP_PASSIVE_MODE"]
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings)
+
+    def download_request(self, request, spider):
+        from twisted.internet import reactor
+
+        parsed_url = urlparse_cached(request)
+        user = request.meta.get("ftp_user", self.default_user)
+        password = request.meta.get("ftp_password", self.default_password)
+        passive_mode = (
+            1 if bool(request.meta.get("ftp_passive", self.passive_mode)) else 0
+        )
+        creator = ClientCreator(
+            reactor, FTPClient, user, password, passive=passive_mode
+        )
+        dfd = creator.connectTCP(parsed_url.hostname, parsed_url.port or 21)
+        return dfd.addCallback(self.gotClient, request, unquote(parsed_url.path))
+
+    def gotClient(self, client, request, filepath):
+        self.client = client
+        protocol = ReceivedDataProtocol(request.meta.get("ftp_local_filename"))
+        return client.retrieveFile(filepath, protocol).addCallbacks(
+            callback=self._build_response,
+            callbackArgs=(request, protocol),
+            errback=self._failed,
+            errbackArgs=(request,),
+        )
+
+    def _build_response(self, result, request, protocol):
+        self.result = result
+        protocol.close()
+        headers = {"local filename": protocol.filename or "", "size": protocol.size}
+        body = to_bytes(protocol.filename or protocol.body.read())
+        respcls = responsetypes.from_args(url=request.url, body=body)
+        return respcls(url=request.url, status=200, body=body, headers=headers)
+
+    def _failed(self, result, request):
+        message = result.getErrorMessage()
+        if result.type == CommandFailed:
+            m = _CODE_RE.search(message)
+            if m:
+                ftpcode = m.group()
+                httpcode = self.CODE_MAPPING.get(ftpcode, self.CODE_MAPPING["default"])
+                return Response(
+                    url=request.url, status=httpcode, body=to_bytes(message)
+                )
+        raise result.type(result.value)
diff --git a/scrapy/core/downloader/handlers/http.py b/scrapy/core/downloader/handlers/http.py
index a62ecadc7..52535bd8b 100644
--- a/scrapy/core/downloader/handlers/http.py
+++ b/scrapy/core/downloader/handlers/http.py
@@ -1,2 +1,4 @@
 from scrapy.core.downloader.handlers.http10 import HTTP10DownloadHandler
-from scrapy.core.downloader.handlers.http11 import HTTP11DownloadHandler as HTTPDownloadHandler
+from scrapy.core.downloader.handlers.http11 import (
+    HTTP11DownloadHandler as HTTPDownloadHandler,
+)
diff --git a/scrapy/core/downloader/handlers/http10.py b/scrapy/core/downloader/handlers/http10.py
index 6f9e8f618..6c1dac4a5 100644
--- a/scrapy/core/downloader/handlers/http10.py
+++ b/scrapy/core/downloader/handlers/http10.py
@@ -8,13 +8,32 @@ class HTTP10DownloadHandler:
     lazy = False

     def __init__(self, settings, crawler=None):
-        self.HTTPClientFactory = load_object(settings[
-            'DOWNLOADER_HTTPCLIENTFACTORY'])
-        self.ClientContextFactory = load_object(settings[
-            'DOWNLOADER_CLIENTCONTEXTFACTORY'])
+        self.HTTPClientFactory = load_object(settings["DOWNLOADER_HTTPCLIENTFACTORY"])
+        self.ClientContextFactory = load_object(
+            settings["DOWNLOADER_CLIENTCONTEXTFACTORY"]
+        )
         self._settings = settings
         self._crawler = crawler

+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings, crawler)
+
     def download_request(self, request, spider):
         """Return a deferred for the HTTP download"""
-        pass
+        factory = self.HTTPClientFactory(request)
+        self._connect(factory)
+        return factory.deferred
+
+    def _connect(self, factory):
+        from twisted.internet import reactor
+
+        host, port = to_unicode(factory.host), factory.port
+        if factory.scheme == b"https":
+            client_context_factory = create_instance(
+                objcls=self.ClientContextFactory,
+                settings=self._settings,
+                crawler=self._crawler,
+            )
+            return reactor.connectSSL(host, port, factory, client_context_factory)
+        return reactor.connectTCP(host, port, factory)
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 2a58c6f22..c3704de3d 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -1,4 +1,5 @@
 """Download handlers for http and https schemes"""
+
 import ipaddress
 import logging
 import re
@@ -6,15 +7,23 @@ from contextlib import suppress
 from io import BytesIO
 from time import time
 from urllib.parse import urldefrag, urlunparse
+
 from twisted.internet import defer, protocol, ssl
 from twisted.internet.endpoints import TCP4ClientEndpoint
 from twisted.internet.error import TimeoutError
 from twisted.python.failure import Failure
-from twisted.web.client import URI, Agent, HTTPConnectionPool, ResponseDone, ResponseFailed
+from twisted.web.client import (
+    URI,
+    Agent,
+    HTTPConnectionPool,
+    ResponseDone,
+    ResponseFailed,
+)
 from twisted.web.http import PotentialDataLoss, _DataLoss
 from twisted.web.http_headers import Headers as TxHeaders
 from twisted.web.iweb import UNKNOWN_LENGTH, IBodyProducer
 from zope.interface import implementer
+
 from scrapy import signals
 from scrapy.core.downloader.contextfactory import load_context_factory_from_settings
 from scrapy.core.downloader.webclient import _parse
@@ -22,6 +31,7 @@ from scrapy.exceptions import StopDownload
 from scrapy.http import Headers
 from scrapy.responsetypes import responsetypes
 from scrapy.utils.python import to_bytes, to_unicode
+
 logger = logging.getLogger(__name__)


@@ -30,21 +40,58 @@ class HTTP11DownloadHandler:

     def __init__(self, settings, crawler=None):
         self._crawler = crawler
+
         from twisted.internet import reactor
+
         self._pool = HTTPConnectionPool(reactor, persistent=True)
         self._pool.maxPersistentPerHost = settings.getint(
-            'CONCURRENT_REQUESTS_PER_DOMAIN')
+            "CONCURRENT_REQUESTS_PER_DOMAIN"
+        )
         self._pool._factory.noisy = False
-        self._contextFactory = load_context_factory_from_settings(settings,
-            crawler)
-        self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
-        self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE')
-        self._fail_on_dataloss = settings.getbool('DOWNLOAD_FAIL_ON_DATALOSS')
+
+        self._contextFactory = load_context_factory_from_settings(settings, crawler)
+        self._default_maxsize = settings.getint("DOWNLOAD_MAXSIZE")
+        self._default_warnsize = settings.getint("DOWNLOAD_WARNSIZE")
+        self._fail_on_dataloss = settings.getbool("DOWNLOAD_FAIL_ON_DATALOSS")
         self._disconnect_timeout = 1

+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings, crawler)
+
     def download_request(self, request, spider):
         """Return a deferred for the HTTP download"""
-        pass
+        agent = ScrapyAgent(
+            contextFactory=self._contextFactory,
+            pool=self._pool,
+            maxsize=getattr(spider, "download_maxsize", self._default_maxsize),
+            warnsize=getattr(spider, "download_warnsize", self._default_warnsize),
+            fail_on_dataloss=self._fail_on_dataloss,
+            crawler=self._crawler,
+        )
+        return agent.download_request(request)
+
+    def close(self):
+        from twisted.internet import reactor
+
+        d = self._pool.closeCachedConnections()
+        # closeCachedConnections will hang on network or server issues, so
+        # we'll manually timeout the deferred.
+        #
+        # Twisted issue addressing this problem can be found here:
+        # https://twistedmatrix.com/trac/ticket/7738.
+        #
+        # closeCachedConnections doesn't handle external errbacks, so we'll
+        # issue a callback after `_disconnect_timeout` seconds.
+        delayed_call = reactor.callLater(self._disconnect_timeout, d.callback, [])
+
+        def cancel_delayed_call(result):
+            if delayed_call.active():
+                delayed_call.cancel()
+            return result
+
+        d.addBoth(cancel_delayed_call)
+        return d


 class TunnelError(Exception):
@@ -59,13 +106,23 @@ class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
     with this endpoint comes from the pool and a CONNECT has already been issued
     for it.
     """
+
     _truncatedLength = 1000
-    _responseAnswer = 'HTTP/1\\.. (?P<status>\\d{3})(?P<reason>.{,' + str(
-        _truncatedLength) + '})'
+    _responseAnswer = (
+        r"HTTP/1\.. (?P<status>\d{3})(?P<reason>.{," + str(_truncatedLength) + r"})"
+    )
     _responseMatcher = re.compile(_responseAnswer.encode())

-    def __init__(self, reactor, host, port, proxyConf, contextFactory,
-        timeout=30, bindAddress=None):
+    def __init__(
+        self,
+        reactor,
+        host,
+        port,
+        proxyConf,
+        contextFactory,
+        timeout=30,
+        bindAddress=None,
+    ):
         proxyHost, proxyPort, self._proxyAuthHeader = proxyConf
         super().__init__(reactor, proxyHost, proxyPort, timeout, bindAddress)
         self._tunnelReadyDeferred = defer.Deferred()
@@ -76,33 +133,83 @@ class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):

     def requestTunnel(self, protocol):
         """Asks the proxy to open a tunnel."""
-        pass
+        tunnelReq = tunnel_request_data(
+            self._tunneledHost, self._tunneledPort, self._proxyAuthHeader
+        )
+        protocol.transport.write(tunnelReq)
+        self._protocolDataReceived = protocol.dataReceived
+        protocol.dataReceived = self.processProxyResponse
+        self._protocol = protocol
+        return protocol

     def processProxyResponse(self, rcvd_bytes):
         """Processes the response from the proxy. If the tunnel is successfully
         created, notifies the client that we are ready to send requests. If not
         raises a TunnelError.
         """
-        pass
+        self._connectBuffer += rcvd_bytes
+        # make sure that enough (all) bytes are consumed
+        # and that we've got all HTTP headers (ending with a blank line)
+        # from the proxy so that we don't send those bytes to the TLS layer
+        #
+        # see https://github.com/scrapy/scrapy/issues/2491
+        if b"\r\n\r\n" not in self._connectBuffer:
+            return
+        self._protocol.dataReceived = self._protocolDataReceived
+        respm = TunnelingTCP4ClientEndpoint._responseMatcher.match(self._connectBuffer)
+        if respm and int(respm.group("status")) == 200:
+            # set proper Server Name Indication extension
+            sslOptions = self._contextFactory.creatorForNetloc(
+                self._tunneledHost, self._tunneledPort
+            )
+            self._protocol.transport.startTLS(sslOptions, self._protocolFactory)
+            self._tunnelReadyDeferred.callback(self._protocol)
+        else:
+            if respm:
+                extra = {
+                    "status": int(respm.group("status")),
+                    "reason": respm.group("reason").strip(),
+                }
+            else:
+                extra = rcvd_bytes[: self._truncatedLength]
+            self._tunnelReadyDeferred.errback(
+                TunnelError(
+                    "Could not open CONNECT tunnel with proxy "
+                    f"{self._host}:{self._port} [{extra!r}]"
+                )
+            )

     def connectFailed(self, reason):
         """Propagates the errback to the appropriate deferred."""
-        pass
+        self._tunnelReadyDeferred.errback(reason)
+
+    def connect(self, protocolFactory):
+        self._protocolFactory = protocolFactory
+        connectDeferred = super().connect(protocolFactory)
+        connectDeferred.addCallback(self.requestTunnel)
+        connectDeferred.addErrback(self.connectFailed)
+        return self._tunnelReadyDeferred


 def tunnel_request_data(host, port, proxy_auth_header=None):
-    """
+    r"""
     Return binary content of a CONNECT request.

     >>> from scrapy.utils.python import to_unicode as s
     >>> s(tunnel_request_data("example.com", 8080))
-    'CONNECT example.com:8080 HTTP/1.1\\r\\nHost: example.com:8080\\r\\n\\r\\n'
+    'CONNECT example.com:8080 HTTP/1.1\r\nHost: example.com:8080\r\n\r\n'
     >>> s(tunnel_request_data("example.com", 8080, b"123"))
-    'CONNECT example.com:8080 HTTP/1.1\\r\\nHost: example.com:8080\\r\\nProxy-Authorization: 123\\r\\n\\r\\n'
+    'CONNECT example.com:8080 HTTP/1.1\r\nHost: example.com:8080\r\nProxy-Authorization: 123\r\n\r\n'
     >>> s(tunnel_request_data(b"example.com", "8090"))
-    'CONNECT example.com:8090 HTTP/1.1\\r\\nHost: example.com:8090\\r\\n\\r\\n'
+    'CONNECT example.com:8090 HTTP/1.1\r\nHost: example.com:8090\r\n\r\n'
     """
-    pass
+    host_value = to_bytes(host, encoding="ascii") + b":" + to_bytes(str(port))
+    tunnel_req = b"CONNECT " + host_value + b" HTTP/1.1\r\n"
+    tunnel_req += b"Host: " + host_value + b"\r\n"
+    if proxy_auth_header:
+        tunnel_req += b"Proxy-Authorization: " + proxy_auth_header + b"\r\n"
+    tunnel_req += b"\r\n"
+    return tunnel_req


 class TunnelingAgent(Agent):
@@ -113,27 +220,75 @@ class TunnelingAgent(Agent):
     proxy involved.
     """

-    def __init__(self, reactor, proxyConf, contextFactory=None,
-        connectTimeout=None, bindAddress=None, pool=None):
-        super().__init__(reactor, contextFactory, connectTimeout,
-            bindAddress, pool)
+    def __init__(
+        self,
+        reactor,
+        proxyConf,
+        contextFactory=None,
+        connectTimeout=None,
+        bindAddress=None,
+        pool=None,
+    ):
+        super().__init__(reactor, contextFactory, connectTimeout, bindAddress, pool)
         self._proxyConf = proxyConf
         self._contextFactory = contextFactory

+    def _getEndpoint(self, uri):
+        return TunnelingTCP4ClientEndpoint(
+            reactor=self._reactor,
+            host=uri.host,
+            port=uri.port,
+            proxyConf=self._proxyConf,
+            contextFactory=self._contextFactory,
+            timeout=self._endpointFactory._connectTimeout,
+            bindAddress=self._endpointFactory._bindAddress,
+        )

-class ScrapyProxyAgent(Agent):
+    def _requestWithEndpoint(
+        self, key, endpoint, method, parsedURI, headers, bodyProducer, requestPath
+    ):
+        # proxy host and port are required for HTTP pool `key`
+        # otherwise, same remote host connection request could reuse
+        # a cached tunneled connection to a different proxy
+        key += self._proxyConf
+        return super()._requestWithEndpoint(
+            key=key,
+            endpoint=endpoint,
+            method=method,
+            parsedURI=parsedURI,
+            headers=headers,
+            bodyProducer=bodyProducer,
+            requestPath=requestPath,
+        )

-    def __init__(self, reactor, proxyURI, connectTimeout=None, bindAddress=
-        None, pool=None):
-        super().__init__(reactor=reactor, connectTimeout=connectTimeout,
-            bindAddress=bindAddress, pool=pool)
+
+class ScrapyProxyAgent(Agent):
+    def __init__(
+        self, reactor, proxyURI, connectTimeout=None, bindAddress=None, pool=None
+    ):
+        super().__init__(
+            reactor=reactor,
+            connectTimeout=connectTimeout,
+            bindAddress=bindAddress,
+            pool=pool,
+        )
         self._proxyURI = URI.fromBytes(proxyURI)

     def request(self, method, uri, headers=None, bodyProducer=None):
         """
         Issue a new request via the configured proxy.
         """
-        pass
+        # Cache *all* connections under the same key, since we are only
+        # connecting to a single destination, the proxy:
+        return self._requestWithEndpoint(
+            key=("http-proxy", self._proxyURI.host, self._proxyURI.port),
+            endpoint=self._getEndpoint(self._proxyURI),
+            method=method,
+            parsedURI=URI.fromBytes(uri),
+            headers=headers,
+            bodyProducer=bodyProducer,
+            requestPath=uri,
+        )


 class ScrapyAgent:
@@ -141,9 +296,17 @@ class ScrapyAgent:
     _ProxyAgent = ScrapyProxyAgent
     _TunnelingAgent = TunnelingAgent

-    def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=
-        None, pool=None, maxsize=0, warnsize=0, fail_on_dataloss=True,
-        crawler=None):
+    def __init__(
+        self,
+        contextFactory=None,
+        connectTimeout=10,
+        bindAddress=None,
+        pool=None,
+        maxsize=0,
+        warnsize=0,
+        fail_on_dataloss=True,
+        crawler=None,
+    ):
         self._contextFactory = contextFactory
         self._connectTimeout = connectTimeout
         self._bindAddress = bindAddress
@@ -154,19 +317,236 @@ class ScrapyAgent:
         self._txresponse = None
         self._crawler = crawler

+    def _get_agent(self, request, timeout):
+        from twisted.internet import reactor
+
+        bindaddress = request.meta.get("bindaddress") or self._bindAddress
+        proxy = request.meta.get("proxy")
+        if proxy:
+            proxyScheme, proxyNetloc, proxyHost, proxyPort, proxyParams = _parse(proxy)
+            scheme = _parse(request.url)[0]
+            proxyHost = to_unicode(proxyHost)
+            if scheme == b"https":
+                proxyAuth = request.headers.get(b"Proxy-Authorization", None)
+                proxyConf = (proxyHost, proxyPort, proxyAuth)
+                return self._TunnelingAgent(
+                    reactor=reactor,
+                    proxyConf=proxyConf,
+                    contextFactory=self._contextFactory,
+                    connectTimeout=timeout,
+                    bindAddress=bindaddress,
+                    pool=self._pool,
+                )
+            proxyScheme = proxyScheme or b"http"
+            proxyURI = urlunparse((proxyScheme, proxyNetloc, proxyParams, "", "", ""))
+            return self._ProxyAgent(
+                reactor=reactor,
+                proxyURI=to_bytes(proxyURI, encoding="ascii"),
+                connectTimeout=timeout,
+                bindAddress=bindaddress,
+                pool=self._pool,
+            )
+
+        return self._Agent(
+            reactor=reactor,
+            contextFactory=self._contextFactory,
+            connectTimeout=timeout,
+            bindAddress=bindaddress,
+            pool=self._pool,
+        )
+
+    def download_request(self, request):
+        from twisted.internet import reactor
+
+        timeout = request.meta.get("download_timeout") or self._connectTimeout
+        agent = self._get_agent(request, timeout)
+
+        # request details
+        url = urldefrag(request.url)[0]
+        method = to_bytes(request.method)
+        headers = TxHeaders(request.headers)
+        if isinstance(agent, self._TunnelingAgent):
+            headers.removeHeader(b"Proxy-Authorization")
+        if request.body:
+            bodyproducer = _RequestBodyProducer(request.body)
+        else:
+            bodyproducer = None
+        start_time = time()
+        d = agent.request(
+            method, to_bytes(url, encoding="ascii"), headers, bodyproducer
+        )
+        # set download latency
+        d.addCallback(self._cb_latency, request, start_time)
+        # response body is ready to be consumed
+        d.addCallback(self._cb_bodyready, request)
+        d.addCallback(self._cb_bodydone, request, url)
+        # check download timeout
+        self._timeout_cl = reactor.callLater(timeout, d.cancel)
+        d.addBoth(self._cb_timeout, request, url, timeout)
+        return d
+
+    def _cb_timeout(self, result, request, url, timeout):
+        if self._timeout_cl.active():
+            self._timeout_cl.cancel()
+            return result
+        # needed for HTTPS requests, otherwise _ResponseReader doesn't
+        # receive connectionLost()
+        if self._txresponse:
+            self._txresponse._transport.stopProducing()
+
+        raise TimeoutError(f"Getting {url} took longer than {timeout} seconds.")
+
+    def _cb_latency(self, result, request, start_time):
+        request.meta["download_latency"] = time() - start_time
+        return result
+
+    @staticmethod
+    def _headers_from_twisted_response(response):
+        headers = Headers()
+        if response.length != UNKNOWN_LENGTH:
+            headers[b"Content-Length"] = str(response.length).encode()
+        headers.update(response.headers.getAllRawHeaders())
+        return headers
+
+    def _cb_bodyready(self, txresponse, request):
+        headers_received_result = self._crawler.signals.send_catch_log(
+            signal=signals.headers_received,
+            headers=self._headers_from_twisted_response(txresponse),
+            body_length=txresponse.length,
+            request=request,
+            spider=self._crawler.spider,
+        )
+        for handler, result in headers_received_result:
+            if isinstance(result, Failure) and isinstance(result.value, StopDownload):
+                logger.debug(
+                    "Download stopped for %(request)s from signal handler %(handler)s",
+                    {"request": request, "handler": handler.__qualname__},
+                )
+                txresponse._transport.stopProducing()
+                txresponse._transport.loseConnection()
+                return {
+                    "txresponse": txresponse,
+                    "body": b"",
+                    "flags": ["download_stopped"],
+                    "certificate": None,
+                    "ip_address": None,
+                    "failure": result if result.value.fail else None,
+                }
+
+        # deliverBody hangs for responses without body
+        if txresponse.length == 0:
+            return {
+                "txresponse": txresponse,
+                "body": b"",
+                "flags": None,
+                "certificate": None,
+                "ip_address": None,
+            }
+
+        maxsize = request.meta.get("download_maxsize", self._maxsize)
+        warnsize = request.meta.get("download_warnsize", self._warnsize)
+        expected_size = txresponse.length if txresponse.length != UNKNOWN_LENGTH else -1
+        fail_on_dataloss = request.meta.get(
+            "download_fail_on_dataloss", self._fail_on_dataloss
+        )
+
+        if maxsize and expected_size > maxsize:
+            warning_msg = (
+                "Cancelling download of %(url)s: expected response "
+                "size (%(size)s) larger than download max size (%(maxsize)s)."
+            )
+            warning_args = {
+                "url": request.url,
+                "size": expected_size,
+                "maxsize": maxsize,
+            }
+
+            logger.warning(warning_msg, warning_args)
+
+            txresponse._transport.loseConnection()
+            raise defer.CancelledError(warning_msg % warning_args)
+
+        if warnsize and expected_size > warnsize:
+            logger.warning(
+                "Expected response size (%(size)s) larger than "
+                "download warn size (%(warnsize)s) in request %(request)s.",
+                {"size": expected_size, "warnsize": warnsize, "request": request},
+            )
+
+        def _cancel(_):
+            # Abort connection immediately.
+            txresponse._transport._producer.abortConnection()
+
+        d = defer.Deferred(_cancel)
+        txresponse.deliverBody(
+            _ResponseReader(
+                finished=d,
+                txresponse=txresponse,
+                request=request,
+                maxsize=maxsize,
+                warnsize=warnsize,
+                fail_on_dataloss=fail_on_dataloss,
+                crawler=self._crawler,
+            )
+        )
+
+        # save response for timeouts
+        self._txresponse = txresponse
+
+        return d
+
+    def _cb_bodydone(self, result, request, url):
+        headers = self._headers_from_twisted_response(result["txresponse"])
+        respcls = responsetypes.from_args(headers=headers, url=url, body=result["body"])
+        try:
+            version = result["txresponse"].version
+            protocol = f"{to_unicode(version[0])}/{version[1]}.{version[2]}"
+        except (AttributeError, TypeError, IndexError):
+            protocol = None
+        response = respcls(
+            url=url,
+            status=int(result["txresponse"].code),
+            headers=headers,
+            body=result["body"],
+            flags=result["flags"],
+            certificate=result["certificate"],
+            ip_address=result["ip_address"],
+            protocol=protocol,
+        )
+        if result.get("failure"):
+            result["failure"].value.response = response
+            return result["failure"]
+        return response
+

 @implementer(IBodyProducer)
 class _RequestBodyProducer:
-
     def __init__(self, body):
         self.body = body
         self.length = len(body)

+    def startProducing(self, consumer):
+        consumer.write(self.body)
+        return defer.succeed(None)

-class _ResponseReader(protocol.Protocol):
+    def pauseProducing(self):
+        pass

-    def __init__(self, finished, txresponse, request, maxsize, warnsize,
-        fail_on_dataloss, crawler):
+    def stopProducing(self):
+        pass
+
+
+class _ResponseReader(protocol.Protocol):
+    def __init__(
+        self,
+        finished,
+        txresponse,
+        request,
+        maxsize,
+        warnsize,
+        fail_on_dataloss,
+        crawler,
+    ):
         self._finished = finished
         self._txresponse = txresponse
         self._request = request
@@ -180,3 +560,108 @@ class _ResponseReader(protocol.Protocol):
         self._certificate = None
         self._ip_address = None
         self._crawler = crawler
+
+    def _finish_response(self, flags=None, failure=None):
+        self._finished.callback(
+            {
+                "txresponse": self._txresponse,
+                "body": self._bodybuf.getvalue(),
+                "flags": flags,
+                "certificate": self._certificate,
+                "ip_address": self._ip_address,
+                "failure": failure,
+            }
+        )
+
+    def connectionMade(self):
+        if self._certificate is None:
+            with suppress(AttributeError):
+                self._certificate = ssl.Certificate(
+                    self.transport._producer.getPeerCertificate()
+                )
+
+        if self._ip_address is None:
+            self._ip_address = ipaddress.ip_address(
+                self.transport._producer.getPeer().host
+            )
+
+    def dataReceived(self, bodyBytes):
+        # This maybe called several times after cancel was called with buffered data.
+        if self._finished.called:
+            return
+
+        self._bodybuf.write(bodyBytes)
+        self._bytes_received += len(bodyBytes)
+
+        bytes_received_result = self._crawler.signals.send_catch_log(
+            signal=signals.bytes_received,
+            data=bodyBytes,
+            request=self._request,
+            spider=self._crawler.spider,
+        )
+        for handler, result in bytes_received_result:
+            if isinstance(result, Failure) and isinstance(result.value, StopDownload):
+                logger.debug(
+                    "Download stopped for %(request)s from signal handler %(handler)s",
+                    {"request": self._request, "handler": handler.__qualname__},
+                )
+                self.transport.stopProducing()
+                self.transport.loseConnection()
+                failure = result if result.value.fail else None
+                self._finish_response(flags=["download_stopped"], failure=failure)
+
+        if self._maxsize and self._bytes_received > self._maxsize:
+            logger.warning(
+                "Received (%(bytes)s) bytes larger than download "
+                "max size (%(maxsize)s) in request %(request)s.",
+                {
+                    "bytes": self._bytes_received,
+                    "maxsize": self._maxsize,
+                    "request": self._request,
+                },
+            )
+            # Clear buffer earlier to avoid keeping data in memory for a long time.
+            self._bodybuf.truncate(0)
+            self._finished.cancel()
+
+        if (
+            self._warnsize
+            and self._bytes_received > self._warnsize
+            and not self._reached_warnsize
+        ):
+            self._reached_warnsize = True
+            logger.warning(
+                "Received more bytes than download "
+                "warn size (%(warnsize)s) in request %(request)s.",
+                {"warnsize": self._warnsize, "request": self._request},
+            )
+
+    def connectionLost(self, reason):
+        if self._finished.called:
+            return
+
+        if reason.check(ResponseDone):
+            self._finish_response()
+            return
+
+        if reason.check(PotentialDataLoss):
+            self._finish_response(flags=["partial"])
+            return
+
+        if reason.check(ResponseFailed) and any(
+            r.check(_DataLoss) for r in reason.value.reasons
+        ):
+            if not self._fail_on_dataloss:
+                self._finish_response(flags=["dataloss"])
+                return
+
+            if not self._fail_on_dataloss_warned:
+                logger.warning(
+                    "Got data loss in %s. If you want to process broken "
+                    "responses set the setting DOWNLOAD_FAIL_ON_DATALOSS = False"
+                    " -- This message won't be shown in further requests",
+                    self._txresponse.request.absoluteURI.decode(),
+                )
+                self._fail_on_dataloss_warned = True
+
+        self._finished.errback(reason)
diff --git a/scrapy/core/downloader/handlers/http2.py b/scrapy/core/downloader/handlers/http2.py
index 37c42a70f..b2579362c 100644
--- a/scrapy/core/downloader/handlers/http2.py
+++ b/scrapy/core/downloader/handlers/http2.py
@@ -1,10 +1,12 @@
 from time import time
 from typing import Optional, Type, TypeVar
 from urllib.parse import urldefrag
+
 from twisted.internet.base import DelayedCall
 from twisted.internet.defer import Deferred
 from twisted.internet.error import TimeoutError
 from twisted.web.client import URI
+
 from scrapy.core.downloader.contextfactory import load_context_factory_from_settings
 from scrapy.core.downloader.webclient import _parse
 from scrapy.core.http2.agent import H2Agent, H2ConnectionPool, ScrapyProxyH2Agent
@@ -13,29 +15,116 @@ from scrapy.http import Request, Response
 from scrapy.settings import Settings
 from scrapy.spiders import Spider
 from scrapy.utils.python import to_bytes
-H2DownloadHandlerOrSubclass = TypeVar('H2DownloadHandlerOrSubclass', bound=
-    'H2DownloadHandler')

+H2DownloadHandlerOrSubclass = TypeVar(
+    "H2DownloadHandlerOrSubclass", bound="H2DownloadHandler"
+)

-class H2DownloadHandler:

-    def __init__(self, settings: Settings, crawler: Optional[Crawler]=None):
+class H2DownloadHandler:
+    def __init__(self, settings: Settings, crawler: Optional[Crawler] = None):
         self._crawler = crawler
+
         from twisted.internet import reactor
+
         self._pool = H2ConnectionPool(reactor, settings)
-        self._context_factory = load_context_factory_from_settings(settings,
-            crawler)
+        self._context_factory = load_context_factory_from_settings(settings, crawler)
+
+    @classmethod
+    def from_crawler(
+        cls: Type[H2DownloadHandlerOrSubclass], crawler: Crawler
+    ) -> H2DownloadHandlerOrSubclass:
+        return cls(crawler.settings, crawler)
+
+    def download_request(self, request: Request, spider: Spider) -> Deferred:
+        agent = ScrapyH2Agent(
+            context_factory=self._context_factory,
+            pool=self._pool,
+            crawler=self._crawler,
+        )
+        return agent.download_request(request, spider)
+
+    def close(self) -> None:
+        self._pool.close_connections()


 class ScrapyH2Agent:
     _Agent = H2Agent
     _ProxyAgent = ScrapyProxyH2Agent

-    def __init__(self, context_factory, pool: H2ConnectionPool,
-        connect_timeout: int=10, bind_address: Optional[bytes]=None,
-        crawler: Optional[Crawler]=None) ->None:
+    def __init__(
+        self,
+        context_factory,
+        pool: H2ConnectionPool,
+        connect_timeout: int = 10,
+        bind_address: Optional[bytes] = None,
+        crawler: Optional[Crawler] = None,
+    ) -> None:
         self._context_factory = context_factory
         self._connect_timeout = connect_timeout
         self._bind_address = bind_address
         self._pool = pool
         self._crawler = crawler
+
+    def _get_agent(self, request: Request, timeout: Optional[float]) -> H2Agent:
+        from twisted.internet import reactor
+
+        bind_address = request.meta.get("bindaddress") or self._bind_address
+        proxy = request.meta.get("proxy")
+        if proxy:
+            _, _, proxy_host, proxy_port, proxy_params = _parse(proxy)
+            scheme = _parse(request.url)[0]
+
+            if scheme == b"https":
+                # ToDo
+                raise NotImplementedError(
+                    "Tunneling via CONNECT method using HTTP/2.0 is not yet supported"
+                )
+            return self._ProxyAgent(
+                reactor=reactor,
+                context_factory=self._context_factory,
+                proxy_uri=URI.fromBytes(to_bytes(proxy, encoding="ascii")),
+                connect_timeout=timeout,
+                bind_address=bind_address,
+                pool=self._pool,
+            )
+
+        return self._Agent(
+            reactor=reactor,
+            context_factory=self._context_factory,
+            connect_timeout=timeout,
+            bind_address=bind_address,
+            pool=self._pool,
+        )
+
+    def download_request(self, request: Request, spider: Spider) -> Deferred:
+        from twisted.internet import reactor
+
+        timeout = request.meta.get("download_timeout") or self._connect_timeout
+        agent = self._get_agent(request, timeout)
+
+        start_time = time()
+        d = agent.request(request, spider)
+        d.addCallback(self._cb_latency, request, start_time)
+
+        timeout_cl = reactor.callLater(timeout, d.cancel)
+        d.addBoth(self._cb_timeout, request, timeout, timeout_cl)
+        return d
+
+    @staticmethod
+    def _cb_latency(
+        response: Response, request: Request, start_time: float
+    ) -> Response:
+        request.meta["download_latency"] = time() - start_time
+        return response
+
+    @staticmethod
+    def _cb_timeout(
+        response: Response, request: Request, timeout: float, timeout_cl: DelayedCall
+    ) -> Response:
+        if timeout_cl.active():
+            timeout_cl.cancel()
+            return response
+
+        url = urldefrag(request.url)[0]
+        raise TimeoutError(f"Getting {url} took longer than {timeout} seconds.")
diff --git a/scrapy/core/downloader/handlers/s3.py b/scrapy/core/downloader/handlers/s3.py
index 6f341f7e2..81d8e8115 100644
--- a/scrapy/core/downloader/handlers/s3.py
+++ b/scrapy/core/downloader/handlers/s3.py
@@ -6,33 +6,78 @@ from scrapy.utils.misc import create_instance


 class S3DownloadHandler:
-
-    def __init__(self, settings, *, crawler=None, aws_access_key_id=None,
-        aws_secret_access_key=None, aws_session_token=None,
-        httpdownloadhandler=HTTPDownloadHandler, **kw):
+    def __init__(
+        self,
+        settings,
+        *,
+        crawler=None,
+        aws_access_key_id=None,
+        aws_secret_access_key=None,
+        aws_session_token=None,
+        httpdownloadhandler=HTTPDownloadHandler,
+        **kw,
+    ):
         if not is_botocore_available():
-            raise NotConfigured('missing botocore library')
+            raise NotConfigured("missing botocore library")
+
         if not aws_access_key_id:
-            aws_access_key_id = settings['AWS_ACCESS_KEY_ID']
+            aws_access_key_id = settings["AWS_ACCESS_KEY_ID"]
         if not aws_secret_access_key:
-            aws_secret_access_key = settings['AWS_SECRET_ACCESS_KEY']
+            aws_secret_access_key = settings["AWS_SECRET_ACCESS_KEY"]
         if not aws_session_token:
-            aws_session_token = settings['AWS_SESSION_TOKEN']
-        anon = kw.get('anon')
-        if (anon is None and not aws_access_key_id and not
-            aws_secret_access_key):
-            kw['anon'] = True
-        self.anon = kw.get('anon')
+            aws_session_token = settings["AWS_SESSION_TOKEN"]
+
+        # If no credentials could be found anywhere,
+        # consider this an anonymous connection request by default;
+        # unless 'anon' was set explicitly (True/False).
+        anon = kw.get("anon")
+        if anon is None and not aws_access_key_id and not aws_secret_access_key:
+            kw["anon"] = True
+        self.anon = kw.get("anon")
+
         self._signer = None
         import botocore.auth
         import botocore.credentials
-        kw.pop('anon', None)
+
+        kw.pop("anon", None)
         if kw:
-            raise TypeError(f'Unexpected keyword arguments: {kw}')
+            raise TypeError(f"Unexpected keyword arguments: {kw}")
         if not self.anon:
-            SignerCls = botocore.auth.AUTH_TYPE_MAPS['s3']
-            self._signer = SignerCls(botocore.credentials.Credentials(
-                aws_access_key_id, aws_secret_access_key, aws_session_token))
-        _http_handler = create_instance(objcls=httpdownloadhandler,
-            settings=settings, crawler=crawler)
+            SignerCls = botocore.auth.AUTH_TYPE_MAPS["s3"]
+            self._signer = SignerCls(
+                botocore.credentials.Credentials(
+                    aws_access_key_id, aws_secret_access_key, aws_session_token
+                )
+            )
+
+        _http_handler = create_instance(
+            objcls=httpdownloadhandler,
+            settings=settings,
+            crawler=crawler,
+        )
         self._download_http = _http_handler.download_request
+
+    @classmethod
+    def from_crawler(cls, crawler, **kwargs):
+        return cls(crawler.settings, crawler=crawler, **kwargs)
+
+    def download_request(self, request, spider):
+        p = urlparse_cached(request)
+        scheme = "https" if request.meta.get("is_secure") else "http"
+        bucket = p.hostname
+        path = p.path + "?" + p.query if p.query else p.path
+        url = f"{scheme}://{bucket}.s3.amazonaws.com{path}"
+        if self.anon:
+            request = request.replace(url=url)
+        else:
+            import botocore.awsrequest
+
+            awsrequest = botocore.awsrequest.AWSRequest(
+                method=request.method,
+                url=f"{scheme}://s3.amazonaws.com/{bucket}{path}",
+                headers=request.headers.to_unicode_dict(),
+                data=request.body,
+            )
+            self._signer.add_auth(awsrequest)
+            request = request.replace(url=url, headers=awsrequest.headers.items())
+        return self._download_http(request, spider)
diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py
index 377040b14..dca13c01e 100644
--- a/scrapy/core/downloader/middleware.py
+++ b/scrapy/core/downloader/middleware.py
@@ -4,8 +4,10 @@ Downloader Middleware manager
 See documentation in docs/topics/downloader-middleware.rst
 """
 from typing import Any, Callable, Generator, List, Union, cast
+
 from twisted.internet.defer import Deferred, inlineCallbacks
 from twisted.python.failure import Failure
+
 from scrapy import Spider
 from scrapy.exceptions import _InvalidOutput
 from scrapy.http import Request, Response
@@ -16,4 +18,86 @@ from scrapy.utils.defer import deferred_from_coro, mustbe_deferred


 class DownloaderMiddlewareManager(MiddlewareManager):
-    component_name = 'downloader middleware'
+    component_name = "downloader middleware"
+
+    @classmethod
+    def _get_mwlist_from_settings(cls, settings: BaseSettings) -> List[Any]:
+        return build_component_list(settings.getwithbase("DOWNLOADER_MIDDLEWARES"))
+
+    def _add_middleware(self, mw: Any) -> None:
+        if hasattr(mw, "process_request"):
+            self.methods["process_request"].append(mw.process_request)
+        if hasattr(mw, "process_response"):
+            self.methods["process_response"].appendleft(mw.process_response)
+        if hasattr(mw, "process_exception"):
+            self.methods["process_exception"].appendleft(mw.process_exception)
+
+    def download(
+        self, download_func: Callable, request: Request, spider: Spider
+    ) -> Deferred:
+        @inlineCallbacks
+        def process_request(request: Request) -> Generator[Deferred, Any, Any]:
+            for method in self.methods["process_request"]:
+                method = cast(Callable, method)
+                response = yield deferred_from_coro(
+                    method(request=request, spider=spider)
+                )
+                if response is not None and not isinstance(
+                    response, (Response, Request)
+                ):
+                    raise _InvalidOutput(
+                        f"Middleware {method.__qualname__} must return None, Response or "
+                        f"Request, got {response.__class__.__name__}"
+                    )
+                if response:
+                    return response
+            return (yield download_func(request=request, spider=spider))
+
+        @inlineCallbacks
+        def process_response(
+            response: Union[Response, Request]
+        ) -> Generator[Deferred, Any, Union[Response, Request]]:
+            if response is None:
+                raise TypeError("Received None in process_response")
+            elif isinstance(response, Request):
+                return response
+
+            for method in self.methods["process_response"]:
+                method = cast(Callable, method)
+                response = yield deferred_from_coro(
+                    method(request=request, response=response, spider=spider)
+                )
+                if not isinstance(response, (Response, Request)):
+                    raise _InvalidOutput(
+                        f"Middleware {method.__qualname__} must return Response or Request, "
+                        f"got {type(response)}"
+                    )
+                if isinstance(response, Request):
+                    return response
+            return response
+
+        @inlineCallbacks
+        def process_exception(
+            failure: Failure,
+        ) -> Generator[Deferred, Any, Union[Failure, Response, Request]]:
+            exception = failure.value
+            for method in self.methods["process_exception"]:
+                method = cast(Callable, method)
+                response = yield deferred_from_coro(
+                    method(request=request, exception=exception, spider=spider)
+                )
+                if response is not None and not isinstance(
+                    response, (Response, Request)
+                ):
+                    raise _InvalidOutput(
+                        f"Middleware {method.__qualname__} must return None, Response or "
+                        f"Request, got {type(response)}"
+                    )
+                if response:
+                    return response
+            return failure
+
+        deferred = mustbe_deferred(process_request, request)
+        deferred.addErrback(process_exception)
+        deferred.addCallback(process_response)
+        return deferred
diff --git a/scrapy/core/downloader/tls.py b/scrapy/core/downloader/tls.py
index 1295d26c4..33cea7263 100644
--- a/scrapy/core/downloader/tls.py
+++ b/scrapy/core/downloader/tls.py
@@ -1,18 +1,32 @@
 import logging
 from typing import Any, Dict
+
 from OpenSSL import SSL
 from service_identity.exceptions import CertificateError
-from twisted.internet._sslverify import ClientTLSOptions, VerificationError, verifyHostname
+from twisted.internet._sslverify import (
+    ClientTLSOptions,
+    VerificationError,
+    verifyHostname,
+)
 from twisted.internet.ssl import AcceptableCiphers
+
 from scrapy.utils.ssl import get_temp_key_info, x509name_to_string
+
 logger = logging.getLogger(__name__)
-METHOD_TLS = 'TLS'
-METHOD_TLSv10 = 'TLSv1.0'
-METHOD_TLSv11 = 'TLSv1.1'
-METHOD_TLSv12 = 'TLSv1.2'
-openssl_methods: Dict[str, int] = {METHOD_TLS: SSL.SSLv23_METHOD,
-    METHOD_TLSv10: SSL.TLSv1_METHOD, METHOD_TLSv11: SSL.TLSv1_1_METHOD,
-    METHOD_TLSv12: SSL.TLSv1_2_METHOD}
+
+
+METHOD_TLS = "TLS"
+METHOD_TLSv10 = "TLSv1.0"
+METHOD_TLSv11 = "TLSv1.1"
+METHOD_TLSv12 = "TLSv1.2"
+
+
+openssl_methods: Dict[str, int] = {
+    METHOD_TLS: SSL.SSLv23_METHOD,  # protocol negotiation (recommended)
+    METHOD_TLSv10: SSL.TLSv1_METHOD,  # TLS 1.0 only
+    METHOD_TLSv11: SSL.TLSv1_1_METHOD,  # TLS 1.1 only
+    METHOD_TLSv12: SSL.TLSv1_2_METHOD,  # TLS 1.2 only
+}


 class ScrapyClientTLSOptions(ClientTLSOptions):
@@ -26,11 +40,52 @@ class ScrapyClientTLSOptions(ClientTLSOptions):
     logging warnings. Also, HTTPS connection parameters logging is added.
     """

-    def __init__(self, hostname: str, ctx: SSL.Context, verbose_logging:
-        bool=False):
+    def __init__(self, hostname: str, ctx: SSL.Context, verbose_logging: bool = False):
         super().__init__(hostname, ctx)
         self.verbose_logging: bool = verbose_logging

+    def _identityVerifyingInfoCallback(
+        self, connection: SSL.Connection, where: int, ret: Any
+    ) -> None:
+        if where & SSL.SSL_CB_HANDSHAKE_START:
+            connection.set_tlsext_host_name(self._hostnameBytes)
+        elif where & SSL.SSL_CB_HANDSHAKE_DONE:
+            if self.verbose_logging:
+                logger.debug(
+                    "SSL connection to %s using protocol %s, cipher %s",
+                    self._hostnameASCII,
+                    connection.get_protocol_version_name(),
+                    connection.get_cipher_name(),
+                )
+                server_cert = connection.get_peer_certificate()
+                if server_cert:
+                    logger.debug(
+                        'SSL connection certificate: issuer "%s", subject "%s"',
+                        x509name_to_string(server_cert.get_issuer()),
+                        x509name_to_string(server_cert.get_subject()),
+                    )
+                key_info = get_temp_key_info(connection._ssl)
+                if key_info:
+                    logger.debug("SSL temp key: %s", key_info)
+
+            try:
+                verifyHostname(connection, self._hostnameASCII)
+            except (CertificateError, VerificationError) as e:
+                logger.warning(
+                    'Remote certificate is not valid for hostname "%s"; %s',
+                    self._hostnameASCII,
+                    e,
+                )
+
+            except ValueError as e:
+                logger.warning(
+                    "Ignoring error while verifying certificate "
+                    'from host "%s" (exception: %r)',
+                    self._hostnameASCII,
+                    e,
+                )
+

 DEFAULT_CIPHERS: AcceptableCiphers = AcceptableCiphers.fromOpenSSLCipherString(
-    'DEFAULT')
+    "DEFAULT"
+)
diff --git a/scrapy/core/downloader/webclient.py b/scrapy/core/downloader/webclient.py
index 96778332a..bb1f73805 100644
--- a/scrapy/core/downloader/webclient.py
+++ b/scrapy/core/downloader/webclient.py
@@ -2,9 +2,11 @@ import re
 from time import time
 from typing import Optional, Tuple
 from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
+
 from twisted.internet import defer
 from twisted.internet.protocol import ClientFactory
 from twisted.web.http import HTTPClient
+
 from scrapy import Request
 from scrapy.http import Headers
 from scrapy.responsetypes import responsetypes
@@ -12,48 +14,185 @@ from scrapy.utils.httpobj import urlparse_cached
 from scrapy.utils.python import to_bytes, to_unicode


-def _parse(url: str) ->Tuple[bytes, bytes, bytes, int, bytes]:
+def _parsed_url_args(parsed: ParseResult) -> Tuple[bytes, bytes, bytes, int, bytes]:
+    # Assume parsed is urlparse-d from Request.url,
+    # which was passed via safe_url_string and is ascii-only.
+    path_str = urlunparse(("", "", parsed.path or "/", parsed.params, parsed.query, ""))
+    path = to_bytes(path_str, encoding="ascii")
+    assert parsed.hostname is not None
+    host = to_bytes(parsed.hostname, encoding="ascii")
+    port = parsed.port
+    scheme = to_bytes(parsed.scheme, encoding="ascii")
+    netloc = to_bytes(parsed.netloc, encoding="ascii")
+    if port is None:
+        port = 443 if scheme == b"https" else 80
+    return scheme, netloc, host, port, path
+
+
+def _parse(url: str) -> Tuple[bytes, bytes, bytes, int, bytes]:
     """Return tuple of (scheme, netloc, host, port, path),
     all in bytes except for port which is int.
     Assume url is from Request.url, which was passed via safe_url_string
     and is ascii-only.
     """
-    pass
+    url = url.strip()
+    if not re.match(r"^\w+://", url):
+        url = "//" + url
+    parsed = urlparse(url)
+    return _parsed_url_args(parsed)


 class ScrapyHTTPPageGetter(HTTPClient):
-    delimiter = b'\n'
+    delimiter = b"\n"
+
+    def connectionMade(self):
+        self.headers = Headers()  # bucket for response headers
+
+        # Method command
+        self.sendCommand(self.factory.method, self.factory.path)
+        # Headers
+        for key, values in self.factory.headers.items():
+            for value in values:
+                self.sendHeader(key, value)
+        self.endHeaders()
+        # Body
+        if self.factory.body is not None:
+            self.transport.write(self.factory.body)
+
+    def lineReceived(self, line):
+        return HTTPClient.lineReceived(self, line.rstrip())
+
+    def handleHeader(self, key, value):
+        self.headers.appendlist(key, value)
+
+    def handleStatus(self, version, status, message):
+        self.factory.gotStatus(version, status, message)
+
+    def handleEndHeaders(self):
+        self.factory.gotHeaders(self.headers)
+
+    def connectionLost(self, reason):
+        self._connection_lost_reason = reason
+        HTTPClient.connectionLost(self, reason)
+        self.factory.noPage(reason)
+
+    def handleResponse(self, response):
+        if self.factory.method.upper() == b"HEAD":
+            self.factory.page(b"")
+        elif self.length is not None and self.length > 0:
+            self.factory.noPage(self._connection_lost_reason)
+        else:
+            self.factory.page(response)
+        self.transport.loseConnection()
+
+    def timeout(self):
+        self.transport.loseConnection()
+
+        # transport cleanup needed for HTTPS connections
+        if self.factory.url.startswith(b"https"):
+            self.transport.stopProducing()

+        self.factory.noPage(
+            defer.TimeoutError(
+                f"Getting {self.factory.url} took longer "
+                f"than {self.factory.timeout} seconds."
+            )
+        )

+
+# This class used to inherit from Twistedโ€™s
+# twisted.web.client.HTTPClientFactory. When that class was deprecated in
+# Twisted (https://github.com/twisted/twisted/pull/643), we merged its
+# non-overridden code into this class.
 class ScrapyHTTPClientFactory(ClientFactory):
     protocol = ScrapyHTTPPageGetter
+
     waiting = 1
     noisy = False
     followRedirect = False
     afterFoundGet = False

-    def __init__(self, request: Request, timeout: float=180):
+    def _build_response(self, body, request):
+        request.meta["download_latency"] = self.headers_time - self.start_time
+        status = int(self.status)
+        headers = Headers(self.response_headers)
+        respcls = responsetypes.from_args(headers=headers, url=self._url, body=body)
+        return respcls(
+            url=self._url,
+            status=status,
+            headers=headers,
+            body=body,
+            protocol=to_unicode(self.version),
+        )
+
+    def _set_connection_attributes(self, request):
+        parsed = urlparse_cached(request)
+        self.scheme, self.netloc, self.host, self.port, self.path = _parsed_url_args(
+            parsed
+        )
+        proxy = request.meta.get("proxy")
+        if proxy:
+            self.scheme, _, self.host, self.port, _ = _parse(proxy)
+            self.path = self.url
+
+    def __init__(self, request: Request, timeout: float = 180):
         self._url: str = urldefrag(request.url)[0]
-        self.url: bytes = to_bytes(self._url, encoding='ascii')
-        self.method: bytes = to_bytes(request.method, encoding='ascii')
+        # converting to bytes to comply to Twisted interface
+        self.url: bytes = to_bytes(self._url, encoding="ascii")
+        self.method: bytes = to_bytes(request.method, encoding="ascii")
         self.body: Optional[bytes] = request.body or None
         self.headers: Headers = Headers(request.headers)
         self.response_headers: Optional[Headers] = None
-        self.timeout: float = request.meta.get('download_timeout') or timeout
+        self.timeout: float = request.meta.get("download_timeout") or timeout
         self.start_time: float = time()
-        self.deferred: defer.Deferred = defer.Deferred().addCallback(self.
-            _build_response, request)
+        self.deferred: defer.Deferred = defer.Deferred().addCallback(
+            self._build_response, request
+        )
+
+        # Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
+        # to have _disconnectedDeferred. See Twisted r32329.
+        # As Scrapy implements it's own logic to handle redirects is not
+        # needed to add the callback _waitForDisconnect.
+        # Specifically this avoids the AttributeError exception when
+        # clientConnectionFailed method is called.
         self._disconnectedDeferred: defer.Deferred = defer.Deferred()
+
         self._set_connection_attributes(request)
-        self.headers.setdefault('Host', self.netloc)
+
+        # set Host header based on url
+        self.headers.setdefault("Host", self.netloc)
+
+        # set Content-Length based len of body
         if self.body is not None:
-            self.headers['Content-Length'] = len(self.body)
-            self.headers.setdefault('Connection', 'close')
-        elif self.method == b'POST':
-            self.headers['Content-Length'] = 0
+            self.headers["Content-Length"] = len(self.body)
+            # just in case a broken http/1.1 decides to keep connection alive
+            self.headers.setdefault("Connection", "close")
+        # Content-Length must be specified in POST method even with no body
+        elif self.method == b"POST":
+            self.headers["Content-Length"] = 0
+
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__}: {self._url}>"
+
+    def _cancelTimeout(self, result, timeoutCall):
+        if timeoutCall.active():
+            timeoutCall.cancel()
+        return result
+
+    def buildProtocol(self, addr):
+        p = ClientFactory.buildProtocol(self, addr)
+        p.followRedirect = self.followRedirect
+        p.afterFoundGet = self.afterFoundGet
+        if self.timeout:
+            from twisted.internet import reactor

-    def __repr__(self) ->str:
-        return f'<{self.__class__.__name__}: {self._url}>'
+            timeoutCall = reactor.callLater(self.timeout, p.timeout)
+            self.deferred.addBoth(self._cancelTimeout, timeoutCall)
+        return p
+
+    def gotHeaders(self, headers):
+        self.headers_time = time()
+        self.response_headers = headers

     def gotStatus(self, version, status, message):
         """
@@ -66,7 +205,17 @@ class ScrapyHTTPClientFactory(ClientFactory):
         @param message: The HTTP status message.
         @type message: L{bytes}
         """
-        pass
+        self.version, self.status, self.message = version, status, message
+
+    def page(self, page):
+        if self.waiting:
+            self.waiting = 0
+            self.deferred.callback(page)
+
+    def noPage(self, reason):
+        if self.waiting:
+            self.waiting = 0
+            self.deferred.errback(reason)

     def clientConnectionFailed(self, _, reason):
         """
@@ -74,4 +223,9 @@ class ScrapyHTTPClientFactory(ClientFactory):
         result has yet been provided to the result Deferred, provide the
         connection failure reason as an error result.
         """
-        pass
+        if self.waiting:
+            self.waiting = 0
+            # If the connection attempt failed, there is nothing more to
+            # disconnect, so just fire that Deferred now.
+            self._disconnectedDeferred.callback(None)
+            self.deferred.errback(reason)
diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py
index e7403fb61..92967ffc8 100644
--- a/scrapy/core/engine.py
+++ b/scrapy/core/engine.py
@@ -6,10 +6,24 @@ For more information see docs/topics/architecture.rst
 """
 import logging
 from time import time
-from typing import TYPE_CHECKING, Any, Callable, Generator, Iterable, Iterator, Optional, Set, Type, Union, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Generator,
+    Iterable,
+    Iterator,
+    Optional,
+    Set,
+    Type,
+    Union,
+    cast,
+)
+
 from twisted.internet.defer import Deferred, inlineCallbacks, succeed
 from twisted.internet.task import LoopingCall
 from twisted.python.failure import Failure
+
 from scrapy import signals
 from scrapy.core.downloader import Downloader
 from scrapy.core.scraper import Scraper
@@ -23,30 +37,54 @@ from scrapy.utils.log import failure_to_exc_info, logformatter_adapter
 from scrapy.utils.misc import create_instance, load_object
 from scrapy.utils.python import global_object_name
 from scrapy.utils.reactor import CallLaterOnce
+
 if TYPE_CHECKING:
     from scrapy.core.scheduler import BaseScheduler
     from scrapy.crawler import Crawler
+
 logger = logging.getLogger(__name__)


 class Slot:
-
-    def __init__(self, start_requests: Iterable[Request], close_if_idle:
-        bool, nextcall: CallLaterOnce, scheduler: 'BaseScheduler') ->None:
+    def __init__(
+        self,
+        start_requests: Iterable[Request],
+        close_if_idle: bool,
+        nextcall: CallLaterOnce,
+        scheduler: "BaseScheduler",
+    ) -> None:
         self.closing: Optional[Deferred] = None
         self.inprogress: Set[Request] = set()
         self.start_requests: Optional[Iterator[Request]] = iter(start_requests)
         self.close_if_idle: bool = close_if_idle
         self.nextcall: CallLaterOnce = nextcall
-        self.scheduler: 'BaseScheduler' = scheduler
+        self.scheduler: "BaseScheduler" = scheduler
         self.heartbeat: LoopingCall = LoopingCall(nextcall.schedule)

+    def add_request(self, request: Request) -> None:
+        self.inprogress.add(request)

-class ExecutionEngine:
+    def remove_request(self, request: Request) -> None:
+        self.inprogress.remove(request)
+        self._maybe_fire_closing()
+
+    def close(self) -> Deferred:
+        self.closing = Deferred()
+        self._maybe_fire_closing()
+        return self.closing
+
+    def _maybe_fire_closing(self) -> None:
+        if self.closing is not None and not self.inprogress:
+            if self.nextcall:
+                self.nextcall.cancel()
+                if self.heartbeat.running:
+                    self.heartbeat.stop()
+            self.closing.callback(None)

-    def __init__(self, crawler: 'Crawler', spider_closed_callback: Callable
-        ) ->None:
-        self.crawler: 'Crawler' = crawler
+
+class ExecutionEngine:
+    def __init__(self, crawler: "Crawler", spider_closed_callback: Callable) -> None:
+        self.crawler: "Crawler" = crawler
         self.settings: Settings = crawler.settings
         self.signals: SignalManager = crawler.signals
         assert crawler.logformatter
@@ -55,43 +93,386 @@ class ExecutionEngine:
         self.spider: Optional[Spider] = None
         self.running: bool = False
         self.paused: bool = False
-        self.scheduler_cls: Type['BaseScheduler'] = self._get_scheduler_class(
-            crawler.settings)
-        downloader_cls: Type[Downloader] = load_object(self.settings[
-            'DOWNLOADER'])
+        self.scheduler_cls: Type["BaseScheduler"] = self._get_scheduler_class(
+            crawler.settings
+        )
+        downloader_cls: Type[Downloader] = load_object(self.settings["DOWNLOADER"])
         self.downloader: Downloader = downloader_cls(crawler)
         self.scraper = Scraper(crawler)
         self._spider_closed_callback: Callable = spider_closed_callback
         self.start_time: Optional[float] = None

-    def stop(self) ->Deferred:
+    def _get_scheduler_class(self, settings: BaseSettings) -> Type["BaseScheduler"]:
+        from scrapy.core.scheduler import BaseScheduler
+
+        scheduler_cls: Type = load_object(settings["SCHEDULER"])
+        if not issubclass(scheduler_cls, BaseScheduler):
+            raise TypeError(
+                f"The provided scheduler class ({settings['SCHEDULER']})"
+                " does not fully implement the scheduler interface"
+            )
+        return scheduler_cls
+
+    @inlineCallbacks
+    def start(self) -> Generator[Deferred, Any, None]:
+        if self.running:
+            raise RuntimeError("Engine already running")
+        self.start_time = time()
+        yield self.signals.send_catch_log_deferred(signal=signals.engine_started)
+        self.running = True
+        self._closewait: Deferred = Deferred()
+        yield self._closewait
+
+    def stop(self) -> Deferred:
         """Gracefully stop the execution engine"""
-        pass

-    def close(self) ->Deferred:
+        @inlineCallbacks
+        def _finish_stopping_engine(_: Any) -> Generator[Deferred, Any, None]:
+            yield self.signals.send_catch_log_deferred(signal=signals.engine_stopped)
+            self._closewait.callback(None)
+
+        if not self.running:
+            raise RuntimeError("Engine not running")
+
+        self.running = False
+        dfd = (
+            self.close_spider(self.spider, reason="shutdown")
+            if self.spider is not None
+            else succeed(None)
+        )
+        return dfd.addBoth(_finish_stopping_engine)
+
+    def close(self) -> Deferred:
         """
         Gracefully close the execution engine.
         If it has already been started, stop it. In all cases, close the spider and the downloader.
         """
-        pass
+        if self.running:
+            return self.stop()  # will also close spider and downloader
+        if self.spider is not None:
+            return self.close_spider(
+                self.spider, reason="shutdown"
+            )  # will also close downloader
+        self.downloader.close()
+        return succeed(None)
+
+    def pause(self) -> None:
+        self.paused = True
+
+    def unpause(self) -> None:
+        self.paused = False
+
+    def _next_request(self) -> None:
+        if self.slot is None:
+            return
+
+        assert self.spider is not None  # typing
+
+        if self.paused:
+            return None
+
+        while (
+            not self._needs_backout()
+            and self._next_request_from_scheduler() is not None
+        ):
+            pass
+
+        if self.slot.start_requests is not None and not self._needs_backout():
+            try:
+                request = next(self.slot.start_requests)
+            except StopIteration:
+                self.slot.start_requests = None
+            except Exception:
+                self.slot.start_requests = None
+                logger.error(
+                    "Error while obtaining start requests",
+                    exc_info=True,
+                    extra={"spider": self.spider},
+                )
+            else:
+                self.crawl(request)
+
+        if self.spider_is_idle() and self.slot.close_if_idle:
+            self._spider_idle()
+
+    def _needs_backout(self) -> bool:
+        assert self.slot is not None  # typing
+        assert self.scraper.slot is not None  # typing
+        return (
+            not self.running
+            or bool(self.slot.closing)
+            or self.downloader.needs_backout()
+            or self.scraper.slot.needs_backout()
+        )
+
+    def _next_request_from_scheduler(self) -> Optional[Deferred]:
+        assert self.slot is not None  # typing
+        assert self.spider is not None  # typing
+
+        request = self.slot.scheduler.next_request()
+        if request is None:
+            return None
+
+        d = self._download(request)
+        d.addBoth(self._handle_downloader_output, request)
+        d.addErrback(
+            lambda f: logger.info(
+                "Error while handling downloader output",
+                exc_info=failure_to_exc_info(f),
+                extra={"spider": self.spider},
+            )
+        )
+
+        def _remove_request(_: Any) -> None:
+            assert self.slot
+            self.slot.remove_request(request)
+
+        d.addBoth(_remove_request)
+        d.addErrback(
+            lambda f: logger.info(
+                "Error while removing request from slot",
+                exc_info=failure_to_exc_info(f),
+                extra={"spider": self.spider},
+            )
+        )
+        slot = self.slot
+        d.addBoth(lambda _: slot.nextcall.schedule())
+        d.addErrback(
+            lambda f: logger.info(
+                "Error while scheduling new request",
+                exc_info=failure_to_exc_info(f),
+                extra={"spider": self.spider},
+            )
+        )
+        return d

-    def crawl(self, request: Request) ->None:
+    def _handle_downloader_output(
+        self, result: Union[Request, Response, Failure], request: Request
+    ) -> Optional[Deferred]:
+        assert self.spider is not None  # typing
+
+        if not isinstance(result, (Request, Response, Failure)):
+            raise TypeError(
+                f"Incorrect type: expected Request, Response or Failure, got {type(result)}: {result!r}"
+            )
+
+        # downloader middleware can return requests (for example, redirects)
+        if isinstance(result, Request):
+            self.crawl(result)
+            return None
+
+        d = self.scraper.enqueue_scrape(result, request, self.spider)
+        d.addErrback(
+            lambda f: logger.error(
+                "Error while enqueuing downloader output",
+                exc_info=failure_to_exc_info(f),
+                extra={"spider": self.spider},
+            )
+        )
+        return d
+
+    def spider_is_idle(self) -> bool:
+        if self.slot is None:
+            raise RuntimeError("Engine slot not assigned")
+        if not self.scraper.slot.is_idle():  # type: ignore[union-attr]
+            return False
+        if self.downloader.active:  # downloader has pending requests
+            return False
+        if self.slot.start_requests is not None:  # not all start requests are handled
+            return False
+        if self.slot.scheduler.has_pending_requests():
+            return False
+        return True
+
+    def crawl(self, request: Request) -> None:
         """Inject the request into the spider <-> downloader pipeline"""
-        pass
+        if self.spider is None:
+            raise RuntimeError(f"No open spider to crawl: {request}")
+        self._schedule_request(request, self.spider)
+        self.slot.nextcall.schedule()  # type: ignore[union-attr]
+
+    def _schedule_request(self, request: Request, spider: Spider) -> None:
+        request_scheduled_result = self.signals.send_catch_log(
+            signals.request_scheduled,
+            request=request,
+            spider=spider,
+            dont_log=IgnoreRequest,
+        )
+        for handler, result in request_scheduled_result:
+            if isinstance(result, Failure) and isinstance(result.value, IgnoreRequest):
+                logger.debug(
+                    f"Signal handler {global_object_name(handler)} dropped "
+                    f"request {request} before it reached the scheduler."
+                )
+                return
+        if not self.slot.scheduler.enqueue_request(request):  # type: ignore[union-attr]
+            self.signals.send_catch_log(
+                signals.request_dropped, request=request, spider=spider
+            )

-    def download(self, request: Request) ->Deferred:
+    def download(self, request: Request) -> Deferred:
         """Return a Deferred which fires with a Response as result, only downloader middlewares are applied"""
-        pass
+        if self.spider is None:
+            raise RuntimeError(f"No open spider to crawl: {request}")
+        return self._download(request).addBoth(self._downloaded, request)
+
+    def _downloaded(
+        self, result: Union[Response, Request, Failure], request: Request
+    ) -> Union[Deferred, Response, Failure]:
+        assert self.slot is not None  # typing
+        self.slot.remove_request(request)
+        return self.download(result) if isinstance(result, Request) else result
+
+    def _download(self, request: Request) -> Deferred:
+        assert self.slot is not None  # typing

-    def _spider_idle(self) ->None:
+        self.slot.add_request(request)
+
+        def _on_success(result: Union[Response, Request]) -> Union[Response, Request]:
+            if not isinstance(result, (Response, Request)):
+                raise TypeError(
+                    f"Incorrect type: expected Response or Request, got {type(result)}: {result!r}"
+                )
+            if isinstance(result, Response):
+                if result.request is None:
+                    result.request = request
+                assert self.spider is not None
+                logkws = self.logformatter.crawled(result.request, result, self.spider)
+                if logkws is not None:
+                    logger.log(
+                        *logformatter_adapter(logkws), extra={"spider": self.spider}
+                    )
+                self.signals.send_catch_log(
+                    signal=signals.response_received,
+                    response=result,
+                    request=result.request,
+                    spider=self.spider,
+                )
+            return result
+
+        def _on_complete(_: Any) -> Any:
+            assert self.slot is not None
+            self.slot.nextcall.schedule()
+            return _
+
+        assert self.spider is not None
+        dwld = self.downloader.fetch(request, self.spider)
+        dwld.addCallbacks(_on_success)
+        dwld.addBoth(_on_complete)
+        return dwld
+
+    @inlineCallbacks
+    def open_spider(
+        self, spider: Spider, start_requests: Iterable = (), close_if_idle: bool = True
+    ) -> Generator[Deferred, Any, None]:
+        if self.slot is not None:
+            raise RuntimeError(f"No free spider slot when opening {spider.name!r}")
+        logger.info("Spider opened", extra={"spider": spider})
+        nextcall = CallLaterOnce(self._next_request)
+        scheduler = create_instance(
+            self.scheduler_cls, settings=None, crawler=self.crawler
+        )
+        start_requests = yield self.scraper.spidermw.process_start_requests(
+            start_requests, spider
+        )
+        self.slot = Slot(start_requests, close_if_idle, nextcall, scheduler)
+        self.spider = spider
+        if hasattr(scheduler, "open"):
+            yield scheduler.open(spider)
+        yield self.scraper.open_spider(spider)
+        assert self.crawler.stats
+        self.crawler.stats.open_spider(spider)
+        yield self.signals.send_catch_log_deferred(signals.spider_opened, spider=spider)
+        self.slot.nextcall.schedule()
+        self.slot.heartbeat.start(5)
+
+    def _spider_idle(self) -> None:
         """
         Called when a spider gets idle, i.e. when there are no remaining requests to download or schedule.
         It can be called multiple times. If a handler for the spider_idle signal raises a DontCloseSpider
         exception, the spider is not closed until the next loop and this function is guaranteed to be called
         (at least) once again. A handler can raise CloseSpider to provide a custom closing reason.
         """
-        pass
+        assert self.spider is not None  # typing
+        expected_ex = (DontCloseSpider, CloseSpider)
+        res = self.signals.send_catch_log(
+            signals.spider_idle, spider=self.spider, dont_log=expected_ex
+        )
+        detected_ex = {
+            ex: x.value
+            for _, x in res
+            for ex in expected_ex
+            if isinstance(x, Failure) and isinstance(x.value, ex)
+        }
+        if DontCloseSpider in detected_ex:
+            return None
+        if self.spider_is_idle():
+            ex = detected_ex.get(CloseSpider, CloseSpider(reason="finished"))
+            assert isinstance(ex, CloseSpider)  # typing
+            self.close_spider(self.spider, reason=ex.reason)

-    def close_spider(self, spider: Spider, reason: str='cancelled') ->Deferred:
+    def close_spider(self, spider: Spider, reason: str = "cancelled") -> Deferred:
         """Close (cancel) spider and clear all its outstanding requests"""
-        pass
+        if self.slot is None:
+            raise RuntimeError("Engine slot not assigned")
+
+        if self.slot.closing is not None:
+            return self.slot.closing
+
+        logger.info(
+            "Closing spider (%(reason)s)", {"reason": reason}, extra={"spider": spider}
+        )
+
+        dfd = self.slot.close()
+
+        def log_failure(msg: str) -> Callable:
+            def errback(failure: Failure) -> None:
+                logger.error(
+                    msg, exc_info=failure_to_exc_info(failure), extra={"spider": spider}
+                )
+
+            return errback
+
+        dfd.addBoth(lambda _: self.downloader.close())
+        dfd.addErrback(log_failure("Downloader close failure"))
+
+        dfd.addBoth(lambda _: self.scraper.close_spider(spider))
+        dfd.addErrback(log_failure("Scraper close failure"))
+
+        if hasattr(self.slot.scheduler, "close"):
+            dfd.addBoth(lambda _: cast(Slot, self.slot).scheduler.close(reason))
+            dfd.addErrback(log_failure("Scheduler close failure"))
+
+        dfd.addBoth(
+            lambda _: self.signals.send_catch_log_deferred(
+                signal=signals.spider_closed,
+                spider=spider,
+                reason=reason,
+            )
+        )
+        dfd.addErrback(log_failure("Error while sending spider_close signal"))
+
+        def close_stats(_: Any) -> None:
+            assert self.crawler.stats
+            self.crawler.stats.close_spider(spider, reason=reason)
+
+        dfd.addBoth(close_stats)
+        dfd.addErrback(log_failure("Stats close failure"))
+
+        dfd.addBoth(
+            lambda _: logger.info(
+                "Spider closed (%(reason)s)",
+                {"reason": reason},
+                extra={"spider": spider},
+            )
+        )
+
+        dfd.addBoth(lambda _: setattr(self, "slot", None))
+        dfd.addErrback(log_failure("Error while unassigning slot"))
+
+        dfd.addBoth(lambda _: setattr(self, "spider", None))
+        dfd.addErrback(log_failure("Error while unassigning spider"))
+
+        dfd.addBoth(lambda _: self._spider_closed_callback(spider))
+
+        return dfd
diff --git a/scrapy/core/http2/agent.py b/scrapy/core/http2/agent.py
index caf067cd1..215ea9716 100644
--- a/scrapy/core/http2/agent.py
+++ b/scrapy/core/http2/agent.py
@@ -1,12 +1,19 @@
 from collections import deque
 from typing import Deque, Dict, List, Optional, Tuple
+
 from twisted.internet import defer
 from twisted.internet.base import ReactorBase
 from twisted.internet.defer import Deferred
 from twisted.internet.endpoints import HostnameEndpoint
 from twisted.python.failure import Failure
-from twisted.web.client import URI, BrowserLikePolicyForHTTPS, ResponseFailed, _StandardEndpointFactory
+from twisted.web.client import (
+    URI,
+    BrowserLikePolicyForHTTPS,
+    ResponseFailed,
+    _StandardEndpointFactory,
+)
 from twisted.web.error import SchemeNotSupported
+
 from scrapy.core.downloader.contextfactory import AcceptableProtocolsContextFactory
 from scrapy.core.http2.protocol import H2ClientFactory, H2ClientProtocol
 from scrapy.http.request import Request
@@ -15,54 +22,148 @@ from scrapy.spiders import Spider


 class H2ConnectionPool:
-
-    def __init__(self, reactor: ReactorBase, settings: Settings) ->None:
+    def __init__(self, reactor: ReactorBase, settings: Settings) -> None:
         self._reactor = reactor
         self.settings = settings
+
+        # Store a dictionary which is used to get the respective
+        # H2ClientProtocolInstance using the  key as Tuple(scheme, hostname, port)
         self._connections: Dict[Tuple, H2ClientProtocol] = {}
+
+        # Save all requests that arrive before the connection is established
         self._pending_requests: Dict[Tuple, Deque[Deferred]] = {}

-    def close_connections(self) ->None:
+    def get_connection(
+        self, key: Tuple, uri: URI, endpoint: HostnameEndpoint
+    ) -> Deferred:
+        if key in self._pending_requests:
+            # Received a request while connecting to remote
+            # Create a deferred which will fire with the H2ClientProtocol
+            # instance
+            d: Deferred = Deferred()
+            self._pending_requests[key].append(d)
+            return d
+
+        # Check if we already have a connection to the remote
+        conn = self._connections.get(key, None)
+        if conn:
+            # Return this connection instance wrapped inside a deferred
+            return defer.succeed(conn)
+
+        # No connection is established for the given URI
+        return self._new_connection(key, uri, endpoint)
+
+    def _new_connection(
+        self, key: Tuple, uri: URI, endpoint: HostnameEndpoint
+    ) -> Deferred:
+        self._pending_requests[key] = deque()
+
+        conn_lost_deferred: Deferred = Deferred()
+        conn_lost_deferred.addCallback(self._remove_connection, key)
+
+        factory = H2ClientFactory(uri, self.settings, conn_lost_deferred)
+        conn_d = endpoint.connect(factory)
+        conn_d.addCallback(self.put_connection, key)
+
+        d: Deferred = Deferred()
+        self._pending_requests[key].append(d)
+        return d
+
+    def put_connection(self, conn: H2ClientProtocol, key: Tuple) -> H2ClientProtocol:
+        self._connections[key] = conn
+
+        # Now as we have established a proper HTTP/2 connection
+        # we fire all the deferred's with the connection instance
+        pending_requests = self._pending_requests.pop(key, None)
+        while pending_requests:
+            d = pending_requests.popleft()
+            d.callback(conn)
+
+        return conn
+
+    def _remove_connection(self, errors: List[BaseException], key: Tuple) -> None:
+        self._connections.pop(key)
+
+        # Call the errback of all the pending requests for this connection
+        pending_requests = self._pending_requests.pop(key, None)
+        while pending_requests:
+            d = pending_requests.popleft()
+            d.errback(ResponseFailed(errors))
+
+    def close_connections(self) -> None:
         """Close all the HTTP/2 connections and remove them from pool

         Returns:
             Deferred that fires when all connections have been closed
         """
-        pass
+        for conn in self._connections.values():
+            assert conn.transport is not None  # typing
+            conn.transport.abortConnection()


 class H2Agent:
-
-    def __init__(self, reactor: ReactorBase, pool: H2ConnectionPool,
-        context_factory: BrowserLikePolicyForHTTPS=
-        BrowserLikePolicyForHTTPS(), connect_timeout: Optional[float]=None,
-        bind_address: Optional[bytes]=None) ->None:
+    def __init__(
+        self,
+        reactor: ReactorBase,
+        pool: H2ConnectionPool,
+        context_factory: BrowserLikePolicyForHTTPS = BrowserLikePolicyForHTTPS(),
+        connect_timeout: Optional[float] = None,
+        bind_address: Optional[bytes] = None,
+    ) -> None:
         self._reactor = reactor
         self._pool = pool
         self._context_factory = AcceptableProtocolsContextFactory(
-            context_factory, acceptable_protocols=[b'h2'])
-        self.endpoint_factory = _StandardEndpointFactory(self._reactor,
-            self._context_factory, connect_timeout, bind_address)
+            context_factory, acceptable_protocols=[b"h2"]
+        )
+        self.endpoint_factory = _StandardEndpointFactory(
+            self._reactor, self._context_factory, connect_timeout, bind_address
+        )

-    def get_key(self, uri: URI) ->Tuple:
+    def get_endpoint(self, uri: URI):
+        return self.endpoint_factory.endpointForURI(uri)
+
+    def get_key(self, uri: URI) -> Tuple:
         """
         Arguments:
             uri - URI obtained directly from request URL
         """
-        pass
+        return uri.scheme, uri.host, uri.port

+    def request(self, request: Request, spider: Spider) -> Deferred:
+        uri = URI.fromBytes(bytes(request.url, encoding="utf-8"))
+        try:
+            endpoint = self.get_endpoint(uri)
+        except SchemeNotSupported:
+            return defer.fail(Failure())
+
+        key = self.get_key(uri)
+        d = self._pool.get_connection(key, uri, endpoint)
+        d.addCallback(lambda conn: conn.request(request, spider))
+        return d

-class ScrapyProxyH2Agent(H2Agent):

-    def __init__(self, reactor: ReactorBase, proxy_uri: URI, pool:
-        H2ConnectionPool, context_factory: BrowserLikePolicyForHTTPS=
-        BrowserLikePolicyForHTTPS(), connect_timeout: Optional[float]=None,
-        bind_address: Optional[bytes]=None) ->None:
-        super().__init__(reactor=reactor, pool=pool, context_factory=
-            context_factory, connect_timeout=connect_timeout, bind_address=
-            bind_address)
+class ScrapyProxyH2Agent(H2Agent):
+    def __init__(
+        self,
+        reactor: ReactorBase,
+        proxy_uri: URI,
+        pool: H2ConnectionPool,
+        context_factory: BrowserLikePolicyForHTTPS = BrowserLikePolicyForHTTPS(),
+        connect_timeout: Optional[float] = None,
+        bind_address: Optional[bytes] = None,
+    ) -> None:
+        super().__init__(
+            reactor=reactor,
+            pool=pool,
+            context_factory=context_factory,
+            connect_timeout=connect_timeout,
+            bind_address=bind_address,
+        )
         self._proxy_uri = proxy_uri

-    def get_key(self, uri: URI) ->Tuple:
+    def get_endpoint(self, uri: URI):
+        return self.endpoint_factory.endpointForURI(self._proxy_uri)
+
+    def get_key(self, uri: URI) -> Tuple:
         """We use the proxy uri instead of uri obtained from request url"""
-        pass
+        return "http-proxy", self._proxy_uri.host, self._proxy_uri.port
diff --git a/scrapy/core/http2/protocol.py b/scrapy/core/http2/protocol.py
index 7f0ed1e12..bc8da50d7 100644
--- a/scrapy/core/http2/protocol.py
+++ b/scrapy/core/http2/protocol.py
@@ -4,10 +4,21 @@ import logging
 from collections import deque
 from ipaddress import IPv4Address, IPv6Address
 from typing import Dict, List, Optional, Union
+
 from h2.config import H2Configuration
 from h2.connection import H2Connection
 from h2.errors import ErrorCodes
-from h2.events import ConnectionTerminated, DataReceived, Event, ResponseReceived, SettingsAcknowledged, StreamEnded, StreamReset, UnknownFrameReceived, WindowUpdated
+from h2.events import (
+    ConnectionTerminated,
+    DataReceived,
+    Event,
+    ResponseReceived,
+    SettingsAcknowledged,
+    StreamEnded,
+    StreamReset,
+    UnknownFrameReceived,
+    WindowUpdated,
+)
 from h2.exceptions import FrameTooLargeError, H2Error
 from twisted.internet.defer import Deferred
 from twisted.internet.error import TimeoutError
@@ -18,54 +29,56 @@ from twisted.protocols.policies import TimeoutMixin
 from twisted.python.failure import Failure
 from twisted.web.client import URI
 from zope.interface import implementer
+
 from scrapy.core.http2.stream import Stream, StreamCloseReason
 from scrapy.http import Request
 from scrapy.settings import Settings
 from scrapy.spiders import Spider
+
 logger = logging.getLogger(__name__)
-PROTOCOL_NAME = b'h2'


-class InvalidNegotiatedProtocol(H2Error):
+PROTOCOL_NAME = b"h2"
+

-    def __init__(self, negotiated_protocol: bytes) ->None:
+class InvalidNegotiatedProtocol(H2Error):
+    def __init__(self, negotiated_protocol: bytes) -> None:
         self.negotiated_protocol = negotiated_protocol

-    def __str__(self) ->str:
-        return (
-            f'Expected {PROTOCOL_NAME!r}, received {self.negotiated_protocol!r}'
-            )
+    def __str__(self) -> str:
+        return f"Expected {PROTOCOL_NAME!r}, received {self.negotiated_protocol!r}"


 class RemoteTerminatedConnection(H2Error):
-
-    def __init__(self, remote_ip_address: Optional[Union[IPv4Address,
-        IPv6Address]], event: ConnectionTerminated) ->None:
+    def __init__(
+        self,
+        remote_ip_address: Optional[Union[IPv4Address, IPv6Address]],
+        event: ConnectionTerminated,
+    ) -> None:
         self.remote_ip_address = remote_ip_address
         self.terminate_event = event

-    def __str__(self) ->str:
-        return f'Received GOAWAY frame from {self.remote_ip_address!r}'
+    def __str__(self) -> str:
+        return f"Received GOAWAY frame from {self.remote_ip_address!r}"


 class MethodNotAllowed405(H2Error):
-
-    def __init__(self, remote_ip_address: Optional[Union[IPv4Address,
-        IPv6Address]]) ->None:
+    def __init__(
+        self, remote_ip_address: Optional[Union[IPv4Address, IPv6Address]]
+    ) -> None:
         self.remote_ip_address = remote_ip_address

-    def __str__(self) ->str:
-        return (
-            f"Received 'HTTP/2.0 405 Method Not Allowed' from {self.remote_ip_address!r}"
-            )
+    def __str__(self) -> str:
+        return f"Received 'HTTP/2.0 405 Method Not Allowed' from {self.remote_ip_address!r}"


 @implementer(IHandshakeListener)
 class H2ClientProtocol(Protocol, TimeoutMixin):
     IDLE_TIMEOUT = 240

-    def __init__(self, uri: URI, settings: Settings, conn_lost_deferred:
-        Deferred) ->None:
+    def __init__(
+        self, uri: URI, settings: Settings, conn_lost_deferred: Deferred
+    ) -> None:
         """
         Arguments:
             uri -- URI of the base url to which HTTP/2 Connection will be made.
@@ -76,108 +89,350 @@ class H2ClientProtocol(Protocol, TimeoutMixin):
                 that connection was lost
         """
         self._conn_lost_deferred = conn_lost_deferred
-        config = H2Configuration(client_side=True, header_encoding='utf-8')
+
+        config = H2Configuration(client_side=True, header_encoding="utf-8")
         self.conn = H2Connection(config=config)
+
+        # ID of the next request stream
+        # Following the convention - 'Streams initiated by a client MUST
+        # use odd-numbered stream identifiers' (RFC 7540 - Section 5.1.1)
         self._stream_id_generator = itertools.count(start=1, step=2)
+
+        # Streams are stored in a dictionary keyed off their stream IDs
         self.streams: Dict[int, Stream] = {}
+
+        # If requests are received before connection is made we keep
+        # all requests in a pool and send them as the connection is made
         self._pending_request_stream_pool: deque = deque()
+
+        # Save an instance of errors raised which lead to losing the connection
+        # We pass these instances to the streams ResponseFailed() failure
         self._conn_lost_errors: List[BaseException] = []
-        self.metadata: Dict = {'certificate': None, 'ip_address': None,
-            'uri': uri, 'default_download_maxsize': settings.getint(
-            'DOWNLOAD_MAXSIZE'), 'default_download_warnsize': settings.
-            getint('DOWNLOAD_WARNSIZE'), 'active_streams': 0,
-            'settings_acknowledged': False}
+
+        # Some meta data of this connection
+        # initialized when connection is successfully made
+        self.metadata: Dict = {
+            # Peer certificate instance
+            "certificate": None,
+            # Address of the server we are connected to which
+            # is updated when HTTP/2 connection is  made successfully
+            "ip_address": None,
+            # URI of the peer HTTP/2 connection is made
+            "uri": uri,
+            # Both ip_address and uri are used by the Stream before
+            # initiating the request to verify that the base address
+            # Variables taken from Project Settings
+            "default_download_maxsize": settings.getint("DOWNLOAD_MAXSIZE"),
+            "default_download_warnsize": settings.getint("DOWNLOAD_WARNSIZE"),
+            # Counter to keep track of opened streams. This counter
+            # is used to make sure that not more than MAX_CONCURRENT_STREAMS
+            # streams are opened which leads to ProtocolError
+            # We use simple FIFO policy to handle pending requests
+            "active_streams": 0,
+            # Flag to keep track if settings were acknowledged by the remote
+            # This ensures that we have established a HTTP/2 connection
+            "settings_acknowledged": False,
+        }

     @property
-    def h2_connected(self) ->bool:
+    def h2_connected(self) -> bool:
         """Boolean to keep track of the connection status.
         This is used while initiating pending streams to make sure
         that we initiate stream only during active HTTP/2 Connection
         """
-        pass
+        assert self.transport is not None  # typing
+        return bool(self.transport.connected) and self.metadata["settings_acknowledged"]

     @property
-    def allowed_max_concurrent_streams(self) ->int:
+    def allowed_max_concurrent_streams(self) -> int:
         """We keep total two streams for client (sending data) and
         server side (receiving data) for a single request. To be safe
         we choose the minimum. Since this value can change in event
         RemoteSettingsChanged we make variable a property.
         """
-        pass
+        return min(
+            self.conn.local_settings.max_concurrent_streams,
+            self.conn.remote_settings.max_concurrent_streams,
+        )

-    def _send_pending_requests(self) ->None:
+    def _send_pending_requests(self) -> None:
         """Initiate all pending requests from the deque following FIFO
         We make sure that at any time {allowed_max_concurrent_streams}
         streams are active.
         """
-        pass
-
-    def pop_stream(self, stream_id: int) ->Stream:
+        while (
+            self._pending_request_stream_pool
+            and self.metadata["active_streams"] < self.allowed_max_concurrent_streams
+            and self.h2_connected
+        ):
+            self.metadata["active_streams"] += 1
+            stream = self._pending_request_stream_pool.popleft()
+            stream.initiate_request()
+            self._write_to_transport()
+
+    def pop_stream(self, stream_id: int) -> Stream:
         """Perform cleanup when a stream is closed"""
-        pass
+        stream = self.streams.pop(stream_id)
+        self.metadata["active_streams"] -= 1
+        self._send_pending_requests()
+        return stream

-    def _new_stream(self, request: Request, spider: Spider) ->Stream:
+    def _new_stream(self, request: Request, spider: Spider) -> Stream:
         """Instantiates a new Stream object"""
-        pass
-
-    def _write_to_transport(self) ->None:
+        stream = Stream(
+            stream_id=next(self._stream_id_generator),
+            request=request,
+            protocol=self,
+            download_maxsize=getattr(
+                spider, "download_maxsize", self.metadata["default_download_maxsize"]
+            ),
+            download_warnsize=getattr(
+                spider, "download_warnsize", self.metadata["default_download_warnsize"]
+            ),
+        )
+        self.streams[stream.stream_id] = stream
+        return stream
+
+    def _write_to_transport(self) -> None:
         """Write data to the underlying transport connection
         from the HTTP2 connection instance if any
         """
-        pass
+        assert self.transport is not None  # typing
+        # Reset the idle timeout as connection is still actively sending data
+        self.resetTimeout()

-    def connectionMade(self) ->None:
+        data = self.conn.data_to_send()
+        self.transport.write(data)
+
+    def request(self, request: Request, spider: Spider) -> Deferred:
+        if not isinstance(request, Request):
+            raise TypeError(
+                f"Expected scrapy.http.Request, received {request.__class__.__qualname__}"
+            )
+
+        stream = self._new_stream(request, spider)
+        d = stream.get_response()
+
+        # Add the stream to the request pool
+        self._pending_request_stream_pool.append(stream)
+
+        # If we receive a request when connection is idle
+        # We need to initiate pending requests
+        self._send_pending_requests()
+        return d
+
+    def connectionMade(self) -> None:
         """Called by Twisted when the connection is established. We can start
         sending some data now: we should open with the connection preamble.
         """
-        pass
+        # Initialize the timeout
+        self.setTimeout(self.IDLE_TIMEOUT)
+
+        assert self.transport is not None  # typing
+        destination = self.transport.getPeer()
+        self.metadata["ip_address"] = ipaddress.ip_address(destination.host)

-    def _lose_connection_with_error(self, errors: List[BaseException]) ->None:
+        # Initiate H2 Connection
+        self.conn.initiate_connection()
+        self._write_to_transport()
+
+    def _lose_connection_with_error(self, errors: List[BaseException]) -> None:
         """Helper function to lose the connection with the error sent as a
         reason"""
-        pass
+        self._conn_lost_errors += errors
+        assert self.transport is not None  # typing
+        self.transport.loseConnection()

-    def handshakeCompleted(self) ->None:
+    def handshakeCompleted(self) -> None:
         """
         Close the connection if it's not made via the expected protocol
         """
-        pass
+        assert self.transport is not None  # typing
+        if (
+            self.transport.negotiatedProtocol is not None
+            and self.transport.negotiatedProtocol != PROTOCOL_NAME
+        ):
+            # we have not initiated the connection yet, no need to send a GOAWAY frame to the remote peer
+            self._lose_connection_with_error(
+                [InvalidNegotiatedProtocol(self.transport.negotiatedProtocol)]
+            )

-    def _check_received_data(self, data: bytes) ->None:
+    def _check_received_data(self, data: bytes) -> None:
         """Checks for edge cases where the connection to remote fails
         without raising an appropriate H2Error

         Arguments:
             data -- Data received from the remote
         """
-        pass
-
-    def timeoutConnection(self) ->None:
+        if data.startswith(b"HTTP/2.0 405 Method Not Allowed"):
+            raise MethodNotAllowed405(self.metadata["ip_address"])
+
+    def dataReceived(self, data: bytes) -> None:
+        # Reset the idle timeout as connection is still actively receiving data
+        self.resetTimeout()
+
+        try:
+            self._check_received_data(data)
+            events = self.conn.receive_data(data)
+            self._handle_events(events)
+        except H2Error as e:
+            if isinstance(e, FrameTooLargeError):
+                # hyper-h2 does not drop the connection in this scenario, we
+                # need to abort the connection manually.
+                self._conn_lost_errors += [e]
+                assert self.transport is not None  # typing
+                self.transport.abortConnection()
+                return
+
+            # Save this error as ultimately the connection will be dropped
+            # internally by hyper-h2. Saved error will be passed to all the streams
+            # closed with the connection.
+            self._lose_connection_with_error([e])
+        finally:
+            self._write_to_transport()
+
+    def timeoutConnection(self) -> None:
         """Called when the connection times out.
         We lose the connection with TimeoutError"""
-        pass

-    def connectionLost(self, reason: Failure=connectionDone) ->None:
+        # Check whether there are open streams. If there are, we're going to
+        # want to use the error code PROTOCOL_ERROR. If there aren't, use
+        # NO_ERROR.
+        if (
+            self.conn.open_outbound_streams > 0
+            or self.conn.open_inbound_streams > 0
+            or self.metadata["active_streams"] > 0
+        ):
+            error_code = ErrorCodes.PROTOCOL_ERROR
+        else:
+            error_code = ErrorCodes.NO_ERROR
+        self.conn.close_connection(error_code=error_code)
+        self._write_to_transport()
+
+        self._lose_connection_with_error(
+            [TimeoutError(f"Connection was IDLE for more than {self.IDLE_TIMEOUT}s")]
+        )
+
+    def connectionLost(self, reason: Failure = connectionDone) -> None:
         """Called by Twisted when the transport connection is lost.
         No need to write anything to transport here.
         """
-        pass
+        # Cancel the timeout if not done yet
+        self.setTimeout(None)
+
+        # Notify the connection pool instance such that no new requests are
+        # sent over current connection
+        if not reason.check(connectionDone):
+            self._conn_lost_errors.append(reason)
+
+        self._conn_lost_deferred.callback(self._conn_lost_errors)
+
+        for stream in self.streams.values():
+            if stream.metadata["request_sent"]:
+                close_reason = StreamCloseReason.CONNECTION_LOST
+            else:
+                close_reason = StreamCloseReason.INACTIVE
+            stream.close(close_reason, self._conn_lost_errors, from_protocol=True)

-    def _handle_events(self, events: List[Event]) ->None:
+        self.metadata["active_streams"] -= len(self.streams)
+        self.streams.clear()
+        self._pending_request_stream_pool.clear()
+        self.conn.close_connection()
+
+    def _handle_events(self, events: List[Event]) -> None:
         """Private method which acts as a bridge between the events
         received from the HTTP/2 data and IH2EventsHandler

         Arguments:
             events -- A list of events that the remote peer triggered by sending data
         """
-        pass
+        for event in events:
+            if isinstance(event, ConnectionTerminated):
+                self.connection_terminated(event)
+            elif isinstance(event, DataReceived):
+                self.data_received(event)
+            elif isinstance(event, ResponseReceived):
+                self.response_received(event)
+            elif isinstance(event, StreamEnded):
+                self.stream_ended(event)
+            elif isinstance(event, StreamReset):
+                self.stream_reset(event)
+            elif isinstance(event, WindowUpdated):
+                self.window_updated(event)
+            elif isinstance(event, SettingsAcknowledged):
+                self.settings_acknowledged(event)
+            elif isinstance(event, UnknownFrameReceived):
+                logger.warning("Unknown frame received: %s", event.frame)
+
+    # Event handler functions starts here
+    def connection_terminated(self, event: ConnectionTerminated) -> None:
+        self._lose_connection_with_error(
+            [RemoteTerminatedConnection(self.metadata["ip_address"], event)]
+        )
+
+    def data_received(self, event: DataReceived) -> None:
+        try:
+            stream = self.streams[event.stream_id]
+        except KeyError:
+            pass  # We ignore server-initiated events
+        else:
+            stream.receive_data(event.data, event.flow_controlled_length)
+
+    def response_received(self, event: ResponseReceived) -> None:
+        try:
+            stream = self.streams[event.stream_id]
+        except KeyError:
+            pass  # We ignore server-initiated events
+        else:
+            stream.receive_headers(event.headers)
+
+    def settings_acknowledged(self, event: SettingsAcknowledged) -> None:
+        self.metadata["settings_acknowledged"] = True
+
+        # Send off all the pending requests as now we have
+        # established a proper HTTP/2 connection
+        self._send_pending_requests()
+
+        # Update certificate when our HTTP/2 connection is established
+        assert self.transport is not None  # typing
+        self.metadata["certificate"] = Certificate(self.transport.getPeerCertificate())
+
+    def stream_ended(self, event: StreamEnded) -> None:
+        try:
+            stream = self.pop_stream(event.stream_id)
+        except KeyError:
+            pass  # We ignore server-initiated events
+        else:
+            stream.close(StreamCloseReason.ENDED, from_protocol=True)
+
+    def stream_reset(self, event: StreamReset) -> None:
+        try:
+            stream = self.pop_stream(event.stream_id)
+        except KeyError:
+            pass  # We ignore server-initiated events
+        else:
+            stream.close(StreamCloseReason.RESET, from_protocol=True)
+
+    def window_updated(self, event: WindowUpdated) -> None:
+        if event.stream_id != 0:
+            self.streams[event.stream_id].receive_window_update()
+        else:
+            # Send leftover data for all the streams
+            for stream in self.streams.values():
+                stream.receive_window_update()


 @implementer(IProtocolNegotiationFactory)
 class H2ClientFactory(Factory):
-
-    def __init__(self, uri: URI, settings: Settings, conn_lost_deferred:
-        Deferred) ->None:
+    def __init__(
+        self, uri: URI, settings: Settings, conn_lost_deferred: Deferred
+    ) -> None:
         self.uri = uri
         self.settings = settings
         self.conn_lost_deferred = conn_lost_deferred
+
+    def buildProtocol(self, addr) -> H2ClientProtocol:
+        return H2ClientProtocol(self.uri, self.settings, self.conn_lost_deferred)
+
+    def acceptableProtocols(self) -> List[bytes]:
+        return [PROTOCOL_NAME]
diff --git a/scrapy/core/http2/stream.py b/scrapy/core/http2/stream.py
index dcbe8e224..6c6ed6f9b 100644
--- a/scrapy/core/http2/stream.py
+++ b/scrapy/core/http2/stream.py
@@ -3,6 +3,7 @@ from enum import Enum
 from io import BytesIO
 from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 from urllib.parse import urlparse
+
 from h2.errors import ErrorCodes
 from h2.exceptions import H2Error, ProtocolError, StreamClosedError
 from hpack import HeaderTuple
@@ -10,11 +11,15 @@ from twisted.internet.defer import CancelledError, Deferred
 from twisted.internet.error import ConnectionClosed
 from twisted.python.failure import Failure
 from twisted.web.client import ResponseFailed
+
 from scrapy.http import Request
 from scrapy.http.headers import Headers
 from scrapy.responsetypes import responsetypes
+
 if TYPE_CHECKING:
     from scrapy.core.http2.protocol import H2ClientProtocol
+
+
 logger = logging.getLogger(__name__)


@@ -23,36 +28,47 @@ class InactiveStreamClosed(ConnectionClosed):
     of the stream. This happens when a stream is waiting for other
     streams to close and connection is lost."""

-    def __init__(self, request: Request) ->None:
+    def __init__(self, request: Request) -> None:
         self.request = request

-    def __str__(self) ->str:
-        return (
-            f'InactiveStreamClosed: Connection was closed without sending the request {self.request!r}'
-            )
+    def __str__(self) -> str:
+        return f"InactiveStreamClosed: Connection was closed without sending the request {self.request!r}"


 class InvalidHostname(H2Error):
-
-    def __init__(self, request: Request, expected_hostname: str,
-        expected_netloc: str) ->None:
+    def __init__(
+        self, request: Request, expected_hostname: str, expected_netloc: str
+    ) -> None:
         self.request = request
         self.expected_hostname = expected_hostname
         self.expected_netloc = expected_netloc

-    def __str__(self) ->str:
-        return (
-            f'InvalidHostname: Expected {self.expected_hostname} or {self.expected_netloc} in {self.request}'
-            )
+    def __str__(self) -> str:
+        return f"InvalidHostname: Expected {self.expected_hostname} or {self.expected_netloc} in {self.request}"


 class StreamCloseReason(Enum):
+    # Received a StreamEnded event from the remote
     ENDED = 1
+
+    # Received a StreamReset event -- ended abruptly
     RESET = 2
+
+    # Transport connection was lost
     CONNECTION_LOST = 3
+
+    # Expected response body size is more than allowed limit
     MAXSIZE_EXCEEDED = 4
+
+    # Response deferred is cancelled by the client
+    # (happens when client called response_deferred.cancel())
     CANCELLED = 5
+
+    # Connection lost and the stream was not initiated
     INACTIVE = 6
+
+    # The hostname of the request is not same as of connected peer hostname
+    # As a result sending this request will the end the connection
     INVALID_HOSTNAME = 7


@@ -67,9 +83,14 @@ class Stream:
     1. Combine all the data frames
     """

-    def __init__(self, stream_id: int, request: Request, protocol:
-        'H2ClientProtocol', download_maxsize: int=0, download_warnsize: int=0
-        ) ->None:
+    def __init__(
+        self,
+        stream_id: int,
+        request: Request,
+        protocol: "H2ClientProtocol",
+        download_maxsize: int = 0,
+        download_warnsize: int = 0,
+    ) -> None:
         """
         Arguments:
             stream_id -- Unique identifier for the stream within a single HTTP/2 connection
@@ -78,31 +99,65 @@ class Stream:
         """
         self.stream_id: int = stream_id
         self._request: Request = request
-        self._protocol: 'H2ClientProtocol' = protocol
-        self._download_maxsize = self._request.meta.get('download_maxsize',
-            download_maxsize)
-        self._download_warnsize = self._request.meta.get('download_warnsize',
-            download_warnsize)
-        self.metadata: Dict = {'request_content_length': 0 if self._request
-            .body is None else len(self._request.body), 'request_sent': 
-            False, 'reached_warnsize': False, 'remaining_content_length': 0 if
-            self._request.body is None else len(self._request.body),
-            'stream_closed_local': False, 'stream_closed_server': False}
-        self._response: Dict = {'body': BytesIO(), 'flow_controlled_size': 
-            0, 'headers': Headers({})}
-
-        def _cancel(_) ->None:
-            if self.metadata['request_sent']:
+        self._protocol: "H2ClientProtocol" = protocol
+
+        self._download_maxsize = self._request.meta.get(
+            "download_maxsize", download_maxsize
+        )
+        self._download_warnsize = self._request.meta.get(
+            "download_warnsize", download_warnsize
+        )
+
+        # Metadata of an HTTP/2 connection stream
+        # initialized when stream is instantiated
+        self.metadata: Dict = {
+            "request_content_length": 0
+            if self._request.body is None
+            else len(self._request.body),
+            # Flag to keep track whether the stream has initiated the request
+            "request_sent": False,
+            # Flag to track whether we have logged about exceeding download warnsize
+            "reached_warnsize": False,
+            # Each time we send a data frame, we will decrease value by the amount send.
+            "remaining_content_length": 0
+            if self._request.body is None
+            else len(self._request.body),
+            # Flag to keep track whether client (self) have closed this stream
+            "stream_closed_local": False,
+            # Flag to keep track whether the server has closed the stream
+            "stream_closed_server": False,
+        }
+
+        # Private variable used to build the response
+        # this response is then converted to appropriate Response class
+        # passed to the response deferred callback
+        self._response: Dict = {
+            # Data received frame by frame from the server is appended
+            # and passed to the response Deferred when completely received.
+            "body": BytesIO(),
+            # The amount of data received that counts against the
+            # flow control window
+            "flow_controlled_size": 0,
+            # Headers received after sending the request
+            "headers": Headers({}),
+        }
+
+        def _cancel(_) -> None:
+            # Close this stream as gracefully as possible
+            # If the associated request is initiated we reset this stream
+            # else we directly call close() method
+            if self.metadata["request_sent"]:
                 self.reset_stream(StreamCloseReason.CANCELLED)
             else:
                 self.close(StreamCloseReason.CANCELLED)
+
         self._deferred_response: Deferred = Deferred(_cancel)

-    def __repr__(self) ->str:
-        return f'Stream(id={self.stream_id!r})'
+    def __repr__(self) -> str:
+        return f"Stream(id={self.stream_id!r})"

     @property
-    def _log_warnsize(self) ->bool:
+    def _log_warnsize(self) -> bool:
         """Checks if we have received data which exceeds the download warnsize
         and whether we have not already logged about it.

@@ -110,15 +165,97 @@ class Stream:
             True if both the above conditions hold true
             False if any of the conditions is false
         """
-        pass
+        content_length_header = int(
+            self._response["headers"].get(b"Content-Length", -1)
+        )
+        return (
+            self._download_warnsize
+            and (
+                self._response["flow_controlled_size"] > self._download_warnsize
+                or content_length_header > self._download_warnsize
+            )
+            and not self.metadata["reached_warnsize"]
+        )

-    def get_response(self) ->Deferred:
+    def get_response(self) -> Deferred:
         """Simply return a Deferred which fires when response
         from the asynchronous request is available
         """
-        pass
+        return self._deferred_response
+
+    def check_request_url(self) -> bool:
+        # Make sure that we are sending the request to the correct URL
+        url = urlparse(self._request.url)
+        return (
+            url.netloc == str(self._protocol.metadata["uri"].host, "utf-8")
+            or url.netloc == str(self._protocol.metadata["uri"].netloc, "utf-8")
+            or url.netloc
+            == f'{self._protocol.metadata["ip_address"]}:{self._protocol.metadata["uri"].port}'
+        )
+
+    def _get_request_headers(self) -> List[Tuple[str, str]]:
+        url = urlparse(self._request.url)
+
+        path = url.path
+        if url.query:
+            path += "?" + url.query
+
+        # This pseudo-header field MUST NOT be empty for "http" or "https"
+        # URIs; "http" or "https" URIs that do not contain a path component
+        # MUST include a value of '/'. The exception to this rule is an
+        # OPTIONS request for an "http" or "https" URI that does not include
+        # a path component; these MUST include a ":path" pseudo-header field
+        # with a value of '*' (refer RFC 7540 - Section 8.1.2.3)
+        if not path:
+            path = "*" if self._request.method == "OPTIONS" else "/"
+
+        # Make sure pseudo-headers comes before all the other headers
+        headers = [
+            (":method", self._request.method),
+            (":authority", url.netloc),
+        ]
+
+        # The ":scheme" and ":path" pseudo-header fields MUST
+        # be omitted for CONNECT method (refer RFC 7540 - Section 8.3)
+        if self._request.method != "CONNECT":
+            headers += [
+                (":scheme", self._protocol.metadata["uri"].scheme),
+                (":path", path),
+            ]

-    def send_data(self) ->None:
+        content_length = str(len(self._request.body))
+        headers.append(("Content-Length", content_length))
+
+        content_length_name = self._request.headers.normkey(b"Content-Length")
+        for name, values in self._request.headers.items():
+            for value in values:
+                value = str(value, "utf-8")
+                if name == content_length_name:
+                    if value != content_length:
+                        logger.warning(
+                            "Ignoring bad Content-Length header %r of request %r, "
+                            "sending %r instead",
+                            value,
+                            self._request,
+                            content_length,
+                        )
+                    continue
+                headers.append((str(name, "utf-8"), value))
+
+        return headers
+
+    def initiate_request(self) -> None:
+        if self.check_request_url():
+            headers = self._get_request_headers()
+            self._protocol.conn.send_headers(self.stream_id, headers, end_stream=False)
+            self.metadata["request_sent"] = True
+            self.send_data()
+        else:
+            # Close this stream calling the response errback
+            # Note that we have not sent any headers
+            self.close(StreamCloseReason.INVALID_HOSTNAME)
+
+    def send_data(self) -> None:
         """Called immediately after the headers are sent. Here we send all the
         data as part of the request.

@@ -129,27 +266,227 @@ class Stream:
            and has initiated request already by sending HEADER frame. If not then
            stream will raise ProtocolError (raise by h2 state machine).
         """
-        pass
+        if self.metadata["stream_closed_local"]:
+            raise StreamClosedError(self.stream_id)
+
+        # Firstly, check what the flow control window is for current stream.
+        window_size = self._protocol.conn.local_flow_control_window(
+            stream_id=self.stream_id
+        )
+
+        # Next, check what the maximum frame size is.
+        max_frame_size = self._protocol.conn.max_outbound_frame_size
+
+        # We will send no more than the window size or the remaining file size
+        # of data in this call, whichever is smaller.
+        bytes_to_send_size = min(window_size, self.metadata["remaining_content_length"])
+
+        # We now need to send a number of data frames.
+        while bytes_to_send_size > 0:
+            chunk_size = min(bytes_to_send_size, max_frame_size)
+
+            data_chunk_start_id = (
+                self.metadata["request_content_length"]
+                - self.metadata["remaining_content_length"]
+            )
+            data_chunk = self._request.body[
+                data_chunk_start_id : data_chunk_start_id + chunk_size
+            ]
+
+            self._protocol.conn.send_data(self.stream_id, data_chunk, end_stream=False)
+
+            bytes_to_send_size -= chunk_size
+            self.metadata["remaining_content_length"] -= chunk_size
+
+        self.metadata["remaining_content_length"] = max(
+            0, self.metadata["remaining_content_length"]
+        )
+
+        # End the stream if no more data needs to be send
+        if self.metadata["remaining_content_length"] == 0:
+            self._protocol.conn.end_stream(self.stream_id)

-    def receive_window_update(self) ->None:
+        # Q. What about the rest of the data?
+        # Ans: Remaining Data frames will be sent when we get a WindowUpdate frame
+
+    def receive_window_update(self) -> None:
         """Flow control window size was changed.
         Send data that earlier could not be sent as we were
         blocked behind the flow control.
         """
-        pass
+        if (
+            self.metadata["remaining_content_length"]
+            and not self.metadata["stream_closed_server"]
+            and self.metadata["request_sent"]
+        ):
+            self.send_data()
+
+    def receive_data(self, data: bytes, flow_controlled_length: int) -> None:
+        self._response["body"].write(data)
+        self._response["flow_controlled_size"] += flow_controlled_length
+
+        # We check maxsize here in case the Content-Length header was not received
+        if (
+            self._download_maxsize
+            and self._response["flow_controlled_size"] > self._download_maxsize
+        ):
+            self.reset_stream(StreamCloseReason.MAXSIZE_EXCEEDED)
+            return
+
+        if self._log_warnsize:
+            self.metadata["reached_warnsize"] = True
+            warning_msg = (
+                f'Received more ({self._response["flow_controlled_size"]}) bytes than download '
+                f"warn size ({self._download_warnsize}) in request {self._request}"
+            )
+            logger.warning(warning_msg)

-    def reset_stream(self, reason: StreamCloseReason=StreamCloseReason.RESET
-        ) ->None:
+        # Acknowledge the data received
+        self._protocol.conn.acknowledge_received_data(
+            self._response["flow_controlled_size"], self.stream_id
+        )
+
+    def receive_headers(self, headers: List[HeaderTuple]) -> None:
+        for name, value in headers:
+            self._response["headers"].appendlist(name, value)
+
+        # Check if we exceed the allowed max data size which can be received
+        expected_size = int(self._response["headers"].get(b"Content-Length", -1))
+        if self._download_maxsize and expected_size > self._download_maxsize:
+            self.reset_stream(StreamCloseReason.MAXSIZE_EXCEEDED)
+            return
+
+        if self._log_warnsize:
+            self.metadata["reached_warnsize"] = True
+            warning_msg = (
+                f"Expected response size ({expected_size}) larger than "
+                f"download warn size ({self._download_warnsize}) in request {self._request}"
+            )
+            logger.warning(warning_msg)
+
+    def reset_stream(self, reason: StreamCloseReason = StreamCloseReason.RESET) -> None:
         """Close this stream by sending a RST_FRAME to the remote peer"""
-        pass
+        if self.metadata["stream_closed_local"]:
+            raise StreamClosedError(self.stream_id)

-    def close(self, reason: StreamCloseReason, errors: Optional[List[
-        BaseException]]=None, from_protocol: bool=False) ->None:
+        # Clear buffer earlier to avoid keeping data in memory for a long time
+        self._response["body"].truncate(0)
+
+        self.metadata["stream_closed_local"] = True
+        self._protocol.conn.reset_stream(self.stream_id, ErrorCodes.REFUSED_STREAM)
+        self.close(reason)
+
+    def close(
+        self,
+        reason: StreamCloseReason,
+        errors: Optional[List[BaseException]] = None,
+        from_protocol: bool = False,
+    ) -> None:
         """Based on the reason sent we will handle each case."""
-        pass
+        if self.metadata["stream_closed_server"]:
+            raise StreamClosedError(self.stream_id)
+
+        if not isinstance(reason, StreamCloseReason):
+            raise TypeError(
+                f"Expected StreamCloseReason, received {reason.__class__.__qualname__}"
+            )
+
+        # Have default value of errors as an empty list as
+        # some cases can add a list of exceptions
+        errors = errors or []

-    def _fire_response_deferred(self) ->None:
+        if not from_protocol:
+            self._protocol.pop_stream(self.stream_id)
+
+        self.metadata["stream_closed_server"] = True
+
+        # We do not check for Content-Length or Transfer-Encoding in response headers
+        # and add `partial` flag as in HTTP/1.1 as 'A request or response that includes
+        # a payload body can include a content-length header field' (RFC 7540 - Section 8.1.2.6)
+
+        # NOTE: Order of handling the events is important here
+        # As we immediately cancel the request when maxsize is exceeded while
+        # receiving DATA_FRAME's when we have received the headers (not
+        # having Content-Length)
+        if reason is StreamCloseReason.MAXSIZE_EXCEEDED:
+            expected_size = int(
+                self._response["headers"].get(
+                    b"Content-Length", self._response["flow_controlled_size"]
+                )
+            )
+            error_msg = (
+                f"Cancelling download of {self._request.url}: received response "
+                f"size ({expected_size}) larger than download max size ({self._download_maxsize})"
+            )
+            logger.error(error_msg)
+            self._deferred_response.errback(CancelledError(error_msg))
+
+        elif reason is StreamCloseReason.ENDED:
+            self._fire_response_deferred()
+
+        # Stream was abruptly ended here
+        elif reason is StreamCloseReason.CANCELLED:
+            # Client has cancelled the request. Remove all the data
+            # received and fire the response deferred with no flags set
+
+            # NOTE: The data is already flushed in Stream.reset_stream() called
+            # immediately when the stream needs to be cancelled
+
+            # There maybe no :status in headers, we make
+            # HTTP Status Code: 499 - Client Closed Request
+            self._response["headers"][":status"] = "499"
+            self._fire_response_deferred()
+
+        elif reason is StreamCloseReason.RESET:
+            self._deferred_response.errback(
+                ResponseFailed(
+                    [
+                        Failure(
+                            f'Remote peer {self._protocol.metadata["ip_address"]} sent RST_STREAM',
+                            ProtocolError,
+                        )
+                    ]
+                )
+            )
+
+        elif reason is StreamCloseReason.CONNECTION_LOST:
+            self._deferred_response.errback(ResponseFailed(errors))
+
+        elif reason is StreamCloseReason.INACTIVE:
+            errors.insert(0, InactiveStreamClosed(self._request))
+            self._deferred_response.errback(ResponseFailed(errors))
+
+        else:
+            assert reason is StreamCloseReason.INVALID_HOSTNAME
+            self._deferred_response.errback(
+                InvalidHostname(
+                    self._request,
+                    str(self._protocol.metadata["uri"].host, "utf-8"),
+                    f'{self._protocol.metadata["ip_address"]}:{self._protocol.metadata["uri"].port}',
+                )
+            )
+
+    def _fire_response_deferred(self) -> None:
         """Builds response from the self._response dict
         and fires the response deferred callback with the
         generated response instance"""
-        pass
+
+        body = self._response["body"].getvalue()
+        response_cls = responsetypes.from_args(
+            headers=self._response["headers"],
+            url=self._request.url,
+            body=body,
+        )
+
+        response = response_cls(
+            url=self._request.url,
+            status=int(self._response["headers"][":status"]),
+            headers=self._response["headers"],
+            body=body,
+            request=self._request,
+            certificate=self._protocol.metadata["certificate"],
+            ip_address=self._protocol.metadata["ip_address"],
+            protocol="h2",
+        )
+
+        self._deferred_response.callback(response)
diff --git a/scrapy/core/scheduler.py b/scrapy/core/scheduler.py
index 39a4bb6a3..17c95f1ea 100644
--- a/scrapy/core/scheduler.py
+++ b/scrapy/core/scheduler.py
@@ -1,10 +1,13 @@
 from __future__ import annotations
+
 import json
 import logging
 from abc import abstractmethod
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Optional, Type, TypeVar, cast
+
 from twisted.internet.defer import Deferred
+
 from scrapy.crawler import Crawler
 from scrapy.dupefilters import BaseDupeFilter
 from scrapy.http.request import Request
@@ -12,8 +15,12 @@ from scrapy.spiders import Spider
 from scrapy.statscollectors import StatsCollector
 from scrapy.utils.job import job_dir
 from scrapy.utils.misc import create_instance, load_object
+
 if TYPE_CHECKING:
+    # typing.Self requires Python 3.11
     from typing_extensions import Self
+
+
 logger = logging.getLogger(__name__)


@@ -22,14 +29,18 @@ class BaseSchedulerMeta(type):
     Metaclass to check scheduler classes against the necessary interface
     """

-    def __instancecheck__(cls, instance: Any) ->bool:
+    def __instancecheck__(cls, instance: Any) -> bool:
         return cls.__subclasscheck__(type(instance))

-    def __subclasscheck__(cls, subclass: type) ->bool:
-        return hasattr(subclass, 'has_pending_requests') and callable(subclass
-            .has_pending_requests) and hasattr(subclass, 'enqueue_request'
-            ) and callable(subclass.enqueue_request) and hasattr(subclass,
-            'next_request') and callable(subclass.next_request)
+    def __subclasscheck__(cls, subclass: type) -> bool:
+        return (
+            hasattr(subclass, "has_pending_requests")
+            and callable(subclass.has_pending_requests)
+            and hasattr(subclass, "enqueue_request")
+            and callable(subclass.enqueue_request)
+            and hasattr(subclass, "next_request")
+            and callable(subclass.next_request)
+        )


 class BaseScheduler(metaclass=BaseSchedulerMeta):
@@ -50,13 +61,13 @@ class BaseScheduler(metaclass=BaseSchedulerMeta):
     """

     @classmethod
-    def from_crawler(cls, crawler: Crawler) ->Self:
+    def from_crawler(cls, crawler: Crawler) -> Self:
         """
         Factory method which receives the current :class:`~scrapy.crawler.Crawler` object as argument.
         """
-        pass
+        return cls()

-    def open(self, spider: Spider) ->Optional[Deferred]:
+    def open(self, spider: Spider) -> Optional[Deferred]:
         """
         Called when the spider is opened by the engine. It receives the spider
         instance as argument and it's useful to execute initialization code.
@@ -66,7 +77,7 @@ class BaseScheduler(metaclass=BaseSchedulerMeta):
         """
         pass

-    def close(self, reason: str) ->Optional[Deferred]:
+    def close(self, reason: str) -> Optional[Deferred]:
         """
         Called when the spider is closed by the engine. It receives the reason why the crawl
         finished as argument and it's useful to execute cleaning code.
@@ -77,14 +88,14 @@ class BaseScheduler(metaclass=BaseSchedulerMeta):
         pass

     @abstractmethod
-    def has_pending_requests(self) ->bool:
+    def has_pending_requests(self) -> bool:
         """
         ``True`` if the scheduler has enqueued requests, ``False`` otherwise
         """
-        pass
+        raise NotImplementedError()

     @abstractmethod
-    def enqueue_request(self, request: Request) ->bool:
+    def enqueue_request(self, request: Request) -> bool:
         """
         Process a request received by the engine.

@@ -95,10 +106,10 @@ class BaseScheduler(metaclass=BaseSchedulerMeta):
         For reference, the default Scrapy scheduler returns ``False`` when the
         request is rejected by the dupefilter.
         """
-        pass
+        raise NotImplementedError()

     @abstractmethod
-    def next_request(self) ->Optional[Request]:
+    def next_request(self) -> Optional[Request]:
         """
         Return the next :class:`~scrapy.http.Request` to be processed, or ``None``
         to indicate that there are no requests to be considered ready at the moment.
@@ -107,10 +118,10 @@ class BaseScheduler(metaclass=BaseSchedulerMeta):
         to the downloader in the current reactor cycle. The engine will continue
         calling ``next_request`` until ``has_pending_requests`` is ``False``.
         """
-        pass
+        raise NotImplementedError()


-SchedulerTV = TypeVar('SchedulerTV', bound='Scheduler')
+SchedulerTV = TypeVar("SchedulerTV", bound="Scheduler")


 class Scheduler(BaseScheduler):
@@ -164,10 +175,17 @@ class Scheduler(BaseScheduler):
     :type crawler: :class:`scrapy.crawler.Crawler`
     """

-    def __init__(self, dupefilter: BaseDupeFilter, jobdir: Optional[str]=
-        None, dqclass=None, mqclass=None, logunser: bool=False, stats:
-        Optional[StatsCollector]=None, pqclass=None, crawler: Optional[
-        Crawler]=None):
+    def __init__(
+        self,
+        dupefilter: BaseDupeFilter,
+        jobdir: Optional[str] = None,
+        dqclass=None,
+        mqclass=None,
+        logunser: bool = False,
+        stats: Optional[StatsCollector] = None,
+        pqclass=None,
+        crawler: Optional[Crawler] = None,
+    ):
         self.df: BaseDupeFilter = dupefilter
         self.dqdir: Optional[str] = self._dqdir(jobdir)
         self.pqclass = pqclass
@@ -178,28 +196,48 @@ class Scheduler(BaseScheduler):
         self.crawler: Optional[Crawler] = crawler

     @classmethod
-    def from_crawler(cls: Type[SchedulerTV], crawler: Crawler) ->SchedulerTV:
+    def from_crawler(cls: Type[SchedulerTV], crawler: Crawler) -> SchedulerTV:
         """
         Factory method, initializes the scheduler with arguments taken from the crawl settings
         """
-        pass
-
-    def open(self, spider: Spider) ->Optional[Deferred]:
+        dupefilter_cls = load_object(crawler.settings["DUPEFILTER_CLASS"])
+        return cls(
+            dupefilter=create_instance(dupefilter_cls, crawler.settings, crawler),
+            jobdir=job_dir(crawler.settings),
+            dqclass=load_object(crawler.settings["SCHEDULER_DISK_QUEUE"]),
+            mqclass=load_object(crawler.settings["SCHEDULER_MEMORY_QUEUE"]),
+            logunser=crawler.settings.getbool("SCHEDULER_DEBUG"),
+            stats=crawler.stats,
+            pqclass=load_object(crawler.settings["SCHEDULER_PRIORITY_QUEUE"]),
+            crawler=crawler,
+        )
+
+    def has_pending_requests(self) -> bool:
+        return len(self) > 0
+
+    def open(self, spider: Spider) -> Optional[Deferred]:
         """
         (1) initialize the memory queue
         (2) initialize the disk queue if the ``jobdir`` attribute is a valid directory
         (3) return the result of the dupefilter's ``open`` method
         """
-        pass
+        self.spider = spider
+        self.mqs = self._mq()
+        self.dqs = self._dq() if self.dqdir else None
+        return self.df.open()

-    def close(self, reason: str) ->Optional[Deferred]:
+    def close(self, reason: str) -> Optional[Deferred]:
         """
         (1) dump pending requests to disk if there is a disk queue
         (2) return the result of the dupefilter's ``close`` method
         """
-        pass
+        if self.dqs is not None:
+            state = self.dqs.close()
+            assert isinstance(self.dqdir, str)
+            self._write_dqs_state(self.dqdir, state)
+        return self.df.close(reason)

-    def enqueue_request(self, request: Request) ->bool:
+    def enqueue_request(self, request: Request) -> bool:
         """
         Unless the received request is filtered out by the Dupefilter, attempt to push
         it into the disk queue, falling back to pushing it into the memory queue.
@@ -209,9 +247,20 @@ class Scheduler(BaseScheduler):

         Return ``True`` if the request was stored successfully, ``False`` otherwise.
         """
-        pass
-
-    def next_request(self) ->Optional[Request]:
+        if not request.dont_filter and self.df.request_seen(request):
+            self.df.log(request, self.spider)
+            return False
+        dqok = self._dqpush(request)
+        assert self.stats is not None
+        if dqok:
+            self.stats.inc_value("scheduler/enqueued/disk", spider=self.spider)
+        else:
+            self._mqpush(request)
+            self.stats.inc_value("scheduler/enqueued/memory", spider=self.spider)
+        self.stats.inc_value("scheduler/enqueued", spider=self.spider)
+        return True
+
+    def next_request(self) -> Optional[Request]:
         """
         Return a :class:`~scrapy.http.Request` object from the memory queue,
         falling back to the disk queue if the memory queue is empty.
@@ -220,23 +269,103 @@ class Scheduler(BaseScheduler):
         Increment the appropriate stats, such as: ``scheduler/dequeued``,
         ``scheduler/dequeued/disk``, ``scheduler/dequeued/memory``.
         """
-        pass
-
-    def __len__(self) ->int:
+        request: Optional[Request] = self.mqs.pop()
+        assert self.stats is not None
+        if request is not None:
+            self.stats.inc_value("scheduler/dequeued/memory", spider=self.spider)
+        else:
+            request = self._dqpop()
+            if request is not None:
+                self.stats.inc_value("scheduler/dequeued/disk", spider=self.spider)
+        if request is not None:
+            self.stats.inc_value("scheduler/dequeued", spider=self.spider)
+        return request
+
+    def __len__(self) -> int:
         """
         Return the total amount of enqueued requests
         """
-        return len(self.dqs) + len(self.mqs) if self.dqs is not None else len(
-            self.mqs)
+        return len(self.dqs) + len(self.mqs) if self.dqs is not None else len(self.mqs)
+
+    def _dqpush(self, request: Request) -> bool:
+        if self.dqs is None:
+            return False
+        try:
+            self.dqs.push(request)
+        except ValueError as e:  # non serializable request
+            if self.logunser:
+                msg = (
+                    "Unable to serialize request: %(request)s - reason:"
+                    " %(reason)s - no more unserializable requests will be"
+                    " logged (stats being collected)"
+                )
+                logger.warning(
+                    msg,
+                    {"request": request, "reason": e},
+                    exc_info=True,
+                    extra={"spider": self.spider},
+                )
+                self.logunser = False
+            assert self.stats is not None
+            self.stats.inc_value("scheduler/unserializable", spider=self.spider)
+            return False
+        else:
+            return True
+
+    def _mqpush(self, request: Request) -> None:
+        self.mqs.push(request)
+
+    def _dqpop(self) -> Optional[Request]:
+        if self.dqs is not None:
+            return self.dqs.pop()
+        return None

     def _mq(self):
         """Create a new priority queue instance, with in-memory storage"""
-        pass
+        return create_instance(
+            self.pqclass,
+            settings=None,
+            crawler=self.crawler,
+            downstream_queue_cls=self.mqclass,
+            key="",
+        )

     def _dq(self):
         """Create a new priority queue instance, with disk storage"""
-        pass
-
-    def _dqdir(self, jobdir: Optional[str]) ->Optional[str]:
+        assert self.dqdir
+        state = self._read_dqs_state(self.dqdir)
+        q = create_instance(
+            self.pqclass,
+            settings=None,
+            crawler=self.crawler,
+            downstream_queue_cls=self.dqclass,
+            key=self.dqdir,
+            startprios=state,
+        )
+        if q:
+            logger.info(
+                "Resuming crawl (%(queuesize)d requests scheduled)",
+                {"queuesize": len(q)},
+                extra={"spider": self.spider},
+            )
+        return q
+
+    def _dqdir(self, jobdir: Optional[str]) -> Optional[str]:
         """Return a folder name to keep disk queue state at"""
-        pass
+        if jobdir:
+            dqdir = Path(jobdir, "requests.queue")
+            if not dqdir.exists():
+                dqdir.mkdir(parents=True)
+            return str(dqdir)
+        return None
+
+    def _read_dqs_state(self, dqdir: str) -> list:
+        path = Path(dqdir, "active.json")
+        if not path.exists():
+            return []
+        with path.open(encoding="utf-8") as f:
+            return cast(list, json.load(f))
+
+    def _write_dqs_state(self, dqdir: str, state: list) -> None:
+        with Path(dqdir, "active.json").open("w", encoding="utf-8") as f:
+            json.dump(state, f)
diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py
index d5cedad9e..b2c26507c 100644
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@@ -1,12 +1,28 @@
 """This module implements the Scraper component which parses responses and
 extracts information from them"""
 from __future__ import annotations
+
 import logging
 from collections import deque
-from typing import TYPE_CHECKING, Any, AsyncGenerator, AsyncIterable, Deque, Generator, Iterable, Optional, Set, Tuple, Type, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncGenerator,
+    AsyncIterable,
+    Deque,
+    Generator,
+    Iterable,
+    Optional,
+    Set,
+    Tuple,
+    Type,
+    Union,
+)
+
 from itemadapter import is_item
 from twisted.internet.defer import Deferred, inlineCallbacks
 from twisted.python.failure import Failure
+
 from scrapy import Spider, signals
 from scrapy.core.spidermw import SpiderMiddlewareManager
 from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
@@ -14,21 +30,34 @@ from scrapy.http import Request, Response
 from scrapy.logformatter import LogFormatter
 from scrapy.pipelines import ItemPipelineManager
 from scrapy.signalmanager import SignalManager
-from scrapy.utils.defer import aiter_errback, defer_fail, defer_succeed, iter_errback, parallel, parallel_async
+from scrapy.utils.defer import (
+    aiter_errback,
+    defer_fail,
+    defer_succeed,
+    iter_errback,
+    parallel,
+    parallel_async,
+)
 from scrapy.utils.log import failure_to_exc_info, logformatter_adapter
 from scrapy.utils.misc import load_object, warn_on_generator_with_return_value
 from scrapy.utils.spider import iterate_spider_output
+
 if TYPE_CHECKING:
     from scrapy.crawler import Crawler
+
+
 QueueTuple = Tuple[Union[Response, Failure], Request, Deferred]
+
+
 logger = logging.getLogger(__name__)


 class Slot:
     """Scraper slot (one per running spider)"""
+
     MIN_RESPONSE_SIZE = 1024

-    def __init__(self, max_active_size: int=5000000):
+    def __init__(self, max_active_size: int = 5000000):
         self.max_active_size = max_active_size
         self.queue: Deque[QueueTuple] = deque()
         self.active: Set[Request] = set()
@@ -36,60 +65,260 @@ class Slot:
         self.itemproc_size: int = 0
         self.closing: Optional[Deferred] = None

+    def add_response_request(
+        self, result: Union[Response, Failure], request: Request
+    ) -> Deferred:
+        deferred: Deferred = Deferred()
+        self.queue.append((result, request, deferred))
+        if isinstance(result, Response):
+            self.active_size += max(len(result.body), self.MIN_RESPONSE_SIZE)
+        else:
+            self.active_size += self.MIN_RESPONSE_SIZE
+        return deferred
+
+    def next_response_request_deferred(self) -> QueueTuple:
+        response, request, deferred = self.queue.popleft()
+        self.active.add(request)
+        return response, request, deferred
+
+    def finish_response(
+        self, result: Union[Response, Failure], request: Request
+    ) -> None:
+        self.active.remove(request)
+        if isinstance(result, Response):
+            self.active_size -= max(len(result.body), self.MIN_RESPONSE_SIZE)
+        else:
+            self.active_size -= self.MIN_RESPONSE_SIZE
+
+    def is_idle(self) -> bool:
+        return not (self.queue or self.active)
+
+    def needs_backout(self) -> bool:
+        return self.active_size > self.max_active_size

-class Scraper:

-    def __init__(self, crawler: Crawler) ->None:
+class Scraper:
+    def __init__(self, crawler: Crawler) -> None:
         self.slot: Optional[Slot] = None
-        self.spidermw: SpiderMiddlewareManager = (SpiderMiddlewareManager.
-            from_crawler(crawler))
-        itemproc_cls: Type[ItemPipelineManager] = load_object(crawler.
-            settings['ITEM_PROCESSOR'])
+        self.spidermw: SpiderMiddlewareManager = SpiderMiddlewareManager.from_crawler(
+            crawler
+        )
+        itemproc_cls: Type[ItemPipelineManager] = load_object(
+            crawler.settings["ITEM_PROCESSOR"]
+        )
         self.itemproc: ItemPipelineManager = itemproc_cls.from_crawler(crawler)
-        self.concurrent_items: int = crawler.settings.getint('CONCURRENT_ITEMS'
-            )
+        self.concurrent_items: int = crawler.settings.getint("CONCURRENT_ITEMS")
         self.crawler: Crawler = crawler
         self.signals: SignalManager = crawler.signals
         assert crawler.logformatter
         self.logformatter: LogFormatter = crawler.logformatter

     @inlineCallbacks
-    def open_spider(self, spider: Spider) ->Generator[Deferred, Any, None]:
+    def open_spider(self, spider: Spider) -> Generator[Deferred, Any, None]:
         """Open the given spider for scraping and allocate resources for it"""
-        pass
+        self.slot = Slot(self.crawler.settings.getint("SCRAPER_SLOT_MAX_ACTIVE_SIZE"))
+        yield self.itemproc.open_spider(spider)

-    def close_spider(self, spider: Spider) ->Deferred:
+    def close_spider(self, spider: Spider) -> Deferred:
         """Close a spider being scraped and release its resources"""
-        pass
+        if self.slot is None:
+            raise RuntimeError("Scraper slot not assigned")
+        self.slot.closing = Deferred()
+        self.slot.closing.addCallback(self.itemproc.close_spider)
+        self._check_if_closing(spider)
+        return self.slot.closing

-    def is_idle(self) ->bool:
+    def is_idle(self) -> bool:
         """Return True if there isn't any more spiders to process"""
-        pass
+        return not self.slot
+
+    def _check_if_closing(self, spider: Spider) -> None:
+        assert self.slot is not None  # typing
+        if self.slot.closing and self.slot.is_idle():
+            self.slot.closing.callback(spider)
+
+    def enqueue_scrape(
+        self, result: Union[Response, Failure], request: Request, spider: Spider
+    ) -> Deferred:
+        if self.slot is None:
+            raise RuntimeError("Scraper slot not assigned")
+        dfd = self.slot.add_response_request(result, request)

-    def _scrape(self, result: Union[Response, Failure], request: Request,
-        spider: Spider) ->Deferred:
+        def finish_scraping(_: Any) -> Any:
+            assert self.slot is not None
+            self.slot.finish_response(result, request)
+            self._check_if_closing(spider)
+            self._scrape_next(spider)
+            return _
+
+        dfd.addBoth(finish_scraping)
+        dfd.addErrback(
+            lambda f: logger.error(
+                "Scraper bug processing %(request)s",
+                {"request": request},
+                exc_info=failure_to_exc_info(f),
+                extra={"spider": spider},
+            )
+        )
+        self._scrape_next(spider)
+        return dfd
+
+    def _scrape_next(self, spider: Spider) -> None:
+        assert self.slot is not None  # typing
+        while self.slot.queue:
+            response, request, deferred = self.slot.next_response_request_deferred()
+            self._scrape(response, request, spider).chainDeferred(deferred)
+
+    def _scrape(
+        self, result: Union[Response, Failure], request: Request, spider: Spider
+    ) -> Deferred:
         """
         Handle the downloaded response or failure through the spider callback/errback
         """
-        pass
+        if not isinstance(result, (Response, Failure)):
+            raise TypeError(
+                f"Incorrect type: expected Response or Failure, got {type(result)}: {result!r}"
+            )
+        dfd = self._scrape2(
+            result, request, spider
+        )  # returns spider's processed output
+        dfd.addErrback(self.handle_spider_error, request, result, spider)
+        dfd.addCallback(self.handle_spider_output, request, result, spider)
+        return dfd

-    def _scrape2(self, result: Union[Response, Failure], request: Request,
-        spider: Spider) ->Deferred:
+    def _scrape2(
+        self, result: Union[Response, Failure], request: Request, spider: Spider
+    ) -> Deferred:
         """
         Handle the different cases of request's result been a Response or a Failure
         """
-        pass
+        if isinstance(result, Response):
+            return self.spidermw.scrape_response(
+                self.call_spider, result, request, spider
+            )
+        # else result is a Failure
+        dfd = self.call_spider(result, request, spider)
+        return dfd.addErrback(self._log_download_errors, result, request, spider)
+
+    def call_spider(
+        self, result: Union[Response, Failure], request: Request, spider: Spider
+    ) -> Deferred:
+        if isinstance(result, Response):
+            if getattr(result, "request", None) is None:
+                result.request = request
+            callback = result.request.callback or spider._parse
+            warn_on_generator_with_return_value(spider, callback)
+            dfd = defer_succeed(result)
+            dfd.addCallbacks(
+                callback=callback, callbackKeywords=result.request.cb_kwargs
+            )
+        else:  # result is a Failure
+            # TODO: properly type adding this attribute to a Failure
+            result.request = request  # type: ignore[attr-defined]
+            dfd = defer_fail(result)
+            if request.errback:
+                warn_on_generator_with_return_value(spider, request.errback)
+                dfd.addErrback(request.errback)
+        return dfd.addCallback(iterate_spider_output)
+
+    def handle_spider_error(
+        self,
+        _failure: Failure,
+        request: Request,
+        response: Union[Response, Failure],
+        spider: Spider,
+    ) -> None:
+        exc = _failure.value
+        if isinstance(exc, CloseSpider):
+            assert self.crawler.engine is not None  # typing
+            self.crawler.engine.close_spider(spider, exc.reason or "cancelled")
+            return
+        logkws = self.logformatter.spider_error(_failure, request, response, spider)
+        logger.log(
+            *logformatter_adapter(logkws),
+            exc_info=failure_to_exc_info(_failure),
+            extra={"spider": spider},
+        )
+        self.signals.send_catch_log(
+            signal=signals.spider_error,
+            failure=_failure,
+            response=response,
+            spider=spider,
+        )
+        assert self.crawler.stats
+        self.crawler.stats.inc_value(
+            f"spider_exceptions/{_failure.value.__class__.__name__}", spider=spider
+        )

-    def _process_spidermw_output(self, output: Any, request: Request,
-        response: Response, spider: Spider) ->Optional[Deferred]:
+    def handle_spider_output(
+        self,
+        result: Union[Iterable, AsyncIterable],
+        request: Request,
+        response: Union[Response, Failure],
+        spider: Spider,
+    ) -> Deferred:
+        if not result:
+            return defer_succeed(None)
+        it: Union[Generator, AsyncGenerator]
+        if isinstance(result, AsyncIterable):
+            it = aiter_errback(
+                result, self.handle_spider_error, request, response, spider
+            )
+            dfd = parallel_async(
+                it,
+                self.concurrent_items,
+                self._process_spidermw_output,
+                request,
+                response,
+                spider,
+            )
+        else:
+            it = iter_errback(
+                result, self.handle_spider_error, request, response, spider
+            )
+            dfd = parallel(
+                it,
+                self.concurrent_items,
+                self._process_spidermw_output,
+                request,
+                response,
+                spider,
+            )
+        return dfd
+
+    def _process_spidermw_output(
+        self, output: Any, request: Request, response: Response, spider: Spider
+    ) -> Optional[Deferred]:
         """Process each Request/Item (given in the output parameter) returned
         from the given spider
         """
-        pass
+        assert self.slot is not None  # typing
+        if isinstance(output, Request):
+            assert self.crawler.engine is not None  # typing
+            self.crawler.engine.crawl(request=output)
+        elif is_item(output):
+            self.slot.itemproc_size += 1
+            dfd = self.itemproc.process_item(output, spider)
+            dfd.addBoth(self._itemproc_finished, output, response, spider)
+            return dfd
+        elif output is None:
+            pass
+        else:
+            typename = type(output).__name__
+            logger.error(
+                "Spider must return request, item, or None, got %(typename)r in %(request)s",
+                {"request": request, "typename": typename},
+                extra={"spider": spider},
+            )
+        return None

-    def _log_download_errors(self, spider_failure: Failure,
-        download_failure: Failure, request: Request, spider: Spider) ->Union[
-        Failure, None]:
+    def _log_download_errors(
+        self,
+        spider_failure: Failure,
+        download_failure: Failure,
+        request: Request,
+        spider: Spider,
+    ) -> Union[Failure, None]:
         """Log and silence errors that come from the engine (typically download
         errors that got propagated thru here).

@@ -97,9 +326,67 @@ class Scraper:
         download_failure: the value passed into _scrape2() from
         ExecutionEngine._handle_downloader_output() as "result"
         """
-        pass
+        if not download_failure.check(IgnoreRequest):
+            if download_failure.frames:
+                logkws = self.logformatter.download_error(
+                    download_failure, request, spider
+                )
+                logger.log(
+                    *logformatter_adapter(logkws),
+                    extra={"spider": spider},
+                    exc_info=failure_to_exc_info(download_failure),
+                )
+            else:
+                errmsg = download_failure.getErrorMessage()
+                if errmsg:
+                    logkws = self.logformatter.download_error(
+                        download_failure, request, spider, errmsg
+                    )
+                    logger.log(
+                        *logformatter_adapter(logkws),
+                        extra={"spider": spider},
+                    )
+
+        if spider_failure is not download_failure:
+            return spider_failure
+        return None

-    def _itemproc_finished(self, output: Any, item: Any, response: Response,
-        spider: Spider) ->Deferred:
+    def _itemproc_finished(
+        self, output: Any, item: Any, response: Response, spider: Spider
+    ) -> Deferred:
         """ItemProcessor finished for the given ``item`` and returned ``output``"""
-        pass
+        assert self.slot is not None  # typing
+        self.slot.itemproc_size -= 1
+        if isinstance(output, Failure):
+            ex = output.value
+            if isinstance(ex, DropItem):
+                logkws = self.logformatter.dropped(item, ex, response, spider)
+                if logkws is not None:
+                    logger.log(*logformatter_adapter(logkws), extra={"spider": spider})
+                return self.signals.send_catch_log_deferred(
+                    signal=signals.item_dropped,
+                    item=item,
+                    response=response,
+                    spider=spider,
+                    exception=output.value,
+                )
+            assert ex
+            logkws = self.logformatter.item_error(item, ex, response, spider)
+            logger.log(
+                *logformatter_adapter(logkws),
+                extra={"spider": spider},
+                exc_info=failure_to_exc_info(output),
+            )
+            return self.signals.send_catch_log_deferred(
+                signal=signals.item_error,
+                item=item,
+                response=response,
+                spider=spider,
+                failure=output,
+            )
+        logkws = self.logformatter.scraped(output, response, spider)
+        if logkws is not None:
+            logger.log(*logformatter_adapter(logkws), extra={"spider": spider})
+        return self.signals.send_catch_log_deferred(
+            signal=signals.item_scraped, item=output, response=response, spider=spider
+        )
diff --git a/scrapy/core/spidermw.py b/scrapy/core/spidermw.py
index 9922755bf..dcf1a6dbc 100644
--- a/scrapy/core/spidermw.py
+++ b/scrapy/core/spidermw.py
@@ -6,9 +6,23 @@ See documentation in docs/topics/spider-middleware.rst
 import logging
 from inspect import isasyncgenfunction, iscoroutine
 from itertools import islice
-from typing import Any, AsyncGenerator, AsyncIterable, Callable, Generator, Iterable, List, Optional, Tuple, Union, cast
+from typing import (
+    Any,
+    AsyncGenerator,
+    AsyncIterable,
+    Callable,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
+
 from twisted.internet.defer import Deferred, inlineCallbacks
 from twisted.python.failure import Failure
+
 from scrapy import Request, Spider
 from scrapy.exceptions import _InvalidOutput
 from scrapy.http import Response
@@ -16,15 +30,317 @@ from scrapy.middleware import MiddlewareManager
 from scrapy.settings import BaseSettings
 from scrapy.utils.asyncgen import as_async_generator, collect_asyncgen
 from scrapy.utils.conf import build_component_list
-from scrapy.utils.defer import deferred_f_from_coro_f, deferred_from_coro, maybe_deferred_to_future, mustbe_deferred
+from scrapy.utils.defer import (
+    deferred_f_from_coro_f,
+    deferred_from_coro,
+    maybe_deferred_to_future,
+    mustbe_deferred,
+)
 from scrapy.utils.python import MutableAsyncChain, MutableChain
+
 logger = logging.getLogger(__name__)
+
+
 ScrapeFunc = Callable[[Union[Response, Failure], Request, Spider], Any]


+def _isiterable(o: Any) -> bool:
+    return isinstance(o, (Iterable, AsyncIterable))
+
+
 class SpiderMiddlewareManager(MiddlewareManager):
-    component_name = 'spider middleware'
+    component_name = "spider middleware"

     def __init__(self, *middlewares: Any):
         super().__init__(*middlewares)
         self.downgrade_warning_done = False
+
+    @classmethod
+    def _get_mwlist_from_settings(cls, settings: BaseSettings) -> List[Any]:
+        return build_component_list(settings.getwithbase("SPIDER_MIDDLEWARES"))
+
+    def _add_middleware(self, mw: Any) -> None:
+        super()._add_middleware(mw)
+        if hasattr(mw, "process_spider_input"):
+            self.methods["process_spider_input"].append(mw.process_spider_input)
+        if hasattr(mw, "process_start_requests"):
+            self.methods["process_start_requests"].appendleft(mw.process_start_requests)
+        process_spider_output = self._get_async_method_pair(mw, "process_spider_output")
+        self.methods["process_spider_output"].appendleft(process_spider_output)
+        process_spider_exception = getattr(mw, "process_spider_exception", None)
+        self.methods["process_spider_exception"].appendleft(process_spider_exception)
+
+    def _process_spider_input(
+        self,
+        scrape_func: ScrapeFunc,
+        response: Response,
+        request: Request,
+        spider: Spider,
+    ) -> Any:
+        for method in self.methods["process_spider_input"]:
+            method = cast(Callable, method)
+            try:
+                result = method(response=response, spider=spider)
+                if result is not None:
+                    msg = (
+                        f"{method.__qualname__} must return None "
+                        f"or raise an exception, got {type(result)}"
+                    )
+                    raise _InvalidOutput(msg)
+            except _InvalidOutput:
+                raise
+            except Exception:
+                return scrape_func(Failure(), request, spider)
+        return scrape_func(response, request, spider)
+
+    def _evaluate_iterable(
+        self,
+        response: Response,
+        spider: Spider,
+        iterable: Union[Iterable, AsyncIterable],
+        exception_processor_index: int,
+        recover_to: Union[MutableChain, MutableAsyncChain],
+    ) -> Union[Generator, AsyncGenerator]:
+        def process_sync(iterable: Iterable) -> Generator:
+            try:
+                for r in iterable:
+                    yield r
+            except Exception as ex:
+                exception_result = self._process_spider_exception(
+                    response, spider, Failure(ex), exception_processor_index
+                )
+                if isinstance(exception_result, Failure):
+                    raise
+                recover_to.extend(exception_result)
+
+        async def process_async(iterable: AsyncIterable) -> AsyncGenerator:
+            try:
+                async for r in iterable:
+                    yield r
+            except Exception as ex:
+                exception_result = self._process_spider_exception(
+                    response, spider, Failure(ex), exception_processor_index
+                )
+                if isinstance(exception_result, Failure):
+                    raise
+                recover_to.extend(exception_result)
+
+        if isinstance(iterable, AsyncIterable):
+            return process_async(iterable)
+        return process_sync(iterable)
+
+    def _process_spider_exception(
+        self,
+        response: Response,
+        spider: Spider,
+        _failure: Failure,
+        start_index: int = 0,
+    ) -> Union[Failure, MutableChain]:
+        exception = _failure.value
+        # don't handle _InvalidOutput exception
+        if isinstance(exception, _InvalidOutput):
+            return _failure
+        method_list = islice(
+            self.methods["process_spider_exception"], start_index, None
+        )
+        for method_index, method in enumerate(method_list, start=start_index):
+            if method is None:
+                continue
+            method = cast(Callable, method)
+            result = method(response=response, exception=exception, spider=spider)
+            if _isiterable(result):
+                # stop exception handling by handing control over to the
+                # process_spider_output chain if an iterable has been returned
+                dfd: Deferred = self._process_spider_output(
+                    response, spider, result, method_index + 1
+                )
+                # _process_spider_output() returns a Deferred only because of downgrading so this can be
+                # simplified when downgrading is removed.
+                if dfd.called:
+                    # the result is available immediately if _process_spider_output didn't do downgrading
+                    return cast(MutableChain, dfd.result)
+                # we forbid waiting here because otherwise we would need to return a deferred from
+                # _process_spider_exception too, which complicates the architecture
+                msg = f"Async iterable returned from {method.__qualname__} cannot be downgraded"
+                raise _InvalidOutput(msg)
+            elif result is None:
+                continue
+            else:
+                msg = (
+                    f"{method.__qualname__} must return None "
+                    f"or an iterable, got {type(result)}"
+                )
+                raise _InvalidOutput(msg)
+        return _failure
+
+    # This method cannot be made async def, as _process_spider_exception relies on the Deferred result
+    # being available immediately which doesn't work when it's a wrapped coroutine.
+    # It also needs @inlineCallbacks only because of downgrading so it can be removed when downgrading is removed.
+    @inlineCallbacks
+    def _process_spider_output(
+        self,
+        response: Response,
+        spider: Spider,
+        result: Union[Iterable, AsyncIterable],
+        start_index: int = 0,
+    ) -> Generator[Deferred, Any, Union[MutableChain, MutableAsyncChain]]:
+        # items in this iterable do not need to go through the process_spider_output
+        # chain, they went through it already from the process_spider_exception method
+        recovered: Union[MutableChain, MutableAsyncChain]
+        last_result_is_async = isinstance(result, AsyncIterable)
+        if last_result_is_async:
+            recovered = MutableAsyncChain()
+        else:
+            recovered = MutableChain()
+
+        # There are three cases for the middleware: def foo, async def foo, def foo + async def foo_async.
+        # 1. def foo. Sync iterables are passed as is, async ones are downgraded.
+        # 2. async def foo. Sync iterables are upgraded, async ones are passed as is.
+        # 3. def foo + async def foo_async. Iterables are passed to the respective method.
+        # Storing methods and method tuples in the same list is weird but we should be able to roll this back
+        # when we drop this compatibility feature.
+
+        method_list = islice(self.methods["process_spider_output"], start_index, None)
+        for method_index, method_pair in enumerate(method_list, start=start_index):
+            if method_pair is None:
+                continue
+            need_upgrade = need_downgrade = False
+            if isinstance(method_pair, tuple):
+                # This tuple handling is only needed until _async compatibility methods are removed.
+                method_sync, method_async = method_pair
+                method = method_async if last_result_is_async else method_sync
+            else:
+                method = method_pair
+                if not last_result_is_async and isasyncgenfunction(method):
+                    need_upgrade = True
+                elif last_result_is_async and not isasyncgenfunction(method):
+                    need_downgrade = True
+            try:
+                if need_upgrade:
+                    # Iterable -> AsyncIterable
+                    result = as_async_generator(result)
+                elif need_downgrade:
+                    if not self.downgrade_warning_done:
+                        logger.warning(
+                            f"Async iterable passed to {method.__qualname__} "
+                            f"was downgraded to a non-async one"
+                        )
+                        self.downgrade_warning_done = True
+                    assert isinstance(result, AsyncIterable)
+                    # AsyncIterable -> Iterable
+                    result = yield deferred_from_coro(collect_asyncgen(result))
+                    if isinstance(recovered, AsyncIterable):
+                        recovered_collected = yield deferred_from_coro(
+                            collect_asyncgen(recovered)
+                        )
+                        recovered = MutableChain(recovered_collected)
+                # might fail directly if the output value is not a generator
+                result = method(response=response, result=result, spider=spider)
+            except Exception as ex:
+                exception_result = self._process_spider_exception(
+                    response, spider, Failure(ex), method_index + 1
+                )
+                if isinstance(exception_result, Failure):
+                    raise
+                return exception_result
+            if _isiterable(result):
+                result = self._evaluate_iterable(
+                    response, spider, result, method_index + 1, recovered
+                )
+            else:
+                if iscoroutine(result):
+                    result.close()  # Silence warning about not awaiting
+                    msg = (
+                        f"{method.__qualname__} must be an asynchronous "
+                        f"generator (i.e. use yield)"
+                    )
+                else:
+                    msg = (
+                        f"{method.__qualname__} must return an iterable, got "
+                        f"{type(result)}"
+                    )
+                raise _InvalidOutput(msg)
+            last_result_is_async = isinstance(result, AsyncIterable)
+
+        if last_result_is_async:
+            return MutableAsyncChain(result, recovered)
+        return MutableChain(result, recovered)  # type: ignore[arg-type]
+
+    async def _process_callback_output(
+        self, response: Response, spider: Spider, result: Union[Iterable, AsyncIterable]
+    ) -> Union[MutableChain, MutableAsyncChain]:
+        recovered: Union[MutableChain, MutableAsyncChain]
+        if isinstance(result, AsyncIterable):
+            recovered = MutableAsyncChain()
+        else:
+            recovered = MutableChain()
+        result = self._evaluate_iterable(response, spider, result, 0, recovered)
+        result = await maybe_deferred_to_future(
+            self._process_spider_output(response, spider, result)
+        )
+        if isinstance(result, AsyncIterable):
+            return MutableAsyncChain(result, recovered)
+        if isinstance(recovered, AsyncIterable):
+            recovered_collected = await collect_asyncgen(recovered)
+            recovered = MutableChain(recovered_collected)
+        return MutableChain(result, recovered)
+
+    def scrape_response(
+        self,
+        scrape_func: ScrapeFunc,
+        response: Response,
+        request: Request,
+        spider: Spider,
+    ) -> Deferred:
+        async def process_callback_output(
+            result: Union[Iterable, AsyncIterable]
+        ) -> Union[MutableChain, MutableAsyncChain]:
+            return await self._process_callback_output(response, spider, result)
+
+        def process_spider_exception(_failure: Failure) -> Union[Failure, MutableChain]:
+            return self._process_spider_exception(response, spider, _failure)
+
+        dfd = mustbe_deferred(
+            self._process_spider_input, scrape_func, response, request, spider
+        )
+        dfd.addCallbacks(
+            callback=deferred_f_from_coro_f(process_callback_output),
+            errback=process_spider_exception,
+        )
+        return dfd
+
+    def process_start_requests(
+        self, start_requests: Iterable[Request], spider: Spider
+    ) -> Deferred:
+        return self._process_chain("process_start_requests", start_requests, spider)
+
+    # This method is only needed until _async compatibility methods are removed.
+    @staticmethod
+    def _get_async_method_pair(
+        mw: Any, methodname: str
+    ) -> Union[None, Callable, Tuple[Callable, Callable]]:
+        normal_method: Optional[Callable] = getattr(mw, methodname, None)
+        methodname_async = methodname + "_async"
+        async_method: Optional[Callable] = getattr(mw, methodname_async, None)
+        if not async_method:
+            return normal_method
+        if not normal_method:
+            logger.error(
+                f"Middleware {mw.__qualname__} has {methodname_async} "
+                f"without {methodname}, skipping this method."
+            )
+            return None
+        if not isasyncgenfunction(async_method):
+            logger.error(
+                f"{async_method.__qualname__} is not "
+                f"an async generator function, skipping this method."
+            )
+            return normal_method
+        if isasyncgenfunction(normal_method):
+            logger.error(
+                f"{normal_method.__qualname__} is an async "
+                f"generator function while {methodname_async} exists, "
+                f"skipping both methods."
+            )
+            return None
+        return normal_method, async_method
diff --git a/scrapy/crawler.py b/scrapy/crawler.py
index 4271118c2..6f54e62e9 100644
--- a/scrapy/crawler.py
+++ b/scrapy/crawler.py
@@ -1,16 +1,27 @@
 from __future__ import annotations
+
 import logging
 import pprint
 import signal
 import warnings
 from typing import TYPE_CHECKING, Any, Dict, Generator, Optional, Set, Type, Union, cast
-from twisted.internet.defer import Deferred, DeferredList, inlineCallbacks, maybeDeferred
+
+from twisted.internet.defer import (
+    Deferred,
+    DeferredList,
+    inlineCallbacks,
+    maybeDeferred,
+)
 from zope.interface.exceptions import DoesNotImplement
+
 try:
+    # zope >= 5.0 only supports MultipleInvalid
     from zope.interface.exceptions import MultipleInvalid
 except ImportError:
     MultipleInvalid = None
+
 from zope.interface.verify import verifyClass
+
 from scrapy import Spider, signals
 from scrapy.addons import AddonManager
 from scrapy.core.engine import ExecutionEngine
@@ -21,33 +32,55 @@ from scrapy.logformatter import LogFormatter
 from scrapy.settings import BaseSettings, Settings, overridden_settings
 from scrapy.signalmanager import SignalManager
 from scrapy.statscollectors import StatsCollector
-from scrapy.utils.log import LogCounterHandler, configure_logging, get_scrapy_root_handler, install_scrapy_root_handler, log_reactor_info, log_scrapy_info
+from scrapy.utils.log import (
+    LogCounterHandler,
+    configure_logging,
+    get_scrapy_root_handler,
+    install_scrapy_root_handler,
+    log_reactor_info,
+    log_scrapy_info,
+)
 from scrapy.utils.misc import create_instance, load_object
 from scrapy.utils.ossignal import install_shutdown_handlers, signal_names
-from scrapy.utils.reactor import install_reactor, is_asyncio_reactor_installed, verify_installed_asyncio_event_loop, verify_installed_reactor
+from scrapy.utils.reactor import (
+    install_reactor,
+    is_asyncio_reactor_installed,
+    verify_installed_asyncio_event_loop,
+    verify_installed_reactor,
+)
+
 if TYPE_CHECKING:
     from scrapy.utils.request import RequestFingerprinter
+
+
 logger = logging.getLogger(__name__)


 class Crawler:
-
-    def __init__(self, spidercls: Type[Spider], settings: Union[None, Dict[
-        str, Any], Settings]=None, init_reactor: bool=False):
+    def __init__(
+        self,
+        spidercls: Type[Spider],
+        settings: Union[None, Dict[str, Any], Settings] = None,
+        init_reactor: bool = False,
+    ):
         if isinstance(spidercls, Spider):
-            raise ValueError(
-                'The spidercls argument must be a class, not an object')
+            raise ValueError("The spidercls argument must be a class, not an object")
+
         if isinstance(settings, dict) or settings is None:
             settings = Settings(settings)
+
         self.spidercls: Type[Spider] = spidercls
         self.settings: Settings = settings.copy()
         self.spidercls.update_settings(self.settings)
         self._update_root_log_handler()
+
         self.addons: AddonManager = AddonManager(self)
         self.signals: SignalManager = SignalManager(self)
+
         self._init_reactor: bool = init_reactor
         self.crawling: bool = False
         self._started: bool = False
+
         self.extensions: Optional[ExtensionManager] = None
         self.stats: Optional[StatsCollector] = None
         self.logformatter: Optional[LogFormatter] = None
@@ -55,11 +88,97 @@ class Crawler:
         self.spider: Optional[Spider] = None
         self.engine: Optional[ExecutionEngine] = None

+    def _update_root_log_handler(self) -> None:
+        if get_scrapy_root_handler() is not None:
+            # scrapy root handler already installed: update it with new settings
+            install_scrapy_root_handler(self.settings)
+
+    def _apply_settings(self) -> None:
+        if self.settings.frozen:
+            return
+
+        self.addons.load_settings(self.settings)
+        self.stats = load_object(self.settings["STATS_CLASS"])(self)
+
+        handler = LogCounterHandler(self, level=self.settings.get("LOG_LEVEL"))
+        logging.root.addHandler(handler)
+        # lambda is assigned to Crawler attribute because this way it is not
+        # garbage collected after leaving the scope
+        self.__remove_handler = lambda: logging.root.removeHandler(handler)
+        self.signals.connect(self.__remove_handler, signals.engine_stopped)
+
+        lf_cls: Type[LogFormatter] = load_object(self.settings["LOG_FORMATTER"])
+        self.logformatter = lf_cls.from_crawler(self)
+
+        self.request_fingerprinter = create_instance(
+            load_object(self.settings["REQUEST_FINGERPRINTER_CLASS"]),
+            settings=self.settings,
+            crawler=self,
+        )
+
+        reactor_class: str = self.settings["TWISTED_REACTOR"]
+        event_loop: str = self.settings["ASYNCIO_EVENT_LOOP"]
+        if self._init_reactor:
+            # this needs to be done after the spider settings are merged,
+            # but before something imports twisted.internet.reactor
+            if reactor_class:
+                install_reactor(reactor_class, event_loop)
+            else:
+                from twisted.internet import reactor  # noqa: F401
+            log_reactor_info()
+        if reactor_class:
+            verify_installed_reactor(reactor_class)
+            if is_asyncio_reactor_installed() and event_loop:
+                verify_installed_asyncio_event_loop(event_loop)
+
+        self.extensions = ExtensionManager.from_crawler(self)
+        self.settings.freeze()
+
+        d = dict(overridden_settings(self.settings))
+        logger.info(
+            "Overridden settings:\n%(settings)s", {"settings": pprint.pformat(d)}
+        )
+
+    @inlineCallbacks
+    def crawl(self, *args: Any, **kwargs: Any) -> Generator[Deferred, Any, None]:
+        if self.crawling:
+            raise RuntimeError("Crawling already taking place")
+        if self._started:
+            warnings.warn(
+                "Running Crawler.crawl() more than once is deprecated.",
+                ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
+        self.crawling = self._started = True
+
+        try:
+            self.spider = self._create_spider(*args, **kwargs)
+            self._apply_settings()
+            self._update_root_log_handler()
+            self.engine = self._create_engine()
+            start_requests = iter(self.spider.start_requests())
+            yield self.engine.open_spider(self.spider, start_requests)
+            yield maybeDeferred(self.engine.start)
+        except Exception:
+            self.crawling = False
+            if self.engine is not None:
+                yield self.engine.close()
+            raise
+
+    def _create_spider(self, *args: Any, **kwargs: Any) -> Spider:
+        return self.spidercls.from_crawler(self, *args, **kwargs)
+
+    def _create_engine(self) -> ExecutionEngine:
+        return ExecutionEngine(self, lambda _: self.stop())
+
     @inlineCallbacks
-    def stop(self) ->Generator[Deferred, Any, None]:
+    def stop(self) -> Generator[Deferred, Any, None]:
         """Starts a graceful stop of the crawler and returns a deferred that is
         fired when the crawler is stopped."""
-        pass
+        if self.crawling:
+            self.crawling = False
+            assert self.engine
+            yield maybeDeferred(self.engine.stop)


 class CrawlerRunner:
@@ -74,16 +193,34 @@ class CrawlerRunner:
     accordingly) unless writing scripts that manually handle the crawling
     process. See :ref:`run-from-script` for an example.
     """
-    crawlers = property(lambda self: self._crawlers, doc=
-        'Set of :class:`crawlers <scrapy.crawler.Crawler>` started by :meth:`crawl` and managed by this class.'
-        )
+
+    crawlers = property(
+        lambda self: self._crawlers,
+        doc="Set of :class:`crawlers <scrapy.crawler.Crawler>` started by "
+        ":meth:`crawl` and managed by this class.",
+    )

     @staticmethod
     def _get_spider_loader(settings: BaseSettings):
         """Get SpiderLoader instance from settings"""
-        pass
-
-    def __init__(self, settings: Union[Dict[str, Any], Settings, None]=None):
+        cls_path = settings.get("SPIDER_LOADER_CLASS")
+        loader_cls = load_object(cls_path)
+        excs = (
+            (DoesNotImplement, MultipleInvalid) if MultipleInvalid else DoesNotImplement
+        )
+        try:
+            verifyClass(ISpiderLoader, loader_cls)
+        except excs:
+            warnings.warn(
+                "SPIDER_LOADER_CLASS (previously named SPIDER_MANAGER_CLASS) does "
+                "not fully implement scrapy.interfaces.ISpiderLoader interface. "
+                "Please add all missing methods to avoid unexpected runtime errors.",
+                category=ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
+        return loader_cls.from_settings(settings.frozencopy())
+
+    def __init__(self, settings: Union[Dict[str, Any], Settings, None] = None):
         if isinstance(settings, dict) or settings is None:
             settings = Settings(settings)
         self.settings = settings
@@ -92,8 +229,12 @@ class CrawlerRunner:
         self._active: Set[Deferred] = set()
         self.bootstrap_failed = False

-    def crawl(self, crawler_or_spidercls: Union[Type[Spider], str, Crawler],
-        *args: Any, **kwargs: Any) ->Deferred:
+    def crawl(
+        self,
+        crawler_or_spidercls: Union[Type[Spider], str, Crawler],
+        *args: Any,
+        **kwargs: Any,
+    ) -> Deferred:
         """
         Run a crawler with the provided arguments.

@@ -115,10 +256,30 @@ class CrawlerRunner:

         :param kwargs: keyword arguments to initialize the spider
         """
-        pass
-
-    def create_crawler(self, crawler_or_spidercls: Union[Type[Spider], str,
-        Crawler]) ->Crawler:
+        if isinstance(crawler_or_spidercls, Spider):
+            raise ValueError(
+                "The crawler_or_spidercls argument cannot be a spider object, "
+                "it must be a spider class (or a Crawler object)"
+            )
+        crawler = self.create_crawler(crawler_or_spidercls)
+        return self._crawl(crawler, *args, **kwargs)
+
+    def _crawl(self, crawler: Crawler, *args: Any, **kwargs: Any) -> Deferred:
+        self.crawlers.add(crawler)
+        d = crawler.crawl(*args, **kwargs)
+        self._active.add(d)
+
+        def _done(result: Any) -> Any:
+            self.crawlers.discard(crawler)
+            self._active.discard(d)
+            self.bootstrap_failed |= not getattr(crawler, "spider", None)
+            return result
+
+        return d.addBoth(_done)
+
+    def create_crawler(
+        self, crawler_or_spidercls: Union[Type[Spider], str, Crawler]
+    ) -> Crawler:
         """
         Return a :class:`~scrapy.crawler.Crawler` object.

@@ -129,25 +290,39 @@ class CrawlerRunner:
           a spider with this name in a Scrapy project (using spider loader),
           then creates a Crawler instance for it.
         """
-        pass
-
-    def stop(self) ->Deferred:
+        if isinstance(crawler_or_spidercls, Spider):
+            raise ValueError(
+                "The crawler_or_spidercls argument cannot be a spider object, "
+                "it must be a spider class (or a Crawler object)"
+            )
+        if isinstance(crawler_or_spidercls, Crawler):
+            return crawler_or_spidercls
+        return self._create_crawler(crawler_or_spidercls)
+
+    def _create_crawler(self, spidercls: Union[str, Type[Spider]]) -> Crawler:
+        if isinstance(spidercls, str):
+            spidercls = self.spider_loader.load(spidercls)
+        # temporary cast until self.spider_loader is typed
+        return Crawler(cast(Type[Spider], spidercls), self.settings)
+
+    def stop(self) -> Deferred:
         """
         Stops simultaneously all the crawling jobs taking place.

         Returns a deferred that is fired when they all have ended.
         """
-        pass
+        return DeferredList([c.stop() for c in list(self.crawlers)])

     @inlineCallbacks
-    def join(self) ->Generator[Deferred, Any, None]:
+    def join(self) -> Generator[Deferred, Any, None]:
         """
         join()

         Returns a deferred that is fired when all managed :attr:`crawlers` have
         completed their executions.
         """
-        pass
+        while self._active:
+            yield DeferredList(self._active)


 class CrawlerProcess(CrawlerRunner):
@@ -174,15 +349,50 @@ class CrawlerProcess(CrawlerRunner):
     process. See :ref:`run-from-script` for an example.
     """

-    def __init__(self, settings: Union[Dict[str, Any], Settings, None]=None,
-        install_root_handler: bool=True):
+    def __init__(
+        self,
+        settings: Union[Dict[str, Any], Settings, None] = None,
+        install_root_handler: bool = True,
+    ):
         super().__init__(settings)
         configure_logging(self.settings, install_root_handler)
         log_scrapy_info(self.settings)
         self._initialized_reactor = False

-    def start(self, stop_after_crawl: bool=True, install_signal_handlers:
-        bool=True) ->None:
+    def _signal_shutdown(self, signum: int, _: Any) -> None:
+        from twisted.internet import reactor
+
+        install_shutdown_handlers(self._signal_kill)
+        signame = signal_names[signum]
+        logger.info(
+            "Received %(signame)s, shutting down gracefully. Send again to force ",
+            {"signame": signame},
+        )
+        reactor.callFromThread(self._graceful_stop_reactor)
+
+    def _signal_kill(self, signum: int, _: Any) -> None:
+        from twisted.internet import reactor
+
+        install_shutdown_handlers(signal.SIG_IGN)
+        signame = signal_names[signum]
+        logger.info(
+            "Received %(signame)s twice, forcing unclean shutdown", {"signame": signame}
+        )
+        reactor.callFromThread(self._stop_reactor)
+
+    def _create_crawler(self, spidercls: Union[Type[Spider], str]) -> Crawler:
+        if isinstance(spidercls, str):
+            spidercls = self.spider_loader.load(spidercls)
+        init_reactor = not self._initialized_reactor
+        self._initialized_reactor = True
+        # temporary cast until self.spider_loader is typed
+        return Crawler(
+            cast(Type[Spider], spidercls), self.settings, init_reactor=init_reactor
+        )
+
+    def start(
+        self, stop_after_crawl: bool = True, install_signal_handlers: bool = True
+    ) -> None:
         """
         This method starts a :mod:`~twisted.internet.reactor`, adjusts its pool
         size to :setting:`REACTOR_THREADPOOL_MAXSIZE`, and installs a DNS cache
@@ -197,4 +407,36 @@ class CrawlerProcess(CrawlerRunner):
         :param bool install_signal_handlers: whether to install the OS signal
             handlers from Twisted and Scrapy (default: True)
         """
-        pass
+        from twisted.internet import reactor
+
+        if stop_after_crawl:
+            d = self.join()
+            # Don't start the reactor if the deferreds are already fired
+            if d.called:
+                return
+            d.addBoth(self._stop_reactor)
+
+        resolver_class = load_object(self.settings["DNS_RESOLVER"])
+        resolver = create_instance(resolver_class, self.settings, self, reactor=reactor)
+        resolver.install_on_reactor()
+        tp = reactor.getThreadPool()
+        tp.adjustPoolsize(maxthreads=self.settings.getint("REACTOR_THREADPOOL_MAXSIZE"))
+        reactor.addSystemEventTrigger("before", "shutdown", self.stop)
+        if install_signal_handlers:
+            reactor.addSystemEventTrigger(
+                "after", "startup", install_shutdown_handlers, self._signal_shutdown
+            )
+        reactor.run(installSignalHandlers=install_signal_handlers)  # blocking call
+
+    def _graceful_stop_reactor(self) -> Deferred:
+        d = self.stop()
+        d.addBoth(self._stop_reactor)
+        return d
+
+    def _stop_reactor(self, _: Any = None) -> None:
+        from twisted.internet import reactor
+
+        try:
+            reactor.stop()
+        except RuntimeError:  # raised if already stopped or in shutdown stage
+            pass
diff --git a/scrapy/downloadermiddlewares/ajaxcrawl.py b/scrapy/downloadermiddlewares/ajaxcrawl.py
index c8df05f69..04ae719de 100644
--- a/scrapy/downloadermiddlewares/ajaxcrawl.py
+++ b/scrapy/downloadermiddlewares/ajaxcrawl.py
@@ -1,8 +1,11 @@
 import logging
 import re
+
 from w3lib import html
+
 from scrapy.exceptions import NotConfigured
 from scrapy.http import HtmlResponse
+
 logger = logging.getLogger(__name__)


@@ -13,20 +16,57 @@ class AjaxCrawlMiddleware:
     """

     def __init__(self, settings):
-        if not settings.getbool('AJAXCRAWL_ENABLED'):
+        if not settings.getbool("AJAXCRAWL_ENABLED"):
             raise NotConfigured
-        self.lookup_bytes = settings.getint('AJAXCRAWL_MAXSIZE', 32768)
+
+        # XXX: Google parses at least first 100k bytes; scrapy's redirect
+        # middleware parses first 4k. 4k turns out to be insufficient
+        # for this middleware, and parsing 100k could be slow.
+        # We use something in between (32K) by default.
+        self.lookup_bytes = settings.getint("AJAXCRAWL_MAXSIZE", 32768)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings)
+
+    def process_response(self, request, response, spider):
+        if not isinstance(response, HtmlResponse) or response.status != 200:
+            return response
+
+        if request.method != "GET":
+            # other HTTP methods are either not safe or don't have a body
+            return response
+
+        if "ajax_crawlable" in request.meta:  # prevent loops
+            return response
+
+        if not self._has_ajax_crawlable_variant(response):
+            return response
+
+        # scrapy already handles #! links properly
+        ajax_crawl_request = request.replace(url=request.url + "#!")
+        logger.debug(
+            "Downloading AJAX crawlable %(ajax_crawl_request)s instead of %(request)s",
+            {"ajax_crawl_request": ajax_crawl_request, "request": request},
+            extra={"spider": spider},
+        )
+
+        ajax_crawl_request.meta["ajax_crawlable"] = True
+        return ajax_crawl_request

     def _has_ajax_crawlable_variant(self, response):
         """
         Return True if a page without hash fragment could be "AJAX crawlable"
         according to https://developers.google.com/webmasters/ajax-crawling/docs/getting-started.
         """
-        pass
+        body = response.text[: self.lookup_bytes]
+        return _has_ajaxcrawlable_meta(body)


+# XXX: move it to w3lib?
 _ajax_crawlable_re = re.compile(
-    '<meta\\s+name=["\\\']fragment["\\\']\\s+content=["\\\']!["\\\']/?>')
+    r'<meta\s+name=["\']fragment["\']\s+content=["\']!["\']/?>'
+)


 def _has_ajaxcrawlable_meta(text):
@@ -40,4 +80,16 @@ def _has_ajaxcrawlable_meta(text):
     >>> _has_ajaxcrawlable_meta('<html></html>')
     False
     """
-    pass
+
+    # Stripping scripts and comments is slow (about 20x slower than
+    # just checking if a string is in text); this is a quick fail-fast
+    # path that should work for most pages.
+    if "fragment" not in text:
+        return False
+    if "content" not in text:
+        return False
+
+    text = html.remove_tags_with_content(text, ("script", "noscript"))
+    text = html.replace_entities(text)
+    text = html.remove_comments(text)
+    return _ajax_crawlable_re.search(text) is not None
diff --git a/scrapy/downloadermiddlewares/cookies.py b/scrapy/downloadermiddlewares/cookies.py
index 3b97e0653..6495157d7 100644
--- a/scrapy/downloadermiddlewares/cookies.py
+++ b/scrapy/downloadermiddlewares/cookies.py
@@ -1,15 +1,25 @@
 import logging
 from collections import defaultdict
+
 from tldextract import TLDExtract
+
 from scrapy.exceptions import NotConfigured
 from scrapy.http import Response
 from scrapy.http.cookies import CookieJar
 from scrapy.utils.httpobj import urlparse_cached
 from scrapy.utils.python import to_unicode
+
 logger = logging.getLogger(__name__)
+
+
 _split_domain = TLDExtract(include_psl_private_domains=True)


+def _is_public_domain(domain):
+    parts = _split_domain(domain)
+    return not parts.domain
+
+
 class CookiesMiddleware:
     """This middleware enables working with sites that need cookies"""

@@ -17,15 +27,118 @@ class CookiesMiddleware:
         self.jars = defaultdict(CookieJar)
         self.debug = debug

+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool("COOKIES_ENABLED"):
+            raise NotConfigured
+        return cls(crawler.settings.getbool("COOKIES_DEBUG"))
+
+    def _process_cookies(self, cookies, *, jar, request):
+        for cookie in cookies:
+            cookie_domain = cookie.domain
+            if cookie_domain.startswith("."):
+                cookie_domain = cookie_domain[1:]
+
+            request_domain = urlparse_cached(request).hostname.lower()
+
+            if cookie_domain and _is_public_domain(cookie_domain):
+                if cookie_domain != request_domain:
+                    continue
+                cookie.domain = request_domain
+
+            jar.set_cookie_if_ok(cookie, request)
+
+    def process_request(self, request, spider):
+        if request.meta.get("dont_merge_cookies", False):
+            return
+
+        cookiejarkey = request.meta.get("cookiejar")
+        jar = self.jars[cookiejarkey]
+        cookies = self._get_request_cookies(jar, request)
+        self._process_cookies(cookies, jar=jar, request=request)
+
+        # set Cookie header
+        request.headers.pop("Cookie", None)
+        jar.add_cookie_header(request)
+        self._debug_cookie(request, spider)
+
+    def process_response(self, request, response, spider):
+        if request.meta.get("dont_merge_cookies", False):
+            return response
+
+        # extract cookies from Set-Cookie and drop invalid/expired cookies
+        cookiejarkey = request.meta.get("cookiejar")
+        jar = self.jars[cookiejarkey]
+        cookies = jar.make_cookies(response, request)
+        self._process_cookies(cookies, jar=jar, request=request)
+
+        self._debug_set_cookie(response, spider)
+
+        return response
+
+    def _debug_cookie(self, request, spider):
+        if self.debug:
+            cl = [
+                to_unicode(c, errors="replace")
+                for c in request.headers.getlist("Cookie")
+            ]
+            if cl:
+                cookies = "\n".join(f"Cookie: {c}\n" for c in cl)
+                msg = f"Sending cookies to: {request}\n{cookies}"
+                logger.debug(msg, extra={"spider": spider})
+
+    def _debug_set_cookie(self, response, spider):
+        if self.debug:
+            cl = [
+                to_unicode(c, errors="replace")
+                for c in response.headers.getlist("Set-Cookie")
+            ]
+            if cl:
+                cookies = "\n".join(f"Set-Cookie: {c}\n" for c in cl)
+                msg = f"Received cookies from: {response}\n{cookies}"
+                logger.debug(msg, extra={"spider": spider})
+
     def _format_cookie(self, cookie, request):
         """
         Given a dict consisting of cookie components, return its string representation.
         Decode from bytes if necessary.
         """
-        pass
+        decoded = {}
+        for key in ("name", "value", "path", "domain"):
+            if cookie.get(key) is None:
+                if key in ("name", "value"):
+                    msg = f"Invalid cookie found in request {request}: {cookie} ('{key}' is missing)"
+                    logger.warning(msg)
+                    return
+                continue
+            if isinstance(cookie[key], (bool, float, int, str)):
+                decoded[key] = str(cookie[key])
+            else:
+                try:
+                    decoded[key] = cookie[key].decode("utf8")
+                except UnicodeDecodeError:
+                    logger.warning(
+                        "Non UTF-8 encoded cookie found in request %s: %s",
+                        request,
+                        cookie,
+                    )
+                    decoded[key] = cookie[key].decode("latin1", errors="replace")
+
+        cookie_str = f"{decoded.pop('name')}={decoded.pop('value')}"
+        for key, value in decoded.items():  # path, domain
+            cookie_str += f"; {key.capitalize()}={value}"
+        return cookie_str

     def _get_request_cookies(self, jar, request):
         """
         Extract cookies from the Request.cookies attribute
         """
-        pass
+        if not request.cookies:
+            return []
+        if isinstance(request.cookies, dict):
+            cookies = ({"name": k, "value": v} for k, v in request.cookies.items())
+        else:
+            cookies = request.cookies
+        formatted = filter(None, (self._format_cookie(c, request) for c in cookies))
+        response = Response(request.url, headers={"Set-Cookie": formatted})
+        return jar.make_cookies(response, request)
diff --git a/scrapy/downloadermiddlewares/defaultheaders.py b/scrapy/downloadermiddlewares/defaultheaders.py
index d104ee821..cdacc7368 100644
--- a/scrapy/downloadermiddlewares/defaultheaders.py
+++ b/scrapy/downloadermiddlewares/defaultheaders.py
@@ -3,10 +3,19 @@ DefaultHeaders downloader middleware

 See documentation in docs/topics/downloader-middleware.rst
 """
+
 from scrapy.utils.python import without_none_values


 class DefaultHeadersMiddleware:
-
     def __init__(self, headers):
         self._headers = headers
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        headers = without_none_values(crawler.settings["DEFAULT_REQUEST_HEADERS"])
+        return cls(headers.items())
+
+    def process_request(self, request, spider):
+        for k, v in self._headers:
+            request.headers.setdefault(k, v)
diff --git a/scrapy/downloadermiddlewares/downloadtimeout.py b/scrapy/downloadermiddlewares/downloadtimeout.py
index 222bc5eb8..a926ecf56 100644
--- a/scrapy/downloadermiddlewares/downloadtimeout.py
+++ b/scrapy/downloadermiddlewares/downloadtimeout.py
@@ -3,10 +3,23 @@ Download timeout middleware

 See documentation in docs/topics/downloader-middleware.rst
 """
+
 from scrapy import signals


 class DownloadTimeoutMiddleware:
-
     def __init__(self, timeout=180):
         self._timeout = timeout
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        o = cls(crawler.settings.getfloat("DOWNLOAD_TIMEOUT"))
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        return o
+
+    def spider_opened(self, spider):
+        self._timeout = getattr(spider, "download_timeout", self._timeout)
+
+    def process_request(self, request, spider):
+        if self._timeout:
+            request.meta.setdefault("download_timeout", self._timeout)
diff --git a/scrapy/downloadermiddlewares/httpauth.py b/scrapy/downloadermiddlewares/httpauth.py
index ec0f5cc32..de5a81388 100644
--- a/scrapy/downloadermiddlewares/httpauth.py
+++ b/scrapy/downloadermiddlewares/httpauth.py
@@ -4,7 +4,9 @@ HTTP basic auth downloader middleware
 See documentation in docs/topics/downloader-middleware.rst
 """
 import warnings
+
 from w3lib.http import basic_auth_header
+
 from scrapy import signals
 from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.utils.httpobj import urlparse_cached
@@ -14,3 +16,37 @@ from scrapy.utils.url import url_is_from_any_domain
 class HttpAuthMiddleware:
     """Set Basic HTTP Authorization header
     (http_user and http_pass spider class attributes)"""
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        o = cls()
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        return o
+
+    def spider_opened(self, spider):
+        usr = getattr(spider, "http_user", "")
+        pwd = getattr(spider, "http_pass", "")
+        if usr or pwd:
+            self.auth = basic_auth_header(usr, pwd)
+            if not hasattr(spider, "http_auth_domain"):
+                warnings.warn(
+                    "Using HttpAuthMiddleware without http_auth_domain is deprecated and can cause security "
+                    "problems if the spider makes requests to several different domains. http_auth_domain "
+                    "will be set to the domain of the first request, please set it to the correct value "
+                    "explicitly.",
+                    category=ScrapyDeprecationWarning,
+                )
+                self.domain_unset = True
+            else:
+                self.domain = spider.http_auth_domain
+                self.domain_unset = False
+
+    def process_request(self, request, spider):
+        auth = getattr(self, "auth", None)
+        if auth and b"Authorization" not in request.headers:
+            domain = urlparse_cached(request).hostname
+            if self.domain_unset:
+                self.domain = domain
+                self.domain_unset = False
+            if not self.domain or url_is_from_any_domain(request.url, [self.domain]):
+                request.headers[b"Authorization"] = auth
diff --git a/scrapy/downloadermiddlewares/httpcache.py b/scrapy/downloadermiddlewares/httpcache.py
index 2ffedebca..a521cde7a 100644
--- a/scrapy/downloadermiddlewares/httpcache.py
+++ b/scrapy/downloadermiddlewares/httpcache.py
@@ -1,8 +1,18 @@
 from email.utils import formatdate
 from typing import Optional, Type, TypeVar
+
 from twisted.internet import defer
-from twisted.internet.error import ConnectError, ConnectionDone, ConnectionLost, ConnectionRefusedError, DNSLookupError, TCPTimedOutError, TimeoutError
+from twisted.internet.error import (
+    ConnectError,
+    ConnectionDone,
+    ConnectionLost,
+    ConnectionRefusedError,
+    DNSLookupError,
+    TCPTimedOutError,
+    TimeoutError,
+)
 from twisted.web.client import ResponseFailed
+
 from scrapy import signals
 from scrapy.crawler import Crawler
 from scrapy.exceptions import IgnoreRequest, NotConfigured
@@ -12,19 +22,129 @@ from scrapy.settings import Settings
 from scrapy.spiders import Spider
 from scrapy.statscollectors import StatsCollector
 from scrapy.utils.misc import load_object
-HttpCacheMiddlewareTV = TypeVar('HttpCacheMiddlewareTV', bound=
-    'HttpCacheMiddleware')
+
+HttpCacheMiddlewareTV = TypeVar("HttpCacheMiddlewareTV", bound="HttpCacheMiddleware")


 class HttpCacheMiddleware:
-    DOWNLOAD_EXCEPTIONS = (defer.TimeoutError, TimeoutError, DNSLookupError,
-        ConnectionRefusedError, ConnectionDone, ConnectError,
-        ConnectionLost, TCPTimedOutError, ResponseFailed, OSError)
+    DOWNLOAD_EXCEPTIONS = (
+        defer.TimeoutError,
+        TimeoutError,
+        DNSLookupError,
+        ConnectionRefusedError,
+        ConnectionDone,
+        ConnectError,
+        ConnectionLost,
+        TCPTimedOutError,
+        ResponseFailed,
+        OSError,
+    )

-    def __init__(self, settings: Settings, stats: StatsCollector) ->None:
-        if not settings.getbool('HTTPCACHE_ENABLED'):
+    def __init__(self, settings: Settings, stats: StatsCollector) -> None:
+        if not settings.getbool("HTTPCACHE_ENABLED"):
             raise NotConfigured
-        self.policy = load_object(settings['HTTPCACHE_POLICY'])(settings)
-        self.storage = load_object(settings['HTTPCACHE_STORAGE'])(settings)
-        self.ignore_missing = settings.getbool('HTTPCACHE_IGNORE_MISSING')
+        self.policy = load_object(settings["HTTPCACHE_POLICY"])(settings)
+        self.storage = load_object(settings["HTTPCACHE_STORAGE"])(settings)
+        self.ignore_missing = settings.getbool("HTTPCACHE_IGNORE_MISSING")
         self.stats = stats
+
+    @classmethod
+    def from_crawler(
+        cls: Type[HttpCacheMiddlewareTV], crawler: Crawler
+    ) -> HttpCacheMiddlewareTV:
+        assert crawler.stats
+        o = cls(crawler.settings, crawler.stats)
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
+        return o
+
+    def spider_opened(self, spider: Spider) -> None:
+        self.storage.open_spider(spider)
+
+    def spider_closed(self, spider: Spider) -> None:
+        self.storage.close_spider(spider)
+
+    def process_request(self, request: Request, spider: Spider) -> Optional[Response]:
+        if request.meta.get("dont_cache", False):
+            return None
+
+        # Skip uncacheable requests
+        if not self.policy.should_cache_request(request):
+            request.meta["_dont_cache"] = True  # flag as uncacheable
+            return None
+
+        # Look for cached response and check if expired
+        cachedresponse = self.storage.retrieve_response(spider, request)
+        if cachedresponse is None:
+            self.stats.inc_value("httpcache/miss", spider=spider)
+            if self.ignore_missing:
+                self.stats.inc_value("httpcache/ignore", spider=spider)
+                raise IgnoreRequest(f"Ignored request not in cache: {request}")
+            return None  # first time request
+
+        # Return cached response only if not expired
+        cachedresponse.flags.append("cached")
+        if self.policy.is_cached_response_fresh(cachedresponse, request):
+            self.stats.inc_value("httpcache/hit", spider=spider)
+            return cachedresponse
+
+        # Keep a reference to cached response to avoid a second cache lookup on
+        # process_response hook
+        request.meta["cached_response"] = cachedresponse
+
+        return None
+
+    def process_response(
+        self, request: Request, response: Response, spider: Spider
+    ) -> Response:
+        if request.meta.get("dont_cache", False):
+            return response
+
+        # Skip cached responses and uncacheable requests
+        if "cached" in response.flags or "_dont_cache" in request.meta:
+            request.meta.pop("_dont_cache", None)
+            return response
+
+        # RFC2616 requires origin server to set Date header,
+        # https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.18
+        if "Date" not in response.headers:
+            response.headers["Date"] = formatdate(usegmt=True)
+
+        # Do not validate first-hand responses
+        cachedresponse = request.meta.pop("cached_response", None)
+        if cachedresponse is None:
+            self.stats.inc_value("httpcache/firsthand", spider=spider)
+            self._cache_response(spider, response, request, cachedresponse)
+            return response
+
+        if self.policy.is_cached_response_valid(cachedresponse, response, request):
+            self.stats.inc_value("httpcache/revalidate", spider=spider)
+            return cachedresponse
+
+        self.stats.inc_value("httpcache/invalidate", spider=spider)
+        self._cache_response(spider, response, request, cachedresponse)
+        return response
+
+    def process_exception(
+        self, request: Request, exception: Exception, spider: Spider
+    ) -> Optional[Response]:
+        cachedresponse = request.meta.pop("cached_response", None)
+        if cachedresponse is not None and isinstance(
+            exception, self.DOWNLOAD_EXCEPTIONS
+        ):
+            self.stats.inc_value("httpcache/errorrecovery", spider=spider)
+            return cachedresponse
+        return None
+
+    def _cache_response(
+        self,
+        spider: Spider,
+        response: Response,
+        request: Request,
+        cachedresponse: Optional[Response],
+    ) -> None:
+        if self.policy.should_cache_response(response, request):
+            self.stats.inc_value("httpcache/store", spider=spider)
+            self.storage.store_response(spider, request, response)
+        else:
+            self.stats.inc_value("httpcache/uncacheable", spider=spider)
diff --git a/scrapy/downloadermiddlewares/httpcompression.py b/scrapy/downloadermiddlewares/httpcompression.py
index cc3614d22..816be25a1 100644
--- a/scrapy/downloadermiddlewares/httpcompression.py
+++ b/scrapy/downloadermiddlewares/httpcompression.py
@@ -1,26 +1,36 @@
 import warnings
 from logging import getLogger
+
 from scrapy import signals
 from scrapy.exceptions import IgnoreRequest, NotConfigured
 from scrapy.http import Response, TextResponse
 from scrapy.responsetypes import responsetypes
-from scrapy.utils._compression import _DecompressionMaxSizeExceeded, _inflate, _unbrotli, _unzstd
+from scrapy.utils._compression import (
+    _DecompressionMaxSizeExceeded,
+    _inflate,
+    _unbrotli,
+    _unzstd,
+)
 from scrapy.utils.deprecate import ScrapyDeprecationWarning
 from scrapy.utils.gz import gunzip
+
 logger = getLogger(__name__)
-ACCEPTED_ENCODINGS = [b'gzip', b'deflate']
+
+ACCEPTED_ENCODINGS = [b"gzip", b"deflate"]
+
 try:
-    import brotli
+    import brotli  # noqa: F401
 except ImportError:
     pass
 else:
-    ACCEPTED_ENCODINGS.append(b'br')
+    ACCEPTED_ENCODINGS.append(b"br")
+
 try:
-    import zstandard
+    import zstandard  # noqa: F401
 except ImportError:
     pass
 else:
-    ACCEPTED_ENCODINGS.append(b'zstd')
+    ACCEPTED_ENCODINGS.append(b"zstd")


 class HttpCompressionMiddleware:
@@ -34,6 +44,94 @@ class HttpCompressionMiddleware:
             self._warn_size = 33554432
             return
         self.stats = crawler.stats
-        self._max_size = crawler.settings.getint('DOWNLOAD_MAXSIZE')
-        self._warn_size = crawler.settings.getint('DOWNLOAD_WARNSIZE')
+        self._max_size = crawler.settings.getint("DOWNLOAD_MAXSIZE")
+        self._warn_size = crawler.settings.getint("DOWNLOAD_WARNSIZE")
         crawler.signals.connect(self.open_spider, signals.spider_opened)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool("COMPRESSION_ENABLED"):
+            raise NotConfigured
+        try:
+            return cls(crawler=crawler)
+        except TypeError:
+            warnings.warn(
+                "HttpCompressionMiddleware subclasses must either modify "
+                "their '__init__' method to support a 'crawler' parameter or "
+                "reimplement their 'from_crawler' method.",
+                ScrapyDeprecationWarning,
+            )
+            mw = cls()
+            mw.stats = crawler.stats
+            mw._max_size = crawler.settings.getint("DOWNLOAD_MAXSIZE")
+            mw._warn_size = crawler.settings.getint("DOWNLOAD_WARNSIZE")
+            crawler.signals.connect(mw.open_spider, signals.spider_opened)
+            return mw
+
+    def open_spider(self, spider):
+        if hasattr(spider, "download_maxsize"):
+            self._max_size = spider.download_maxsize
+        if hasattr(spider, "download_warnsize"):
+            self._warn_size = spider.download_warnsize
+
+    def process_request(self, request, spider):
+        request.headers.setdefault("Accept-Encoding", b", ".join(ACCEPTED_ENCODINGS))
+
+    def process_response(self, request, response, spider):
+        if request.method == "HEAD":
+            return response
+        if isinstance(response, Response):
+            content_encoding = response.headers.getlist("Content-Encoding")
+            if content_encoding:
+                encoding = content_encoding.pop()
+                max_size = request.meta.get("download_maxsize", self._max_size)
+                warn_size = request.meta.get("download_warnsize", self._warn_size)
+                try:
+                    decoded_body = self._decode(
+                        response.body, encoding.lower(), max_size
+                    )
+                except _DecompressionMaxSizeExceeded:
+                    raise IgnoreRequest(
+                        f"Ignored response {response} because its body "
+                        f"({len(response.body)} B) exceeded DOWNLOAD_MAXSIZE "
+                        f"({max_size} B) during decompression."
+                    )
+                if len(response.body) < warn_size <= len(decoded_body):
+                    logger.warning(
+                        f"{response} body size after decompression "
+                        f"({len(decoded_body)} B) is larger than the "
+                        f"download warning size ({warn_size} B)."
+                    )
+                if self.stats:
+                    self.stats.inc_value(
+                        "httpcompression/response_bytes",
+                        len(decoded_body),
+                        spider=spider,
+                    )
+                    self.stats.inc_value(
+                        "httpcompression/response_count", spider=spider
+                    )
+                respcls = responsetypes.from_args(
+                    headers=response.headers, url=response.url, body=decoded_body
+                )
+                kwargs = dict(cls=respcls, body=decoded_body)
+                if issubclass(respcls, TextResponse):
+                    # force recalculating the encoding until we make sure the
+                    # responsetypes guessing is reliable
+                    kwargs["encoding"] = None
+                response = response.replace(**kwargs)
+                if not content_encoding:
+                    del response.headers["Content-Encoding"]
+
+        return response
+
+    def _decode(self, body, encoding, max_size):
+        if encoding == b"gzip" or encoding == b"x-gzip":
+            return gunzip(body, max_size=max_size)
+        if encoding == b"deflate":
+            return _inflate(body, max_size=max_size)
+        if encoding == b"br" and b"br" in ACCEPTED_ENCODINGS:
+            return _unbrotli(body, max_size=max_size)
+        if encoding == b"zstd" and b"zstd" in ACCEPTED_ENCODINGS:
+            return _unzstd(body, max_size=max_size)
+        return body
diff --git a/scrapy/downloadermiddlewares/httpproxy.py b/scrapy/downloadermiddlewares/httpproxy.py
index 3fb8b5cbc..522237674 100644
--- a/scrapy/downloadermiddlewares/httpproxy.py
+++ b/scrapy/downloadermiddlewares/httpproxy.py
@@ -1,18 +1,83 @@
 import base64
 from urllib.parse import unquote, urlunparse
 from urllib.request import _parse_proxy, getproxies, proxy_bypass
+
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.httpobj import urlparse_cached
 from scrapy.utils.python import to_bytes


 class HttpProxyMiddleware:
-
-    def __init__(self, auth_encoding='latin-1'):
+    def __init__(self, auth_encoding="latin-1"):
         self.auth_encoding = auth_encoding
         self.proxies = {}
         for type_, url in getproxies().items():
             try:
                 self.proxies[type_] = self._get_proxy(url, type_)
+            # some values such as '/var/run/docker.sock' can't be parsed
+            # by _parse_proxy and as such should be skipped
             except ValueError:
                 continue
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool("HTTPPROXY_ENABLED"):
+            raise NotConfigured
+        auth_encoding = crawler.settings.get("HTTPPROXY_AUTH_ENCODING")
+        return cls(auth_encoding)
+
+    def _basic_auth_header(self, username, password):
+        user_pass = to_bytes(
+            f"{unquote(username)}:{unquote(password)}", encoding=self.auth_encoding
+        )
+        return base64.b64encode(user_pass)
+
+    def _get_proxy(self, url, orig_type):
+        proxy_type, user, password, hostport = _parse_proxy(url)
+        proxy_url = urlunparse((proxy_type or orig_type, hostport, "", "", "", ""))
+
+        if user:
+            creds = self._basic_auth_header(user, password)
+        else:
+            creds = None
+
+        return creds, proxy_url
+
+    def process_request(self, request, spider):
+        creds, proxy_url, scheme = None, None, None
+        if "proxy" in request.meta:
+            if request.meta["proxy"] is not None:
+                creds, proxy_url = self._get_proxy(request.meta["proxy"], "")
+        elif self.proxies:
+            parsed = urlparse_cached(request)
+            _scheme = parsed.scheme
+            if (
+                # 'no_proxy' is only supported by http schemes
+                _scheme not in ("http", "https")
+                or not proxy_bypass(parsed.hostname)
+            ) and _scheme in self.proxies:
+                scheme = _scheme
+                creds, proxy_url = self.proxies[scheme]
+
+        self._set_proxy_and_creds(request, proxy_url, creds, scheme)
+
+    def _set_proxy_and_creds(self, request, proxy_url, creds, scheme):
+        if scheme:
+            request.meta["_scheme_proxy"] = True
+        if proxy_url:
+            request.meta["proxy"] = proxy_url
+        elif request.meta.get("proxy") is not None:
+            request.meta["proxy"] = None
+        if creds:
+            request.headers[b"Proxy-Authorization"] = b"Basic " + creds
+            request.meta["_auth_proxy"] = proxy_url
+        elif "_auth_proxy" in request.meta:
+            if proxy_url != request.meta["_auth_proxy"]:
+                if b"Proxy-Authorization" in request.headers:
+                    del request.headers[b"Proxy-Authorization"]
+                del request.meta["_auth_proxy"]
+        elif b"Proxy-Authorization" in request.headers:
+            if proxy_url:
+                request.meta["_auth_proxy"] = proxy_url
+            else:
+                del request.headers[b"Proxy-Authorization"]
diff --git a/scrapy/downloadermiddlewares/offsite.py b/scrapy/downloadermiddlewares/offsite.py
index d5ebe2f64..1e5026925 100644
--- a/scrapy/downloadermiddlewares/offsite.py
+++ b/scrapy/downloadermiddlewares/offsite.py
@@ -1,18 +1,77 @@
 import logging
 import re
 import warnings
+
 from scrapy import signals
 from scrapy.exceptions import IgnoreRequest
 from scrapy.utils.httpobj import urlparse_cached
+
 logger = logging.getLogger(__name__)


 class OffsiteMiddleware:
+    @classmethod
+    def from_crawler(cls, crawler):
+        o = cls(crawler.stats)
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        crawler.signals.connect(o.request_scheduled, signal=signals.request_scheduled)
+        return o

     def __init__(self, stats):
         self.stats = stats
         self.domains_seen = set()

+    def spider_opened(self, spider):
+        self.host_regex = self.get_host_regex(spider)
+
+    def request_scheduled(self, request, spider):
+        self.process_request(request, spider)
+
+    def process_request(self, request, spider):
+        if request.dont_filter or self.should_follow(request, spider):
+            return None
+        domain = urlparse_cached(request).hostname
+        if domain and domain not in self.domains_seen:
+            self.domains_seen.add(domain)
+            logger.debug(
+                "Filtered offsite request to %(domain)r: %(request)s",
+                {"domain": domain, "request": request},
+                extra={"spider": spider},
+            )
+            self.stats.inc_value("offsite/domains", spider=spider)
+        self.stats.inc_value("offsite/filtered", spider=spider)
+        raise IgnoreRequest
+
+    def should_follow(self, request, spider):
+        regex = self.host_regex
+        # hostname can be None for wrong urls (like javascript links)
+        host = urlparse_cached(request).hostname or ""
+        return bool(regex.search(host))
+
     def get_host_regex(self, spider):
         """Override this method to implement a different offsite policy"""
-        pass
+        allowed_domains = getattr(spider, "allowed_domains", None)
+        if not allowed_domains:
+            return re.compile("")  # allow all by default
+        url_pattern = re.compile(r"^https?://.*$")
+        port_pattern = re.compile(r":\d+$")
+        domains = []
+        for domain in allowed_domains:
+            if domain is None:
+                continue
+            if url_pattern.match(domain):
+                message = (
+                    "allowed_domains accepts only domains, not URLs. "
+                    f"Ignoring URL entry {domain} in allowed_domains."
+                )
+                warnings.warn(message)
+            elif port_pattern.search(domain):
+                message = (
+                    "allowed_domains accepts only domains without ports. "
+                    f"Ignoring entry {domain} in allowed_domains."
+                )
+                warnings.warn(message)
+            else:
+                domains.append(re.escape(domain))
+        regex = rf'^(.*\.)?({"|".join(domains)})$'
+        return re.compile(regex)
diff --git a/scrapy/downloadermiddlewares/redirect.py b/scrapy/downloadermiddlewares/redirect.py
index dce283f2f..63be1d0ca 100644
--- a/scrapy/downloadermiddlewares/redirect.py
+++ b/scrapy/downloadermiddlewares/redirect.py
@@ -1,21 +1,120 @@
 import logging
 from urllib.parse import urljoin, urlparse
+
 from w3lib.url import safe_url_string
+
 from scrapy.exceptions import IgnoreRequest, NotConfigured
 from scrapy.http import HtmlResponse
 from scrapy.utils.httpobj import urlparse_cached
 from scrapy.utils.response import get_meta_refresh
+
 logger = logging.getLogger(__name__)


+def _build_redirect_request(source_request, *, url, **kwargs):
+    redirect_request = source_request.replace(
+        url=url,
+        **kwargs,
+        cookies=None,
+    )
+    if "_scheme_proxy" in redirect_request.meta:
+        source_request_scheme = urlparse_cached(source_request).scheme
+        redirect_request_scheme = urlparse_cached(redirect_request).scheme
+        if source_request_scheme != redirect_request_scheme:
+            redirect_request.meta.pop("_scheme_proxy")
+            redirect_request.meta.pop("proxy", None)
+            redirect_request.meta.pop("_auth_proxy", None)
+            redirect_request.headers.pop(b"Proxy-Authorization", None)
+    has_cookie_header = "Cookie" in redirect_request.headers
+    has_authorization_header = "Authorization" in redirect_request.headers
+    if has_cookie_header or has_authorization_header:
+        default_ports = {"http": 80, "https": 443}
+
+        parsed_source_request = urlparse_cached(source_request)
+        source_scheme, source_host, source_port = (
+            parsed_source_request.scheme,
+            parsed_source_request.hostname,
+            parsed_source_request.port
+            or default_ports.get(parsed_source_request.scheme),
+        )
+
+        parsed_redirect_request = urlparse_cached(redirect_request)
+        redirect_scheme, redirect_host, redirect_port = (
+            parsed_redirect_request.scheme,
+            parsed_redirect_request.hostname,
+            parsed_redirect_request.port
+            or default_ports.get(parsed_redirect_request.scheme),
+        )
+
+        if has_cookie_header and (
+            (source_scheme != redirect_scheme and redirect_scheme != "https")
+            or source_host != redirect_host
+        ):
+            del redirect_request.headers["Cookie"]
+
+        # https://fetch.spec.whatwg.org/#ref-for-cors-non-wildcard-request-header-name
+        if has_authorization_header and (
+            source_scheme != redirect_scheme
+            or source_host != redirect_host
+            or source_port != redirect_port
+        ):
+            del redirect_request.headers["Authorization"]
+
+    return redirect_request
+
+
 class BaseRedirectMiddleware:
-    enabled_setting = 'REDIRECT_ENABLED'
+    enabled_setting = "REDIRECT_ENABLED"

     def __init__(self, settings):
         if not settings.getbool(self.enabled_setting):
             raise NotConfigured
-        self.max_redirect_times = settings.getint('REDIRECT_MAX_TIMES')
-        self.priority_adjust = settings.getint('REDIRECT_PRIORITY_ADJUST')
+
+        self.max_redirect_times = settings.getint("REDIRECT_MAX_TIMES")
+        self.priority_adjust = settings.getint("REDIRECT_PRIORITY_ADJUST")
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings)
+
+    def _redirect(self, redirected, request, spider, reason):
+        ttl = request.meta.setdefault("redirect_ttl", self.max_redirect_times)
+        redirects = request.meta.get("redirect_times", 0) + 1
+
+        if ttl and redirects <= self.max_redirect_times:
+            redirected.meta["redirect_times"] = redirects
+            redirected.meta["redirect_ttl"] = ttl - 1
+            redirected.meta["redirect_urls"] = request.meta.get("redirect_urls", []) + [
+                request.url
+            ]
+            redirected.meta["redirect_reasons"] = request.meta.get(
+                "redirect_reasons", []
+            ) + [reason]
+            redirected.dont_filter = request.dont_filter
+            redirected.priority = request.priority + self.priority_adjust
+            logger.debug(
+                "Redirecting (%(reason)s) to %(redirected)s from %(request)s",
+                {"reason": reason, "redirected": redirected, "request": request},
+                extra={"spider": spider},
+            )
+            return redirected
+        logger.debug(
+            "Discarding %(request)s: max redirections reached",
+            {"request": request},
+            extra={"spider": spider},
+        )
+        raise IgnoreRequest("max redirections reached")
+
+    def _redirect_request_using_get(self, request, redirect_url):
+        redirect_request = _build_redirect_request(
+            request,
+            url=redirect_url,
+            method="GET",
+            body="",
+        )
+        redirect_request.headers.pop("Content-Type", None)
+        redirect_request.headers.pop("Content-Length", None)
+        return redirect_request


 class RedirectMiddleware(BaseRedirectMiddleware):
@@ -24,11 +123,59 @@ class RedirectMiddleware(BaseRedirectMiddleware):
     and meta-refresh html tag.
     """

+    def process_response(self, request, response, spider):
+        if (
+            request.meta.get("dont_redirect", False)
+            or response.status in getattr(spider, "handle_httpstatus_list", [])
+            or response.status in request.meta.get("handle_httpstatus_list", [])
+            or request.meta.get("handle_httpstatus_all", False)
+        ):
+            return response
+
+        allowed_status = (301, 302, 303, 307, 308)
+        if "Location" not in response.headers or response.status not in allowed_status:
+            return response
+
+        location = safe_url_string(response.headers["Location"])
+        if response.headers["Location"].startswith(b"//"):
+            request_scheme = urlparse(request.url).scheme
+            location = request_scheme + "://" + location.lstrip("/")
+
+        redirected_url = urljoin(request.url, location)
+        if urlparse(redirected_url).scheme not in {"http", "https"}:
+            return response
+
+        if response.status in (301, 307, 308) or request.method == "HEAD":
+            redirected = _build_redirect_request(request, url=redirected_url)
+            return self._redirect(redirected, request, spider, response.status)
+
+        redirected = self._redirect_request_using_get(request, redirected_url)
+        return self._redirect(redirected, request, spider, response.status)
+

 class MetaRefreshMiddleware(BaseRedirectMiddleware):
-    enabled_setting = 'METAREFRESH_ENABLED'
+    enabled_setting = "METAREFRESH_ENABLED"

     def __init__(self, settings):
         super().__init__(settings)
-        self._ignore_tags = settings.getlist('METAREFRESH_IGNORE_TAGS')
-        self._maxdelay = settings.getint('METAREFRESH_MAXDELAY')
+        self._ignore_tags = settings.getlist("METAREFRESH_IGNORE_TAGS")
+        self._maxdelay = settings.getint("METAREFRESH_MAXDELAY")
+
+    def process_response(self, request, response, spider):
+        if (
+            request.meta.get("dont_redirect", False)
+            or request.method == "HEAD"
+            or not isinstance(response, HtmlResponse)
+            or urlparse_cached(request).scheme not in {"http", "https"}
+        ):
+            return response
+
+        interval, url = get_meta_refresh(response, ignore_tags=self._ignore_tags)
+        if not url:
+            return response
+        if urlparse(url).scheme not in {"http", "https"}:
+            return response
+        if interval < self._maxdelay:
+            redirected = self._redirect_request_using_get(request, url)
+            return self._redirect(redirected, request, spider, "meta refresh")
+        return response
diff --git a/scrapy/downloadermiddlewares/retry.py b/scrapy/downloadermiddlewares/retry.py
index 38df481a3..380623cea 100644
--- a/scrapy/downloadermiddlewares/retry.py
+++ b/scrapy/downloadermiddlewares/retry.py
@@ -12,6 +12,7 @@ once the spider has finished crawling all regular (non failed) pages.
 import warnings
 from logging import Logger, getLogger
 from typing import Optional, Type, Union
+
 from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
 from scrapy.http.request import Request
 from scrapy.settings import Settings
@@ -19,17 +20,41 @@ from scrapy.spiders import Spider
 from scrapy.utils.misc import load_object
 from scrapy.utils.python import global_object_name
 from scrapy.utils.response import response_status_message
+
 retry_logger = getLogger(__name__)


+def backwards_compatibility_getattr(self, name):
+    if name == "EXCEPTIONS_TO_RETRY":
+        warnings.warn(
+            "Attribute RetryMiddleware.EXCEPTIONS_TO_RETRY is deprecated. "
+            "Use the RETRY_EXCEPTIONS setting instead.",
+            ScrapyDeprecationWarning,
+            stacklevel=2,
+        )
+        return tuple(
+            load_object(x) if isinstance(x, str) else x
+            for x in Settings().getlist("RETRY_EXCEPTIONS")
+        )
+    raise AttributeError(
+        f"{self.__class__.__name__!r} object has no attribute {name!r}"
+    )
+
+
 class BackwardsCompatibilityMetaclass(type):
     __getattr__ = backwards_compatibility_getattr


-def get_retry_request(request: Request, *, spider: Spider, reason: Union[
-    str, Exception, Type[Exception]]='unspecified', max_retry_times:
-    Optional[int]=None, priority_adjust: Optional[int]=None, logger: Logger
-    =retry_logger, stats_base_key: str='retry'):
+def get_retry_request(
+    request: Request,
+    *,
+    spider: Spider,
+    reason: Union[str, Exception, Type[Exception]] = "unspecified",
+    max_retry_times: Optional[int] = None,
+    priority_adjust: Optional[int] = None,
+    logger: Logger = retry_logger,
+    stats_base_key: str = "retry",
+):
     """
     Returns a new :class:`~scrapy.Request` object to retry the specified
     request, or ``None`` if retries of the specified request have been
@@ -70,22 +95,90 @@ def get_retry_request(request: Request, *, spider: Spider, reason: Union[
     *stats_base_key* is a string to be used as the base key for the
     retry-related job stats
     """
-    pass
+    settings = spider.crawler.settings
+    assert spider.crawler.stats
+    stats = spider.crawler.stats
+    retry_times = request.meta.get("retry_times", 0) + 1
+    if max_retry_times is None:
+        max_retry_times = request.meta.get("max_retry_times")
+        if max_retry_times is None:
+            max_retry_times = settings.getint("RETRY_TIMES")
+    if retry_times <= max_retry_times:
+        logger.debug(
+            "Retrying %(request)s (failed %(retry_times)d times): %(reason)s",
+            {"request": request, "retry_times": retry_times, "reason": reason},
+            extra={"spider": spider},
+        )
+        new_request: Request = request.copy()
+        new_request.meta["retry_times"] = retry_times
+        new_request.dont_filter = True
+        if priority_adjust is None:
+            priority_adjust = settings.getint("RETRY_PRIORITY_ADJUST")
+        new_request.priority = request.priority + priority_adjust
+
+        if callable(reason):
+            reason = reason()
+        if isinstance(reason, Exception):
+            reason = global_object_name(reason.__class__)
+
+        stats.inc_value(f"{stats_base_key}/count")
+        stats.inc_value(f"{stats_base_key}/reason_count/{reason}")
+        return new_request
+    stats.inc_value(f"{stats_base_key}/max_reached")
+    logger.error(
+        "Gave up retrying %(request)s (failed %(retry_times)d times): " "%(reason)s",
+        {"request": request, "retry_times": retry_times, "reason": reason},
+        extra={"spider": spider},
+    )
+    return None


 class RetryMiddleware(metaclass=BackwardsCompatibilityMetaclass):
-
     def __init__(self, settings):
-        if not settings.getbool('RETRY_ENABLED'):
+        if not settings.getbool("RETRY_ENABLED"):
             raise NotConfigured
-        self.max_retry_times = settings.getint('RETRY_TIMES')
-        self.retry_http_codes = set(int(x) for x in settings.getlist(
-            'RETRY_HTTP_CODES'))
-        self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')
+        self.max_retry_times = settings.getint("RETRY_TIMES")
+        self.retry_http_codes = set(
+            int(x) for x in settings.getlist("RETRY_HTTP_CODES")
+        )
+        self.priority_adjust = settings.getint("RETRY_PRIORITY_ADJUST")
+
         try:
-            self.exceptions_to_retry = self.__getattribute__(
-                'EXCEPTIONS_TO_RETRY')
+            self.exceptions_to_retry = self.__getattribute__("EXCEPTIONS_TO_RETRY")
         except AttributeError:
-            self.exceptions_to_retry = tuple(load_object(x) if isinstance(x,
-                str) else x for x in settings.getlist('RETRY_EXCEPTIONS'))
+            # If EXCEPTIONS_TO_RETRY is not "overridden"
+            self.exceptions_to_retry = tuple(
+                load_object(x) if isinstance(x, str) else x
+                for x in settings.getlist("RETRY_EXCEPTIONS")
+            )
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings)
+
+    def process_response(self, request, response, spider):
+        if request.meta.get("dont_retry", False):
+            return response
+        if response.status in self.retry_http_codes:
+            reason = response_status_message(response.status)
+            return self._retry(request, reason, spider) or response
+        return response
+
+    def process_exception(self, request, exception, spider):
+        if isinstance(exception, self.exceptions_to_retry) and not request.meta.get(
+            "dont_retry", False
+        ):
+            return self._retry(request, exception, spider)
+
+    def _retry(self, request, reason, spider):
+        max_retry_times = request.meta.get("max_retry_times", self.max_retry_times)
+        priority_adjust = request.meta.get("priority_adjust", self.priority_adjust)
+        return get_retry_request(
+            request,
+            reason=reason,
+            spider=spider,
+            max_retry_times=max_retry_times,
+            priority_adjust=priority_adjust,
+        )
+
     __getattr__ = backwards_compatibility_getattr
diff --git a/scrapy/downloadermiddlewares/robotstxt.py b/scrapy/downloadermiddlewares/robotstxt.py
index e1699b10f..6cab27c5a 100644
--- a/scrapy/downloadermiddlewares/robotstxt.py
+++ b/scrapy/downloadermiddlewares/robotstxt.py
@@ -3,14 +3,18 @@ This is a middleware to respect robots.txt policies. To activate it you must
 enable this middleware and enable the ROBOTSTXT_OBEY setting.

 """
+
 import logging
+
 from twisted.internet.defer import Deferred, maybeDeferred
+
 from scrapy.exceptions import IgnoreRequest, NotConfigured
 from scrapy.http import Request
 from scrapy.http.request import NO_CALLBACK
 from scrapy.utils.httpobj import urlparse_cached
 from scrapy.utils.log import failure_to_exc_info
 from scrapy.utils.misc import load_object
+
 logger = logging.getLogger(__name__)


@@ -18,13 +22,100 @@ class RobotsTxtMiddleware:
     DOWNLOAD_PRIORITY = 1000

     def __init__(self, crawler):
-        if not crawler.settings.getbool('ROBOTSTXT_OBEY'):
+        if not crawler.settings.getbool("ROBOTSTXT_OBEY"):
             raise NotConfigured
-        self._default_useragent = crawler.settings.get('USER_AGENT', 'Scrapy')
-        self._robotstxt_useragent = crawler.settings.get('ROBOTSTXT_USER_AGENT'
-            , None)
+        self._default_useragent = crawler.settings.get("USER_AGENT", "Scrapy")
+        self._robotstxt_useragent = crawler.settings.get("ROBOTSTXT_USER_AGENT", None)
         self.crawler = crawler
         self._parsers = {}
-        self._parserimpl = load_object(crawler.settings.get('ROBOTSTXT_PARSER')
+        self._parserimpl = load_object(crawler.settings.get("ROBOTSTXT_PARSER"))
+
+        # check if parser dependencies are met, this should throw an error otherwise.
+        self._parserimpl.from_crawler(self.crawler, b"")
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
+    def process_request(self, request, spider):
+        if request.meta.get("dont_obey_robotstxt"):
+            return
+        if request.url.startswith("data:") or request.url.startswith("file:"):
+            return
+        d = maybeDeferred(self.robot_parser, request, spider)
+        d.addCallback(self.process_request_2, request, spider)
+        return d
+
+    def process_request_2(self, rp, request, spider):
+        if rp is None:
+            return
+
+        useragent = self._robotstxt_useragent
+        if not useragent:
+            useragent = request.headers.get(b"User-Agent", self._default_useragent)
+        if not rp.allowed(request.url, useragent):
+            logger.debug(
+                "Forbidden by robots.txt: %(request)s",
+                {"request": request},
+                extra={"spider": spider},
             )
-        self._parserimpl.from_crawler(self.crawler, b'')
+            self.crawler.stats.inc_value("robotstxt/forbidden")
+            raise IgnoreRequest("Forbidden by robots.txt")
+
+    def robot_parser(self, request, spider):
+        url = urlparse_cached(request)
+        netloc = url.netloc
+
+        if netloc not in self._parsers:
+            self._parsers[netloc] = Deferred()
+            robotsurl = f"{url.scheme}://{url.netloc}/robots.txt"
+            robotsreq = Request(
+                robotsurl,
+                priority=self.DOWNLOAD_PRIORITY,
+                meta={"dont_obey_robotstxt": True},
+                callback=NO_CALLBACK,
+            )
+            dfd = self.crawler.engine.download(robotsreq)
+            dfd.addCallback(self._parse_robots, netloc, spider)
+            dfd.addErrback(self._logerror, robotsreq, spider)
+            dfd.addErrback(self._robots_error, netloc)
+            self.crawler.stats.inc_value("robotstxt/request_count")
+
+        if isinstance(self._parsers[netloc], Deferred):
+            d = Deferred()
+
+            def cb(result):
+                d.callback(result)
+                return result
+
+            self._parsers[netloc].addCallback(cb)
+            return d
+        return self._parsers[netloc]
+
+    def _logerror(self, failure, request, spider):
+        if failure.type is not IgnoreRequest:
+            logger.error(
+                "Error downloading %(request)s: %(f_exception)s",
+                {"request": request, "f_exception": failure.value},
+                exc_info=failure_to_exc_info(failure),
+                extra={"spider": spider},
+            )
+        return failure
+
+    def _parse_robots(self, response, netloc, spider):
+        self.crawler.stats.inc_value("robotstxt/response_count")
+        self.crawler.stats.inc_value(
+            f"robotstxt/response_status_count/{response.status}"
+        )
+        rp = self._parserimpl.from_crawler(self.crawler, response.body)
+        rp_dfd = self._parsers[netloc]
+        self._parsers[netloc] = rp
+        rp_dfd.callback(rp)
+
+    def _robots_error(self, failure, netloc):
+        if failure.type is not IgnoreRequest:
+            key = f"robotstxt/exception_count/{failure.type}"
+            self.crawler.stats.inc_value(key)
+        rp_dfd = self._parsers[netloc]
+        self._parsers[netloc] = None
+        rp_dfd.callback(None)
diff --git a/scrapy/downloadermiddlewares/stats.py b/scrapy/downloadermiddlewares/stats.py
index 571687317..a0f62e262 100644
--- a/scrapy/downloadermiddlewares/stats.py
+++ b/scrapy/downloadermiddlewares/stats.py
@@ -1,10 +1,60 @@
 from twisted.web import http
+
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.python import global_object_name, to_bytes
 from scrapy.utils.request import request_httprepr


-class DownloaderStats:
+def get_header_size(headers):
+    size = 0
+    for key, value in headers.items():
+        if isinstance(value, (list, tuple)):
+            for v in value:
+                size += len(b": ") + len(key) + len(v)
+    return size + len(b"\r\n") * (len(headers.keys()) - 1)
+
+
+def get_status_size(response_status):
+    return len(to_bytes(http.RESPONSES.get(response_status, b""))) + 15
+    # resp.status + b"\r\n" + b"HTTP/1.1 <100-599> "
+

+class DownloaderStats:
     def __init__(self, stats):
         self.stats = stats
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool("DOWNLOADER_STATS"):
+            raise NotConfigured
+        return cls(crawler.stats)
+
+    def process_request(self, request, spider):
+        self.stats.inc_value("downloader/request_count", spider=spider)
+        self.stats.inc_value(
+            f"downloader/request_method_count/{request.method}", spider=spider
+        )
+        reqlen = len(request_httprepr(request))
+        self.stats.inc_value("downloader/request_bytes", reqlen, spider=spider)
+
+    def process_response(self, request, response, spider):
+        self.stats.inc_value("downloader/response_count", spider=spider)
+        self.stats.inc_value(
+            f"downloader/response_status_count/{response.status}", spider=spider
+        )
+        reslen = (
+            len(response.body)
+            + get_header_size(response.headers)
+            + get_status_size(response.status)
+            + 4
+        )
+        # response.body + b"\r\n"+ response.header + b"\r\n" + response.status
+        self.stats.inc_value("downloader/response_bytes", reslen, spider=spider)
+        return response
+
+    def process_exception(self, request, exception, spider):
+        ex_class = global_object_name(exception.__class__)
+        self.stats.inc_value("downloader/exception_count", spider=spider)
+        self.stats.inc_value(
+            f"downloader/exception_type_count/{ex_class}", spider=spider
+        )
diff --git a/scrapy/downloadermiddlewares/useragent.py b/scrapy/downloadermiddlewares/useragent.py
index 5199b0472..856a275ab 100644
--- a/scrapy/downloadermiddlewares/useragent.py
+++ b/scrapy/downloadermiddlewares/useragent.py
@@ -1,9 +1,23 @@
 """Set User-Agent header per spider or use a default value from settings"""
+
 from scrapy import signals


 class UserAgentMiddleware:
     """This middleware allows spiders to override the user_agent"""

-    def __init__(self, user_agent='Scrapy'):
+    def __init__(self, user_agent="Scrapy"):
         self.user_agent = user_agent
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        o = cls(crawler.settings["USER_AGENT"])
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        return o
+
+    def spider_opened(self, spider):
+        self.user_agent = getattr(spider, "user_agent", self.user_agent)
+
+    def process_request(self, request, spider):
+        if self.user_agent:
+            request.headers.setdefault(b"User-Agent", self.user_agent)
diff --git a/scrapy/dupefilters.py b/scrapy/dupefilters.py
index 684ffcbe6..0b20f53b9 100644
--- a/scrapy/dupefilters.py
+++ b/scrapy/dupefilters.py
@@ -1,23 +1,45 @@
 from __future__ import annotations
+
 import logging
 from pathlib import Path
 from typing import TYPE_CHECKING, Optional, Set
 from warnings import warn
+
 from twisted.internet.defer import Deferred
+
 from scrapy.http.request import Request
 from scrapy.settings import BaseSettings
 from scrapy.spiders import Spider
 from scrapy.utils.deprecate import ScrapyDeprecationWarning
 from scrapy.utils.job import job_dir
-from scrapy.utils.request import RequestFingerprinter, RequestFingerprinterProtocol, referer_str
+from scrapy.utils.request import (
+    RequestFingerprinter,
+    RequestFingerprinterProtocol,
+    referer_str,
+)
+
 if TYPE_CHECKING:
+    # typing.Self requires Python 3.11
     from typing_extensions import Self
+
     from scrapy.crawler import Crawler


 class BaseDupeFilter:
+    @classmethod
+    def from_settings(cls, settings: BaseSettings) -> Self:
+        return cls()

-    def log(self, request: Request, spider: Spider) ->None:
+    def request_seen(self, request: Request) -> bool:
+        return False
+
+    def open(self) -> Optional[Deferred]:
+        pass
+
+    def close(self, reason: str) -> Optional[Deferred]:
+        pass
+
+    def log(self, request: Request, spider: Spider) -> None:
         """Log that a request has been filtered"""
         pass

@@ -25,17 +47,96 @@ class BaseDupeFilter:
 class RFPDupeFilter(BaseDupeFilter):
     """Request Fingerprint duplicates filter"""

-    def __init__(self, path: Optional[str]=None, debug: bool=False, *,
-        fingerprinter: Optional[RequestFingerprinterProtocol]=None) ->None:
+    def __init__(
+        self,
+        path: Optional[str] = None,
+        debug: bool = False,
+        *,
+        fingerprinter: Optional[RequestFingerprinterProtocol] = None,
+    ) -> None:
         self.file = None
-        self.fingerprinter: RequestFingerprinterProtocol = (fingerprinter or
-            RequestFingerprinter())
+        self.fingerprinter: RequestFingerprinterProtocol = (
+            fingerprinter or RequestFingerprinter()
+        )
         self.fingerprints: Set[str] = set()
         self.logdupes = True
         self.debug = debug
         self.logger = logging.getLogger(__name__)
         if path:
-            self.file = Path(path, 'requests.seen').open('a+', encoding='utf-8'
-                )
+            self.file = Path(path, "requests.seen").open("a+", encoding="utf-8")
             self.file.seek(0)
             self.fingerprints.update(x.rstrip() for x in self.file)
+
+    @classmethod
+    def from_settings(
+        cls,
+        settings: BaseSettings,
+        *,
+        fingerprinter: Optional[RequestFingerprinterProtocol] = None,
+    ) -> Self:
+        debug = settings.getbool("DUPEFILTER_DEBUG")
+        try:
+            return cls(job_dir(settings), debug, fingerprinter=fingerprinter)
+        except TypeError:
+            warn(
+                "RFPDupeFilter subclasses must either modify their '__init__' "
+                "method to support a 'fingerprinter' parameter or reimplement "
+                "the 'from_settings' class method.",
+                ScrapyDeprecationWarning,
+            )
+            result = cls(job_dir(settings), debug)
+            result.fingerprinter = fingerprinter or RequestFingerprinter()
+            return result
+
+    @classmethod
+    def from_crawler(cls, crawler: Crawler) -> Self:
+        assert crawler.request_fingerprinter
+        try:
+            return cls.from_settings(
+                crawler.settings,
+                fingerprinter=crawler.request_fingerprinter,
+            )
+        except TypeError:
+            warn(
+                "RFPDupeFilter subclasses must either modify their overridden "
+                "'__init__' method and 'from_settings' class method to "
+                "support a 'fingerprinter' parameter, or reimplement the "
+                "'from_crawler' class method.",
+                ScrapyDeprecationWarning,
+            )
+            result = cls.from_settings(crawler.settings)
+            result.fingerprinter = crawler.request_fingerprinter
+            return result
+
+    def request_seen(self, request: Request) -> bool:
+        fp = self.request_fingerprint(request)
+        if fp in self.fingerprints:
+            return True
+        self.fingerprints.add(fp)
+        if self.file:
+            self.file.write(fp + "\n")
+        return False
+
+    def request_fingerprint(self, request: Request) -> str:
+        return self.fingerprinter.fingerprint(request).hex()
+
+    def close(self, reason: str) -> None:
+        if self.file:
+            self.file.close()
+
+    def log(self, request: Request, spider: Spider) -> None:
+        if self.debug:
+            msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
+            args = {"request": request, "referer": referer_str(request)}
+            self.logger.debug(msg, args, extra={"spider": spider})
+        elif self.logdupes:
+            msg = (
+                "Filtered duplicate request: %(request)s"
+                " - no more duplicates will be shown"
+                " (see DUPEFILTER_DEBUG to show all duplicates)"
+            )
+            self.logger.debug(msg, {"request": request}, extra={"spider": spider})
+            self.logdupes = False
+
+        assert spider.crawler.stats
+        spider.crawler.stats.inc_value("dupefilter/filtered", spider=spider)
diff --git a/scrapy/exceptions.py b/scrapy/exceptions.py
index 5dee6b7da..6d188c489 100644
--- a/scrapy/exceptions.py
+++ b/scrapy/exceptions.py
@@ -6,9 +6,12 @@ new exceptions here without documenting them there.
 """
 from typing import Any

+# Internal
+

 class NotConfigured(Exception):
     """Indicates a missing configuration situation"""
+
     pass


@@ -17,22 +20,27 @@ class _InvalidOutput(TypeError):
     Indicates an invalid value has been returned by a middleware's processing method.
     Internal and undocumented, it should not be raised or caught by user code.
     """
+
     pass


+# HTTP and crawling
+
+
 class IgnoreRequest(Exception):
     """Indicates a decision was made not to process a request"""


 class DontCloseSpider(Exception):
     """Request the spider not to be closed yet"""
+
     pass


 class CloseSpider(Exception):
     """Raise this from callbacks to request the spider to be closed"""

-    def __init__(self, reason: str='cancelled'):
+    def __init__(self, reason: str = "cancelled"):
         super().__init__()
         self.reason = reason

@@ -44,26 +52,34 @@ class StopDownload(Exception):
     should be handled by the request errback. Note that 'fail' is a keyword-only argument.
     """

-    def __init__(self, *, fail: bool=True):
+    def __init__(self, *, fail: bool = True):
         super().__init__()
         self.fail = fail


+# Items
+
+
 class DropItem(Exception):
     """Drop item from the item pipeline"""
+
     pass


 class NotSupported(Exception):
     """Indicates a feature or method is not supported"""
+
     pass


+# Commands
+
+
 class UsageError(Exception):
     """To indicate a command-line usage error"""

     def __init__(self, *a: Any, **kw: Any):
-        self.print_help = kw.pop('print_help', True)
+        self.print_help = kw.pop("print_help", True)
         super().__init__(*a, **kw)


@@ -71,9 +87,11 @@ class ScrapyDeprecationWarning(Warning):
     """Warning category for deprecated features, since the default
     DeprecationWarning is silenced on Python 2.7+
     """
+
     pass


 class ContractFail(AssertionError):
     """Error raised in case of a failing contract"""
+
     pass
diff --git a/scrapy/exporters.py b/scrapy/exporters.py
index d22653341..f85f1dad8 100644
--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@@ -1,6 +1,7 @@
 """
 Item Exporters are used to export/serialize items into different formats.
 """
+
 import csv
 import io
 import marshal
@@ -8,17 +9,26 @@ import pickle
 import pprint
 from collections.abc import Mapping
 from xml.sax.saxutils import XMLGenerator
+
 from itemadapter import ItemAdapter, is_item
+
 from scrapy.item import Item
 from scrapy.utils.python import is_listlike, to_bytes, to_unicode
 from scrapy.utils.serialize import ScrapyJSONEncoder
-__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
-    'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter',
-    'JsonItemExporter', 'MarshalItemExporter']

+__all__ = [
+    "BaseItemExporter",
+    "PprintItemExporter",
+    "PickleItemExporter",
+    "CsvItemExporter",
+    "XmlItemExporter",
+    "JsonLinesItemExporter",
+    "JsonItemExporter",
+    "MarshalItemExporter",
+]

-class BaseItemExporter:

+class BaseItemExporter:
     def __init__(self, *, dont_fail=False, **kwargs):
         self._kwargs = kwargs
         self._configure(kwargs, dont_fail=dont_fail)
@@ -28,72 +38,257 @@ class BaseItemExporter:
         If dont_fail is set, it won't raise an exception on unexpected options
         (useful for using with keyword arguments in subclasses ``__init__`` methods)
         """
+        self.encoding = options.pop("encoding", None)
+        self.fields_to_export = options.pop("fields_to_export", None)
+        self.export_empty_fields = options.pop("export_empty_fields", False)
+        self.indent = options.pop("indent", None)
+        if not dont_fail and options:
+            raise TypeError(f"Unexpected options: {', '.join(options.keys())}")
+
+    def export_item(self, item):
+        raise NotImplementedError
+
+    def serialize_field(self, field, name, value):
+        serializer = field.get("serializer", lambda x: x)
+        return serializer(value)
+
+    def start_exporting(self):
+        pass
+
+    def finish_exporting(self):
         pass

-    def _get_serialized_fields(self, item, default_value=None,
-        include_empty=None):
+    def _get_serialized_fields(self, item, default_value=None, include_empty=None):
         """Return the fields to export as an iterable of tuples
         (name, serialized_value)
         """
-        pass
+        item = ItemAdapter(item)

+        if include_empty is None:
+            include_empty = self.export_empty_fields

-class JsonLinesItemExporter(BaseItemExporter):
+        if self.fields_to_export is None:
+            if include_empty:
+                field_iter = item.field_names()
+            else:
+                field_iter = item.keys()
+        elif isinstance(self.fields_to_export, Mapping):
+            if include_empty:
+                field_iter = self.fields_to_export.items()
+            else:
+                field_iter = (
+                    (x, y) for x, y in self.fields_to_export.items() if x in item
+                )
+        else:
+            if include_empty:
+                field_iter = self.fields_to_export
+            else:
+                field_iter = (x for x in self.fields_to_export if x in item)
+
+        for field_name in field_iter:
+            if isinstance(field_name, str):
+                item_field, output_field = field_name, field_name
+            else:
+                item_field, output_field = field_name
+            if item_field in item:
+                field_meta = item.get_field_meta(item_field)
+                value = self.serialize_field(field_meta, output_field, item[item_field])
+            else:
+                value = default_value

+            yield output_field, value
+
+
+class JsonLinesItemExporter(BaseItemExporter):
     def __init__(self, file, **kwargs):
         super().__init__(dont_fail=True, **kwargs)
         self.file = file
-        self._kwargs.setdefault('ensure_ascii', not self.encoding)
+        self._kwargs.setdefault("ensure_ascii", not self.encoding)
         self.encoder = ScrapyJSONEncoder(**self._kwargs)

+    def export_item(self, item):
+        itemdict = dict(self._get_serialized_fields(item))
+        data = self.encoder.encode(itemdict) + "\n"
+        self.file.write(to_bytes(data, self.encoding))

-class JsonItemExporter(BaseItemExporter):

+class JsonItemExporter(BaseItemExporter):
     def __init__(self, file, **kwargs):
         super().__init__(dont_fail=True, **kwargs)
         self.file = file
-        json_indent = (self.indent if self.indent is not None and self.
-            indent > 0 else None)
-        self._kwargs.setdefault('indent', json_indent)
-        self._kwargs.setdefault('ensure_ascii', not self.encoding)
+        # there is a small difference between the behaviour or JsonItemExporter.indent
+        # and ScrapyJSONEncoder.indent. ScrapyJSONEncoder.indent=None is needed to prevent
+        # the addition of newlines everywhere
+        json_indent = (
+            self.indent if self.indent is not None and self.indent > 0 else None
+        )
+        self._kwargs.setdefault("indent", json_indent)
+        self._kwargs.setdefault("ensure_ascii", not self.encoding)
         self.encoder = ScrapyJSONEncoder(**self._kwargs)
         self.first_item = True

+    def _beautify_newline(self):
+        if self.indent is not None:
+            self.file.write(b"\n")

-class XmlItemExporter(BaseItemExporter):
+    def _add_comma_after_first(self):
+        if self.first_item:
+            self.first_item = False
+        else:
+            self.file.write(b",")
+            self._beautify_newline()

+    def start_exporting(self):
+        self.file.write(b"[")
+        self._beautify_newline()
+
+    def finish_exporting(self):
+        self._beautify_newline()
+        self.file.write(b"]")
+
+    def export_item(self, item):
+        itemdict = dict(self._get_serialized_fields(item))
+        data = to_bytes(self.encoder.encode(itemdict), self.encoding)
+        self._add_comma_after_first()
+        self.file.write(data)
+
+
+class XmlItemExporter(BaseItemExporter):
     def __init__(self, file, **kwargs):
-        self.item_element = kwargs.pop('item_element', 'item')
-        self.root_element = kwargs.pop('root_element', 'items')
+        self.item_element = kwargs.pop("item_element", "item")
+        self.root_element = kwargs.pop("root_element", "items")
         super().__init__(**kwargs)
         if not self.encoding:
-            self.encoding = 'utf-8'
+            self.encoding = "utf-8"
         self.xg = XMLGenerator(file, encoding=self.encoding)

+    def _beautify_newline(self, new_item=False):
+        if self.indent is not None and (self.indent > 0 or new_item):
+            self.xg.characters("\n")

-class CsvItemExporter(BaseItemExporter):
+    def _beautify_indent(self, depth=1):
+        if self.indent:
+            self.xg.characters(" " * self.indent * depth)

-    def __init__(self, file, include_headers_line=True, join_multivalued=
-        ',', errors=None, **kwargs):
+    def start_exporting(self):
+        self.xg.startDocument()
+        self.xg.startElement(self.root_element, {})
+        self._beautify_newline(new_item=True)
+
+    def export_item(self, item):
+        self._beautify_indent(depth=1)
+        self.xg.startElement(self.item_element, {})
+        self._beautify_newline()
+        for name, value in self._get_serialized_fields(item, default_value=""):
+            self._export_xml_field(name, value, depth=2)
+        self._beautify_indent(depth=1)
+        self.xg.endElement(self.item_element)
+        self._beautify_newline(new_item=True)
+
+    def finish_exporting(self):
+        self.xg.endElement(self.root_element)
+        self.xg.endDocument()
+
+    def _export_xml_field(self, name, serialized_value, depth):
+        self._beautify_indent(depth=depth)
+        self.xg.startElement(name, {})
+        if hasattr(serialized_value, "items"):
+            self._beautify_newline()
+            for subname, value in serialized_value.items():
+                self._export_xml_field(subname, value, depth=depth + 1)
+            self._beautify_indent(depth=depth)
+        elif is_listlike(serialized_value):
+            self._beautify_newline()
+            for value in serialized_value:
+                self._export_xml_field("value", value, depth=depth + 1)
+            self._beautify_indent(depth=depth)
+        elif isinstance(serialized_value, str):
+            self.xg.characters(serialized_value)
+        else:
+            self.xg.characters(str(serialized_value))
+        self.xg.endElement(name)
+        self._beautify_newline()
+
+
+class CsvItemExporter(BaseItemExporter):
+    def __init__(
+        self,
+        file,
+        include_headers_line=True,
+        join_multivalued=",",
+        errors=None,
+        **kwargs,
+    ):
         super().__init__(dont_fail=True, **kwargs)
         if not self.encoding:
-            self.encoding = 'utf-8'
+            self.encoding = "utf-8"
         self.include_headers_line = include_headers_line
-        self.stream = io.TextIOWrapper(file, line_buffering=False,
-            write_through=True, encoding=self.encoding, newline='', errors=
-            errors)
+        self.stream = io.TextIOWrapper(
+            file,
+            line_buffering=False,
+            write_through=True,
+            encoding=self.encoding,
+            newline="",  # Windows needs this https://github.com/scrapy/scrapy/issues/3034
+            errors=errors,
+        )
         self.csv_writer = csv.writer(self.stream, **self._kwargs)
         self._headers_not_written = True
         self._join_multivalued = join_multivalued

+    def serialize_field(self, field, name, value):
+        serializer = field.get("serializer", self._join_if_needed)
+        return serializer(value)
+
+    def _join_if_needed(self, value):
+        if isinstance(value, (list, tuple)):
+            try:
+                return self._join_multivalued.join(value)
+            except TypeError:  # list in value may not contain strings
+                pass
+        return value
+
+    def export_item(self, item):
+        if self._headers_not_written:
+            self._headers_not_written = False
+            self._write_headers_and_set_fields_to_export(item)
+
+        fields = self._get_serialized_fields(item, default_value="", include_empty=True)
+        values = list(self._build_row(x for _, x in fields))
+        self.csv_writer.writerow(values)
+
+    def finish_exporting(self):
+        self.stream.detach()  # Avoid closing the wrapped file.
+
+    def _build_row(self, values):
+        for s in values:
+            try:
+                yield to_unicode(s, self.encoding)
+            except TypeError:
+                yield s
+
+    def _write_headers_and_set_fields_to_export(self, item):
+        if self.include_headers_line:
+            if not self.fields_to_export:
+                # use declared field names, or keys if the item is a dict
+                self.fields_to_export = ItemAdapter(item).field_names()
+            if isinstance(self.fields_to_export, Mapping):
+                fields = self.fields_to_export.values()
+            else:
+                fields = self.fields_to_export
+            row = list(self._build_row(fields))
+            self.csv_writer.writerow(row)

-class PickleItemExporter(BaseItemExporter):

+class PickleItemExporter(BaseItemExporter):
     def __init__(self, file, protocol=4, **kwargs):
         super().__init__(**kwargs)
         self.file = file
         self.protocol = protocol

+    def export_item(self, item):
+        d = dict(self._get_serialized_fields(item))
+        pickle.dump(d, self.file, self.protocol)
+

 class MarshalItemExporter(BaseItemExporter):
     """Exports items in a Python-specific binary format (see
@@ -108,13 +303,19 @@ class MarshalItemExporter(BaseItemExporter):
         super().__init__(**kwargs)
         self.file = file

+    def export_item(self, item):
+        marshal.dump(dict(self._get_serialized_fields(item)), self.file)

-class PprintItemExporter(BaseItemExporter):

+class PprintItemExporter(BaseItemExporter):
     def __init__(self, file, **kwargs):
         super().__init__(**kwargs)
         self.file = file

+    def export_item(self, item):
+        itemdict = dict(self._get_serialized_fields(item))
+        self.file.write(to_bytes(pprint.pformat(itemdict) + "\n"))
+

 class PythonItemExporter(BaseItemExporter):
     """This is a base class for item exporters that extends
@@ -125,3 +326,31 @@ class PythonItemExporter(BaseItemExporter):

     .. _msgpack: https://pypi.org/project/msgpack/
     """
+
+    def _configure(self, options, dont_fail=False):
+        super()._configure(options, dont_fail)
+        if not self.encoding:
+            self.encoding = "utf-8"
+
+    def serialize_field(self, field, name, value):
+        serializer = field.get("serializer", self._serialize_value)
+        return serializer(value)
+
+    def _serialize_value(self, value):
+        if isinstance(value, Item):
+            return self.export_item(value)
+        if is_item(value):
+            return dict(self._serialize_item(value))
+        if is_listlike(value):
+            return [self._serialize_value(v) for v in value]
+        if isinstance(value, (str, bytes)):
+            return to_unicode(value, encoding=self.encoding)
+        return value
+
+    def _serialize_item(self, item):
+        for key, value in ItemAdapter(item).items():
+            yield key, self._serialize_value(value)
+
+    def export_item(self, item):
+        result = dict(self._get_serialized_fields(item))
+        return result
diff --git a/scrapy/extension.py b/scrapy/extension.py
index 27464cc17..4e365cfa1 100644
--- a/scrapy/extension.py
+++ b/scrapy/extension.py
@@ -8,4 +8,8 @@ from scrapy.utils.conf import build_component_list


 class ExtensionManager(MiddlewareManager):
-    component_name = 'extension'
+    component_name = "extension"
+
+    @classmethod
+    def _get_mwlist_from_settings(cls, settings):
+        return build_component_list(settings.getwithbase("EXTENSIONS"))
diff --git a/scrapy/extensions/closespider.py b/scrapy/extensions/closespider.py
index a01d48a8e..4307b4170 100644
--- a/scrapy/extensions/closespider.py
+++ b/scrapy/extensions/closespider.py
@@ -3,44 +3,110 @@ conditions are met.

 See documentation in docs/topics/extensions.rst
 """
+
 import logging
 from collections import defaultdict
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
+
 logger = logging.getLogger(__name__)


 class CloseSpider:
-
     def __init__(self, crawler):
         self.crawler = crawler
-        self.close_on = {'timeout': crawler.settings.getfloat(
-            'CLOSESPIDER_TIMEOUT'), 'itemcount': crawler.settings.getint(
-            'CLOSESPIDER_ITEMCOUNT'), 'pagecount': crawler.settings.getint(
-            'CLOSESPIDER_PAGECOUNT'), 'errorcount': crawler.settings.getint
-            ('CLOSESPIDER_ERRORCOUNT'), 'timeout_no_item': crawler.settings
-            .getint('CLOSESPIDER_TIMEOUT_NO_ITEM')}
+
+        self.close_on = {
+            "timeout": crawler.settings.getfloat("CLOSESPIDER_TIMEOUT"),
+            "itemcount": crawler.settings.getint("CLOSESPIDER_ITEMCOUNT"),
+            "pagecount": crawler.settings.getint("CLOSESPIDER_PAGECOUNT"),
+            "errorcount": crawler.settings.getint("CLOSESPIDER_ERRORCOUNT"),
+            "timeout_no_item": crawler.settings.getint("CLOSESPIDER_TIMEOUT_NO_ITEM"),
+        }
+
         if not any(self.close_on.values()):
             raise NotConfigured
+
         self.counter = defaultdict(int)
-        if self.close_on.get('errorcount'):
-            crawler.signals.connect(self.error_count, signal=signals.
-                spider_error)
-        if self.close_on.get('pagecount'):
-            crawler.signals.connect(self.page_count, signal=signals.
-                response_received)
-        if self.close_on.get('timeout'):
-            crawler.signals.connect(self.spider_opened, signal=signals.
-                spider_opened)
-        if self.close_on.get('itemcount'):
-            crawler.signals.connect(self.item_scraped, signal=signals.
-                item_scraped)
-        if self.close_on.get('timeout_no_item'):
-            self.timeout_no_item = self.close_on['timeout_no_item']
+
+        if self.close_on.get("errorcount"):
+            crawler.signals.connect(self.error_count, signal=signals.spider_error)
+        if self.close_on.get("pagecount"):
+            crawler.signals.connect(self.page_count, signal=signals.response_received)
+        if self.close_on.get("timeout"):
+            crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
+        if self.close_on.get("itemcount"):
+            crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
+        if self.close_on.get("timeout_no_item"):
+            self.timeout_no_item = self.close_on["timeout_no_item"]
+            self.items_in_period = 0
+            crawler.signals.connect(
+                self.spider_opened_no_item, signal=signals.spider_opened
+            )
+            crawler.signals.connect(
+                self.item_scraped_no_item, signal=signals.item_scraped
+            )
+        crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
+    def error_count(self, failure, response, spider):
+        self.counter["errorcount"] += 1
+        if self.counter["errorcount"] == self.close_on["errorcount"]:
+            self.crawler.engine.close_spider(spider, "closespider_errorcount")
+
+    def page_count(self, response, request, spider):
+        self.counter["pagecount"] += 1
+        if self.counter["pagecount"] == self.close_on["pagecount"]:
+            self.crawler.engine.close_spider(spider, "closespider_pagecount")
+
+    def spider_opened(self, spider):
+        from twisted.internet import reactor
+
+        self.task = reactor.callLater(
+            self.close_on["timeout"],
+            self.crawler.engine.close_spider,
+            spider,
+            reason="closespider_timeout",
+        )
+
+    def item_scraped(self, item, spider):
+        self.counter["itemcount"] += 1
+        if self.counter["itemcount"] == self.close_on["itemcount"]:
+            self.crawler.engine.close_spider(spider, "closespider_itemcount")
+
+    def spider_closed(self, spider):
+        task = getattr(self, "task", False)
+        if task and task.active():
+            task.cancel()
+
+        task_no_item = getattr(self, "task_no_item", False)
+        if task_no_item and task_no_item.running:
+            task_no_item.stop()
+
+    def spider_opened_no_item(self, spider):
+        from twisted.internet import task
+
+        self.task_no_item = task.LoopingCall(self._count_items_produced, spider)
+        self.task_no_item.start(self.timeout_no_item, now=False)
+
+        logger.info(
+            f"Spider will stop when no items are produced after "
+            f"{self.timeout_no_item} seconds."
+        )
+
+    def item_scraped_no_item(self, item, spider):
+        self.items_in_period += 1
+
+    def _count_items_produced(self, spider):
+        if self.items_in_period >= 1:
             self.items_in_period = 0
-            crawler.signals.connect(self.spider_opened_no_item, signal=
-                signals.spider_opened)
-            crawler.signals.connect(self.item_scraped_no_item, signal=
-                signals.item_scraped)
-        crawler.signals.connect(self.spider_closed, signal=signals.
-            spider_closed)
+        else:
+            logger.info(
+                f"Closing spider since no items were produced in the last "
+                f"{self.timeout_no_item} seconds."
+            )
+            self.crawler.engine.close_spider(spider, "closespider_timeout_no_item")
diff --git a/scrapy/extensions/corestats.py b/scrapy/extensions/corestats.py
index c8451087e..302a615f2 100644
--- a/scrapy/extensions/corestats.py
+++ b/scrapy/extensions/corestats.py
@@ -2,11 +2,46 @@
 Extension for collecting core stats like items scraped and start/finish times
 """
 from datetime import datetime, timezone
+
 from scrapy import signals


 class CoreStats:
-
     def __init__(self, stats):
         self.stats = stats
         self.start_time = None
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        o = cls(crawler.stats)
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
+        crawler.signals.connect(o.item_scraped, signal=signals.item_scraped)
+        crawler.signals.connect(o.item_dropped, signal=signals.item_dropped)
+        crawler.signals.connect(o.response_received, signal=signals.response_received)
+        return o
+
+    def spider_opened(self, spider):
+        self.start_time = datetime.now(tz=timezone.utc)
+        self.stats.set_value("start_time", self.start_time, spider=spider)
+
+    def spider_closed(self, spider, reason):
+        finish_time = datetime.now(tz=timezone.utc)
+        elapsed_time = finish_time - self.start_time
+        elapsed_time_seconds = elapsed_time.total_seconds()
+        self.stats.set_value(
+            "elapsed_time_seconds", elapsed_time_seconds, spider=spider
+        )
+        self.stats.set_value("finish_time", finish_time, spider=spider)
+        self.stats.set_value("finish_reason", reason, spider=spider)
+
+    def item_scraped(self, item, spider):
+        self.stats.inc_value("item_scraped_count", spider=spider)
+
+    def response_received(self, spider):
+        self.stats.inc_value("response_received_count", spider=spider)
+
+    def item_dropped(self, item, spider, exception):
+        reason = exception.__class__.__name__
+        self.stats.inc_value("item_dropped_count", spider=spider)
+        self.stats.inc_value(f"item_dropped_reasons_count/{reason}", spider=spider)
diff --git a/scrapy/extensions/debug.py b/scrapy/extensions/debug.py
index cac078bc7..1b6c7777f 100644
--- a/scrapy/extensions/debug.py
+++ b/scrapy/extensions/debug.py
@@ -3,32 +3,64 @@ Extensions for debugging Scrapy

 See documentation in docs/topics/extensions.rst
 """
+
 import logging
 import signal
 import sys
 import threading
 import traceback
 from pdb import Pdb
+
 from scrapy.utils.engine import format_engine_status
 from scrapy.utils.trackref import format_live_refs
+
 logger = logging.getLogger(__name__)


 class StackTraceDump:
-
     def __init__(self, crawler=None):
         self.crawler = crawler
         try:
             signal.signal(signal.SIGUSR2, self.dump_stacktrace)
             signal.signal(signal.SIGQUIT, self.dump_stacktrace)
         except AttributeError:
+            # win32 platforms don't support SIGUSR signals
             pass

+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)

-class Debugger:
+    def dump_stacktrace(self, signum, frame):
+        log_args = {
+            "stackdumps": self._thread_stacks(),
+            "enginestatus": format_engine_status(self.crawler.engine),
+            "liverefs": format_live_refs(),
+        }
+        logger.info(
+            "Dumping stack trace and engine status\n"
+            "%(enginestatus)s\n%(liverefs)s\n%(stackdumps)s",
+            log_args,
+            extra={"crawler": self.crawler},
+        )

+    def _thread_stacks(self):
+        id2name = dict((th.ident, th.name) for th in threading.enumerate())
+        dumps = ""
+        for id_, frame in sys._current_frames().items():
+            name = id2name.get(id_, "")
+            dump = "".join(traceback.format_stack(frame))
+            dumps += f"# Thread: {name}({id_})\n{dump}\n"
+        return dumps
+
+
+class Debugger:
     def __init__(self):
         try:
             signal.signal(signal.SIGUSR2, self._enter_debugger)
         except AttributeError:
+            # win32 platforms don't support SIGUSR signals
             pass
+
+    def _enter_debugger(self, signum, frame):
+        Pdb().set_trace(frame.f_back)
diff --git a/scrapy/extensions/feedexport.py b/scrapy/extensions/feedexport.py
index a30ae2dbd..4e846d1bd 100644
--- a/scrapy/extensions/feedexport.py
+++ b/scrapy/extensions/feedexport.py
@@ -3,6 +3,7 @@ Feed Exports extension

 See documentation in docs/topics/feed-exports.rst
 """
+
 import logging
 import re
 import sys
@@ -12,10 +13,12 @@ from pathlib import Path, PureWindowsPath
 from tempfile import NamedTemporaryFile
 from typing import IO, Any, Callable, Dict, List, Optional, Tuple, Union
 from urllib.parse import unquote, urlparse
+
 from twisted.internet import defer, threads
 from twisted.internet.defer import DeferredList
 from w3lib.url import file_uri_to_path
 from zope.interface import Interface, implementer
+
 from scrapy import Spider, signals
 from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
 from scrapy.extensions.postprocessing import PostProcessingManager
@@ -27,14 +30,32 @@ from scrapy.utils.ftp import ftp_store_file
 from scrapy.utils.log import failure_to_exc_info
 from scrapy.utils.misc import create_instance, load_object
 from scrapy.utils.python import get_func_args, without_none_values
+
 logger = logging.getLogger(__name__)
+
 try:
-    import boto3
+    import boto3  # noqa: F401
+
     IS_BOTO3_AVAILABLE = True
 except ImportError:
     IS_BOTO3_AVAILABLE = False


+def build_storage(builder, uri, *args, feed_options=None, preargs=(), **kwargs):
+    argument_names = get_func_args(builder)
+    if "feed_options" in argument_names:
+        kwargs["feed_options"] = feed_options
+    else:
+        warnings.warn(
+            f"{builder.__qualname__} does not support the 'feed_options' keyword argument. Add a "
+            "'feed_options' parameter to its signature to remove this "
+            "warning. This parameter will become mandatory in a future "
+            "version of Scrapy.",
+            category=ScrapyDeprecationWarning,
+        )
+    return builder(*preargs, uri, *args, **kwargs)
+
+
 class ItemFilter:
     """
     This will be used by FeedExporter to decide if an item should be allowed
@@ -43,18 +64,21 @@ class ItemFilter:
     :param feed_options: feed specific options passed from FeedExporter
     :type feed_options: dict
     """
+
     feed_options: Optional[dict]
     item_classes: Tuple

-    def __init__(self, feed_options: Optional[dict]) ->None:
+    def __init__(self, feed_options: Optional[dict]) -> None:
         self.feed_options = feed_options
         if feed_options is not None:
-            self.item_classes = tuple(load_object(item_class) for
-                item_class in feed_options.get('item_classes') or ())
+            self.item_classes = tuple(
+                load_object(item_class)
+                for item_class in feed_options.get("item_classes") or ()
+            )
         else:
             self.item_classes = tuple()

-    def accepts(self, item: Any) ->bool:
+    def accepts(self, item: Any) -> bool:
         """
         Return ``True`` if `item` should be exported or ``False`` otherwise.

@@ -63,7 +87,9 @@ class ItemFilter:
         :return: `True` if accepted, `False` otherwise
         :rtype: bool
         """
-        pass
+        if self.item_classes:
+            return isinstance(item, self.item_classes)
+        return True  # accept all items by default


 class IFeedStorage(Interface):
@@ -76,175 +102,437 @@ class IFeedStorage(Interface):
     def open(spider):
         """Open the storage for the given spider. It must return a file-like
         object that will be used for the exporters"""
-        pass

     def store(file):
         """Store the given file stream"""
-        pass


 @implementer(IFeedStorage)
 class BlockingFeedStorage:
-    pass
+    def open(self, spider):
+        path = spider.crawler.settings["FEED_TEMPDIR"]
+        if path and not Path(path).is_dir():
+            raise OSError("Not a Directory: " + str(path))
+
+        return NamedTemporaryFile(prefix="feed-", dir=path)
+
+    def store(self, file):
+        return threads.deferToThread(self._store_in_thread, file)
+
+    def _store_in_thread(self, file):
+        raise NotImplementedError


 @implementer(IFeedStorage)
 class StdoutFeedStorage:
-
     def __init__(self, uri, _stdout=None, *, feed_options=None):
         if not _stdout:
             _stdout = sys.stdout.buffer
         self._stdout = _stdout
-        if feed_options and feed_options.get('overwrite', False) is True:
+        if feed_options and feed_options.get("overwrite", False) is True:
             logger.warning(
-                'Standard output (stdout) storage does not support overwriting. To suppress this warning, remove the overwrite option from your FEEDS setting, or set it to False.'
-                )
+                "Standard output (stdout) storage does not support "
+                "overwriting. To suppress this warning, remove the "
+                "overwrite option from your FEEDS setting, or set "
+                "it to False."
+            )
+
+    def open(self, spider):
+        return self._stdout
+
+    def store(self, file):
+        pass


 @implementer(IFeedStorage)
 class FileFeedStorage:
-
     def __init__(self, uri, *, feed_options=None):
         self.path = file_uri_to_path(uri)
         feed_options = feed_options or {}
-        self.write_mode = 'wb' if feed_options.get('overwrite', False
-            ) else 'ab'
+        self.write_mode = "wb" if feed_options.get("overwrite", False) else "ab"

+    def open(self, spider) -> IO[Any]:
+        dirname = Path(self.path).parent
+        if dirname and not dirname.exists():
+            dirname.mkdir(parents=True)
+        return Path(self.path).open(self.write_mode)

-class S3FeedStorage(BlockingFeedStorage):
+    def store(self, file):
+        file.close()

-    def __init__(self, uri, access_key=None, secret_key=None, acl=None,
-        endpoint_url=None, *, feed_options=None, session_token=None,
-        region_name=None):
+
+class S3FeedStorage(BlockingFeedStorage):
+    def __init__(
+        self,
+        uri,
+        access_key=None,
+        secret_key=None,
+        acl=None,
+        endpoint_url=None,
+        *,
+        feed_options=None,
+        session_token=None,
+        region_name=None,
+    ):
         if not is_botocore_available():
-            raise NotConfigured('missing botocore library')
+            raise NotConfigured("missing botocore library")
         u = urlparse(uri)
         self.bucketname = u.hostname
         self.access_key = u.username or access_key
         self.secret_key = u.password or secret_key
         self.session_token = session_token
-        self.keyname = u.path[1:]
+        self.keyname = u.path[1:]  # remove first "/"
         self.acl = acl
         self.endpoint_url = endpoint_url
         self.region_name = region_name
+
         if IS_BOTO3_AVAILABLE:
             import boto3.session
+
             session = boto3.session.Session()
-            self.s3_client = session.client('s3', aws_access_key_id=self.
-                access_key, aws_secret_access_key=self.secret_key,
-                aws_session_token=self.session_token, endpoint_url=self.
-                endpoint_url, region_name=self.region_name)
+
+            self.s3_client = session.client(
+                "s3",
+                aws_access_key_id=self.access_key,
+                aws_secret_access_key=self.secret_key,
+                aws_session_token=self.session_token,
+                endpoint_url=self.endpoint_url,
+                region_name=self.region_name,
+            )
         else:
             warnings.warn(
-                '`botocore` usage has been deprecated for S3 feed export, please use `boto3` to avoid problems'
-                , category=ScrapyDeprecationWarning)
+                "`botocore` usage has been deprecated for S3 feed "
+                "export, please use `boto3` to avoid problems",
+                category=ScrapyDeprecationWarning,
+            )
+
             import botocore.session
+
             session = botocore.session.get_session()
-            self.s3_client = session.create_client('s3', aws_access_key_id=
-                self.access_key, aws_secret_access_key=self.secret_key,
-                aws_session_token=self.session_token, endpoint_url=self.
-                endpoint_url, region_name=self.region_name)
-        if feed_options and feed_options.get('overwrite', True) is False:
+
+            self.s3_client = session.create_client(
+                "s3",
+                aws_access_key_id=self.access_key,
+                aws_secret_access_key=self.secret_key,
+                aws_session_token=self.session_token,
+                endpoint_url=self.endpoint_url,
+                region_name=self.region_name,
+            )
+
+        if feed_options and feed_options.get("overwrite", True) is False:
             logger.warning(
-                'S3 does not support appending to files. To suppress this warning, remove the overwrite option from your FEEDS setting or set it to True.'
-                )
+                "S3 does not support appending to files. To "
+                "suppress this warning, remove the overwrite "
+                "option from your FEEDS setting or set it to True."
+            )
+
+    @classmethod
+    def from_crawler(cls, crawler, uri, *, feed_options=None):
+        return build_storage(
+            cls,
+            uri,
+            access_key=crawler.settings["AWS_ACCESS_KEY_ID"],
+            secret_key=crawler.settings["AWS_SECRET_ACCESS_KEY"],
+            session_token=crawler.settings["AWS_SESSION_TOKEN"],
+            acl=crawler.settings["FEED_STORAGE_S3_ACL"] or None,
+            endpoint_url=crawler.settings["AWS_ENDPOINT_URL"] or None,
+            region_name=crawler.settings["AWS_REGION_NAME"] or None,
+            feed_options=feed_options,
+        )
+
+    def _store_in_thread(self, file):
+        file.seek(0)
+        if IS_BOTO3_AVAILABLE:
+            kwargs = {"ExtraArgs": {"ACL": self.acl}} if self.acl else {}
+            self.s3_client.upload_fileobj(
+                Bucket=self.bucketname, Key=self.keyname, Fileobj=file, **kwargs
+            )
+        else:
+            kwargs = {"ACL": self.acl} if self.acl else {}
+            self.s3_client.put_object(
+                Bucket=self.bucketname, Key=self.keyname, Body=file, **kwargs
+            )
+        file.close()


 class GCSFeedStorage(BlockingFeedStorage):
-
     def __init__(self, uri, project_id, acl):
         self.project_id = project_id
         self.acl = acl
         u = urlparse(uri)
         self.bucket_name = u.hostname
-        self.blob_name = u.path[1:]
+        self.blob_name = u.path[1:]  # remove first "/"

+    @classmethod
+    def from_crawler(cls, crawler, uri):
+        return cls(
+            uri,
+            crawler.settings["GCS_PROJECT_ID"],
+            crawler.settings["FEED_STORAGE_GCS_ACL"] or None,
+        )

-class FTPFeedStorage(BlockingFeedStorage):
+    def _store_in_thread(self, file):
+        file.seek(0)
+        from google.cloud.storage import Client

-    def __init__(self, uri: str, use_active_mode: bool=False, *,
-        feed_options: Optional[Dict[str, Any]]=None):
+        client = Client(project=self.project_id)
+        bucket = client.get_bucket(self.bucket_name)
+        blob = bucket.blob(self.blob_name)
+        blob.upload_from_file(file, predefined_acl=self.acl)
+
+
+class FTPFeedStorage(BlockingFeedStorage):
+    def __init__(
+        self,
+        uri: str,
+        use_active_mode: bool = False,
+        *,
+        feed_options: Optional[Dict[str, Any]] = None,
+    ):
         u = urlparse(uri)
         if not u.hostname:
-            raise ValueError(f'Got a storage URI without a hostname: {uri}')
+            raise ValueError(f"Got a storage URI without a hostname: {uri}")
         self.host: str = u.hostname
-        self.port: int = int(u.port or '21')
-        self.username: str = u.username or ''
-        self.password: str = unquote(u.password or '')
+        self.port: int = int(u.port or "21")
+        self.username: str = u.username or ""
+        self.password: str = unquote(u.password or "")
         self.path: str = u.path
         self.use_active_mode: bool = use_active_mode
-        self.overwrite: bool = not feed_options or feed_options.get('overwrite'
-            , True)
+        self.overwrite: bool = not feed_options or feed_options.get("overwrite", True)
+
+    @classmethod
+    def from_crawler(cls, crawler, uri, *, feed_options=None):
+        return build_storage(
+            cls,
+            uri,
+            crawler.settings.getbool("FEED_STORAGE_FTP_ACTIVE"),
+            feed_options=feed_options,
+        )
+
+    def _store_in_thread(self, file):
+        ftp_store_file(
+            path=self.path,
+            file=file,
+            host=self.host,
+            port=self.port,
+            username=self.username,
+            password=self.password,
+            use_active_mode=self.use_active_mode,
+            overwrite=self.overwrite,
+        )


 class FeedSlot:
-
-    def __init__(self, storage, uri, format, store_empty, batch_id,
-        uri_template, filter, feed_options, spider, exporters, settings,
-        crawler):
+    def __init__(
+        self,
+        storage,
+        uri,
+        format,
+        store_empty,
+        batch_id,
+        uri_template,
+        filter,
+        feed_options,
+        spider,
+        exporters,
+        settings,
+        crawler,
+    ):
         self.file = None
         self.exporter = None
         self.storage = storage
+        # feed params
         self.batch_id = batch_id
         self.format = format
         self.store_empty = store_empty
         self.uri_template = uri_template
         self.uri = uri
         self.filter = filter
+        # exporter params
         self.feed_options = feed_options
         self.spider = spider
         self.exporters = exporters
         self.settings = settings
         self.crawler = crawler
+        # flags
         self.itemcount = 0
         self._exporting = False
         self._fileloaded = False

+    def start_exporting(self):
+        if not self._fileloaded:
+            self.file = self.storage.open(self.spider)
+            if "postprocessing" in self.feed_options:
+                self.file = PostProcessingManager(
+                    self.feed_options["postprocessing"], self.file, self.feed_options
+                )
+            self.exporter = self._get_exporter(
+                file=self.file,
+                format=self.feed_options["format"],
+                fields_to_export=self.feed_options["fields"],
+                encoding=self.feed_options["encoding"],
+                indent=self.feed_options["indent"],
+                **self.feed_options["item_export_kwargs"],
+            )
+            self._fileloaded = True
+
+        if not self._exporting:
+            self.exporter.start_exporting()
+            self._exporting = True
+
+    def _get_instance(self, objcls, *args, **kwargs):
+        return create_instance(objcls, self.settings, self.crawler, *args, **kwargs)
+
+    def _get_exporter(self, file, format, *args, **kwargs):
+        return self._get_instance(self.exporters[format], file, *args, **kwargs)

-_FeedSlot = create_deprecated_class(name='_FeedSlot', new_class=FeedSlot)
+    def finish_exporting(self):
+        if self._exporting:
+            self.exporter.finish_exporting()
+            self._exporting = False
+
+
+_FeedSlot = create_deprecated_class(
+    name="_FeedSlot",
+    new_class=FeedSlot,
+)


 class FeedExporter:
     _pending_deferreds: List[defer.Deferred] = []

+    @classmethod
+    def from_crawler(cls, crawler):
+        exporter = cls(crawler)
+        crawler.signals.connect(exporter.open_spider, signals.spider_opened)
+        crawler.signals.connect(exporter.close_spider, signals.spider_closed)
+        crawler.signals.connect(exporter.item_scraped, signals.item_scraped)
+        return exporter
+
     def __init__(self, crawler):
         self.crawler = crawler
         self.settings = crawler.settings
         self.feeds = {}
         self.slots = []
         self.filters = {}
-        if not self.settings['FEEDS'] and not self.settings['FEED_URI']:
+
+        if not self.settings["FEEDS"] and not self.settings["FEED_URI"]:
             raise NotConfigured
-        if self.settings['FEED_URI']:
+
+        # Begin: Backward compatibility for FEED_URI and FEED_FORMAT settings
+        if self.settings["FEED_URI"]:
             warnings.warn(
-                'The `FEED_URI` and `FEED_FORMAT` settings have been deprecated in favor of the `FEEDS` setting. Please see the `FEEDS` setting docs for more details'
-                , category=ScrapyDeprecationWarning, stacklevel=2)
-            uri = self.settings['FEED_URI']
-            uri = str(uri) if not isinstance(uri, Path) else uri.absolute(
-                ).as_uri()
-            feed_options = {'format': self.settings.get('FEED_FORMAT',
-                'jsonlines')}
+                "The `FEED_URI` and `FEED_FORMAT` settings have been deprecated in favor of "
+                "the `FEEDS` setting. Please see the `FEEDS` setting docs for more details",
+                category=ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
+            uri = self.settings["FEED_URI"]
+            # handle pathlib.Path objects
+            uri = str(uri) if not isinstance(uri, Path) else uri.absolute().as_uri()
+            feed_options = {"format": self.settings.get("FEED_FORMAT", "jsonlines")}
             self.feeds[uri] = feed_complete_default_values_from_settings(
-                feed_options, self.settings)
+                feed_options, self.settings
+            )
             self.filters[uri] = self._load_filter(feed_options)
-        for uri, feed_options in self.settings.getdict('FEEDS').items():
-            uri = str(uri) if not isinstance(uri, Path) else uri.absolute(
-                ).as_uri()
+        # End: Backward compatibility for FEED_URI and FEED_FORMAT settings
+
+        # 'FEEDS' setting takes precedence over 'FEED_URI'
+        for uri, feed_options in self.settings.getdict("FEEDS").items():
+            # handle pathlib.Path objects
+            uri = str(uri) if not isinstance(uri, Path) else uri.absolute().as_uri()
             self.feeds[uri] = feed_complete_default_values_from_settings(
-                feed_options, self.settings)
+                feed_options, self.settings
+            )
             self.filters[uri] = self._load_filter(feed_options)
-        self.storages = self._load_components('FEED_STORAGES')
-        self.exporters = self._load_components('FEED_EXPORTERS')
+
+        self.storages = self._load_components("FEED_STORAGES")
+        self.exporters = self._load_components("FEED_EXPORTERS")
         for uri, feed_options in self.feeds.items():
             if not self._storage_supported(uri, feed_options):
                 raise NotConfigured
             if not self._settings_are_valid():
                 raise NotConfigured
-            if not self._exporter_supported(feed_options['format']):
+            if not self._exporter_supported(feed_options["format"]):
                 raise NotConfigured

-    def _start_new_batch(self, batch_id, uri, feed_options, spider,
-        uri_template):
+    def open_spider(self, spider):
+        for uri, feed_options in self.feeds.items():
+            uri_params = self._get_uri_params(spider, feed_options["uri_params"])
+            self.slots.append(
+                self._start_new_batch(
+                    batch_id=1,
+                    uri=uri % uri_params,
+                    feed_options=feed_options,
+                    spider=spider,
+                    uri_template=uri,
+                )
+            )
+
+    async def close_spider(self, spider):
+        for slot in self.slots:
+            self._close_slot(slot, spider)
+
+        # Await all deferreds
+        if self._pending_deferreds:
+            await maybe_deferred_to_future(DeferredList(self._pending_deferreds))
+
+        # Send FEED_EXPORTER_CLOSED signal
+        await maybe_deferred_to_future(
+            self.crawler.signals.send_catch_log_deferred(signals.feed_exporter_closed)
+        )
+
+    def _close_slot(self, slot, spider):
+        def get_file(slot_):
+            if isinstance(slot_.file, PostProcessingManager):
+                slot_.file.close()
+                return slot_.file.file
+            return slot_.file
+
+        if slot.itemcount:
+            # Normal case
+            slot.finish_exporting()
+        elif slot.store_empty and slot.batch_id == 1:
+            # Need to store the empty file
+            slot.start_exporting()
+            slot.finish_exporting()
+        else:
+            # In this case, the file is not stored, so no processing is required.
+            return None
+
+        logmsg = f"{slot.format} feed ({slot.itemcount} items) in: {slot.uri}"
+        d = defer.maybeDeferred(slot.storage.store, get_file(slot))
+
+        d.addCallback(
+            self._handle_store_success, logmsg, spider, type(slot.storage).__name__
+        )
+        d.addErrback(
+            self._handle_store_error, logmsg, spider, type(slot.storage).__name__
+        )
+        self._pending_deferreds.append(d)
+        d.addCallback(
+            lambda _: self.crawler.signals.send_catch_log_deferred(
+                signals.feed_slot_closed, slot=slot
+            )
+        )
+        d.addBoth(lambda _: self._pending_deferreds.remove(d))
+
+        return d
+
+    def _handle_store_error(self, f, logmsg, spider, slot_type):
+        logger.error(
+            "Error storing %s",
+            logmsg,
+            exc_info=failure_to_exc_info(f),
+            extra={"spider": spider},
+        )
+        self.crawler.stats.inc_value(f"feedexport/failed_count/{slot_type}")
+
+    def _handle_store_success(self, f, logmsg, spider, slot_type):
+        logger.info("Stored %s", logmsg, extra={"spider": spider})
+        self.crawler.stats.inc_value(f"feedexport/success_count/{slot_type}")
+
+    def _start_new_batch(self, batch_id, uri, feed_options, spider, uri_template):
         """
         Redirect the output data stream to a new file.
         Execute multiple times if FEED_EXPORT_BATCH_ITEM_COUNT setting or FEEDS.batch_item_count is specified
@@ -254,14 +542,103 @@ class FeedExporter:
         :param spider: user spider
         :param uri_template: template of uri which contains %(batch_time)s or %(batch_id)d to create new uri
         """
-        pass
+        storage = self._get_storage(uri, feed_options)
+        slot = FeedSlot(
+            storage=storage,
+            uri=uri,
+            format=feed_options["format"],
+            store_empty=feed_options["store_empty"],
+            batch_id=batch_id,
+            uri_template=uri_template,
+            filter=self.filters[uri_template],
+            feed_options=feed_options,
+            spider=spider,
+            exporters=self.exporters,
+            settings=self.settings,
+            crawler=getattr(self, "crawler", None),
+        )
+        return slot
+
+    def item_scraped(self, item, spider):
+        slots = []
+        for slot in self.slots:
+            if not slot.filter.accepts(item):
+                slots.append(
+                    slot
+                )  # if slot doesn't accept item, continue with next slot
+                continue
+
+            slot.start_exporting()
+            slot.exporter.export_item(item)
+            slot.itemcount += 1
+            # create new slot for each slot with itemcount == FEED_EXPORT_BATCH_ITEM_COUNT and close the old one
+            if (
+                self.feeds[slot.uri_template]["batch_item_count"]
+                and slot.itemcount >= self.feeds[slot.uri_template]["batch_item_count"]
+            ):
+                uri_params = self._get_uri_params(
+                    spider, self.feeds[slot.uri_template]["uri_params"], slot
+                )
+                self._close_slot(slot, spider)
+                slots.append(
+                    self._start_new_batch(
+                        batch_id=slot.batch_id + 1,
+                        uri=slot.uri_template % uri_params,
+                        feed_options=self.feeds[slot.uri_template],
+                        spider=spider,
+                        uri_template=slot.uri_template,
+                    )
+                )
+            else:
+                slots.append(slot)
+        self.slots = slots
+
+    def _load_components(self, setting_prefix):
+        conf = without_none_values(self.settings.getwithbase(setting_prefix))
+        d = {}
+        for k, v in conf.items():
+            try:
+                d[k] = load_object(v)
+            except NotConfigured:
+                pass
+        return d
+
+    def _exporter_supported(self, format):
+        if format in self.exporters:
+            return True
+        logger.error("Unknown feed format: %(format)s", {"format": format})

     def _settings_are_valid(self):
         """
         If FEED_EXPORT_BATCH_ITEM_COUNT setting or FEEDS.batch_item_count is specified uri has to contain
         %(batch_time)s or %(batch_id)d to distinguish different files of partial output
         """
-        pass
+        for uri_template, values in self.feeds.items():
+            if values["batch_item_count"] and not re.search(
+                r"%\(batch_time\)s|%\(batch_id\)", uri_template
+            ):
+                logger.error(
+                    "%%(batch_time)s or %%(batch_id)d must be in the feed URI (%s) if FEED_EXPORT_BATCH_ITEM_COUNT "
+                    "setting or FEEDS.batch_item_count is specified and greater than 0. For more info see: "
+                    "https://docs.scrapy.org/en/latest/topics/feed-exports.html#feed-export-batch-item-count",
+                    uri_template,
+                )
+                return False
+        return True
+
+    def _storage_supported(self, uri, feed_options):
+        scheme = urlparse(uri).scheme
+        if scheme in self.storages or PureWindowsPath(uri).drive:
+            try:
+                self._get_storage(uri, feed_options)
+                return True
+            except NotConfigured as e:
+                logger.error(
+                    "Disabled feed storage scheme: %(scheme)s. " "Reason: %(reason)s",
+                    {"scheme": scheme, "reason": str(e)},
+                )
+        else:
+            logger.error("Unknown feed storage scheme: %(scheme)s", {"scheme": scheme})

     def _get_storage(self, uri, feed_options):
         """Fork of create_instance specific to feed storage classes
@@ -269,4 +646,49 @@ class FeedExporter:
         It supports not passing the *feed_options* parameters to classes that
         do not support it, and issuing a deprecation warning instead.
         """
-        pass
+        feedcls = self.storages.get(urlparse(uri).scheme, self.storages["file"])
+        crawler = getattr(self, "crawler", None)
+
+        def build_instance(builder, *preargs):
+            return build_storage(
+                builder, uri, feed_options=feed_options, preargs=preargs
+            )
+
+        if crawler and hasattr(feedcls, "from_crawler"):
+            instance = build_instance(feedcls.from_crawler, crawler)
+            method_name = "from_crawler"
+        elif hasattr(feedcls, "from_settings"):
+            instance = build_instance(feedcls.from_settings, self.settings)
+            method_name = "from_settings"
+        else:
+            instance = build_instance(feedcls)
+            method_name = "__new__"
+        if instance is None:
+            raise TypeError(f"{feedcls.__qualname__}.{method_name} returned None")
+        return instance
+
+    def _get_uri_params(
+        self,
+        spider: Spider,
+        uri_params_function: Optional[Union[str, Callable[[dict, Spider], dict]]],
+        slot: Optional[FeedSlot] = None,
+    ) -> dict:
+        params = {}
+        for k in dir(spider):
+            params[k] = getattr(spider, k)
+        utc_now = datetime.now(tz=timezone.utc)
+        params["time"] = utc_now.replace(microsecond=0).isoformat().replace(":", "-")
+        params["batch_time"] = utc_now.isoformat().replace(":", "-")
+        params["batch_id"] = slot.batch_id + 1 if slot is not None else 1
+        uripar_function = (
+            load_object(uri_params_function)
+            if uri_params_function
+            else lambda params, _: params
+        )
+        new_params = uripar_function(params, spider)
+        return new_params if new_params is not None else params
+
+    def _load_filter(self, feed_options):
+        # load the item filter if declared else load the default filter class
+        item_filter_class = load_object(feed_options.get("item_filter", ItemFilter))
+        return item_filter_class(feed_options)
diff --git a/scrapy/extensions/httpcache.py b/scrapy/extensions/httpcache.py
index b70d7ec39..7e4f047a8 100644
--- a/scrapy/extensions/httpcache.py
+++ b/scrapy/extensions/httpcache.py
@@ -6,7 +6,9 @@ from importlib import import_module
 from pathlib import Path
 from time import time
 from weakref import WeakKeyDictionary
+
 from w3lib.http import headers_dict_to_raw, headers_raw_to_dict
+
 from scrapy.http import Headers, Response
 from scrapy.http.request import Request
 from scrapy.responsetypes import responsetypes
@@ -14,52 +16,343 @@ from scrapy.spiders import Spider
 from scrapy.utils.httpobj import urlparse_cached
 from scrapy.utils.project import data_path
 from scrapy.utils.python import to_bytes, to_unicode
+
 logger = logging.getLogger(__name__)


 class DummyPolicy:
-
     def __init__(self, settings):
-        self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
-        self.ignore_http_codes = [int(x) for x in settings.getlist(
-            'HTTPCACHE_IGNORE_HTTP_CODES')]
+        self.ignore_schemes = settings.getlist("HTTPCACHE_IGNORE_SCHEMES")
+        self.ignore_http_codes = [
+            int(x) for x in settings.getlist("HTTPCACHE_IGNORE_HTTP_CODES")
+        ]
+
+    def should_cache_request(self, request):
+        return urlparse_cached(request).scheme not in self.ignore_schemes
+
+    def should_cache_response(self, response, request):
+        return response.status not in self.ignore_http_codes
+
+    def is_cached_response_fresh(self, cachedresponse, request):
+        return True
+
+    def is_cached_response_valid(self, cachedresponse, response, request):
+        return True


 class RFC2616Policy:
-    MAXAGE = 3600 * 24 * 365
+    MAXAGE = 3600 * 24 * 365  # one year

     def __init__(self, settings):
-        self.always_store = settings.getbool('HTTPCACHE_ALWAYS_STORE')
-        self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
+        self.always_store = settings.getbool("HTTPCACHE_ALWAYS_STORE")
+        self.ignore_schemes = settings.getlist("HTTPCACHE_IGNORE_SCHEMES")
         self._cc_parsed = WeakKeyDictionary()
-        self.ignore_response_cache_controls = [to_bytes(cc) for cc in
-            settings.getlist('HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS')]
+        self.ignore_response_cache_controls = [
+            to_bytes(cc)
+            for cc in settings.getlist("HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS")
+        ]

+    def _parse_cachecontrol(self, r):
+        if r not in self._cc_parsed:
+            cch = r.headers.get(b"Cache-Control", b"")
+            parsed = parse_cachecontrol(cch)
+            if isinstance(r, Response):
+                for key in self.ignore_response_cache_controls:
+                    parsed.pop(key, None)
+            self._cc_parsed[r] = parsed
+        return self._cc_parsed[r]

-class DbmCacheStorage:
+    def should_cache_request(self, request):
+        if urlparse_cached(request).scheme in self.ignore_schemes:
+            return False
+        cc = self._parse_cachecontrol(request)
+        # obey user-agent directive "Cache-Control: no-store"
+        if b"no-store" in cc:
+            return False
+        # Any other is eligible for caching
+        return True
+
+    def should_cache_response(self, response, request):
+        # What is cacheable - https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9.1
+        # Response cacheability - https://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.4
+        # Status code 206 is not included because cache can not deal with partial contents
+        cc = self._parse_cachecontrol(response)
+        # obey directive "Cache-Control: no-store"
+        if b"no-store" in cc:
+            return False
+        # Never cache 304 (Not Modified) responses
+        if response.status == 304:
+            return False
+        # Cache unconditionally if configured to do so
+        if self.always_store:
+            return True
+        # Any hint on response expiration is good
+        if b"max-age" in cc or b"Expires" in response.headers:
+            return True
+        # Firefox fallbacks this statuses to one year expiration if none is set
+        if response.status in (300, 301, 308):
+            return True
+        # Other statuses without expiration requires at least one validator
+        if response.status in (200, 203, 401):
+            return b"Last-Modified" in response.headers or b"ETag" in response.headers
+        # Any other is probably not eligible for caching
+        # Makes no sense to cache responses that does not contain expiration
+        # info and can not be revalidated
+        return False
+
+    def is_cached_response_fresh(self, cachedresponse, request):
+        cc = self._parse_cachecontrol(cachedresponse)
+        ccreq = self._parse_cachecontrol(request)
+        if b"no-cache" in cc or b"no-cache" in ccreq:
+            return False
+
+        now = time()
+        freshnesslifetime = self._compute_freshness_lifetime(
+            cachedresponse, request, now
+        )
+        currentage = self._compute_current_age(cachedresponse, request, now)
+
+        reqmaxage = self._get_max_age(ccreq)
+        if reqmaxage is not None:
+            freshnesslifetime = min(freshnesslifetime, reqmaxage)
+
+        if currentage < freshnesslifetime:
+            return True
+
+        if b"max-stale" in ccreq and b"must-revalidate" not in cc:
+            # From RFC2616: "Indicates that the client is willing to
+            # accept a response that has exceeded its expiration time.
+            # If max-stale is assigned a value, then the client is
+            # willing to accept a response that has exceeded its
+            # expiration time by no more than the specified number of
+            # seconds. If no value is assigned to max-stale, then the
+            # client is willing to accept a stale response of any age."
+            staleage = ccreq[b"max-stale"]
+            if staleage is None:
+                return True
+
+            try:
+                if currentage < freshnesslifetime + max(0, int(staleage)):
+                    return True
+            except ValueError:
+                pass
+
+        # Cached response is stale, try to set validators if any
+        self._set_conditional_validators(request, cachedresponse)
+        return False
+
+    def is_cached_response_valid(self, cachedresponse, response, request):
+        # Use the cached response if the new response is a server error,
+        # as long as the old response didn't specify must-revalidate.
+        if response.status >= 500:
+            cc = self._parse_cachecontrol(cachedresponse)
+            if b"must-revalidate" not in cc:
+                return True
+
+        # Use the cached response if the server says it hasn't changed.
+        return response.status == 304
+
+    def _set_conditional_validators(self, request, cachedresponse):
+        if b"Last-Modified" in cachedresponse.headers:
+            request.headers[b"If-Modified-Since"] = cachedresponse.headers[
+                b"Last-Modified"
+            ]
+
+        if b"ETag" in cachedresponse.headers:
+            request.headers[b"If-None-Match"] = cachedresponse.headers[b"ETag"]
+
+    def _get_max_age(self, cc):
+        try:
+            return max(0, int(cc[b"max-age"]))
+        except (KeyError, ValueError):
+            return None
+
+    def _compute_freshness_lifetime(self, response, request, now):
+        # Reference nsHttpResponseHead::ComputeFreshnessLifetime
+        # https://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#706
+        cc = self._parse_cachecontrol(response)
+        maxage = self._get_max_age(cc)
+        if maxage is not None:
+            return maxage
+
+        # Parse date header or synthesize it if none exists
+        date = rfc1123_to_epoch(response.headers.get(b"Date")) or now
+
+        # Try HTTP/1.0 Expires header
+        if b"Expires" in response.headers:
+            expires = rfc1123_to_epoch(response.headers[b"Expires"])
+            # When parsing Expires header fails RFC 2616 section 14.21 says we
+            # should treat this as an expiration time in the past.
+            return max(0, expires - date) if expires else 0
+
+        # Fallback to heuristic using last-modified header
+        # This is not in RFC but on Firefox caching implementation
+        lastmodified = rfc1123_to_epoch(response.headers.get(b"Last-Modified"))
+        if lastmodified and lastmodified <= date:
+            return (date - lastmodified) / 10
+
+        # This request can be cached indefinitely
+        if response.status in (300, 301, 308):
+            return self.MAXAGE

+        # Insufficient information to compute freshness lifetime
+        return 0
+
+    def _compute_current_age(self, response, request, now):
+        # Reference nsHttpResponseHead::ComputeCurrentAge
+        # https://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#658
+        currentage = 0
+        # If Date header is not set we assume it is a fast connection, and
+        # clock is in sync with the server
+        date = rfc1123_to_epoch(response.headers.get(b"Date")) or now
+        if now > date:
+            currentage = now - date
+
+        if b"Age" in response.headers:
+            try:
+                age = int(response.headers[b"Age"])
+                currentage = max(currentage, age)
+            except ValueError:
+                pass
+
+        return currentage
+
+
+class DbmCacheStorage:
     def __init__(self, settings):
-        self.cachedir = data_path(settings['HTTPCACHE_DIR'], createdir=True)
-        self.expiration_secs = settings.getint('HTTPCACHE_EXPIRATION_SECS')
-        self.dbmodule = import_module(settings['HTTPCACHE_DBM_MODULE'])
+        self.cachedir = data_path(settings["HTTPCACHE_DIR"], createdir=True)
+        self.expiration_secs = settings.getint("HTTPCACHE_EXPIRATION_SECS")
+        self.dbmodule = import_module(settings["HTTPCACHE_DBM_MODULE"])
         self.db = None

+    def open_spider(self, spider: Spider):
+        dbpath = Path(self.cachedir, f"{spider.name}.db")
+        self.db = self.dbmodule.open(str(dbpath), "c")

-class FilesystemCacheStorage:
+        logger.debug(
+            "Using DBM cache storage in %(cachepath)s",
+            {"cachepath": dbpath},
+            extra={"spider": spider},
+        )
+
+        self._fingerprinter = spider.crawler.request_fingerprinter
+
+    def close_spider(self, spider):
+        self.db.close()
+
+    def retrieve_response(self, spider, request):
+        data = self._read_data(spider, request)
+        if data is None:
+            return  # not cached
+        url = data["url"]
+        status = data["status"]
+        headers = Headers(data["headers"])
+        body = data["body"]
+        respcls = responsetypes.from_args(headers=headers, url=url, body=body)
+        response = respcls(url=url, headers=headers, status=status, body=body)
+        return response
+
+    def store_response(self, spider, request, response):
+        key = self._fingerprinter.fingerprint(request).hex()
+        data = {
+            "status": response.status,
+            "url": response.url,
+            "headers": dict(response.headers),
+            "body": response.body,
+        }
+        self.db[f"{key}_data"] = pickle.dumps(data, protocol=4)
+        self.db[f"{key}_time"] = str(time())
+
+    def _read_data(self, spider, request):
+        key = self._fingerprinter.fingerprint(request).hex()
+        db = self.db
+        tkey = f"{key}_time"
+        if tkey not in db:
+            return  # not found
+
+        ts = db[tkey]
+        if 0 < self.expiration_secs < time() - float(ts):
+            return  # expired

+        return pickle.loads(db[f"{key}_data"])
+
+
+class FilesystemCacheStorage:
     def __init__(self, settings):
-        self.cachedir = data_path(settings['HTTPCACHE_DIR'])
-        self.expiration_secs = settings.getint('HTTPCACHE_EXPIRATION_SECS')
-        self.use_gzip = settings.getbool('HTTPCACHE_GZIP')
+        self.cachedir = data_path(settings["HTTPCACHE_DIR"])
+        self.expiration_secs = settings.getint("HTTPCACHE_EXPIRATION_SECS")
+        self.use_gzip = settings.getbool("HTTPCACHE_GZIP")
         self._open = gzip.open if self.use_gzip else open

+    def open_spider(self, spider: Spider):
+        logger.debug(
+            "Using filesystem cache storage in %(cachedir)s",
+            {"cachedir": self.cachedir},
+            extra={"spider": spider},
+        )
+
+        assert spider.crawler.request_fingerprinter
+        self._fingerprinter = spider.crawler.request_fingerprinter
+
+    def close_spider(self, spider):
+        pass
+
     def retrieve_response(self, spider: Spider, request: Request):
         """Return response if present in cache, or None otherwise."""
-        pass
+        metadata = self._read_meta(spider, request)
+        if metadata is None:
+            return  # not cached
+        rpath = Path(self._get_request_path(spider, request))
+        with self._open(rpath / "response_body", "rb") as f:
+            body = f.read()
+        with self._open(rpath / "response_headers", "rb") as f:
+            rawheaders = f.read()
+        url = metadata.get("response_url")
+        status = metadata["status"]
+        headers = Headers(headers_raw_to_dict(rawheaders))
+        respcls = responsetypes.from_args(headers=headers, url=url, body=body)
+        response = respcls(url=url, headers=headers, status=status, body=body)
+        return response

     def store_response(self, spider: Spider, request: Request, response):
         """Store the given response in the cache."""
-        pass
+        rpath = Path(self._get_request_path(spider, request))
+        if not rpath.exists():
+            rpath.mkdir(parents=True)
+        metadata = {
+            "url": request.url,
+            "method": request.method,
+            "status": response.status,
+            "response_url": response.url,
+            "timestamp": time(),
+        }
+        with self._open(rpath / "meta", "wb") as f:
+            f.write(to_bytes(repr(metadata)))
+        with self._open(rpath / "pickled_meta", "wb") as f:
+            pickle.dump(metadata, f, protocol=4)
+        with self._open(rpath / "response_headers", "wb") as f:
+            f.write(headers_dict_to_raw(response.headers))
+        with self._open(rpath / "response_body", "wb") as f:
+            f.write(response.body)
+        with self._open(rpath / "request_headers", "wb") as f:
+            f.write(headers_dict_to_raw(request.headers))
+        with self._open(rpath / "request_body", "wb") as f:
+            f.write(request.body)
+
+    def _get_request_path(self, spider: Spider, request: Request) -> str:
+        key = self._fingerprinter.fingerprint(request).hex()
+        return str(Path(self.cachedir, spider.name, key[0:2], key))
+
+    def _read_meta(self, spider: Spider, request: Request):
+        rpath = Path(self._get_request_path(spider, request))
+        metapath = rpath / "pickled_meta"
+        if not metapath.exists():
+            return  # not found
+        mtime = metapath.stat().st_mtime
+        if 0 < self.expiration_secs < time() - mtime:
+            return  # expired
+        with self._open(metapath, "rb") as f:
+            return pickle.load(f)


 def parse_cachecontrol(header):
@@ -74,4 +367,17 @@ def parse_cachecontrol(header):
     True

     """
-    pass
+    directives = {}
+    for directive in header.split(b","):
+        key, sep, val = directive.strip().partition(b"=")
+        if key:
+            directives[key.lower()] = val if sep else None
+    return directives
+
+
+def rfc1123_to_epoch(date_str):
+    try:
+        date_str = to_unicode(date_str, encoding="ascii")
+        return mktime_tz(parsedate_tz(date_str))
+    except Exception:
+        return None
diff --git a/scrapy/extensions/logstats.py b/scrapy/extensions/logstats.py
index e2864b322..78874a6db 100644
--- a/scrapy/extensions/logstats.py
+++ b/scrapy/extensions/logstats.py
@@ -1,7 +1,10 @@
 import logging
+
 from twisted.internet import task
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
+
 logger = logging.getLogger(__name__)


@@ -13,3 +16,43 @@ class LogStats:
         self.interval = interval
         self.multiplier = 60.0 / self.interval
         self.task = None
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        interval = crawler.settings.getfloat("LOGSTATS_INTERVAL")
+        if not interval:
+            raise NotConfigured
+        o = cls(crawler.stats, interval)
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
+        return o
+
+    def spider_opened(self, spider):
+        self.pagesprev = 0
+        self.itemsprev = 0
+
+        self.task = task.LoopingCall(self.log, spider)
+        self.task.start(self.interval)
+
+    def log(self, spider):
+        items = self.stats.get_value("item_scraped_count", 0)
+        pages = self.stats.get_value("response_received_count", 0)
+        irate = (items - self.itemsprev) * self.multiplier
+        prate = (pages - self.pagesprev) * self.multiplier
+        self.pagesprev, self.itemsprev = pages, items
+
+        msg = (
+            "Crawled %(pages)d pages (at %(pagerate)d pages/min), "
+            "scraped %(items)d items (at %(itemrate)d items/min)"
+        )
+        log_args = {
+            "pages": pages,
+            "pagerate": prate,
+            "items": items,
+            "itemrate": irate,
+        }
+        logger.info(msg, log_args, extra={"spider": spider})
+
+    def spider_closed(self, spider, reason):
+        if self.task and self.task.running:
+            self.task.stop()
diff --git a/scrapy/extensions/memdebug.py b/scrapy/extensions/memdebug.py
index 8eb617202..03ede0681 100644
--- a/scrapy/extensions/memdebug.py
+++ b/scrapy/extensions/memdebug.py
@@ -3,13 +3,34 @@ MemoryDebugger extension

 See documentation in docs/topics/extensions.rst
 """
+
 import gc
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.trackref import live_refs


 class MemoryDebugger:
-
     def __init__(self, stats):
         self.stats = stats
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool("MEMDEBUG_ENABLED"):
+            raise NotConfigured
+        o = cls(crawler.stats)
+        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
+        return o
+
+    def spider_closed(self, spider, reason):
+        gc.collect()
+        self.stats.set_value(
+            "memdebug/gc_garbage_count", len(gc.garbage), spider=spider
+        )
+        for cls, wdict in live_refs.items():
+            if not wdict:
+                continue
+            self.stats.set_value(
+                f"memdebug/live_refs/{cls.__name__}", len(wdict), spider=spider
+            )
diff --git a/scrapy/extensions/memusage.py b/scrapy/extensions/memusage.py
index 81fb5c242..ca766c938 100644
--- a/scrapy/extensions/memusage.py
+++ b/scrapy/extensions/memusage.py
@@ -8,37 +8,134 @@ import socket
 import sys
 from importlib import import_module
 from pprint import pformat
+
 from twisted.internet import task
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.mail import MailSender
 from scrapy.utils.engine import get_engine_status
+
 logger = logging.getLogger(__name__)


 class MemoryUsage:
-
     def __init__(self, crawler):
-        if not crawler.settings.getbool('MEMUSAGE_ENABLED'):
+        if not crawler.settings.getbool("MEMUSAGE_ENABLED"):
             raise NotConfigured
         try:
-            self.resource = import_module('resource')
+            # stdlib's resource module is only available on unix platforms.
+            self.resource = import_module("resource")
         except ImportError:
             raise NotConfigured
+
         self.crawler = crawler
         self.warned = False
-        self.notify_mails = crawler.settings.getlist('MEMUSAGE_NOTIFY_MAIL')
-        self.limit = crawler.settings.getint('MEMUSAGE_LIMIT_MB') * 1024 * 1024
-        self.warning = crawler.settings.getint('MEMUSAGE_WARNING_MB'
-            ) * 1024 * 1024
+        self.notify_mails = crawler.settings.getlist("MEMUSAGE_NOTIFY_MAIL")
+        self.limit = crawler.settings.getint("MEMUSAGE_LIMIT_MB") * 1024 * 1024
+        self.warning = crawler.settings.getint("MEMUSAGE_WARNING_MB") * 1024 * 1024
         self.check_interval = crawler.settings.getfloat(
-            'MEMUSAGE_CHECK_INTERVAL_SECONDS')
+            "MEMUSAGE_CHECK_INTERVAL_SECONDS"
+        )
         self.mail = MailSender.from_settings(crawler.settings)
-        crawler.signals.connect(self.engine_started, signal=signals.
-            engine_started)
-        crawler.signals.connect(self.engine_stopped, signal=signals.
-            engine_stopped)
+        crawler.signals.connect(self.engine_started, signal=signals.engine_started)
+        crawler.signals.connect(self.engine_stopped, signal=signals.engine_stopped)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
+    def get_virtual_size(self):
+        size = self.resource.getrusage(self.resource.RUSAGE_SELF).ru_maxrss
+        if sys.platform != "darwin":
+            # on macOS ru_maxrss is in bytes, on Linux it is in KB
+            size *= 1024
+        return size
+
+    def engine_started(self):
+        self.crawler.stats.set_value("memusage/startup", self.get_virtual_size())
+        self.tasks = []
+        tsk = task.LoopingCall(self.update)
+        self.tasks.append(tsk)
+        tsk.start(self.check_interval, now=True)
+        if self.limit:
+            tsk = task.LoopingCall(self._check_limit)
+            self.tasks.append(tsk)
+            tsk.start(self.check_interval, now=True)
+        if self.warning:
+            tsk = task.LoopingCall(self._check_warning)
+            self.tasks.append(tsk)
+            tsk.start(self.check_interval, now=True)
+
+    def engine_stopped(self):
+        for tsk in self.tasks:
+            if tsk.running:
+                tsk.stop()
+
+    def update(self):
+        self.crawler.stats.max_value("memusage/max", self.get_virtual_size())
+
+    def _check_limit(self):
+        peak_mem_usage = self.get_virtual_size()
+        if peak_mem_usage > self.limit:
+            self.crawler.stats.set_value("memusage/limit_reached", 1)
+            mem = self.limit / 1024 / 1024
+            logger.error(
+                "Memory usage exceeded %(memusage)dMiB. Shutting down Scrapy...",
+                {"memusage": mem},
+                extra={"crawler": self.crawler},
+            )
+            if self.notify_mails:
+                subj = (
+                    f"{self.crawler.settings['BOT_NAME']} terminated: "
+                    f"memory usage exceeded {mem}MiB at {socket.gethostname()}"
+                )
+                self._send_report(self.notify_mails, subj)
+                self.crawler.stats.set_value("memusage/limit_notified", 1)
+
+            if self.crawler.engine.spider is not None:
+                self.crawler.engine.close_spider(
+                    self.crawler.engine.spider, "memusage_exceeded"
+                )
+            else:
+                self.crawler.stop()
+        else:
+            logger.info(
+                "Peak memory usage is %(virtualsize)dMiB",
+                {"virtualsize": peak_mem_usage / 1024 / 1024},
+            )
+
+    def _check_warning(self):
+        if self.warned:  # warn only once
+            return
+        if self.get_virtual_size() > self.warning:
+            self.crawler.stats.set_value("memusage/warning_reached", 1)
+            mem = self.warning / 1024 / 1024
+            logger.warning(
+                "Memory usage reached %(memusage)dMiB",
+                {"memusage": mem},
+                extra={"crawler": self.crawler},
+            )
+            if self.notify_mails:
+                subj = (
+                    f"{self.crawler.settings['BOT_NAME']} warning: "
+                    f"memory usage reached {mem}MiB at {socket.gethostname()}"
+                )
+                self._send_report(self.notify_mails, subj)
+                self.crawler.stats.set_value("memusage/warning_notified", 1)
+            self.warned = True

     def _send_report(self, rcpts, subject):
         """send notification mail with some additional useful info"""
-        pass
+        stats = self.crawler.stats
+        s = f"Memory usage at engine startup : {stats.get_value('memusage/startup') / 1024 / 1024}M\r\n"
+        s += f"Maximum memory usage          : {stats.get_value('memusage/max') / 1024 / 1024}M\r\n"
+        s += f"Current memory usage          : {self.get_virtual_size() / 1024 / 1024}M\r\n"
+
+        s += (
+            "ENGINE STATUS ------------------------------------------------------- \r\n"
+        )
+        s += "\r\n"
+        s += pformat(get_engine_status(self.crawler.engine))
+        s += "\r\n"
+        self.mail.send(rcpts, subject, s)
diff --git a/scrapy/extensions/periodic_log.py b/scrapy/extensions/periodic_log.py
index 6703689b9..2d557f123 100644
--- a/scrapy/extensions/periodic_log.py
+++ b/scrapy/extensions/periodic_log.py
@@ -1,26 +1,140 @@
 import logging
 from datetime import datetime, timezone
+
 from twisted.internet import task
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.serialize import ScrapyJSONEncoder
+
 logger = logging.getLogger(__name__)


 class PeriodicLog:
     """Log basic scraping stats periodically"""

-    def __init__(self, stats, interval=60.0, ext_stats={}, ext_delta={},
-        ext_timing_enabled=False):
+    def __init__(
+        self,
+        stats,
+        interval=60.0,
+        ext_stats={},
+        ext_delta={},
+        ext_timing_enabled=False,
+    ):
         self.stats = stats
         self.interval = interval
         self.multiplier = 60.0 / self.interval
         self.task = None
         self.encoder = ScrapyJSONEncoder(sort_keys=True, indent=4)
         self.ext_stats_enabled = bool(ext_stats)
-        self.ext_stats_include = ext_stats.get('include', [])
-        self.ext_stats_exclude = ext_stats.get('exclude', [])
+        self.ext_stats_include = ext_stats.get("include", [])
+        self.ext_stats_exclude = ext_stats.get("exclude", [])
         self.ext_delta_enabled = bool(ext_delta)
-        self.ext_delta_include = ext_delta.get('include', [])
-        self.ext_delta_exclude = ext_delta.get('exclude', [])
+        self.ext_delta_include = ext_delta.get("include", [])
+        self.ext_delta_exclude = ext_delta.get("exclude", [])
         self.ext_timing_enabled = ext_timing_enabled
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        interval = crawler.settings.getfloat("LOGSTATS_INTERVAL")
+        if not interval:
+            raise NotConfigured
+        try:
+            ext_stats = crawler.settings.getdict("PERIODIC_LOG_STATS")
+        except (TypeError, ValueError):
+            ext_stats = (
+                {"enabled": True}
+                if crawler.settings.getbool("PERIODIC_LOG_STATS")
+                else None
+            )
+        try:
+            ext_delta = crawler.settings.getdict("PERIODIC_LOG_DELTA")
+        except (TypeError, ValueError):
+            ext_delta = (
+                {"enabled": True}
+                if crawler.settings.getbool("PERIODIC_LOG_DELTA")
+                else None
+            )
+
+        ext_timing_enabled = crawler.settings.getbool(
+            "PERIODIC_LOG_TIMING_ENABLED", False
+        )
+        if not (ext_stats or ext_delta or ext_timing_enabled):
+            raise NotConfigured
+        o = cls(
+            crawler.stats,
+            interval,
+            ext_stats,
+            ext_delta,
+            ext_timing_enabled,
+        )
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
+        return o
+
+    def spider_opened(self, spider):
+        self.time_prev = datetime.now(tz=timezone.utc)
+        self.delta_prev = {}
+        self.stats_prev = {}
+
+        self.task = task.LoopingCall(self.log)
+        self.task.start(self.interval)
+
+    def log(self):
+        data = {}
+        if self.ext_timing_enabled:
+            data.update(self.log_timing())
+        if self.ext_delta_enabled:
+            data.update(self.log_delta())
+        if self.ext_stats_enabled:
+            data.update(self.log_crawler_stats())
+        logger.info(self.encoder.encode(data))
+
+    def log_delta(self):
+        num_stats = {
+            k: v
+            for k, v in self.stats._stats.items()
+            if isinstance(v, (int, float))
+            and self.param_allowed(k, self.ext_delta_include, self.ext_delta_exclude)
+        }
+        delta = {k: v - self.delta_prev.get(k, 0) for k, v in num_stats.items()}
+        self.delta_prev = num_stats
+        return {"delta": delta}
+
+    def log_timing(self):
+        now = datetime.now(tz=timezone.utc)
+        time = {
+            "log_interval": self.interval,
+            "start_time": self.stats._stats["start_time"],
+            "utcnow": now,
+            "log_interval_real": (now - self.time_prev).total_seconds(),
+            "elapsed": (now - self.stats._stats["start_time"]).total_seconds(),
+        }
+        self.time_prev = now
+        return {"time": time}
+
+    def log_crawler_stats(self):
+        stats = {
+            k: v
+            for k, v in self.stats._stats.items()
+            if self.param_allowed(k, self.ext_stats_include, self.ext_stats_exclude)
+        }
+        return {"stats": stats}
+
+    def param_allowed(self, stat_name, include, exclude):
+        if not include and not exclude:
+            return True
+        for p in exclude:
+            if p in stat_name:
+                return False
+        if exclude and not include:
+            return True
+        for p in include:
+            if p in stat_name:
+                return True
+        return False
+
+    def spider_closed(self, spider, reason):
+        self.log()
+        if self.task and self.task.running:
+            self.task.stop()
diff --git a/scrapy/extensions/postprocessing.py b/scrapy/extensions/postprocessing.py
index 32f5ff6bc..17969c5b0 100644
--- a/scrapy/extensions/postprocessing.py
+++ b/scrapy/extensions/postprocessing.py
@@ -6,6 +6,7 @@ from gzip import GzipFile
 from io import IOBase
 from lzma import LZMAFile
 from typing import Any, BinaryIO, Dict, List
+
 from scrapy.utils.misc import load_object


@@ -22,14 +23,25 @@ class GzipPlugin:
     See :py:class:`gzip.GzipFile` for more info about parameters.
     """

-    def __init__(self, file: BinaryIO, feed_options: Dict[str, Any]) ->None:
+    def __init__(self, file: BinaryIO, feed_options: Dict[str, Any]) -> None:
         self.file = file
         self.feed_options = feed_options
-        compress_level = self.feed_options.get('gzip_compresslevel', 9)
-        mtime = self.feed_options.get('gzip_mtime')
-        filename = self.feed_options.get('gzip_filename')
-        self.gzipfile = GzipFile(fileobj=self.file, mode='wb',
-            compresslevel=compress_level, mtime=mtime, filename=filename)
+        compress_level = self.feed_options.get("gzip_compresslevel", 9)
+        mtime = self.feed_options.get("gzip_mtime")
+        filename = self.feed_options.get("gzip_filename")
+        self.gzipfile = GzipFile(
+            fileobj=self.file,
+            mode="wb",
+            compresslevel=compress_level,
+            mtime=mtime,
+            filename=filename,
+        )
+
+    def write(self, data: bytes) -> int:
+        return self.gzipfile.write(data)
+
+    def close(self) -> None:
+        self.gzipfile.close()


 class Bz2Plugin:
@@ -43,12 +55,19 @@ class Bz2Plugin:
     See :py:class:`bz2.BZ2File` for more info about parameters.
     """

-    def __init__(self, file: BinaryIO, feed_options: Dict[str, Any]) ->None:
+    def __init__(self, file: BinaryIO, feed_options: Dict[str, Any]) -> None:
         self.file = file
         self.feed_options = feed_options
-        compress_level = self.feed_options.get('bz2_compresslevel', 9)
-        self.bz2file = BZ2File(filename=self.file, mode='wb', compresslevel
-            =compress_level)
+        compress_level = self.feed_options.get("bz2_compresslevel", 9)
+        self.bz2file = BZ2File(
+            filename=self.file, mode="wb", compresslevel=compress_level
+        )
+
+    def write(self, data: bytes) -> int:
+        return self.bz2file.write(data)
+
+    def close(self) -> None:
+        self.bz2file.close()


 class LZMAPlugin:
@@ -68,17 +87,33 @@ class LZMAPlugin:
     See :py:class:`lzma.LZMAFile` for more info about parameters.
     """

-    def __init__(self, file: BinaryIO, feed_options: Dict[str, Any]) ->None:
+    def __init__(self, file: BinaryIO, feed_options: Dict[str, Any]) -> None:
         self.file = file
         self.feed_options = feed_options
-        format = self.feed_options.get('lzma_format')
-        check = self.feed_options.get('lzma_check', -1)
-        preset = self.feed_options.get('lzma_preset')
-        filters = self.feed_options.get('lzma_filters')
-        self.lzmafile = LZMAFile(filename=self.file, mode='wb', format=
-            format, check=check, preset=preset, filters=filters)
-

+        format = self.feed_options.get("lzma_format")
+        check = self.feed_options.get("lzma_check", -1)
+        preset = self.feed_options.get("lzma_preset")
+        filters = self.feed_options.get("lzma_filters")
+        self.lzmafile = LZMAFile(
+            filename=self.file,
+            mode="wb",
+            format=format,
+            check=check,
+            preset=preset,
+            filters=filters,
+        )
+
+    def write(self, data: bytes) -> int:
+        return self.lzmafile.write(data)
+
+    def close(self) -> None:
+        self.lzmafile.close()
+
+
+# io.IOBase is subclassed here, so that exporters can use the PostProcessingManager
+# instance as a file like writable object. This could be needed by some exporters
+# such as CsvItemExporter which wraps the feed storage with io.TextIOWrapper.
 class PostProcessingManager(IOBase):
     """
     This will manage and use declared plugins to process data in a
@@ -89,14 +124,15 @@ class PostProcessingManager(IOBase):
     :type file: file like object
     """

-    def __init__(self, plugins: List[Any], file: BinaryIO, feed_options:
-        Dict[str, Any]) ->None:
+    def __init__(
+        self, plugins: List[Any], file: BinaryIO, feed_options: Dict[str, Any]
+    ) -> None:
         self.plugins = self._load_plugins(plugins)
         self.file = file
         self.feed_options = feed_options
         self.head_plugin = self._get_head_plugin()

-    def write(self, data: bytes) ->int:
+    def write(self, data: bytes) -> int:
         """
         Uses all the declared plugins to process data first, then writes
         the processed data to target file.
@@ -105,10 +141,26 @@ class PostProcessingManager(IOBase):
         :return: returns number of bytes written
         :rtype: int
         """
-        pass
+        return self.head_plugin.write(data)
+
+    def tell(self) -> int:
+        return self.file.tell()

-    def close(self) ->None:
+    def close(self) -> None:
         """
         Close the target file along with all the plugins.
         """
-        pass
+        self.head_plugin.close()
+
+    def writable(self) -> bool:
+        return True
+
+    def _load_plugins(self, plugins: List[Any]) -> List[Any]:
+        plugins = [load_object(plugin) for plugin in plugins]
+        return plugins
+
+    def _get_head_plugin(self) -> Any:
+        prev = self.file
+        for plugin in self.plugins[::-1]:
+            prev = plugin(prev, self.feed_options)
+        return prev
diff --git a/scrapy/extensions/spiderstate.py b/scrapy/extensions/spiderstate.py
index 903837a0c..929a3be70 100644
--- a/scrapy/extensions/spiderstate.py
+++ b/scrapy/extensions/spiderstate.py
@@ -1,5 +1,6 @@
 import pickle
 from pathlib import Path
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.job import job_dir
@@ -10,3 +11,30 @@ class SpiderState:

     def __init__(self, jobdir=None):
         self.jobdir = jobdir
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        jobdir = job_dir(crawler.settings)
+        if not jobdir:
+            raise NotConfigured
+
+        obj = cls(jobdir)
+        crawler.signals.connect(obj.spider_closed, signal=signals.spider_closed)
+        crawler.signals.connect(obj.spider_opened, signal=signals.spider_opened)
+        return obj
+
+    def spider_closed(self, spider):
+        if self.jobdir:
+            with Path(self.statefn).open("wb") as f:
+                pickle.dump(spider.state, f, protocol=4)
+
+    def spider_opened(self, spider):
+        if self.jobdir and Path(self.statefn).exists():
+            with Path(self.statefn).open("rb") as f:
+                spider.state = pickle.load(f)
+        else:
+            spider.state = {}
+
+    @property
+    def statefn(self) -> str:
+        return str(Path(self.jobdir, "spider.state"))
diff --git a/scrapy/extensions/statsmailer.py b/scrapy/extensions/statsmailer.py
index b0e2395d9..58610c25e 100644
--- a/scrapy/extensions/statsmailer.py
+++ b/scrapy/extensions/statsmailer.py
@@ -3,14 +3,32 @@ StatsMailer extension sends an email when a spider finishes scraping.

 Use STATSMAILER_RCPTS setting to enable and give the recipient mail address
 """
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.mail import MailSender


 class StatsMailer:
-
     def __init__(self, stats, recipients, mail):
         self.stats = stats
         self.recipients = recipients
         self.mail = mail
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        recipients = crawler.settings.getlist("STATSMAILER_RCPTS")
+        if not recipients:
+            raise NotConfigured
+        mail = MailSender.from_settings(crawler.settings)
+        o = cls(crawler.stats, recipients, mail)
+        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
+        return o
+
+    def spider_closed(self, spider):
+        spider_stats = self.stats.get_stats(spider)
+        body = "Global stats\n\n"
+        body += "\n".join(f"{k:<50} : {v}" for k, v in self.stats.get_stats().items())
+        body += f"\n\n{spider.name} stats\n\n"
+        body += "\n".join(f"{k:<50} : {v}" for k, v in spider_stats.items())
+        return self.mail.send(self.recipients, f"Scrapy stats for: {spider.name}", body)
diff --git a/scrapy/extensions/telnet.py b/scrapy/extensions/telnet.py
index d866ecf30..c92b7f5fe 100644
--- a/scrapy/extensions/telnet.py
+++ b/scrapy/extensions/telnet.py
@@ -3,50 +3,113 @@ Scrapy Telnet Console extension

 See documentation in docs/topics/telnetconsole.rst
 """
+
 import binascii
 import logging
 import os
 import pprint
 import traceback
+
 from twisted.internet import protocol
+
 try:
     from twisted.conch import manhole, telnet
     from twisted.conch.insults import insults
+
     TWISTED_CONCH_AVAILABLE = True
 except (ImportError, SyntaxError):
     _TWISTED_CONCH_TRACEBACK = traceback.format_exc()
     TWISTED_CONCH_AVAILABLE = False
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.decorators import defers
 from scrapy.utils.engine import print_engine_status
 from scrapy.utils.reactor import listen_tcp
 from scrapy.utils.trackref import print_live_refs
+
 logger = logging.getLogger(__name__)
+
+# signal to update telnet variables
+# args: telnet_vars
 update_telnet_vars = object()


 class TelnetConsole(protocol.ServerFactory):
-
     def __init__(self, crawler):
-        if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'):
+        if not crawler.settings.getbool("TELNETCONSOLE_ENABLED"):
             raise NotConfigured
         if not TWISTED_CONCH_AVAILABLE:
             raise NotConfigured(
-                """TELNETCONSOLE_ENABLED setting is True but required twisted modules failed to import:
-"""
-                 + _TWISTED_CONCH_TRACEBACK)
+                "TELNETCONSOLE_ENABLED setting is True but required twisted "
+                "modules failed to import:\n" + _TWISTED_CONCH_TRACEBACK
+            )
         self.crawler = crawler
         self.noisy = False
-        self.portrange = [int(x) for x in crawler.settings.getlist(
-            'TELNETCONSOLE_PORT')]
-        self.host = crawler.settings['TELNETCONSOLE_HOST']
-        self.username = crawler.settings['TELNETCONSOLE_USERNAME']
-        self.password = crawler.settings['TELNETCONSOLE_PASSWORD']
+        self.portrange = [
+            int(x) for x in crawler.settings.getlist("TELNETCONSOLE_PORT")
+        ]
+        self.host = crawler.settings["TELNETCONSOLE_HOST"]
+        self.username = crawler.settings["TELNETCONSOLE_USERNAME"]
+        self.password = crawler.settings["TELNETCONSOLE_PASSWORD"]
+
         if not self.password:
-            self.password = binascii.hexlify(os.urandom(8)).decode('utf8')
-            logger.info('Telnet Password: %s', self.password)
-        self.crawler.signals.connect(self.start_listening, signals.
-            engine_started)
-        self.crawler.signals.connect(self.stop_listening, signals.
-            engine_stopped)
+            self.password = binascii.hexlify(os.urandom(8)).decode("utf8")
+            logger.info("Telnet Password: %s", self.password)
+
+        self.crawler.signals.connect(self.start_listening, signals.engine_started)
+        self.crawler.signals.connect(self.stop_listening, signals.engine_stopped)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
+    def start_listening(self):
+        self.port = listen_tcp(self.portrange, self.host, self)
+        h = self.port.getHost()
+        logger.info(
+            "Telnet console listening on %(host)s:%(port)d",
+            {"host": h.host, "port": h.port},
+            extra={"crawler": self.crawler},
+        )
+
+    def stop_listening(self):
+        self.port.stopListening()
+
+    def protocol(self):
+        class Portal:
+            """An implementation of IPortal"""
+
+            @defers
+            def login(self_, credentials, mind, *interfaces):
+                if not (
+                    credentials.username == self.username.encode("utf8")
+                    and credentials.checkPassword(self.password.encode("utf8"))
+                ):
+                    raise ValueError("Invalid credentials")
+
+                protocol = telnet.TelnetBootstrapProtocol(
+                    insults.ServerProtocol, manhole.Manhole, self._get_telnet_vars()
+                )
+                return (interfaces[0], protocol, lambda: None)
+
+        return telnet.TelnetTransport(telnet.AuthenticatingTelnetProtocol, Portal())
+
+    def _get_telnet_vars(self):
+        # Note: if you add entries here also update topics/telnetconsole.rst
+        telnet_vars = {
+            "engine": self.crawler.engine,
+            "spider": self.crawler.engine.spider,
+            "slot": self.crawler.engine.slot,
+            "crawler": self.crawler,
+            "extensions": self.crawler.extensions,
+            "stats": self.crawler.stats,
+            "settings": self.crawler.settings,
+            "est": lambda: print_engine_status(self.crawler.engine),
+            "p": pprint.pprint,
+            "prefs": print_live_refs,
+            "help": "This is Scrapy telnet console. For more info see: "
+            "https://docs.scrapy.org/en/latest/topics/telnetconsole.html",
+        }
+        self.crawler.signals.send_catch_log(update_telnet_vars, telnet_vars=telnet_vars)
+        return telnet_vars
diff --git a/scrapy/extensions/throttle.py b/scrapy/extensions/throttle.py
index 4920a7cc7..396800775 100644
--- a/scrapy/extensions/throttle.py
+++ b/scrapy/extensions/throttle.py
@@ -1,23 +1,101 @@
 import logging
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
+
 logger = logging.getLogger(__name__)


 class AutoThrottle:
-
     def __init__(self, crawler):
         self.crawler = crawler
-        if not crawler.settings.getbool('AUTOTHROTTLE_ENABLED'):
+        if not crawler.settings.getbool("AUTOTHROTTLE_ENABLED"):
             raise NotConfigured
-        self.debug = crawler.settings.getbool('AUTOTHROTTLE_DEBUG')
+
+        self.debug = crawler.settings.getbool("AUTOTHROTTLE_DEBUG")
         self.target_concurrency = crawler.settings.getfloat(
-            'AUTOTHROTTLE_TARGET_CONCURRENCY')
-        crawler.signals.connect(self._spider_opened, signal=signals.
-            spider_opened)
-        crawler.signals.connect(self._response_downloaded, signal=signals.
-            response_downloaded)
+            "AUTOTHROTTLE_TARGET_CONCURRENCY"
+        )
+        crawler.signals.connect(self._spider_opened, signal=signals.spider_opened)
+        crawler.signals.connect(
+            self._response_downloaded, signal=signals.response_downloaded
+        )
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
+    def _spider_opened(self, spider):
+        self.mindelay = self._min_delay(spider)
+        self.maxdelay = self._max_delay(spider)
+        spider.download_delay = self._start_delay(spider)
+
+    def _min_delay(self, spider):
+        s = self.crawler.settings
+        return getattr(spider, "download_delay", s.getfloat("DOWNLOAD_DELAY"))
+
+    def _max_delay(self, spider):
+        return self.crawler.settings.getfloat("AUTOTHROTTLE_MAX_DELAY")
+
+    def _start_delay(self, spider):
+        return max(
+            self.mindelay, self.crawler.settings.getfloat("AUTOTHROTTLE_START_DELAY")
+        )
+
+    def _response_downloaded(self, response, request, spider):
+        key, slot = self._get_slot(request, spider)
+        latency = request.meta.get("download_latency")
+        if latency is None or slot is None:
+            return
+
+        olddelay = slot.delay
+        self._adjust_delay(slot, latency, response)
+        if self.debug:
+            diff = slot.delay - olddelay
+            size = len(response.body)
+            conc = len(slot.transferring)
+            logger.info(
+                "slot: %(slot)s | conc:%(concurrency)2d | "
+                "delay:%(delay)5d ms (%(delaydiff)+d) | "
+                "latency:%(latency)5d ms | size:%(size)6d bytes",
+                {
+                    "slot": key,
+                    "concurrency": conc,
+                    "delay": slot.delay * 1000,
+                    "delaydiff": diff * 1000,
+                    "latency": latency * 1000,
+                    "size": size,
+                },
+                extra={"spider": spider},
+            )
+
+    def _get_slot(self, request, spider):
+        key = request.meta.get("download_slot")
+        return key, self.crawler.engine.downloader.slots.get(key)

     def _adjust_delay(self, slot, latency, response):
         """Define delay adjustment policy"""
-        pass
+
+        # If a server needs `latency` seconds to respond then
+        # we should send a request each `latency/N` seconds
+        # to have N requests processed in parallel
+        target_delay = latency / self.target_concurrency
+
+        # Adjust the delay to make it closer to target_delay
+        new_delay = (slot.delay + target_delay) / 2.0
+
+        # If target delay is bigger than old delay, then use it instead of mean.
+        # It works better with problematic sites.
+        new_delay = max(target_delay, new_delay)
+
+        # Make sure self.mindelay <= new_delay <= self.max_delay
+        new_delay = min(max(self.mindelay, new_delay), self.maxdelay)
+
+        # Dont adjust delay if response status != 200 and new delay is smaller
+        # than old one, as error pages (and redirections) are usually small and
+        # so tend to reduce latency, thus provoking a positive feedback by
+        # reducing delay instead of increase.
+        if response.status != 200 and new_delay <= slot.delay:
+            return
+
+        slot.delay = new_delay
diff --git a/scrapy/http/common.py b/scrapy/http/common.py
index e69de29bb..bc8861574 100644
--- a/scrapy/http/common.py
+++ b/scrapy/http/common.py
@@ -0,0 +1,7 @@
+def obsolete_setter(setter, attrname):
+    def newsetter(self, value):
+        c = self.__class__.__name__
+        msg = f"{c}.{attrname} is not modifiable, use {c}.replace() instead"
+        raise AttributeError(msg)
+
+    return newsetter
diff --git a/scrapy/http/cookies.py b/scrapy/http/cookies.py
index 25e4927de..15f25f69d 100644
--- a/scrapy/http/cookies.py
+++ b/scrapy/http/cookies.py
@@ -2,13 +2,16 @@ import re
 import time
 from http.cookiejar import CookieJar as _CookieJar
 from http.cookiejar import DefaultCookiePolicy
+
 from scrapy.utils.httpobj import urlparse_cached
 from scrapy.utils.python import to_unicode
-IPV4_RE = re.compile('\\.\\d+$', re.ASCII)

+# Defined in the http.cookiejar module, but undocumented:
+# https://github.com/python/cpython/blob/v3.9.0/Lib/http/cookiejar.py#L527
+IPV4_RE = re.compile(r"\.\d+$", re.ASCII)

-class CookieJar:

+class CookieJar:
     def __init__(self, policy=None, check_expired_frequency=10000):
         self.policy = policy or DefaultCookiePolicy()
         self.jar = _CookieJar(self.policy)
@@ -16,12 +19,73 @@ class CookieJar:
         self.check_expired_frequency = check_expired_frequency
         self.processed = 0

+    def extract_cookies(self, response, request):
+        wreq = WrappedRequest(request)
+        wrsp = WrappedResponse(response)
+        return self.jar.extract_cookies(wrsp, wreq)
+
+    def add_cookie_header(self, request):
+        wreq = WrappedRequest(request)
+        self.policy._now = self.jar._now = int(time.time())
+
+        # the cookiejar implementation iterates through all domains
+        # instead we restrict to potential matches on the domain
+        req_host = urlparse_cached(request).hostname
+        if not req_host:
+            return
+
+        if not IPV4_RE.search(req_host):
+            hosts = potential_domain_matches(req_host)
+            if "." not in req_host:
+                hosts += [req_host + ".local"]
+        else:
+            hosts = [req_host]
+
+        cookies = []
+        for host in hosts:
+            if host in self.jar._cookies:
+                cookies += self.jar._cookies_for_domain(host, wreq)
+
+        attrs = self.jar._cookie_attrs(cookies)
+        if attrs:
+            if not wreq.has_header("Cookie"):
+                wreq.add_unredirected_header("Cookie", "; ".join(attrs))
+
+        self.processed += 1
+        if self.processed % self.check_expired_frequency == 0:
+            # This is still quite inefficient for large number of cookies
+            self.jar.clear_expired_cookies()
+
+    @property
+    def _cookies(self):
+        return self.jar._cookies
+
+    def clear_session_cookies(self, *args, **kwargs):
+        return self.jar.clear_session_cookies(*args, **kwargs)
+
+    def clear(self, domain=None, path=None, name=None):
+        return self.jar.clear(domain, path, name)
+
     def __iter__(self):
         return iter(self.jar)

     def __len__(self):
         return len(self.jar)

+    def set_policy(self, pol):
+        return self.jar.set_policy(pol)
+
+    def make_cookies(self, response, request):
+        wreq = WrappedRequest(request)
+        wrsp = WrappedResponse(response)
+        return self.jar.make_cookies(wrsp, wreq)
+
+    def set_cookie(self, cookie):
+        self.jar.set_cookie(cookie)
+
+    def set_cookie_if_ok(self, cookie, request):
+        self.jar.set_cookie_if_ok(cookie, WrappedRequest(request))
+

 def potential_domain_matches(domain):
     """Potential domain matches for a cookie
@@ -30,11 +94,24 @@ def potential_domain_matches(domain):
     ['www.example.com', 'example.com', '.www.example.com', '.example.com']

     """
-    pass
+    matches = [domain]
+    try:
+        start = domain.index(".") + 1
+        end = domain.rindex(".")
+        while start < end:
+            matches.append(domain[start:])
+            start = domain.index(".", start) + 1
+    except ValueError:
+        pass
+    return matches + ["." + d for d in matches]


 class _DummyLock:
-    pass
+    def acquire(self):
+        pass
+
+    def release(self):
+        pass


 class WrappedRequest:
@@ -46,6 +123,15 @@ class WrappedRequest:
     def __init__(self, request):
         self.request = request

+    def get_full_url(self):
+        return self.request.url
+
+    def get_host(self):
+        return urlparse_cached(self.request).netloc
+
+    def get_type(self):
+        return urlparse_cached(self.request).scheme
+
     def is_unverifiable(self):
         """Unverifiable should indicate whether the request is unverifiable, as defined by RFC 2965.

@@ -54,10 +140,56 @@ class WrappedRequest:
         HTML document, and the user had no option to approve the automatic
         fetching of the image, this should be true.
         """
-        pass
+        return self.request.meta.get("is_unverifiable", False)

+    @property
+    def full_url(self):
+        return self.get_full_url()

-class WrappedResponse:
+    @property
+    def host(self):
+        return self.get_host()
+
+    @property
+    def type(self):
+        return self.get_type()
+
+    @property
+    def unverifiable(self):
+        return self.is_unverifiable()
+
+    @property
+    def origin_req_host(self):
+        return urlparse_cached(self.request).hostname
+
+    def has_header(self, name):
+        return name in self.request.headers

+    def get_header(self, name, default=None):
+        value = self.request.headers.get(name, default)
+        return to_unicode(value, errors="replace") if value is not None else None
+
+    def header_items(self):
+        return [
+            (
+                to_unicode(k, errors="replace"),
+                [to_unicode(x, errors="replace") for x in v],
+            )
+            for k, v in self.request.headers.items()
+        ]
+
+    def add_unredirected_header(self, name, value):
+        self.request.headers.appendlist(name, value)
+
+
+class WrappedResponse:
     def __init__(self, response):
         self.response = response
+
+    def info(self):
+        return self
+
+    def get_all(self, name, default=None):
+        return [
+            to_unicode(v, errors="replace") for v in self.response.headers.getlist(name)
+        ]
diff --git a/scrapy/http/headers.py b/scrapy/http/headers.py
index 730065335..822597c84 100644
--- a/scrapy/http/headers.py
+++ b/scrapy/http/headers.py
@@ -1,5 +1,7 @@
 from collections.abc import Mapping
+
 from w3lib.http import headers_dict_to_raw
+
 from scrapy.utils.datatypes import CaseInsensitiveDict, CaselessDict
 from scrapy.utils.python import to_unicode

@@ -7,17 +9,40 @@ from scrapy.utils.python import to_unicode
 class Headers(CaselessDict):
     """Case insensitive http headers dictionary"""

-    def __init__(self, seq=None, encoding='utf-8'):
+    def __init__(self, seq=None, encoding="utf-8"):
         self.encoding = encoding
         super().__init__(seq)

+    def update(self, seq):
+        seq = seq.items() if isinstance(seq, Mapping) else seq
+        iseq = {}
+        for k, v in seq:
+            iseq.setdefault(self.normkey(k), []).extend(self.normvalue(v))
+        super().update(iseq)
+
     def normkey(self, key):
         """Normalize key to bytes"""
-        pass
+        return self._tobytes(key.title())

     def normvalue(self, value):
         """Normalize values to bytes"""
-        pass
+        if value is None:
+            value = []
+        elif isinstance(value, (str, bytes)):
+            value = [value]
+        elif not hasattr(value, "__iter__"):
+            value = [value]
+
+        return [self._tobytes(x) for x in value]
+
+    def _tobytes(self, x):
+        if isinstance(x, bytes):
+            return x
+        if isinstance(x, str):
+            return x.encode(self.encoding)
+        if isinstance(x, int):
+            return str(x).encode(self.encoding)
+        raise TypeError(f"Unsupported value type: {type(x)}")

     def __getitem__(self, key):
         try:
@@ -25,12 +50,53 @@ class Headers(CaselessDict):
         except IndexError:
             return None

+    def get(self, key, def_val=None):
+        try:
+            return super().get(key, def_val)[-1]
+        except IndexError:
+            return None
+
+    def getlist(self, key, def_val=None):
+        try:
+            return super().__getitem__(key)
+        except KeyError:
+            if def_val is not None:
+                return self.normvalue(def_val)
+            return []
+
+    def setlist(self, key, list_):
+        self[key] = list_
+
+    def setlistdefault(self, key, default_list=()):
+        return self.setdefault(key, default_list)
+
+    def appendlist(self, key, value):
+        lst = self.getlist(key)
+        lst.extend(self.normvalue(value))
+        self[key] = lst
+
+    def items(self):
+        return ((k, self.getlist(k)) for k in self.keys())
+
+    def values(self):
+        return [self[k] for k in self.keys()]
+
+    def to_string(self):
+        return headers_dict_to_raw(self)
+
     def to_unicode_dict(self):
         """Return headers as a CaselessDict with unicode keys
         and unicode values. Multiple values are joined with ','.
         """
-        pass
+        return CaseInsensitiveDict(
+            (
+                to_unicode(key, encoding=self.encoding),
+                to_unicode(b",".join(value), encoding=self.encoding),
+            )
+            for key, value in self.items()
+        )

     def __copy__(self):
         return self.__class__(self)
+
     copy = __copy__
diff --git a/scrapy/http/request/form.py b/scrapy/http/request/form.py
index 351539ea3..2d1f33edd 100644
--- a/scrapy/http/request/form.py
+++ b/scrapy/http/request/form.py
@@ -4,59 +4,255 @@ This module implements the FormRequest class which is a more convenient class

 See documentation in docs/topics/request-response.rst
 """
+
 from typing import Iterable, List, Optional, Tuple, Type, TypeVar, Union, cast
 from urllib.parse import urlencode, urljoin, urlsplit, urlunsplit
-from lxml.html import FormElement
-from lxml.html import InputElement
-from lxml.html import MultipleSelectOptions
-from lxml.html import SelectElement
-from lxml.html import TextareaElement
+
+from lxml.html import FormElement  # nosec
+from lxml.html import InputElement  # nosec
+from lxml.html import MultipleSelectOptions  # nosec
+from lxml.html import SelectElement  # nosec
+from lxml.html import TextareaElement  # nosec
 from w3lib.html import strip_html5_whitespace
+
 from scrapy.http.request import Request
 from scrapy.http.response.text import TextResponse
 from scrapy.utils.python import is_listlike, to_bytes
-FormRequestTypeVar = TypeVar('FormRequestTypeVar', bound='FormRequest')
+
+FormRequestTypeVar = TypeVar("FormRequestTypeVar", bound="FormRequest")
+
 FormdataKVType = Tuple[str, Union[str, Iterable[str]]]
 FormdataType = Optional[Union[dict, List[FormdataKVType]]]


 class FormRequest(Request):
-    valid_form_methods = ['GET', 'POST']
+    valid_form_methods = ["GET", "POST"]
+
+    def __init__(self, *args, formdata: FormdataType = None, **kwargs) -> None:
+        if formdata and kwargs.get("method") is None:
+            kwargs["method"] = "POST"

-    def __init__(self, *args, formdata: FormdataType=None, **kwargs) ->None:
-        if formdata and kwargs.get('method') is None:
-            kwargs['method'] = 'POST'
         super().__init__(*args, **kwargs)
+
         if formdata:
-            items = formdata.items() if isinstance(formdata, dict
-                ) else formdata
+            items = formdata.items() if isinstance(formdata, dict) else formdata
             form_query_str = _urlencode(items, self.encoding)
-            if self.method == 'POST':
-                self.headers.setdefault(b'Content-Type',
-                    b'application/x-www-form-urlencoded')
+            if self.method == "POST":
+                self.headers.setdefault(
+                    b"Content-Type", b"application/x-www-form-urlencoded"
+                )
                 self._set_body(form_query_str)
             else:
-                self._set_url(urlunsplit(urlsplit(self.url)._replace(query=
-                    form_query_str)))
+                self._set_url(
+                    urlunsplit(urlsplit(self.url)._replace(query=form_query_str))
+                )
+
+    @classmethod
+    def from_response(
+        cls: Type[FormRequestTypeVar],
+        response: TextResponse,
+        formname: Optional[str] = None,
+        formid: Optional[str] = None,
+        formnumber: int = 0,
+        formdata: FormdataType = None,
+        clickdata: Optional[dict] = None,
+        dont_click: bool = False,
+        formxpath: Optional[str] = None,
+        formcss: Optional[str] = None,
+        **kwargs,
+    ) -> FormRequestTypeVar:
+        kwargs.setdefault("encoding", response.encoding)
+
+        if formcss is not None:
+            from parsel.csstranslator import HTMLTranslator
+
+            formxpath = HTMLTranslator().css_to_xpath(formcss)
+
+        form = _get_form(response, formname, formid, formnumber, formxpath)
+        formdata = _get_inputs(form, formdata, dont_click, clickdata)
+        url = _get_form_url(form, kwargs.pop("url", None))
+
+        method = kwargs.pop("method", form.method)
+        if method is not None:
+            method = method.upper()
+            if method not in cls.valid_form_methods:
+                method = "GET"
+
+        return cls(url=url, method=method, formdata=formdata, **kwargs)
+
+
+def _get_form_url(form: FormElement, url: Optional[str]) -> str:
+    assert form.base_url is not None  # typing
+    if url is None:
+        action = form.get("action")
+        if action is None:
+            return form.base_url
+        return urljoin(form.base_url, strip_html5_whitespace(action))
+    return urljoin(form.base_url, url)
+

+def _urlencode(seq: Iterable[FormdataKVType], enc: str) -> str:
+    values = [
+        (to_bytes(k, enc), to_bytes(v, enc))
+        for k, vs in seq
+        for v in (cast(Iterable[str], vs) if is_listlike(vs) else [cast(str, vs)])
+    ]
+    return urlencode(values, doseq=True)

-def _get_form(response: TextResponse, formname: Optional[str], formid:
-    Optional[str], formnumber: int, formxpath: Optional[str]) ->FormElement:
+
+def _get_form(
+    response: TextResponse,
+    formname: Optional[str],
+    formid: Optional[str],
+    formnumber: int,
+    formxpath: Optional[str],
+) -> FormElement:
     """Find the wanted form element within the given response."""
-    pass
+    root = response.selector.root
+    forms = root.xpath("//form")
+    if not forms:
+        raise ValueError(f"No <form> element found in {response}")
+
+    if formname is not None:
+        f = root.xpath(f'//form[@name="{formname}"]')
+        if f:
+            return f[0]
+
+    if formid is not None:
+        f = root.xpath(f'//form[@id="{formid}"]')
+        if f:
+            return f[0]
+
+    # Get form element from xpath, if not found, go up
+    if formxpath is not None:
+        nodes = root.xpath(formxpath)
+        if nodes:
+            el = nodes[0]
+            while True:
+                if el.tag == "form":
+                    return el
+                el = el.getparent()
+                if el is None:
+                    break
+        raise ValueError(f"No <form> element found with {formxpath}")

+    # If we get here, it means that either formname was None or invalid
+    try:
+        form = forms[formnumber]
+    except IndexError:
+        raise IndexError(f"Form number {formnumber} not found in {response}")
+    else:
+        return form

-def _get_inputs(form: FormElement, formdata: FormdataType, dont_click: bool,
-    clickdata: Optional[dict]) ->List[FormdataKVType]:
+
+def _get_inputs(
+    form: FormElement,
+    formdata: FormdataType,
+    dont_click: bool,
+    clickdata: Optional[dict],
+) -> List[FormdataKVType]:
     """Return a list of key-value pairs for the inputs found in the given form."""
-    pass
+    try:
+        formdata_keys = dict(formdata or ()).keys()
+    except (ValueError, TypeError):
+        raise ValueError("formdata should be a dict or iterable of tuples")
+
+    if not formdata:
+        formdata = []
+    inputs = form.xpath(
+        "descendant::textarea"
+        "|descendant::select"
+        "|descendant::input[not(@type) or @type["
+        ' not(re:test(., "^(?:submit|image|reset)$", "i"))'
+        " and (../@checked or"
+        '  not(re:test(., "^(?:checkbox|radio)$", "i")))]]',
+        namespaces={"re": "http://exslt.org/regular-expressions"},
+    )
+    values: List[FormdataKVType] = [
+        (k, "" if v is None else v)
+        for k, v in (_value(e) for e in inputs)
+        if k and k not in formdata_keys
+    ]
+
+    if not dont_click:
+        clickable = _get_clickable(clickdata, form)
+        if clickable and clickable[0] not in formdata and not clickable[0] is None:
+            values.append(clickable)
+
+    if isinstance(formdata, dict):
+        formdata = formdata.items()  # type: ignore[assignment]

+    values.extend((k, v) for k, v in formdata if v is not None)
+    return values

-def _get_clickable(clickdata: Optional[dict], form: FormElement) ->Optional[
-    Tuple[str, str]]:
+
+def _value(
+    ele: Union[InputElement, SelectElement, TextareaElement]
+) -> Tuple[Optional[str], Union[None, str, MultipleSelectOptions]]:
+    n = ele.name
+    v = ele.value
+    if ele.tag == "select":
+        return _select_value(cast(SelectElement, ele), n, v)
+    return n, v
+
+
+def _select_value(
+    ele: SelectElement, n: Optional[str], v: Union[None, str, MultipleSelectOptions]
+) -> Tuple[Optional[str], Union[None, str, MultipleSelectOptions]]:
+    multiple = ele.multiple
+    if v is None and not multiple:
+        # Match browser behaviour on simple select tag without options selected
+        # And for select tags without options
+        o = ele.value_options
+        return (n, o[0]) if o else (None, None)
+    return n, v
+
+
+def _get_clickable(
+    clickdata: Optional[dict], form: FormElement
+) -> Optional[Tuple[str, str]]:
     """
     Returns the clickable element specified in clickdata,
     if the latter is given. If not, it returns the first
     clickable element found
     """
-    pass
+    clickables = list(
+        form.xpath(
+            'descendant::input[re:test(@type, "^(submit|image)$", "i")]'
+            '|descendant::button[not(@type) or re:test(@type, "^submit$", "i")]',
+            namespaces={"re": "http://exslt.org/regular-expressions"},
+        )
+    )
+    if not clickables:
+        return None
+
+    # If we don't have clickdata, we just use the first clickable element
+    if clickdata is None:
+        el = clickables[0]
+        return (el.get("name"), el.get("value") or "")
+
+    # If clickdata is given, we compare it to the clickable elements to find a
+    # match. We first look to see if the number is specified in clickdata,
+    # because that uniquely identifies the element
+    nr = clickdata.get("nr", None)
+    if nr is not None:
+        try:
+            el = list(form.inputs)[nr]
+        except IndexError:
+            pass
+        else:
+            return (el.get("name"), el.get("value") or "")
+
+    # We didn't find it, so now we build an XPath expression out of the other
+    # arguments, because they can be used as such
+    xpath = ".//*" + "".join(f'[@{k}="{v}"]' for k, v in clickdata.items())
+    el = form.xpath(xpath)
+    if len(el) == 1:
+        return (el[0].get("name"), el[0].get("value") or "")
+    if len(el) > 1:
+        raise ValueError(
+            f"Multiple elements found ({el!r}) matching the "
+            f"criteria in clickdata: {clickdata!r}"
+        )
+    else:
+        raise ValueError(f"No clickable element matching clickdata: {clickdata!r}")
diff --git a/scrapy/http/request/json_request.py b/scrapy/http/request/json_request.py
index 7a8404484..510c903db 100644
--- a/scrapy/http/request/json_request.py
+++ b/scrapy/http/request/json_request.py
@@ -4,40 +4,60 @@ This module implements the JsonRequest class which is a more convenient class

 See documentation in docs/topics/request-response.rst
 """
+
 import copy
 import json
 import warnings
 from typing import Optional, Tuple
+
 from scrapy.http.request import Request
 from scrapy.utils.deprecate import create_deprecated_class


 class JsonRequest(Request):
-    attributes: Tuple[str, ...] = Request.attributes + ('dumps_kwargs',)
+    attributes: Tuple[str, ...] = Request.attributes + ("dumps_kwargs",)

-    def __init__(self, *args, dumps_kwargs: Optional[dict]=None, **kwargs
-        ) ->None:
-        dumps_kwargs = copy.deepcopy(dumps_kwargs
-            ) if dumps_kwargs is not None else {}
-        dumps_kwargs.setdefault('sort_keys', True)
+    def __init__(self, *args, dumps_kwargs: Optional[dict] = None, **kwargs) -> None:
+        dumps_kwargs = copy.deepcopy(dumps_kwargs) if dumps_kwargs is not None else {}
+        dumps_kwargs.setdefault("sort_keys", True)
         self._dumps_kwargs = dumps_kwargs
-        body_passed = kwargs.get('body', None) is not None
-        data = kwargs.pop('data', None)
+
+        body_passed = kwargs.get("body", None) is not None
+        data = kwargs.pop("data", None)
         data_passed = data is not None
+
         if body_passed and data_passed:
-            warnings.warn('Both body and data passed. data will be ignored')
+            warnings.warn("Both body and data passed. data will be ignored")
         elif not body_passed and data_passed:
-            kwargs['body'] = self._dumps(data)
-            if 'method' not in kwargs:
-                kwargs['method'] = 'POST'
+            kwargs["body"] = self._dumps(data)
+            if "method" not in kwargs:
+                kwargs["method"] = "POST"
+
         super().__init__(*args, **kwargs)
-        self.headers.setdefault('Content-Type', 'application/json')
-        self.headers.setdefault('Accept',
-            'application/json, text/javascript, */*; q=0.01')
+        self.headers.setdefault("Content-Type", "application/json")
+        self.headers.setdefault(
+            "Accept", "application/json, text/javascript, */*; q=0.01"
+        )
+
+    @property
+    def dumps_kwargs(self) -> dict:
+        return self._dumps_kwargs
+
+    def replace(self, *args, **kwargs) -> Request:
+        body_passed = kwargs.get("body", None) is not None
+        data = kwargs.pop("data", None)
+        data_passed = data is not None
+
+        if body_passed and data_passed:
+            warnings.warn("Both body and data passed. data will be ignored")
+        elif not body_passed and data_passed:
+            kwargs["body"] = self._dumps(data)
+
+        return super().replace(*args, **kwargs)

-    def _dumps(self, data: dict) ->str:
+    def _dumps(self, data: dict) -> str:
         """Convert to JSON"""
-        pass
+        return json.dumps(data, **self._dumps_kwargs)


-JSONRequest = create_deprecated_class('JSONRequest', JsonRequest)
+JSONRequest = create_deprecated_class("JSONRequest", JsonRequest)
diff --git a/scrapy/http/request/rpc.py b/scrapy/http/request/rpc.py
index 2ed828664..59767de7a 100644
--- a/scrapy/http/request/rpc.py
+++ b/scrapy/http/request/rpc.py
@@ -6,22 +6,32 @@ See documentation in docs/topics/request-response.rst
 """
 import xmlrpc.client as xmlrpclib
 from typing import Optional
+
 import defusedxml.xmlrpc
+
 from scrapy.http.request import Request
 from scrapy.utils.python import get_func_args
+
 defusedxml.xmlrpc.monkey_patch()
+
 DUMPS_ARGS = get_func_args(xmlrpclib.dumps)


 class XmlRpcRequest(Request):
-
-    def __init__(self, *args, encoding: Optional[str]=None, **kwargs):
-        if 'body' not in kwargs and 'params' in kwargs:
+    def __init__(self, *args, encoding: Optional[str] = None, **kwargs):
+        if "body" not in kwargs and "params" in kwargs:
             kw = dict((k, kwargs.pop(k)) for k in DUMPS_ARGS if k in kwargs)
-            kwargs['body'] = xmlrpclib.dumps(**kw)
-        kwargs.setdefault('method', 'POST')
-        kwargs.setdefault('dont_filter', True)
+            kwargs["body"] = xmlrpclib.dumps(**kw)
+
+        # spec defines that requests must use POST method
+        kwargs.setdefault("method", "POST")
+
+        # xmlrpc query multiples times over the same url
+        kwargs.setdefault("dont_filter", True)
+
+        # restore encoding
         if encoding is not None:
-            kwargs['encoding'] = encoding
+            kwargs["encoding"] = encoding
+
         super().__init__(*args, **kwargs)
-        self.headers.setdefault('Content-Type', 'text/xml')
+        self.headers.setdefault("Content-Type", "text/xml")
diff --git a/scrapy/http/response/html.py b/scrapy/http/response/html.py
index d55895aa3..7eed052c2 100644
--- a/scrapy/http/response/html.py
+++ b/scrapy/http/response/html.py
@@ -4,6 +4,7 @@ discovering through HTML encoding declarations to the TextResponse class.

 See documentation in docs/topics/request-response.rst
 """
+
 from scrapy.http.response.text import TextResponse


diff --git a/scrapy/http/response/text.py b/scrapy/http/response/text.py
index 4b8b63972..47d7bc10f 100644
--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@@ -5,55 +5,176 @@ discovering (through HTTP headers) to base Response class.
 See documentation in docs/topics/request-response.rst
 """
 from __future__ import annotations
+
 import json
 from contextlib import suppress
 from typing import TYPE_CHECKING, Any, Generator, Optional, Tuple
 from urllib.parse import urljoin
+
 import parsel
-from w3lib.encoding import html_body_declared_encoding, html_to_unicode, http_content_type_encoding, read_bom, resolve_encoding
+from w3lib.encoding import (
+    html_body_declared_encoding,
+    html_to_unicode,
+    http_content_type_encoding,
+    read_bom,
+    resolve_encoding,
+)
 from w3lib.html import strip_html5_whitespace
+
 from scrapy.http import Request
 from scrapy.http.response import Response
 from scrapy.utils.python import memoizemethod_noargs, to_unicode
 from scrapy.utils.response import get_base_url
+
 if TYPE_CHECKING:
     from scrapy.selector import Selector
+
 _NONE = object()


 class TextResponse(Response):
-    _DEFAULT_ENCODING = 'ascii'
+    _DEFAULT_ENCODING = "ascii"
     _cached_decoded_json = _NONE
-    attributes: Tuple[str, ...] = Response.attributes + ('encoding',)
+
+    attributes: Tuple[str, ...] = Response.attributes + ("encoding",)

     def __init__(self, *args: Any, **kwargs: Any):
-        self._encoding = kwargs.pop('encoding', None)
+        self._encoding = kwargs.pop("encoding", None)
         self._cached_benc: Optional[str] = None
         self._cached_ubody: Optional[str] = None
         self._cached_selector: Optional[Selector] = None
         super().__init__(*args, **kwargs)

+    def _set_url(self, url):
+        if isinstance(url, str):
+            self._url = to_unicode(url, self.encoding)
+        else:
+            super()._set_url(url)
+
+    def _set_body(self, body):
+        self._body = b""  # used by encoding detection
+        if isinstance(body, str):
+            if self._encoding is None:
+                raise TypeError(
+                    "Cannot convert unicode body - "
+                    f"{type(self).__name__} has no encoding"
+                )
+            self._body = body.encode(self._encoding)
+        else:
+            super()._set_body(body)
+
+    @property
+    def encoding(self):
+        return self._declared_encoding() or self._body_inferred_encoding()
+
+    def _declared_encoding(self):
+        return (
+            self._encoding
+            or self._bom_encoding()
+            or self._headers_encoding()
+            or self._body_declared_encoding()
+        )
+
     def json(self):
         """
         .. versionadded:: 2.2

         Deserialize a JSON document to a Python object.
         """
-        pass
+        if self._cached_decoded_json is _NONE:
+            self._cached_decoded_json = json.loads(self.body)
+        return self._cached_decoded_json

     @property
-    def text(self) ->str:
+    def text(self) -> str:
         """Body as unicode"""
-        pass
+        # access self.encoding before _cached_ubody to make sure
+        # _body_inferred_encoding is called
+        benc = self.encoding
+        if self._cached_ubody is None:
+            charset = f"charset={benc}"
+            self._cached_ubody = html_to_unicode(charset, self.body)[1]
+        return self._cached_ubody

     def urljoin(self, url):
         """Join this Response's url with a possible relative url to form an
         absolute interpretation of the latter."""
-        pass
+        return urljoin(get_base_url(self), url)
+
+    @memoizemethod_noargs
+    def _headers_encoding(self):
+        content_type = self.headers.get(b"Content-Type", b"")
+        return http_content_type_encoding(to_unicode(content_type, encoding="latin-1"))
+
+    def _body_inferred_encoding(self):
+        if self._cached_benc is None:
+            content_type = to_unicode(
+                self.headers.get(b"Content-Type", b""), encoding="latin-1"
+            )
+            benc, ubody = html_to_unicode(
+                content_type,
+                self.body,
+                auto_detect_fun=self._auto_detect_fun,
+                default_encoding=self._DEFAULT_ENCODING,
+            )
+            self._cached_benc = benc
+            self._cached_ubody = ubody
+        return self._cached_benc

-    def follow(self, url, callback=None, method='GET', headers=None, body=
-        None, cookies=None, meta=None, encoding=None, priority=0,
-        dont_filter=False, errback=None, cb_kwargs=None, flags=None) ->Request:
+    def _auto_detect_fun(self, text):
+        for enc in (self._DEFAULT_ENCODING, "utf-8", "cp1252"):
+            try:
+                text.decode(enc)
+            except UnicodeError:
+                continue
+            return resolve_encoding(enc)
+
+    @memoizemethod_noargs
+    def _body_declared_encoding(self):
+        return html_body_declared_encoding(self.body)
+
+    @memoizemethod_noargs
+    def _bom_encoding(self):
+        return read_bom(self.body)[0]
+
+    @property
+    def selector(self):
+        from scrapy.selector import Selector
+
+        if self._cached_selector is None:
+            self._cached_selector = Selector(self)
+        return self._cached_selector
+
+    def jmespath(self, query, **kwargs):
+        if not hasattr(self.selector, "jmespath"):  # type: ignore[attr-defined]
+            raise AttributeError(
+                "Please install parsel >= 1.8.1 to get jmespath support"
+            )
+
+        return self.selector.jmespath(query, **kwargs)  # type: ignore[attr-defined]
+
+    def xpath(self, query, **kwargs):
+        return self.selector.xpath(query, **kwargs)
+
+    def css(self, query):
+        return self.selector.css(query)
+
+    def follow(
+        self,
+        url,
+        callback=None,
+        method="GET",
+        headers=None,
+        body=None,
+        cookies=None,
+        meta=None,
+        encoding=None,
+        priority=0,
+        dont_filter=False,
+        errback=None,
+        cb_kwargs=None,
+        flags=None,
+    ) -> Request:
         """
         Return a :class:`~.Request` instance to follow a link ``url``.
         It accepts the same arguments as ``Request.__init__`` method,
@@ -70,12 +191,45 @@ class TextResponse(Response):

         See :ref:`response-follow-example` for usage examples.
         """
-        pass
+        if isinstance(url, parsel.Selector):
+            url = _url_from_selector(url)
+        elif isinstance(url, parsel.SelectorList):
+            raise ValueError("SelectorList is not supported")
+        encoding = self.encoding if encoding is None else encoding
+        return super().follow(
+            url=url,
+            callback=callback,
+            method=method,
+            headers=headers,
+            body=body,
+            cookies=cookies,
+            meta=meta,
+            encoding=encoding,
+            priority=priority,
+            dont_filter=dont_filter,
+            errback=errback,
+            cb_kwargs=cb_kwargs,
+            flags=flags,
+        )

-    def follow_all(self, urls=None, callback=None, method='GET', headers=
-        None, body=None, cookies=None, meta=None, encoding=None, priority=0,
-        dont_filter=False, errback=None, cb_kwargs=None, flags=None, css=
-        None, xpath=None) ->Generator[Request, None, None]:
+    def follow_all(
+        self,
+        urls=None,
+        callback=None,
+        method="GET",
+        headers=None,
+        body=None,
+        cookies=None,
+        meta=None,
+        encoding=None,
+        priority=0,
+        dont_filter=False,
+        errback=None,
+        cb_kwargs=None,
+        flags=None,
+        css=None,
+        xpath=None,
+    ) -> Generator[Request, None, None]:
         """
         A generator that produces :class:`~.Request` instances to follow all
         links in ``urls``. It accepts the same arguments as the :class:`~.Request`'s
@@ -99,10 +253,57 @@ class TextResponse(Response):
         selectors from which links cannot be obtained (for instance, anchor tags without an
         ``href`` attribute)
         """
-        pass
+        arguments = [x for x in (urls, css, xpath) if x is not None]
+        if len(arguments) != 1:
+            raise ValueError(
+                "Please supply exactly one of the following arguments: urls, css, xpath"
+            )
+        if not urls:
+            if css:
+                urls = self.css(css)
+            if xpath:
+                urls = self.xpath(xpath)
+        if isinstance(urls, parsel.SelectorList):
+            selectors = urls
+            urls = []
+            for sel in selectors:
+                with suppress(_InvalidSelector):
+                    urls.append(_url_from_selector(sel))
+        return super().follow_all(
+            urls=urls,
+            callback=callback,
+            method=method,
+            headers=headers,
+            body=body,
+            cookies=cookies,
+            meta=meta,
+            encoding=encoding,
+            priority=priority,
+            dont_filter=dont_filter,
+            errback=errback,
+            cb_kwargs=cb_kwargs,
+            flags=flags,
+        )


 class _InvalidSelector(ValueError):
     """
     Raised when a URL cannot be obtained from a Selector
     """
+
+
+def _url_from_selector(sel):
+    # type: (parsel.Selector) -> str
+    if isinstance(sel.root, str):
+        # e.g. ::attr(href) result
+        return strip_html5_whitespace(sel.root)
+    if not hasattr(sel.root, "tag"):
+        raise _InvalidSelector(f"Unsupported selector: {sel}")
+    if sel.root.tag not in ("a", "link"):
+        raise _InvalidSelector(
+            "Only <a> and <link> elements are supported; " f"got <{sel.root.tag}>"
+        )
+    href = sel.root.get("href")
+    if href is None:
+        raise _InvalidSelector(f"<{sel.root.tag}> element has no href attribute: {sel}")
+    return strip_html5_whitespace(href)
diff --git a/scrapy/http/response/xml.py b/scrapy/http/response/xml.py
index 9ff465ec5..abf474a2f 100644
--- a/scrapy/http/response/xml.py
+++ b/scrapy/http/response/xml.py
@@ -4,6 +4,7 @@ discovering through XML encoding declarations to the TextResponse class.

 See documentation in docs/topics/request-response.rst
 """
+
 from scrapy.http.response.text import TextResponse


diff --git a/scrapy/interfaces.py b/scrapy/interfaces.py
index 151522c8c..9a2c5f170 100644
--- a/scrapy/interfaces.py
+++ b/scrapy/interfaces.py
@@ -2,21 +2,16 @@ from zope.interface import Interface


 class ISpiderLoader(Interface):
-
     def from_settings(settings):
         """Return an instance of the class for the given settings"""
-        pass

     def load(spider_name):
         """Return the Spider class for the given spider name. If the spider
         name is not found, it must raise a KeyError."""
-        pass

     def list():
         """Return a list with the names of all spiders available in the
         project"""
-        pass

     def find_by_request(request):
         """Return the list of spiders names that can handle the given request"""
-        pass
diff --git a/scrapy/item.py b/scrapy/item.py
index aae295969..d3eb90b7b 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -3,11 +3,13 @@ Scrapy Item

 See documentation in docs/topics/item.rst
 """
+
 from abc import ABCMeta
 from collections.abc import MutableMapping
 from copy import deepcopy
 from pprint import pformat
 from typing import Dict
+
 from scrapy.utils.trackref import object_ref


@@ -22,11 +24,11 @@ class ItemMeta(ABCMeta):
     """

     def __new__(mcs, class_name, bases, attrs):
-        classcell = attrs.pop('__classcell__', None)
-        new_bases = tuple(base._class for base in bases if hasattr(base,
-            '_class'))
-        _class = super().__new__(mcs, 'x_' + class_name, new_bases, attrs)
-        fields = getattr(_class, 'fields', {})
+        classcell = attrs.pop("__classcell__", None)
+        new_bases = tuple(base._class for base in bases if hasattr(base, "_class"))
+        _class = super().__new__(mcs, "x_" + class_name, new_bases, attrs)
+
+        fields = getattr(_class, "fields", {})
         new_attrs = {}
         for n in dir(_class):
             v = getattr(_class, n)
@@ -34,10 +36,11 @@ class ItemMeta(ABCMeta):
                 fields[n] = v
             elif n in attrs:
                 new_attrs[n] = attrs[n]
-        new_attrs['fields'] = fields
-        new_attrs['_class'] = _class
+
+        new_attrs["fields"] = fields
+        new_attrs["_class"] = _class
         if classcell is not None:
-            new_attrs['__classcell__'] = classcell
+            new_attrs["__classcell__"] = classcell
         return super().__new__(mcs, class_name, bases, new_attrs)


@@ -63,11 +66,12 @@ class Item(MutableMapping, object_ref, metaclass=ItemMeta):
     Unlike instances of :class:`dict`, instances of :class:`Item` may be
     :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
     """
+
     fields: Dict[str, Field]

     def __init__(self, *args, **kwargs):
         self._values = {}
-        if args or kwargs:
+        if args or kwargs:  # avoid creating dict for most common case
             for k, v in dict(*args, **kwargs).items():
                 self[k] = v

@@ -78,21 +82,19 @@ class Item(MutableMapping, object_ref, metaclass=ItemMeta):
         if key in self.fields:
             self._values[key] = value
         else:
-            raise KeyError(
-                f'{self.__class__.__name__} does not support field: {key}')
+            raise KeyError(f"{self.__class__.__name__} does not support field: {key}")

     def __delitem__(self, key):
         del self._values[key]

     def __getattr__(self, name):
         if name in self.fields:
-            raise AttributeError(f'Use item[{name!r}] to get field value')
+            raise AttributeError(f"Use item[{name!r}] to get field value")
         raise AttributeError(name)

     def __setattr__(self, name, value):
-        if not name.startswith('_'):
-            raise AttributeError(
-                f'Use item[{name!r}] = {value!r} to set field value')
+        if not name.startswith("_"):
+            raise AttributeError(f"Use item[{name!r}] = {value!r} to set field value")
         super().__setattr__(name, value)

     def __len__(self):
@@ -100,11 +102,18 @@ class Item(MutableMapping, object_ref, metaclass=ItemMeta):

     def __iter__(self):
         return iter(self._values)
+
     __hash__ = object_ref.__hash__

+    def keys(self):
+        return self._values.keys()
+
     def __repr__(self):
         return pformat(dict(self))

+    def copy(self):
+        return self.__class__(self)
+
     def deepcopy(self):
         """Return a :func:`~copy.deepcopy` of this item."""
-        pass
+        return deepcopy(self)
diff --git a/scrapy/link.py b/scrapy/link.py
index 2bc6f207e..0868ae5ef 100644
--- a/scrapy/link.py
+++ b/scrapy/link.py
@@ -24,29 +24,37 @@ class Link:
     :param nofollow: an indication of the presence or absence of a nofollow value in the ``rel`` attribute
                     of the anchor tag.
     """
-    __slots__ = ['url', 'text', 'fragment', 'nofollow']

-    def __init__(self, url: str, text: str='', fragment: str='', nofollow:
-        bool=False):
+    __slots__ = ["url", "text", "fragment", "nofollow"]
+
+    def __init__(
+        self, url: str, text: str = "", fragment: str = "", nofollow: bool = False
+    ):
         if not isinstance(url, str):
             got = url.__class__.__name__
-            raise TypeError(f'Link urls must be str objects, got {got}')
+            raise TypeError(f"Link urls must be str objects, got {got}")
         self.url: str = url
         self.text: str = text
         self.fragment: str = fragment
         self.nofollow: bool = nofollow

-    def __eq__(self, other: Any) ->bool:
+    def __eq__(self, other: Any) -> bool:
         if not isinstance(other, Link):
             raise NotImplementedError
-        return (self.url == other.url and self.text == other.text and self.
-            fragment == other.fragment and self.nofollow == other.nofollow)
+        return (
+            self.url == other.url
+            and self.text == other.text
+            and self.fragment == other.fragment
+            and self.nofollow == other.nofollow
+        )

-    def __hash__(self) ->int:
-        return hash(self.url) ^ hash(self.text) ^ hash(self.fragment) ^ hash(
-            self.nofollow)
+    def __hash__(self) -> int:
+        return (
+            hash(self.url) ^ hash(self.text) ^ hash(self.fragment) ^ hash(self.nofollow)
+        )

-    def __repr__(self) ->str:
+    def __repr__(self) -> str:
         return (
-            f'Link(url={self.url!r}, text={self.text!r}, fragment={self.fragment!r}, nofollow={self.nofollow!r})'
-            )
+            f"Link(url={self.url!r}, text={self.text!r}, "
+            f"fragment={self.fragment!r}, nofollow={self.nofollow!r})"
+        )
diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py
index 4c22c2389..de032fdd8 100644
--- a/scrapy/linkextractors/lxmlhtml.py
+++ b/scrapy/linkextractors/lxmlhtml.py
@@ -5,69 +5,226 @@ import logging
 import operator
 from functools import partial
 from urllib.parse import urljoin, urlparse
-from lxml import etree
+
+from lxml import etree  # nosec
 from parsel.csstranslator import HTMLTranslator
 from w3lib.html import strip_html5_whitespace
 from w3lib.url import canonicalize_url, safe_url_string
+
 from scrapy.link import Link
-from scrapy.linkextractors import IGNORED_EXTENSIONS, _is_valid_url, _matches, _re_type, re
+from scrapy.linkextractors import (
+    IGNORED_EXTENSIONS,
+    _is_valid_url,
+    _matches,
+    _re_type,
+    re,
+)
 from scrapy.utils.misc import arg_to_iter, rel_has_nofollow
 from scrapy.utils.python import unique as unique_list
 from scrapy.utils.response import get_base_url
 from scrapy.utils.url import url_has_any_extension, url_is_from_any_domain
+
 logger = logging.getLogger(__name__)
-XHTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'
-_collect_string_content = etree.XPath('string()')

+# from lxml/src/lxml/html/__init__.py
+XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
+
+_collect_string_content = etree.XPath("string()")
+
+
+def _nons(tag):
+    if isinstance(tag, str):
+        if tag[0] == "{" and tag[1 : len(XHTML_NAMESPACE) + 1] == XHTML_NAMESPACE:
+            return tag.split("}")[-1]
+    return tag

-class LxmlParserLinkExtractor:

-    def __init__(self, tag='a', attr='href', process=None, unique=False,
-        strip=True, canonicalized=False):
+def _identity(x):
+    return x
+
+
+def _canonicalize_link_url(link):
+    return canonicalize_url(link.url, keep_fragments=True)
+
+
+class LxmlParserLinkExtractor:
+    def __init__(
+        self,
+        tag="a",
+        attr="href",
+        process=None,
+        unique=False,
+        strip=True,
+        canonicalized=False,
+    ):
         self.scan_tag = tag if callable(tag) else partial(operator.eq, tag)
         self.scan_attr = attr if callable(attr) else partial(operator.eq, attr)
         self.process_attr = process if callable(process) else _identity
         self.unique = unique
         self.strip = strip
-        self.link_key = operator.attrgetter('url'
-            ) if canonicalized else _canonicalize_link_url
+        self.link_key = (
+            operator.attrgetter("url") if canonicalized else _canonicalize_link_url
+        )
+
+    def _iter_links(self, document):
+        for el in document.iter(etree.Element):
+            if not self.scan_tag(_nons(el.tag)):
+                continue
+            attribs = el.attrib
+            for attrib in attribs:
+                if not self.scan_attr(attrib):
+                    continue
+                yield (el, attrib, attribs[attrib])
+
+    def _extract_links(self, selector, response_url, response_encoding, base_url):
+        links = []
+        # hacky way to get the underlying lxml parsed document
+        for el, attr, attr_val in self._iter_links(selector.root):
+            # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
+            try:
+                if self.strip:
+                    attr_val = strip_html5_whitespace(attr_val)
+                attr_val = urljoin(base_url, attr_val)
+            except ValueError:
+                continue  # skipping bogus links
+            else:
+                url = self.process_attr(attr_val)
+                if url is None:
+                    continue
+            try:
+                url = safe_url_string(url, encoding=response_encoding)
+            except ValueError:
+                logger.debug(f"Skipping extraction of link with bad URL {url!r}")
+                continue
+
+            # to fix relative links after process_value
+            url = urljoin(response_url, url)
+            link = Link(
+                url,
+                _collect_string_content(el) or "",
+                nofollow=rel_has_nofollow(el.get("rel")),
+            )
+            links.append(link)
+        return self._deduplicate_if_needed(links)
+
+    def extract_links(self, response):
+        base_url = get_base_url(response)
+        return self._extract_links(
+            response.selector, response.url, response.encoding, base_url
+        )

     def _process_links(self, links):
         """Normalize and filter extracted links

         The subclass should override it if necessary
         """
-        pass
+        return self._deduplicate_if_needed(links)
+
+    def _deduplicate_if_needed(self, links):
+        if self.unique:
+            return unique_list(links, key=self.link_key)
+        return links


 class LxmlLinkExtractor:
     _csstranslator = HTMLTranslator()

-    def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(),
-        restrict_xpaths=(), tags=('a', 'area'), attrs=('href',),
-        canonicalize=False, unique=True, process_value=None,
-        deny_extensions=None, restrict_css=(), strip=True, restrict_text=None):
+    def __init__(
+        self,
+        allow=(),
+        deny=(),
+        allow_domains=(),
+        deny_domains=(),
+        restrict_xpaths=(),
+        tags=("a", "area"),
+        attrs=("href",),
+        canonicalize=False,
+        unique=True,
+        process_value=None,
+        deny_extensions=None,
+        restrict_css=(),
+        strip=True,
+        restrict_text=None,
+    ):
         tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
-        self.link_extractor = LxmlParserLinkExtractor(tag=partial(operator.
-            contains, tags), attr=partial(operator.contains, attrs), unique
-            =unique, process=process_value, strip=strip, canonicalized=not
-            canonicalize)
-        self.allow_res = [(x if isinstance(x, _re_type) else re.compile(x)) for
-            x in arg_to_iter(allow)]
-        self.deny_res = [(x if isinstance(x, _re_type) else re.compile(x)) for
-            x in arg_to_iter(deny)]
+        self.link_extractor = LxmlParserLinkExtractor(
+            tag=partial(operator.contains, tags),
+            attr=partial(operator.contains, attrs),
+            unique=unique,
+            process=process_value,
+            strip=strip,
+            canonicalized=not canonicalize,
+        )
+        self.allow_res = [
+            x if isinstance(x, _re_type) else re.compile(x) for x in arg_to_iter(allow)
+        ]
+        self.deny_res = [
+            x if isinstance(x, _re_type) else re.compile(x) for x in arg_to_iter(deny)
+        ]
+
         self.allow_domains = set(arg_to_iter(allow_domains))
         self.deny_domains = set(arg_to_iter(deny_domains))
+
         self.restrict_xpaths = tuple(arg_to_iter(restrict_xpaths))
-        self.restrict_xpaths += tuple(map(self._csstranslator.css_to_xpath,
-            arg_to_iter(restrict_css)))
+        self.restrict_xpaths += tuple(
+            map(self._csstranslator.css_to_xpath, arg_to_iter(restrict_css))
+        )
+
         if deny_extensions is None:
             deny_extensions = IGNORED_EXTENSIONS
         self.canonicalize = canonicalize
-        self.deny_extensions = {('.' + e) for e in arg_to_iter(deny_extensions)
-            }
-        self.restrict_text = [(x if isinstance(x, _re_type) else re.compile
-            (x)) for x in arg_to_iter(restrict_text)]
+        self.deny_extensions = {"." + e for e in arg_to_iter(deny_extensions)}
+        self.restrict_text = [
+            x if isinstance(x, _re_type) else re.compile(x)
+            for x in arg_to_iter(restrict_text)
+        ]
+
+    def _link_allowed(self, link):
+        if not _is_valid_url(link.url):
+            return False
+        if self.allow_res and not _matches(link.url, self.allow_res):
+            return False
+        if self.deny_res and _matches(link.url, self.deny_res):
+            return False
+        parsed_url = urlparse(link.url)
+        if self.allow_domains and not url_is_from_any_domain(
+            parsed_url, self.allow_domains
+        ):
+            return False
+        if self.deny_domains and url_is_from_any_domain(parsed_url, self.deny_domains):
+            return False
+        if self.deny_extensions and url_has_any_extension(
+            parsed_url, self.deny_extensions
+        ):
+            return False
+        if self.restrict_text and not _matches(link.text, self.restrict_text):
+            return False
+        return True
+
+    def matches(self, url):
+        if self.allow_domains and not url_is_from_any_domain(url, self.allow_domains):
+            return False
+        if self.deny_domains and url_is_from_any_domain(url, self.deny_domains):
+            return False
+
+        allowed = (
+            (regex.search(url) for regex in self.allow_res)
+            if self.allow_res
+            else [True]
+        )
+        denied = (regex.search(url) for regex in self.deny_res) if self.deny_res else []
+        return any(allowed) and not any(denied)
+
+    def _process_links(self, links):
+        links = [x for x in links if self._link_allowed(x)]
+        if self.canonicalize:
+            for link in links:
+                link.url = canonicalize_url(link.url)
+        links = self.link_extractor._process_links(links)
+        return links
+
+    def _extract_links(self, *args, **kwargs):
+        return self.link_extractor._extract_links(*args, **kwargs)

     def extract_links(self, response):
         """Returns a list of :class:`~scrapy.link.Link` objects from the
@@ -79,4 +236,17 @@ class LxmlLinkExtractor:
         Duplicate links are omitted if the ``unique`` attribute is set to ``True``,
         otherwise they are returned.
         """
-        pass
+        base_url = get_base_url(response)
+        if self.restrict_xpaths:
+            docs = [
+                subdoc for x in self.restrict_xpaths for subdoc in response.xpath(x)
+            ]
+        else:
+            docs = [response.selector]
+        all_links = []
+        for doc in docs:
+            links = self._extract_links(doc, response.url, response.encoding, base_url)
+            all_links.extend(self._process_links(links))
+        if self.link_extractor.unique:
+            return unique_list(all_links, key=self.link_extractor.link_key)
+        return all_links
diff --git a/scrapy/loader/common.py b/scrapy/loader/common.py
index f37e2fc91..3e8644e0c 100644
--- a/scrapy/loader/common.py
+++ b/scrapy/loader/common.py
@@ -1,6 +1,9 @@
 """Common functions used in Item Loaders code"""
+
 import warnings
+
 from itemloaders import common
+
 from scrapy.utils.deprecate import ScrapyDeprecationWarning


@@ -8,4 +11,11 @@ def wrap_loader_context(function, context):
     """Wrap functions that receive loader_context to contain the context
     "pre-loaded" and expose a interface that receives only one argument
     """
-    pass
+    warnings.warn(
+        "scrapy.loader.common.wrap_loader_context has moved to a new library."
+        "Please update your reference to itemloaders.common.wrap_loader_context",
+        ScrapyDeprecationWarning,
+        stacklevel=2,
+    )
+
+    return common.wrap_loader_context(function, context)
diff --git a/scrapy/loader/processors.py b/scrapy/loader/processors.py
index 1b5404f6e..b82c6d5c7 100644
--- a/scrapy/loader/processors.py
+++ b/scrapy/loader/processors.py
@@ -4,10 +4,17 @@ This module provides some commonly used processors for Item Loaders.
 See documentation in docs/topics/loaders.rst
 """
 from itemloaders import processors
+
 from scrapy.utils.deprecate import create_deprecated_class
-MapCompose = create_deprecated_class('MapCompose', processors.MapCompose)
-Compose = create_deprecated_class('Compose', processors.Compose)
-TakeFirst = create_deprecated_class('TakeFirst', processors.TakeFirst)
-Identity = create_deprecated_class('Identity', processors.Identity)
-SelectJmes = create_deprecated_class('SelectJmes', processors.SelectJmes)
-Join = create_deprecated_class('Join', processors.Join)
+
+MapCompose = create_deprecated_class("MapCompose", processors.MapCompose)
+
+Compose = create_deprecated_class("Compose", processors.Compose)
+
+TakeFirst = create_deprecated_class("TakeFirst", processors.TakeFirst)
+
+Identity = create_deprecated_class("Identity", processors.Identity)
+
+SelectJmes = create_deprecated_class("SelectJmes", processors.SelectJmes)
+
+Join = create_deprecated_class("Join", processors.Join)
diff --git a/scrapy/logformatter.py b/scrapy/logformatter.py
index e59eb2a97..d720b2f38 100644
--- a/scrapy/logformatter.py
+++ b/scrapy/logformatter.py
@@ -1,23 +1,29 @@
 from __future__ import annotations
+
 import logging
 import os
 from typing import TYPE_CHECKING, Any, Dict, Optional, Union
+
 from twisted.python.failure import Failure
+
 from scrapy import Request, Spider
 from scrapy.http import Response
 from scrapy.utils.request import referer_str
+
 if TYPE_CHECKING:
+    # typing.Self requires Python 3.11
     from typing_extensions import Self
+
     from scrapy.crawler import Crawler
-SCRAPEDMSG = 'Scraped from %(src)s' + os.linesep + '%(item)s'
-DROPPEDMSG = 'Dropped: %(exception)s' + os.linesep + '%(item)s'
-CRAWLEDMSG = (
-    'Crawled (%(status)s) %(request)s%(request_flags)s (referer: %(referer)s)%(response_flags)s'
-    )
-ITEMERRORMSG = 'Error processing %(item)s'
-SPIDERERRORMSG = 'Spider error processing %(request)s (referer: %(referer)s)'
-DOWNLOADERRORMSG_SHORT = 'Error downloading %(request)s'
-DOWNLOADERRORMSG_LONG = 'Error downloading %(request)s: %(errmsg)s'
+
+
+SCRAPEDMSG = "Scraped from %(src)s" + os.linesep + "%(item)s"
+DROPPEDMSG = "Dropped: %(exception)s" + os.linesep + "%(item)s"
+CRAWLEDMSG = "Crawled (%(status)s) %(request)s%(request_flags)s (referer: %(referer)s)%(response_flags)s"
+ITEMERRORMSG = "Error processing %(item)s"
+SPIDERERRORMSG = "Spider error processing %(request)s (referer: %(referer)s)"
+DOWNLOADERRORMSG_SHORT = "Error downloading %(request)s"
+DOWNLOADERRORMSG_LONG = "Error downloading %(request)s: %(errmsg)s"


 class LogFormatter:
@@ -58,43 +64,115 @@ class LogFormatter:
                     }
     """

-    def crawled(self, request: Request, response: Response, spider: Spider
-        ) ->dict:
+    def crawled(self, request: Request, response: Response, spider: Spider) -> dict:
         """Logs a message when the crawler finds a webpage."""
-        pass
-
-    def scraped(self, item: Any, response: Union[Response, Failure], spider:
-        Spider) ->dict:
+        request_flags = f" {str(request.flags)}" if request.flags else ""
+        response_flags = f" {str(response.flags)}" if response.flags else ""
+        return {
+            "level": logging.DEBUG,
+            "msg": CRAWLEDMSG,
+            "args": {
+                "status": response.status,
+                "request": request,
+                "request_flags": request_flags,
+                "referer": referer_str(request),
+                "response_flags": response_flags,
+                # backward compatibility with Scrapy logformatter below 1.4 version
+                "flags": response_flags,
+            },
+        }
+
+    def scraped(
+        self, item: Any, response: Union[Response, Failure], spider: Spider
+    ) -> dict:
         """Logs a message when an item is scraped by a spider."""
-        pass
-
-    def dropped(self, item: Any, exception: BaseException, response:
-        Response, spider: Spider) ->dict:
+        src: Any
+        if isinstance(response, Failure):
+            src = response.getErrorMessage()
+        else:
+            src = response
+        return {
+            "level": logging.DEBUG,
+            "msg": SCRAPEDMSG,
+            "args": {
+                "src": src,
+                "item": item,
+            },
+        }
+
+    def dropped(
+        self, item: Any, exception: BaseException, response: Response, spider: Spider
+    ) -> dict:
         """Logs a message when an item is dropped while it is passing through the item pipeline."""
-        pass
-
-    def item_error(self, item: Any, exception: BaseException, response:
-        Response, spider: Spider) ->dict:
+        return {
+            "level": logging.WARNING,
+            "msg": DROPPEDMSG,
+            "args": {
+                "exception": exception,
+                "item": item,
+            },
+        }
+
+    def item_error(
+        self, item: Any, exception: BaseException, response: Response, spider: Spider
+    ) -> dict:
         """Logs a message when an item causes an error while it is passing
         through the item pipeline.

         .. versionadded:: 2.0
         """
-        pass
-
-    def spider_error(self, failure: Failure, request: Request, response:
-        Union[Response, Failure], spider: Spider) ->dict:
+        return {
+            "level": logging.ERROR,
+            "msg": ITEMERRORMSG,
+            "args": {
+                "item": item,
+            },
+        }
+
+    def spider_error(
+        self,
+        failure: Failure,
+        request: Request,
+        response: Union[Response, Failure],
+        spider: Spider,
+    ) -> dict:
         """Logs an error message from a spider.

         .. versionadded:: 2.0
         """
-        pass
-
-    def download_error(self, failure: Failure, request: Request, spider:
-        Spider, errmsg: Optional[str]=None) ->dict:
+        return {
+            "level": logging.ERROR,
+            "msg": SPIDERERRORMSG,
+            "args": {
+                "request": request,
+                "referer": referer_str(request),
+            },
+        }
+
+    def download_error(
+        self,
+        failure: Failure,
+        request: Request,
+        spider: Spider,
+        errmsg: Optional[str] = None,
+    ) -> dict:
         """Logs a download error message from a spider (typically coming from
         the engine).

         .. versionadded:: 2.0
         """
-        pass
+        args: Dict[str, Any] = {"request": request}
+        if errmsg:
+            msg = DOWNLOADERRORMSG_LONG
+            args["errmsg"] = errmsg
+        else:
+            msg = DOWNLOADERRORMSG_SHORT
+        return {
+            "level": logging.ERROR,
+            "msg": msg,
+            "args": args,
+        }
+
+    @classmethod
+    def from_crawler(cls, crawler: Crawler) -> Self:
+        return cls()
diff --git a/scrapy/mail.py b/scrapy/mail.py
index b149b42bb..237327451 100644
--- a/scrapy/mail.py
+++ b/scrapy/mail.py
@@ -11,20 +11,40 @@ from email.mime.nonmultipart import MIMENonMultipart
 from email.mime.text import MIMEText
 from email.utils import formatdate
 from io import BytesIO
+
 from twisted import version as twisted_version
 from twisted.internet import defer, ssl
 from twisted.python.versions import Version
+
 from scrapy.utils.misc import arg_to_iter
 from scrapy.utils.python import to_bytes
+
 logger = logging.getLogger(__name__)
-COMMASPACE = ', '


-class MailSender:
+# Defined in the email.utils module, but undocumented:
+# https://github.com/python/cpython/blob/v3.9.0/Lib/email/utils.py#L42
+COMMASPACE = ", "
+
+
+def _to_bytes_or_none(text):
+    if text is None:
+        return None
+    return to_bytes(text)
+

-    def __init__(self, smtphost='localhost', mailfrom='scrapy@localhost',
-        smtpuser=None, smtppass=None, smtpport=25, smtptls=False, smtpssl=
-        False, debug=False):
+class MailSender:
+    def __init__(
+        self,
+        smtphost="localhost",
+        mailfrom="scrapy@localhost",
+        smtpuser=None,
+        smtppass=None,
+        smtpport=25,
+        smtptls=False,
+        smtpssl=False,
+        debug=False,
+    ):
         self.smtphost = smtphost
         self.smtpport = smtpport
         self.smtpuser = _to_bytes_or_none(smtpuser)
@@ -33,3 +53,156 @@ class MailSender:
         self.smtpssl = smtpssl
         self.mailfrom = mailfrom
         self.debug = debug
+
+    @classmethod
+    def from_settings(cls, settings):
+        return cls(
+            smtphost=settings["MAIL_HOST"],
+            mailfrom=settings["MAIL_FROM"],
+            smtpuser=settings["MAIL_USER"],
+            smtppass=settings["MAIL_PASS"],
+            smtpport=settings.getint("MAIL_PORT"),
+            smtptls=settings.getbool("MAIL_TLS"),
+            smtpssl=settings.getbool("MAIL_SSL"),
+        )
+
+    def send(
+        self,
+        to,
+        subject,
+        body,
+        cc=None,
+        attachs=(),
+        mimetype="text/plain",
+        charset=None,
+        _callback=None,
+    ):
+        from twisted.internet import reactor
+
+        if attachs:
+            msg = MIMEMultipart()
+        else:
+            msg = MIMENonMultipart(*mimetype.split("/", 1))
+
+        to = list(arg_to_iter(to))
+        cc = list(arg_to_iter(cc))
+
+        msg["From"] = self.mailfrom
+        msg["To"] = COMMASPACE.join(to)
+        msg["Date"] = formatdate(localtime=True)
+        msg["Subject"] = subject
+        rcpts = to[:]
+        if cc:
+            rcpts.extend(cc)
+            msg["Cc"] = COMMASPACE.join(cc)
+
+        if attachs:
+            if charset:
+                msg.set_charset(charset)
+            msg.attach(MIMEText(body, "plain", charset or "us-ascii"))
+            for attach_name, mimetype, f in attachs:
+                part = MIMEBase(*mimetype.split("/"))
+                part.set_payload(f.read())
+                Encoders.encode_base64(part)
+                part.add_header(
+                    "Content-Disposition", "attachment", filename=attach_name
+                )
+                msg.attach(part)
+        else:
+            msg.set_payload(body, charset)
+
+        if _callback:
+            _callback(to=to, subject=subject, body=body, cc=cc, attach=attachs, msg=msg)
+
+        if self.debug:
+            logger.debug(
+                "Debug mail sent OK: To=%(mailto)s Cc=%(mailcc)s "
+                'Subject="%(mailsubject)s" Attachs=%(mailattachs)d',
+                {
+                    "mailto": to,
+                    "mailcc": cc,
+                    "mailsubject": subject,
+                    "mailattachs": len(attachs),
+                },
+            )
+            return
+
+        dfd = self._sendmail(rcpts, msg.as_string().encode(charset or "utf-8"))
+        dfd.addCallbacks(
+            callback=self._sent_ok,
+            errback=self._sent_failed,
+            callbackArgs=[to, cc, subject, len(attachs)],
+            errbackArgs=[to, cc, subject, len(attachs)],
+        )
+        reactor.addSystemEventTrigger("before", "shutdown", lambda: dfd)
+        return dfd
+
+    def _sent_ok(self, result, to, cc, subject, nattachs):
+        logger.info(
+            "Mail sent OK: To=%(mailto)s Cc=%(mailcc)s "
+            'Subject="%(mailsubject)s" Attachs=%(mailattachs)d',
+            {
+                "mailto": to,
+                "mailcc": cc,
+                "mailsubject": subject,
+                "mailattachs": nattachs,
+            },
+        )
+
+    def _sent_failed(self, failure, to, cc, subject, nattachs):
+        errstr = str(failure.value)
+        logger.error(
+            "Unable to send mail: To=%(mailto)s Cc=%(mailcc)s "
+            'Subject="%(mailsubject)s" Attachs=%(mailattachs)d'
+            "- %(mailerr)s",
+            {
+                "mailto": to,
+                "mailcc": cc,
+                "mailsubject": subject,
+                "mailattachs": nattachs,
+                "mailerr": errstr,
+            },
+        )
+        return failure
+
+    def _sendmail(self, to_addrs, msg):
+        from twisted.internet import reactor
+
+        msg = BytesIO(msg)
+        d = defer.Deferred()
+
+        factory = self._create_sender_factory(to_addrs, msg, d)
+
+        if self.smtpssl:
+            reactor.connectSSL(
+                self.smtphost, self.smtpport, factory, ssl.ClientContextFactory()
+            )
+        else:
+            reactor.connectTCP(self.smtphost, self.smtpport, factory)
+
+        return d
+
+    def _create_sender_factory(self, to_addrs, msg, d):
+        from twisted.mail.smtp import ESMTPSenderFactory
+
+        factory_keywords = {
+            "heloFallback": True,
+            "requireAuthentication": False,
+            "requireTransportSecurity": self.smtptls,
+        }
+
+        # Newer versions of twisted require the hostname to use STARTTLS
+        if twisted_version >= Version("twisted", 21, 2, 0):
+            factory_keywords["hostname"] = self.smtphost
+
+        factory = ESMTPSenderFactory(
+            self.smtpuser,
+            self.smtppass,
+            self.mailfrom,
+            to_addrs,
+            msg,
+            d,
+            **factory_keywords
+        )
+        factory.noisy = False
+        return factory
diff --git a/scrapy/middleware.py b/scrapy/middleware.py
index 38f8c46ff..090588130 100644
--- a/scrapy/middleware.py
+++ b/scrapy/middleware.py
@@ -1,27 +1,110 @@
 from __future__ import annotations
+
 import logging
 import pprint
 from collections import defaultdict, deque
-from typing import TYPE_CHECKING, Any, Callable, Deque, Dict, Iterable, List, Optional, Tuple, Union, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Deque,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
+
 from twisted.internet.defer import Deferred
+
 from scrapy import Spider
 from scrapy.exceptions import NotConfigured
 from scrapy.settings import Settings
 from scrapy.utils.defer import process_chain, process_parallel
 from scrapy.utils.misc import create_instance, load_object
+
 if TYPE_CHECKING:
+    # typing.Self requires Python 3.11
     from typing_extensions import Self
+
     from scrapy.crawler import Crawler
+
+
 logger = logging.getLogger(__name__)


 class MiddlewareManager:
     """Base class for implementing middleware managers"""
-    component_name = 'foo middleware'

-    def __init__(self, *middlewares: Any) ->None:
+    component_name = "foo middleware"
+
+    def __init__(self, *middlewares: Any) -> None:
         self.middlewares = middlewares
-        self.methods: Dict[str, Deque[Union[None, Callable, Tuple[Callable,
-            Callable]]]] = defaultdict(deque)
+        # Only process_spider_output and process_spider_exception can be None.
+        # Only process_spider_output can be a tuple, and only until _async compatibility methods are removed.
+        self.methods: Dict[
+            str, Deque[Union[None, Callable, Tuple[Callable, Callable]]]
+        ] = defaultdict(deque)
         for mw in middlewares:
             self._add_middleware(mw)
+
+    @classmethod
+    def _get_mwlist_from_settings(cls, settings: Settings) -> List[Any]:
+        raise NotImplementedError
+
+    @classmethod
+    def from_settings(
+        cls, settings: Settings, crawler: Optional[Crawler] = None
+    ) -> Self:
+        mwlist = cls._get_mwlist_from_settings(settings)
+        middlewares = []
+        enabled = []
+        for clspath in mwlist:
+            try:
+                mwcls = load_object(clspath)
+                mw = create_instance(mwcls, settings, crawler)
+                middlewares.append(mw)
+                enabled.append(clspath)
+            except NotConfigured as e:
+                if e.args:
+                    logger.warning(
+                        "Disabled %(clspath)s: %(eargs)s",
+                        {"clspath": clspath, "eargs": e.args[0]},
+                        extra={"crawler": crawler},
+                    )
+
+        logger.info(
+            "Enabled %(componentname)ss:\n%(enabledlist)s",
+            {
+                "componentname": cls.component_name,
+                "enabledlist": pprint.pformat(enabled),
+            },
+            extra={"crawler": crawler},
+        )
+        return cls(*middlewares)
+
+    @classmethod
+    def from_crawler(cls, crawler: Crawler) -> Self:
+        return cls.from_settings(crawler.settings, crawler)
+
+    def _add_middleware(self, mw: Any) -> None:
+        if hasattr(mw, "open_spider"):
+            self.methods["open_spider"].append(mw.open_spider)
+        if hasattr(mw, "close_spider"):
+            self.methods["close_spider"].appendleft(mw.close_spider)
+
+    def _process_parallel(self, methodname: str, obj: Any, *args: Any) -> Deferred:
+        methods = cast(Iterable[Callable], self.methods[methodname])
+        return process_parallel(methods, obj, *args)
+
+    def _process_chain(self, methodname: str, obj: Any, *args: Any) -> Deferred:
+        methods = cast(Iterable[Callable], self.methods[methodname])
+        return process_chain(methods, obj, *args)
+
+    def open_spider(self, spider: Spider) -> Deferred:
+        return self._process_parallel("open_spider", spider)
+
+    def close_spider(self, spider: Spider) -> Deferred:
+        return self._process_parallel("close_spider", spider)
diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
index 3c976dd8f..5c09ab37e 100644
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@@ -18,8 +18,10 @@ from os import PathLike
 from pathlib import Path
 from typing import DefaultDict, Optional, Set, Union
 from urllib.parse import urlparse
+
 from itemadapter import ItemAdapter
 from twisted.internet import defer, threads
+
 from scrapy.exceptions import IgnoreRequest, NotConfigured
 from scrapy.http import Request
 from scrapy.http.request import NO_CALLBACK
@@ -32,23 +34,57 @@ from scrapy.utils.log import failure_to_exc_info
 from scrapy.utils.misc import md5sum
 from scrapy.utils.python import to_bytes
 from scrapy.utils.request import referer_str
+
 logger = logging.getLogger(__name__)


+def _to_string(path: Union[str, PathLike]) -> str:
+    return str(path)  # convert a Path object to string
+
+
 class FileException(Exception):
     """General media error exception"""


 class FSFilesStore:
-
     def __init__(self, basedir: Union[str, PathLike]):
         basedir = _to_string(basedir)
-        if '://' in basedir:
-            basedir = basedir.split('://', 1)[1]
+        if "://" in basedir:
+            basedir = basedir.split("://", 1)[1]
         self.basedir = basedir
         self._mkdir(Path(self.basedir))
         self.created_directories: DefaultDict[str, Set[str]] = defaultdict(set)

+    def persist_file(
+        self, path: Union[str, PathLike], buf, info, meta=None, headers=None
+    ):
+        absolute_path = self._get_filesystem_path(path)
+        self._mkdir(absolute_path.parent, info)
+        absolute_path.write_bytes(buf.getvalue())
+
+    def stat_file(self, path: Union[str, PathLike], info):
+        absolute_path = self._get_filesystem_path(path)
+        try:
+            last_modified = absolute_path.stat().st_mtime
+        except os.error:
+            return {}
+
+        with absolute_path.open("rb") as f:
+            checksum = md5sum(f)
+
+        return {"last_modified": last_modified, "checksum": checksum}
+
+    def _get_filesystem_path(self, path: Union[str, PathLike]) -> Path:
+        path_comps = _to_string(path).split("/")
+        return Path(self.basedir, *path_comps)
+
+    def _mkdir(self, dirname: Path, domain: Optional[str] = None):
+        seen = self.created_directories[domain] if domain else set()
+        if str(dirname) not in seen:
+            if not dirname.exists():
+                dirname.mkdir(parents=True)
+            seen.add(str(dirname))
+

 class S3FilesStore:
     AWS_ACCESS_KEY_ID = None
@@ -58,53 +94,170 @@ class S3FilesStore:
     AWS_REGION_NAME = None
     AWS_USE_SSL = None
     AWS_VERIFY = None
-    POLICY = 'private'
-    HEADERS = {'Cache-Control': 'max-age=172800'}
+
+    POLICY = "private"  # Overridden from settings.FILES_STORE_S3_ACL in FilesPipeline.from_settings
+    HEADERS = {
+        "Cache-Control": "max-age=172800",
+    }

     def __init__(self, uri):
         if not is_botocore_available():
-            raise NotConfigured('missing botocore library')
+            raise NotConfigured("missing botocore library")
         import botocore.session
+
         session = botocore.session.get_session()
-        self.s3_client = session.create_client('s3', aws_access_key_id=self
-            .AWS_ACCESS_KEY_ID, aws_secret_access_key=self.
-            AWS_SECRET_ACCESS_KEY, aws_session_token=self.AWS_SESSION_TOKEN,
-            endpoint_url=self.AWS_ENDPOINT_URL, region_name=self.
-            AWS_REGION_NAME, use_ssl=self.AWS_USE_SSL, verify=self.AWS_VERIFY)
-        if not uri.startswith('s3://'):
+        self.s3_client = session.create_client(
+            "s3",
+            aws_access_key_id=self.AWS_ACCESS_KEY_ID,
+            aws_secret_access_key=self.AWS_SECRET_ACCESS_KEY,
+            aws_session_token=self.AWS_SESSION_TOKEN,
+            endpoint_url=self.AWS_ENDPOINT_URL,
+            region_name=self.AWS_REGION_NAME,
+            use_ssl=self.AWS_USE_SSL,
+            verify=self.AWS_VERIFY,
+        )
+        if not uri.startswith("s3://"):
             raise ValueError(f"Incorrect URI scheme in {uri}, expected 's3'")
-        self.bucket, self.prefix = uri[5:].split('/', 1)
+        self.bucket, self.prefix = uri[5:].split("/", 1)
+
+    def stat_file(self, path, info):
+        def _onsuccess(boto_key):
+            checksum = boto_key["ETag"].strip('"')
+            last_modified = boto_key["LastModified"]
+            modified_stamp = time.mktime(last_modified.timetuple())
+            return {"checksum": checksum, "last_modified": modified_stamp}
+
+        return self._get_boto_key(path).addCallback(_onsuccess)
+
+    def _get_boto_key(self, path):
+        key_name = f"{self.prefix}{path}"
+        return threads.deferToThread(
+            self.s3_client.head_object, Bucket=self.bucket, Key=key_name
+        )

     def persist_file(self, path, buf, info, meta=None, headers=None):
         """Upload file to S3 storage"""
-        pass
+        key_name = f"{self.prefix}{path}"
+        buf.seek(0)
+        extra = self._headers_to_botocore_kwargs(self.HEADERS)
+        if headers:
+            extra.update(self._headers_to_botocore_kwargs(headers))
+        return threads.deferToThread(
+            self.s3_client.put_object,
+            Bucket=self.bucket,
+            Key=key_name,
+            Body=buf,
+            Metadata={k: str(v) for k, v in (meta or {}).items()},
+            ACL=self.POLICY,
+            **extra,
+        )

     def _headers_to_botocore_kwargs(self, headers):
         """Convert headers to botocore keyword arguments."""
-        pass
+        # This is required while we need to support both boto and botocore.
+        mapping = CaseInsensitiveDict(
+            {
+                "Content-Type": "ContentType",
+                "Cache-Control": "CacheControl",
+                "Content-Disposition": "ContentDisposition",
+                "Content-Encoding": "ContentEncoding",
+                "Content-Language": "ContentLanguage",
+                "Content-Length": "ContentLength",
+                "Content-MD5": "ContentMD5",
+                "Expires": "Expires",
+                "X-Amz-Grant-Full-Control": "GrantFullControl",
+                "X-Amz-Grant-Read": "GrantRead",
+                "X-Amz-Grant-Read-ACP": "GrantReadACP",
+                "X-Amz-Grant-Write-ACP": "GrantWriteACP",
+                "X-Amz-Object-Lock-Legal-Hold": "ObjectLockLegalHoldStatus",
+                "X-Amz-Object-Lock-Mode": "ObjectLockMode",
+                "X-Amz-Object-Lock-Retain-Until-Date": "ObjectLockRetainUntilDate",
+                "X-Amz-Request-Payer": "RequestPayer",
+                "X-Amz-Server-Side-Encryption": "ServerSideEncryption",
+                "X-Amz-Server-Side-Encryption-Aws-Kms-Key-Id": "SSEKMSKeyId",
+                "X-Amz-Server-Side-Encryption-Context": "SSEKMSEncryptionContext",
+                "X-Amz-Server-Side-Encryption-Customer-Algorithm": "SSECustomerAlgorithm",
+                "X-Amz-Server-Side-Encryption-Customer-Key": "SSECustomerKey",
+                "X-Amz-Server-Side-Encryption-Customer-Key-Md5": "SSECustomerKeyMD5",
+                "X-Amz-Storage-Class": "StorageClass",
+                "X-Amz-Tagging": "Tagging",
+                "X-Amz-Website-Redirect-Location": "WebsiteRedirectLocation",
+            }
+        )
+        extra = {}
+        for key, value in headers.items():
+            try:
+                kwarg = mapping[key]
+            except KeyError:
+                raise TypeError(f'Header "{key}" is not supported by botocore')
+            else:
+                extra[kwarg] = value
+        return extra


 class GCSFilesStore:
     GCS_PROJECT_ID = None
-    CACHE_CONTROL = 'max-age=172800'
+
+    CACHE_CONTROL = "max-age=172800"
+
+    # The bucket's default object ACL will be applied to the object.
+    # Overridden from settings.FILES_STORE_GCS_ACL in FilesPipeline.from_settings.
     POLICY = None

     def __init__(self, uri):
         from google.cloud import storage
+
         client = storage.Client(project=self.GCS_PROJECT_ID)
-        bucket, prefix = uri[5:].split('/', 1)
+        bucket, prefix = uri[5:].split("/", 1)
         self.bucket = client.bucket(bucket)
         self.prefix = prefix
-        permissions = self.bucket.test_iam_permissions([
-            'storage.objects.get', 'storage.objects.create'])
-        if 'storage.objects.get' not in permissions:
+        permissions = self.bucket.test_iam_permissions(
+            ["storage.objects.get", "storage.objects.create"]
+        )
+        if "storage.objects.get" not in permissions:
             logger.warning(
-                "No 'storage.objects.get' permission for GSC bucket %(bucket)s. Checking if files are up to date will be impossible. Files will be downloaded every time."
-                , {'bucket': bucket})
-        if 'storage.objects.create' not in permissions:
+                "No 'storage.objects.get' permission for GSC bucket %(bucket)s. "
+                "Checking if files are up to date will be impossible. Files will be downloaded every time.",
+                {"bucket": bucket},
+            )
+        if "storage.objects.create" not in permissions:
             logger.error(
-                "No 'storage.objects.create' permission for GSC bucket %(bucket)s. Saving files will be impossible!"
-                , {'bucket': bucket})
+                "No 'storage.objects.create' permission for GSC bucket %(bucket)s. Saving files will be impossible!",
+                {"bucket": bucket},
+            )
+
+    def stat_file(self, path, info):
+        def _onsuccess(blob):
+            if blob:
+                checksum = base64.b64decode(blob.md5_hash).hex()
+                last_modified = time.mktime(blob.updated.timetuple())
+                return {"checksum": checksum, "last_modified": last_modified}
+            return {}
+
+        blob_path = self._get_blob_path(path)
+        return threads.deferToThread(self.bucket.get_blob, blob_path).addCallback(
+            _onsuccess
+        )
+
+    def _get_content_type(self, headers):
+        if headers and "Content-Type" in headers:
+            return headers["Content-Type"]
+        return "application/octet-stream"
+
+    def _get_blob_path(self, path):
+        return self.prefix + path
+
+    def persist_file(self, path, buf, info, meta=None, headers=None):
+        blob_path = self._get_blob_path(path)
+        blob = self.bucket.blob(blob_path)
+        blob.cache_control = self.CACHE_CONTROL
+        blob.metadata = {k: str(v) for k, v in (meta or {}).items()}
+        return threads.deferToThread(
+            blob.upload_from_string,
+            data=buf.getvalue(),
+            content_type=self._get_content_type(headers),
+            predefined_acl=self.POLICY,
+        )


 class FTPFilesStore:
@@ -113,7 +266,7 @@ class FTPFilesStore:
     USE_ACTIVE_MODE = None

     def __init__(self, uri):
-        if not uri.startswith('ftp://'):
+        if not uri.startswith("ftp://"):
             raise ValueError(f"Incorrect URI scheme in {uri}, expected 'ftp'")
         u = urlparse(uri)
         self.port = u.port
@@ -121,7 +274,39 @@ class FTPFilesStore:
         self.port = int(u.port or 21)
         self.username = u.username or self.FTP_USERNAME
         self.password = u.password or self.FTP_PASSWORD
-        self.basedir = u.path.rstrip('/')
+        self.basedir = u.path.rstrip("/")
+
+    def persist_file(self, path, buf, info, meta=None, headers=None):
+        path = f"{self.basedir}/{path}"
+        return threads.deferToThread(
+            ftp_store_file,
+            path=path,
+            file=buf,
+            host=self.host,
+            port=self.port,
+            username=self.username,
+            password=self.password,
+            use_active_mode=self.USE_ACTIVE_MODE,
+        )
+
+    def stat_file(self, path, info):
+        def _stat_file(path):
+            try:
+                ftp = FTP()
+                ftp.connect(self.host, self.port)
+                ftp.login(self.username, self.password)
+                if self.USE_ACTIVE_MODE:
+                    ftp.set_pasv(False)
+                file_path = f"{self.basedir}/{path}"
+                last_modified = float(ftp.voidcmd(f"MDTM {file_path}")[4:].strip())
+                m = hashlib.md5()
+                ftp.retrbinary(f"RETR {file_path}", m.update)
+                return {"last_modified": last_modified, "checksum": m.hexdigest()}
+            # The file doesn't exist
+            except Exception:
+                return {}
+
+        return threads.deferToThread(_stat_file, path)


 class FilesPipeline(MediaPipeline):
@@ -142,30 +327,226 @@ class FilesPipeline(MediaPipeline):
         refresh it in case of change.

     """
-    MEDIA_NAME = 'file'
+
+    MEDIA_NAME = "file"
     EXPIRES = 90
-    STORE_SCHEMES = {'': FSFilesStore, 'file': FSFilesStore, 's3':
-        S3FilesStore, 'gs': GCSFilesStore, 'ftp': FTPFilesStore}
-    DEFAULT_FILES_URLS_FIELD = 'file_urls'
-    DEFAULT_FILES_RESULT_FIELD = 'files'
+    STORE_SCHEMES = {
+        "": FSFilesStore,
+        "file": FSFilesStore,
+        "s3": S3FilesStore,
+        "gs": GCSFilesStore,
+        "ftp": FTPFilesStore,
+    }
+    DEFAULT_FILES_URLS_FIELD = "file_urls"
+    DEFAULT_FILES_RESULT_FIELD = "files"

     def __init__(self, store_uri, download_func=None, settings=None):
         store_uri = _to_string(store_uri)
         if not store_uri:
             raise NotConfigured
+
         if isinstance(settings, dict) or settings is None:
             settings = Settings(settings)
-        cls_name = 'FilesPipeline'
+        cls_name = "FilesPipeline"
         self.store = self._get_store(store_uri)
-        resolve = functools.partial(self._key_for_pipe, base_class_name=
-            cls_name, settings=settings)
-        self.expires = settings.getint(resolve('FILES_EXPIRES'), self.EXPIRES)
-        if not hasattr(self, 'FILES_URLS_FIELD'):
+        resolve = functools.partial(
+            self._key_for_pipe, base_class_name=cls_name, settings=settings
+        )
+        self.expires = settings.getint(resolve("FILES_EXPIRES"), self.EXPIRES)
+        if not hasattr(self, "FILES_URLS_FIELD"):
             self.FILES_URLS_FIELD = self.DEFAULT_FILES_URLS_FIELD
-        if not hasattr(self, 'FILES_RESULT_FIELD'):
+        if not hasattr(self, "FILES_RESULT_FIELD"):
             self.FILES_RESULT_FIELD = self.DEFAULT_FILES_RESULT_FIELD
-        self.files_urls_field = settings.get(resolve('FILES_URLS_FIELD'),
-            self.FILES_URLS_FIELD)
-        self.files_result_field = settings.get(resolve('FILES_RESULT_FIELD'
-            ), self.FILES_RESULT_FIELD)
+        self.files_urls_field = settings.get(
+            resolve("FILES_URLS_FIELD"), self.FILES_URLS_FIELD
+        )
+        self.files_result_field = settings.get(
+            resolve("FILES_RESULT_FIELD"), self.FILES_RESULT_FIELD
+        )
+
         super().__init__(download_func=download_func, settings=settings)
+
+    @classmethod
+    def from_settings(cls, settings):
+        s3store = cls.STORE_SCHEMES["s3"]
+        s3store.AWS_ACCESS_KEY_ID = settings["AWS_ACCESS_KEY_ID"]
+        s3store.AWS_SECRET_ACCESS_KEY = settings["AWS_SECRET_ACCESS_KEY"]
+        s3store.AWS_SESSION_TOKEN = settings["AWS_SESSION_TOKEN"]
+        s3store.AWS_ENDPOINT_URL = settings["AWS_ENDPOINT_URL"]
+        s3store.AWS_REGION_NAME = settings["AWS_REGION_NAME"]
+        s3store.AWS_USE_SSL = settings["AWS_USE_SSL"]
+        s3store.AWS_VERIFY = settings["AWS_VERIFY"]
+        s3store.POLICY = settings["FILES_STORE_S3_ACL"]
+
+        gcs_store = cls.STORE_SCHEMES["gs"]
+        gcs_store.GCS_PROJECT_ID = settings["GCS_PROJECT_ID"]
+        gcs_store.POLICY = settings["FILES_STORE_GCS_ACL"] or None
+
+        ftp_store = cls.STORE_SCHEMES["ftp"]
+        ftp_store.FTP_USERNAME = settings["FTP_USER"]
+        ftp_store.FTP_PASSWORD = settings["FTP_PASSWORD"]
+        ftp_store.USE_ACTIVE_MODE = settings.getbool("FEED_STORAGE_FTP_ACTIVE")
+
+        store_uri = settings["FILES_STORE"]
+        return cls(store_uri, settings=settings)
+
+    def _get_store(self, uri: str):
+        if Path(uri).is_absolute():  # to support win32 paths like: C:\\some\dir
+            scheme = "file"
+        else:
+            scheme = urlparse(uri).scheme
+        store_cls = self.STORE_SCHEMES[scheme]
+        return store_cls(uri)
+
+    def media_to_download(self, request, info, *, item=None):
+        def _onsuccess(result):
+            if not result:
+                return  # returning None force download
+
+            last_modified = result.get("last_modified", None)
+            if not last_modified:
+                return  # returning None force download
+
+            age_seconds = time.time() - last_modified
+            age_days = age_seconds / 60 / 60 / 24
+            if age_days > self.expires:
+                return  # returning None force download
+
+            referer = referer_str(request)
+            logger.debug(
+                "File (uptodate): Downloaded %(medianame)s from %(request)s "
+                "referred in <%(referer)s>",
+                {"medianame": self.MEDIA_NAME, "request": request, "referer": referer},
+                extra={"spider": info.spider},
+            )
+            self.inc_stats(info.spider, "uptodate")
+
+            checksum = result.get("checksum", None)
+            return {
+                "url": request.url,
+                "path": path,
+                "checksum": checksum,
+                "status": "uptodate",
+            }
+
+        path = self.file_path(request, info=info, item=item)
+        dfd = defer.maybeDeferred(self.store.stat_file, path, info)
+        dfd.addCallbacks(_onsuccess, lambda _: None)
+        dfd.addErrback(
+            lambda f: logger.error(
+                self.__class__.__name__ + ".store.stat_file",
+                exc_info=failure_to_exc_info(f),
+                extra={"spider": info.spider},
+            )
+        )
+        return dfd
+
+    def media_failed(self, failure, request, info):
+        if not isinstance(failure.value, IgnoreRequest):
+            referer = referer_str(request)
+            logger.warning(
+                "File (unknown-error): Error downloading %(medianame)s from "
+                "%(request)s referred in <%(referer)s>: %(exception)s",
+                {
+                    "medianame": self.MEDIA_NAME,
+                    "request": request,
+                    "referer": referer,
+                    "exception": failure.value,
+                },
+                extra={"spider": info.spider},
+            )
+
+        raise FileException
+
+    def media_downloaded(self, response, request, info, *, item=None):
+        referer = referer_str(request)
+
+        if response.status != 200:
+            logger.warning(
+                "File (code: %(status)s): Error downloading file from "
+                "%(request)s referred in <%(referer)s>",
+                {"status": response.status, "request": request, "referer": referer},
+                extra={"spider": info.spider},
+            )
+            raise FileException("download-error")
+
+        if not response.body:
+            logger.warning(
+                "File (empty-content): Empty file from %(request)s referred "
+                "in <%(referer)s>: no-content",
+                {"request": request, "referer": referer},
+                extra={"spider": info.spider},
+            )
+            raise FileException("empty-content")
+
+        status = "cached" if "cached" in response.flags else "downloaded"
+        logger.debug(
+            "File (%(status)s): Downloaded file from %(request)s referred in "
+            "<%(referer)s>",
+            {"status": status, "request": request, "referer": referer},
+            extra={"spider": info.spider},
+        )
+        self.inc_stats(info.spider, status)
+
+        try:
+            path = self.file_path(request, response=response, info=info, item=item)
+            checksum = self.file_downloaded(response, request, info, item=item)
+        except FileException as exc:
+            logger.warning(
+                "File (error): Error processing file from %(request)s "
+                "referred in <%(referer)s>: %(errormsg)s",
+                {"request": request, "referer": referer, "errormsg": str(exc)},
+                extra={"spider": info.spider},
+                exc_info=True,
+            )
+            raise
+        except Exception as exc:
+            logger.error(
+                "File (unknown-error): Error processing file from %(request)s "
+                "referred in <%(referer)s>",
+                {"request": request, "referer": referer},
+                exc_info=True,
+                extra={"spider": info.spider},
+            )
+            raise FileException(str(exc))
+
+        return {
+            "url": request.url,
+            "path": path,
+            "checksum": checksum,
+            "status": status,
+        }
+
+    def inc_stats(self, spider, status):
+        spider.crawler.stats.inc_value("file_count", spider=spider)
+        spider.crawler.stats.inc_value(f"file_status_count/{status}", spider=spider)
+
+    # Overridable Interface
+    def get_media_requests(self, item, info):
+        urls = ItemAdapter(item).get(self.files_urls_field, [])
+        return [Request(u, callback=NO_CALLBACK) for u in urls]
+
+    def file_downloaded(self, response, request, info, *, item=None):
+        path = self.file_path(request, response=response, info=info, item=item)
+        buf = BytesIO(response.body)
+        checksum = md5sum(buf)
+        buf.seek(0)
+        self.store.persist_file(path, buf, info)
+        return checksum
+
+    def item_completed(self, results, item, info):
+        with suppress(KeyError):
+            ItemAdapter(item)[self.files_result_field] = [x for ok, x in results if ok]
+        return item
+
+    def file_path(self, request, response=None, info=None, *, item=None):
+        media_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
+        media_ext = Path(request.url).suffix
+        # Handles empty and wild extensions by trying to guess the
+        # mime type then extension or default to empty string otherwise
+        if media_ext not in mimetypes.types_map:
+            media_ext = ""
+            media_type = mimetypes.guess_type(request.url)[0]
+            if media_type:
+                media_ext = mimetypes.guess_extension(media_type)
+        return f"full/{media_guid}{media_ext}"
diff --git a/scrapy/pipelines/images.py b/scrapy/pipelines/images.py
index 4c93a08c0..9d18144ee 100644
--- a/scrapy/pipelines/images.py
+++ b/scrapy/pipelines/images.py
@@ -8,11 +8,15 @@ import hashlib
 import warnings
 from contextlib import suppress
 from io import BytesIO
+
 from itemadapter import ItemAdapter
+
 from scrapy.exceptions import DropItem, NotConfigured, ScrapyDeprecationWarning
 from scrapy.http import Request
 from scrapy.http.request import NO_CALLBACK
 from scrapy.pipelines.files import FileException, FilesPipeline
+
+# TODO: from scrapy.pipelines.media import MediaPipeline
 from scrapy.settings import Settings
 from scrapy.utils.misc import md5sum
 from scrapy.utils.python import get_func_args, to_bytes
@@ -22,8 +26,11 @@ class NoimagesDrop(DropItem):
     """Product with no images exception"""

     def __init__(self, *args, **kwargs):
-        warnings.warn('The NoimagesDrop class is deprecated', category=
-            ScrapyDeprecationWarning, stacklevel=2)
+        warnings.warn(
+            "The NoimagesDrop class is deprecated",
+            category=ScrapyDeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(*args, **kwargs)


@@ -33,39 +40,192 @@ class ImageException(FileException):

 class ImagesPipeline(FilesPipeline):
     """Abstract pipeline that implement the image thumbnail generation logic"""
-    MEDIA_NAME = 'image'
+
+    MEDIA_NAME = "image"
+
+    # Uppercase attributes kept for backward compatibility with code that subclasses
+    # ImagesPipeline. They may be overridden by settings.
     MIN_WIDTH = 0
     MIN_HEIGHT = 0
     EXPIRES = 90
     THUMBS = {}
-    DEFAULT_IMAGES_URLS_FIELD = 'image_urls'
-    DEFAULT_IMAGES_RESULT_FIELD = 'images'
+    DEFAULT_IMAGES_URLS_FIELD = "image_urls"
+    DEFAULT_IMAGES_RESULT_FIELD = "images"

     def __init__(self, store_uri, download_func=None, settings=None):
         try:
             from PIL import Image
+
             self._Image = Image
         except ImportError:
             raise NotConfigured(
-                'ImagesPipeline requires installing Pillow 4.0.0 or later')
-        super().__init__(store_uri, settings=settings, download_func=
-            download_func)
+                "ImagesPipeline requires installing Pillow 4.0.0 or later"
+            )
+
+        super().__init__(store_uri, settings=settings, download_func=download_func)
+
         if isinstance(settings, dict) or settings is None:
             settings = Settings(settings)
-        resolve = functools.partial(self._key_for_pipe, base_class_name=
-            'ImagesPipeline', settings=settings)
-        self.expires = settings.getint(resolve('IMAGES_EXPIRES'), self.EXPIRES)
-        if not hasattr(self, 'IMAGES_RESULT_FIELD'):
+
+        resolve = functools.partial(
+            self._key_for_pipe,
+            base_class_name="ImagesPipeline",
+            settings=settings,
+        )
+        self.expires = settings.getint(resolve("IMAGES_EXPIRES"), self.EXPIRES)
+
+        if not hasattr(self, "IMAGES_RESULT_FIELD"):
             self.IMAGES_RESULT_FIELD = self.DEFAULT_IMAGES_RESULT_FIELD
-        if not hasattr(self, 'IMAGES_URLS_FIELD'):
+        if not hasattr(self, "IMAGES_URLS_FIELD"):
             self.IMAGES_URLS_FIELD = self.DEFAULT_IMAGES_URLS_FIELD
-        self.images_urls_field = settings.get(resolve('IMAGES_URLS_FIELD'),
-            self.IMAGES_URLS_FIELD)
-        self.images_result_field = settings.get(resolve(
-            'IMAGES_RESULT_FIELD'), self.IMAGES_RESULT_FIELD)
-        self.min_width = settings.getint(resolve('IMAGES_MIN_WIDTH'), self.
-            MIN_WIDTH)
-        self.min_height = settings.getint(resolve('IMAGES_MIN_HEIGHT'),
-            self.MIN_HEIGHT)
-        self.thumbs = settings.get(resolve('IMAGES_THUMBS'), self.THUMBS)
+
+        self.images_urls_field = settings.get(
+            resolve("IMAGES_URLS_FIELD"), self.IMAGES_URLS_FIELD
+        )
+        self.images_result_field = settings.get(
+            resolve("IMAGES_RESULT_FIELD"), self.IMAGES_RESULT_FIELD
+        )
+        self.min_width = settings.getint(resolve("IMAGES_MIN_WIDTH"), self.MIN_WIDTH)
+        self.min_height = settings.getint(resolve("IMAGES_MIN_HEIGHT"), self.MIN_HEIGHT)
+        self.thumbs = settings.get(resolve("IMAGES_THUMBS"), self.THUMBS)
+
         self._deprecated_convert_image = None
+
+    @classmethod
+    def from_settings(cls, settings):
+        s3store = cls.STORE_SCHEMES["s3"]
+        s3store.AWS_ACCESS_KEY_ID = settings["AWS_ACCESS_KEY_ID"]
+        s3store.AWS_SECRET_ACCESS_KEY = settings["AWS_SECRET_ACCESS_KEY"]
+        s3store.AWS_SESSION_TOKEN = settings["AWS_SESSION_TOKEN"]
+        s3store.AWS_ENDPOINT_URL = settings["AWS_ENDPOINT_URL"]
+        s3store.AWS_REGION_NAME = settings["AWS_REGION_NAME"]
+        s3store.AWS_USE_SSL = settings["AWS_USE_SSL"]
+        s3store.AWS_VERIFY = settings["AWS_VERIFY"]
+        s3store.POLICY = settings["IMAGES_STORE_S3_ACL"]
+
+        gcs_store = cls.STORE_SCHEMES["gs"]
+        gcs_store.GCS_PROJECT_ID = settings["GCS_PROJECT_ID"]
+        gcs_store.POLICY = settings["IMAGES_STORE_GCS_ACL"] or None
+
+        ftp_store = cls.STORE_SCHEMES["ftp"]
+        ftp_store.FTP_USERNAME = settings["FTP_USER"]
+        ftp_store.FTP_PASSWORD = settings["FTP_PASSWORD"]
+        ftp_store.USE_ACTIVE_MODE = settings.getbool("FEED_STORAGE_FTP_ACTIVE")
+
+        store_uri = settings["IMAGES_STORE"]
+        return cls(store_uri, settings=settings)
+
+    def file_downloaded(self, response, request, info, *, item=None):
+        return self.image_downloaded(response, request, info, item=item)
+
+    def image_downloaded(self, response, request, info, *, item=None):
+        checksum = None
+        for path, image, buf in self.get_images(response, request, info, item=item):
+            if checksum is None:
+                buf.seek(0)
+                checksum = md5sum(buf)
+            width, height = image.size
+            self.store.persist_file(
+                path,
+                buf,
+                info,
+                meta={"width": width, "height": height},
+                headers={"Content-Type": "image/jpeg"},
+            )
+        return checksum
+
+    def get_images(self, response, request, info, *, item=None):
+        path = self.file_path(request, response=response, info=info, item=item)
+        orig_image = self._Image.open(BytesIO(response.body))
+
+        width, height = orig_image.size
+        if width < self.min_width or height < self.min_height:
+            raise ImageException(
+                "Image too small "
+                f"({width}x{height} < "
+                f"{self.min_width}x{self.min_height})"
+            )
+
+        if self._deprecated_convert_image is None:
+            self._deprecated_convert_image = "response_body" not in get_func_args(
+                self.convert_image
+            )
+            if self._deprecated_convert_image:
+                warnings.warn(
+                    f"{self.__class__.__name__}.convert_image() method overridden in a deprecated way, "
+                    "overridden method does not accept response_body argument.",
+                    category=ScrapyDeprecationWarning,
+                )
+
+        if self._deprecated_convert_image:
+            image, buf = self.convert_image(orig_image)
+        else:
+            image, buf = self.convert_image(
+                orig_image, response_body=BytesIO(response.body)
+            )
+        yield path, image, buf
+
+        for thumb_id, size in self.thumbs.items():
+            thumb_path = self.thumb_path(
+                request, thumb_id, response=response, info=info, item=item
+            )
+            if self._deprecated_convert_image:
+                thumb_image, thumb_buf = self.convert_image(image, size)
+            else:
+                thumb_image, thumb_buf = self.convert_image(image, size, buf)
+            yield thumb_path, thumb_image, thumb_buf
+
+    def convert_image(self, image, size=None, response_body=None):
+        if response_body is None:
+            warnings.warn(
+                f"{self.__class__.__name__}.convert_image() method called in a deprecated way, "
+                "method called without response_body argument.",
+                category=ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
+
+        if image.format in ("PNG", "WEBP") and image.mode == "RGBA":
+            background = self._Image.new("RGBA", image.size, (255, 255, 255))
+            background.paste(image, image)
+            image = background.convert("RGB")
+        elif image.mode == "P":
+            image = image.convert("RGBA")
+            background = self._Image.new("RGBA", image.size, (255, 255, 255))
+            background.paste(image, image)
+            image = background.convert("RGB")
+        elif image.mode != "RGB":
+            image = image.convert("RGB")
+
+        if size:
+            image = image.copy()
+            try:
+                # Image.Resampling.LANCZOS was added in Pillow 9.1.0
+                # remove this try except block,
+                # when updating the minimum requirements for Pillow.
+                resampling_filter = self._Image.Resampling.LANCZOS
+            except AttributeError:
+                resampling_filter = self._Image.ANTIALIAS
+            image.thumbnail(size, resampling_filter)
+        elif response_body is not None and image.format == "JPEG":
+            return image, response_body
+
+        buf = BytesIO()
+        image.save(buf, "JPEG")
+        return image, buf
+
+    def get_media_requests(self, item, info):
+        urls = ItemAdapter(item).get(self.images_urls_field, [])
+        return [Request(u, callback=NO_CALLBACK) for u in urls]
+
+    def item_completed(self, results, item, info):
+        with suppress(KeyError):
+            ItemAdapter(item)[self.images_result_field] = [x for ok, x in results if ok]
+        return item
+
+    def file_path(self, request, response=None, info=None, *, item=None):
+        image_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
+        return f"full/{image_guid}.jpg"
+
+    def thumb_path(self, request, thumb_id, response=None, info=None, *, item=None):
+        thumb_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
+        return f"thumbs/{thumb_id}/{thumb_guid}.jpg"
diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py
index f88dbcfe8..153047acf 100644
--- a/scrapy/pipelines/media.py
+++ b/scrapy/pipelines/media.py
@@ -3,8 +3,10 @@ import logging
 from collections import defaultdict
 from inspect import signature
 from warnings import warn
+
 from twisted.internet.defer import Deferred, DeferredList
 from twisted.python.failure import Failure
+
 from scrapy.http.request import NO_CALLBACK
 from scrapy.settings import Settings
 from scrapy.utils.datatypes import SequenceExclude
@@ -12,15 +14,18 @@ from scrapy.utils.defer import defer_result, mustbe_deferred
 from scrapy.utils.deprecate import ScrapyDeprecationWarning
 from scrapy.utils.log import failure_to_exc_info
 from scrapy.utils.misc import arg_to_iter
+
 logger = logging.getLogger(__name__)


+def _DUMMY_CALLBACK(response):
+    return response
+
+
 class MediaPipeline:
     LOG_FAILED_RESULTS = True

-
     class SpiderInfo:
-
         def __init__(self, spider):
             self.spider = spider
             self.downloading = set()
@@ -30,15 +35,23 @@ class MediaPipeline:
     def __init__(self, download_func=None, settings=None):
         self.download_func = download_func
         self._expects_item = {}
+
         if isinstance(settings, dict) or settings is None:
             settings = Settings(settings)
-        resolve = functools.partial(self._key_for_pipe, base_class_name=
-            'MediaPipeline', settings=settings)
-        self.allow_redirects = settings.getbool(resolve(
-            'MEDIA_ALLOW_REDIRECTS'), False)
+        resolve = functools.partial(
+            self._key_for_pipe, base_class_name="MediaPipeline", settings=settings
+        )
+        self.allow_redirects = settings.getbool(resolve("MEDIA_ALLOW_REDIRECTS"), False)
         self._handle_statuses(self.allow_redirects)
+
+        # Check if deprecated methods are being used and make them compatible
         self._make_compatible()

+    def _handle_statuses(self, allow_redirects):
+        self.handle_httpstatus_list = None
+        if allow_redirects:
+            self.handle_httpstatus_list = SequenceExclude(range(300, 400))
+
     def _key_for_pipe(self, key, base_class_name=None, settings=None):
         """
         >>> MediaPipeline()._key_for_pipe("IMAGES")
@@ -48,16 +61,186 @@ class MediaPipeline:
         >>> MyPipe()._key_for_pipe("IMAGES", base_class_name="MediaPipeline")
         'MYPIPE_IMAGES'
         """
-        pass
+        class_name = self.__class__.__name__
+        formatted_key = f"{class_name.upper()}_{key}"
+        if (
+            not base_class_name
+            or class_name == base_class_name
+            or settings
+            and not settings.get(formatted_key)
+        ):
+            return key
+        return formatted_key
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        try:
+            pipe = cls.from_settings(crawler.settings)
+        except AttributeError:
+            pipe = cls()
+        pipe.crawler = crawler
+        pipe._fingerprinter = crawler.request_fingerprinter
+        return pipe
+
+    def open_spider(self, spider):
+        self.spiderinfo = self.SpiderInfo(spider)
+
+    def process_item(self, item, spider):
+        info = self.spiderinfo
+        requests = arg_to_iter(self.get_media_requests(item, info))
+        dlist = [self._process_request(r, info, item) for r in requests]
+        dfd = DeferredList(dlist, consumeErrors=True)
+        return dfd.addCallback(self.item_completed, item, info)
+
+    def _process_request(self, request, info, item):
+        fp = self._fingerprinter.fingerprint(request)
+        if not request.callback or request.callback is NO_CALLBACK:
+            cb = _DUMMY_CALLBACK
+        else:
+            cb = request.callback
+        eb = request.errback
+        request.callback = NO_CALLBACK
+        request.errback = None
+
+        # Return cached result if request was already seen
+        if fp in info.downloaded:
+            return defer_result(info.downloaded[fp]).addCallbacks(cb, eb)
+
+        # Otherwise, wait for result
+        wad = Deferred().addCallbacks(cb, eb)
+        info.waiting[fp].append(wad)
+
+        # Check if request is downloading right now to avoid doing it twice
+        if fp in info.downloading:
+            return wad
+
+        # Download request checking media_to_download hook output first
+        info.downloading.add(fp)
+        dfd = mustbe_deferred(self.media_to_download, request, info, item=item)
+        dfd.addCallback(self._check_media_to_download, request, info, item=item)
+        dfd.addErrback(self._log_exception)
+        dfd.addBoth(self._cache_result_and_execute_waiters, fp, info)
+        return dfd.addBoth(lambda _: wad)  # it must return wad at last
+
+    def _log_exception(self, result):
+        logger.exception(result)
+        return result

     def _make_compatible(self):
         """Make overridable methods of MediaPipeline and subclasses backwards compatible"""
-        pass
+        methods = [
+            "file_path",
+            "thumb_path",
+            "media_to_download",
+            "media_downloaded",
+            "file_downloaded",
+            "image_downloaded",
+            "get_images",
+        ]
+
+        for method_name in methods:
+            method = getattr(self, method_name, None)
+            if callable(method):
+                setattr(self, method_name, self._compatible(method))

     def _compatible(self, func):
         """Wrapper for overridable methods to allow backwards compatibility"""
-        pass
+        self._check_signature(func)
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            if self._expects_item[func.__name__]:
+                return func(*args, **kwargs)
+
+            kwargs.pop("item", None)
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    def _check_signature(self, func):
+        sig = signature(func)
+        self._expects_item[func.__name__] = True

+        if "item" not in sig.parameters:
+            old_params = str(sig)[1:-1]
+            new_params = old_params + ", *, item=None"
+            warn(
+                f"{func.__name__}(self, {old_params}) is deprecated, "
+                f"please use {func.__name__}(self, {new_params})",
+                ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
+            self._expects_item[func.__name__] = False
+
+    def _modify_media_request(self, request):
+        if self.handle_httpstatus_list:
+            request.meta["handle_httpstatus_list"] = self.handle_httpstatus_list
+        else:
+            request.meta["handle_httpstatus_all"] = True
+
+    def _check_media_to_download(self, result, request, info, item):
+        if result is not None:
+            return result
+        if self.download_func:
+            # this ugly code was left only to support tests. TODO: remove
+            dfd = mustbe_deferred(self.download_func, request, info.spider)
+            dfd.addCallbacks(
+                callback=self.media_downloaded,
+                callbackArgs=(request, info),
+                callbackKeywords={"item": item},
+                errback=self.media_failed,
+                errbackArgs=(request, info),
+            )
+        else:
+            self._modify_media_request(request)
+            dfd = self.crawler.engine.download(request)
+            dfd.addCallbacks(
+                callback=self.media_downloaded,
+                callbackArgs=(request, info),
+                callbackKeywords={"item": item},
+                errback=self.media_failed,
+                errbackArgs=(request, info),
+            )
+        return dfd
+
+    def _cache_result_and_execute_waiters(self, result, fp, info):
+        if isinstance(result, Failure):
+            # minimize cached information for failure
+            result.cleanFailure()
+            result.frames = []
+            result.stack = None
+
+            # This code fixes a memory leak by avoiding to keep references to
+            # the Request and Response objects on the Media Pipeline cache.
+            #
+            # What happens when the media_downloaded callback raises an
+            # exception, for example a FileException('download-error') when
+            # the Response status code is not 200 OK, is that the original
+            # StopIteration exception (which in turn contains the failed
+            # Response and by extension, the original Request) gets encapsulated
+            # within the FileException context.
+            #
+            # Originally, Scrapy was using twisted.internet.defer.returnValue
+            # inside functions decorated with twisted.internet.defer.inlineCallbacks,
+            # encapsulating the returned Response in a _DefGen_Return exception
+            # instead of a StopIteration.
+            #
+            # To avoid keeping references to the Response and therefore Request
+            # objects on the Media Pipeline cache, we should wipe the context of
+            # the encapsulated exception when it is a StopIteration instance
+            #
+            # This problem does not occur in Python 2.7 since we don't have
+            # Exception Chaining (https://www.python.org/dev/peps/pep-3134/).
+            context = getattr(result.value, "__context__", None)
+            if isinstance(context, StopIteration):
+                setattr(result.value, "__context__", None)
+
+        info.downloading.remove(fp)
+        info.downloaded[fp] = result  # cache result
+        for wad in info.waiting.pop(fp):
+            defer_result(result).chainDeferred(wad)
+
+    # Overridable Interface
     def media_to_download(self, request, info, *, item=None):
         """Check request before starting download"""
         pass
@@ -68,15 +251,24 @@ class MediaPipeline:

     def media_downloaded(self, response, request, info, *, item=None):
         """Handler for success downloads"""
-        pass
+        return response

     def media_failed(self, failure, request, info):
         """Handler for failed downloads"""
-        pass
+        return failure

     def item_completed(self, results, item, info):
         """Called per item when all media requests has been processed"""
-        pass
+        if self.LOG_FAILED_RESULTS:
+            for ok, value in results:
+                if not ok:
+                    logger.error(
+                        "%(class)s found errors processing %(item)s",
+                        {"class": self.__class__.__name__, "item": item},
+                        exc_info=failure_to_exc_info(value),
+                        extra={"spider": info.spider},
+                    )
+        return item

     def file_path(self, request, response=None, info=None, *, item=None):
         """Returns the path where downloaded media should be stored"""
diff --git a/scrapy/pqueues.py b/scrapy/pqueues.py
index c9f7c822d..62a9af477 100644
--- a/scrapy/pqueues.py
+++ b/scrapy/pqueues.py
@@ -1,6 +1,8 @@
 import hashlib
 import logging
+
 from scrapy.utils.misc import create_instance
+
 logger = logging.getLogger(__name__)


@@ -15,7 +17,11 @@ def _path_safe(text):
     >>> _path_safe('some@symbol?').startswith('some_symbol_')
     True
     """
-    pass
+    pathable_slot = "".join([c if c.isalnum() or c in "-._" else "_" for c in text])
+    # as we replace some letters we can get collision for different slots
+    # add we add unique part
+    unique_slot = hashlib.md5(text.encode("utf8")).hexdigest()
+    return "-".join([pathable_slot, unique_slot])


 class ScrapyPriorityQueue:
@@ -44,6 +50,10 @@ class ScrapyPriorityQueue:

     """

+    @classmethod
+    def from_crawler(cls, crawler, downstream_queue_cls, key, startprios=()):
+        return cls(crawler, downstream_queue_cls, key, startprios)
+
     def __init__(self, crawler, downstream_queue_cls, key, startprios=()):
         self.crawler = crawler
         self.downstream_queue_cls = downstream_queue_cls
@@ -52,6 +62,47 @@ class ScrapyPriorityQueue:
         self.curprio = None
         self.init_prios(startprios)

+    def init_prios(self, startprios):
+        if not startprios:
+            return
+
+        for priority in startprios:
+            self.queues[priority] = self.qfactory(priority)
+
+        self.curprio = min(startprios)
+
+    def qfactory(self, key):
+        return create_instance(
+            self.downstream_queue_cls,
+            None,
+            self.crawler,
+            self.key + "/" + str(key),
+        )
+
+    def priority(self, request):
+        return -request.priority
+
+    def push(self, request):
+        priority = self.priority(request)
+        if priority not in self.queues:
+            self.queues[priority] = self.qfactory(priority)
+        q = self.queues[priority]
+        q.push(request)  # this may fail (eg. serialization error)
+        if self.curprio is None or priority < self.curprio:
+            self.curprio = priority
+
+    def pop(self):
+        if self.curprio is None:
+            return
+        q = self.queues[self.curprio]
+        m = q.pop()
+        if not q:
+            del self.queues[self.curprio]
+            q.close()
+            prios = [p for p, q in self.queues.items() if q]
+            self.curprio = min(prios) if prios else None
+        return m
+
     def peek(self):
         """Returns the next object to be returned by :meth:`pop`,
         but without removing it from the queue.
@@ -59,20 +110,37 @@ class ScrapyPriorityQueue:
         Raises :exc:`NotImplementedError` if the underlying queue class does
         not implement a ``peek`` method, which is optional for queues.
         """
-        pass
+        if self.curprio is None:
+            return None
+        queue = self.queues[self.curprio]
+        return queue.peek()
+
+    def close(self):
+        active = []
+        for p, q in self.queues.items():
+            active.append(p)
+            q.close()
+        return active

     def __len__(self):
         return sum(len(x) for x in self.queues.values()) if self.queues else 0


 class DownloaderInterface:
-
     def __init__(self, crawler):
         self.downloader = crawler.engine.downloader

+    def stats(self, possible_slots):
+        return [(self._active_downloads(slot), slot) for slot in possible_slots]
+
+    def get_slot_key(self, request):
+        return self.downloader._get_slot_key(request, None)
+
     def _active_downloads(self, slot):
         """Return a number of requests in a Downloader for a given slot"""
-        pass
+        if slot not in self.downloader.slots:
+            return 0
+        return len(self.downloader.slots[slot].active)


 class DownloaderAwarePriorityQueue:
@@ -81,23 +149,64 @@ class DownloaderAwarePriorityQueue:
     first.
     """

+    @classmethod
+    def from_crawler(cls, crawler, downstream_queue_cls, key, startprios=()):
+        return cls(crawler, downstream_queue_cls, key, startprios)
+
     def __init__(self, crawler, downstream_queue_cls, key, slot_startprios=()):
-        if crawler.settings.getint('CONCURRENT_REQUESTS_PER_IP') != 0:
+        if crawler.settings.getint("CONCURRENT_REQUESTS_PER_IP") != 0:
             raise ValueError(
                 f'"{self.__class__}" does not support CONCURRENT_REQUESTS_PER_IP'
-                )
+            )
+
         if slot_startprios and not isinstance(slot_startprios, dict):
             raise ValueError(
-                f'DownloaderAwarePriorityQueue accepts ``slot_startprios`` as a dict; {slot_startprios.__class__!r} instance is passed. Most likely, it means the state iscreated by an incompatible priority queue. Only a crawl started with the same priority queue class can be resumed.'
-                )
+                "DownloaderAwarePriorityQueue accepts "
+                "``slot_startprios`` as a dict; "
+                f"{slot_startprios.__class__!r} instance "
+                "is passed. Most likely, it means the state is"
+                "created by an incompatible priority queue. "
+                "Only a crawl started with the same priority "
+                "queue class can be resumed."
+            )
+
         self._downloader_interface = DownloaderInterface(crawler)
         self.downstream_queue_cls = downstream_queue_cls
         self.key = key
         self.crawler = crawler
-        self.pqueues = {}
+
+        self.pqueues = {}  # slot -> priority queue
         for slot, startprios in (slot_startprios or {}).items():
             self.pqueues[slot] = self.pqfactory(slot, startprios)

+    def pqfactory(self, slot, startprios=()):
+        return ScrapyPriorityQueue(
+            self.crawler,
+            self.downstream_queue_cls,
+            self.key + "/" + _path_safe(slot),
+            startprios,
+        )
+
+    def pop(self):
+        stats = self._downloader_interface.stats(self.pqueues)
+
+        if not stats:
+            return
+
+        slot = min(stats)[1]
+        queue = self.pqueues[slot]
+        request = queue.pop()
+        if len(queue) == 0:
+            del self.pqueues[slot]
+        return request
+
+    def push(self, request):
+        slot = self._downloader_interface.get_slot_key(request)
+        if slot not in self.pqueues:
+            self.pqueues[slot] = self.pqfactory(slot)
+        queue = self.pqueues[slot]
+        queue.push(request)
+
     def peek(self):
         """Returns the next object to be returned by :meth:`pop`,
         but without removing it from the queue.
@@ -105,11 +214,20 @@ class DownloaderAwarePriorityQueue:
         Raises :exc:`NotImplementedError` if the underlying queue class does
         not implement a ``peek`` method, which is optional for queues.
         """
-        pass
+        stats = self._downloader_interface.stats(self.pqueues)
+        if not stats:
+            return None
+        slot = min(stats)[1]
+        queue = self.pqueues[slot]
+        return queue.peek()
+
+    def close(self):
+        active = {slot: queue.close() for slot, queue in self.pqueues.items()}
+        self.pqueues.clear()
+        return active

     def __len__(self):
-        return sum(len(x) for x in self.pqueues.values()
-            ) if self.pqueues else 0
+        return sum(len(x) for x in self.pqueues.values()) if self.pqueues else 0

     def __contains__(self, slot):
         return slot in self.pqueues
diff --git a/scrapy/resolver.py b/scrapy/resolver.py
index c5fa8c6de..e2e8beff4 100644
--- a/scrapy/resolver.py
+++ b/scrapy/resolver.py
@@ -1,9 +1,18 @@
 from typing import Any
+
 from twisted.internet import defer
 from twisted.internet.base import ThreadedResolver
-from twisted.internet.interfaces import IHostnameResolver, IHostResolution, IResolutionReceiver, IResolverSimple
+from twisted.internet.interfaces import (
+    IHostnameResolver,
+    IHostResolution,
+    IResolutionReceiver,
+    IResolverSimple,
+)
 from zope.interface.declarations import implementer, provider
+
 from scrapy.utils.datatypes import LocalCache
+
+# TODO: cache misses
 dnscache: LocalCache[str, Any] = LocalCache(10000)


@@ -18,22 +27,64 @@ class CachingThreadedResolver(ThreadedResolver):
         dnscache.limit = cache_size
         self.timeout = timeout

+    @classmethod
+    def from_crawler(cls, crawler, reactor):
+        if crawler.settings.getbool("DNSCACHE_ENABLED"):
+            cache_size = crawler.settings.getint("DNSCACHE_SIZE")
+        else:
+            cache_size = 0
+        return cls(reactor, cache_size, crawler.settings.getfloat("DNS_TIMEOUT"))
+
+    def install_on_reactor(self):
+        self.reactor.installResolver(self)
+
+    def getHostByName(self, name: str, timeout=None):
+        if name in dnscache:
+            return defer.succeed(dnscache[name])
+        # in Twisted<=16.6, getHostByName() is always called with
+        # a default timeout of 60s (actually passed as (1, 3, 11, 45) tuple),
+        # so the input argument above is simply overridden
+        # to enforce Scrapy's DNS_TIMEOUT setting's value
+        timeout = (self.timeout,)
+        d = super().getHostByName(name, timeout)
+        if dnscache.limit:
+            d.addCallback(self._cache_result, name)
+        return d
+
+    def _cache_result(self, result, name):
+        dnscache[name] = result
+        return result
+

 @implementer(IHostResolution)
 class HostResolution:
-
     def __init__(self, name):
         self.name = name

+    def cancel(self):
+        raise NotImplementedError()
+

 @provider(IResolutionReceiver)
 class _CachingResolutionReceiver:
-
     def __init__(self, resolutionReceiver, hostName):
         self.resolutionReceiver = resolutionReceiver
         self.hostName = hostName
         self.addresses = []

+    def resolutionBegan(self, resolution):
+        self.resolutionReceiver.resolutionBegan(resolution)
+        self.resolution = resolution
+
+    def addressResolved(self, address):
+        self.resolutionReceiver.addressResolved(address)
+        self.addresses.append(address)
+
+    def resolutionComplete(self):
+        self.resolutionReceiver.resolutionComplete()
+        if self.addresses:
+            dnscache[self.hostName] = self.addresses
+

 @implementer(IHostnameResolver)
 class CachingHostnameResolver:
@@ -46,3 +97,39 @@ class CachingHostnameResolver:
         self.reactor = reactor
         self.original_resolver = reactor.nameResolver
         dnscache.limit = cache_size
+
+    @classmethod
+    def from_crawler(cls, crawler, reactor):
+        if crawler.settings.getbool("DNSCACHE_ENABLED"):
+            cache_size = crawler.settings.getint("DNSCACHE_SIZE")
+        else:
+            cache_size = 0
+        return cls(reactor, cache_size)
+
+    def install_on_reactor(self):
+        self.reactor.installNameResolver(self)
+
+    def resolveHostName(
+        self,
+        resolutionReceiver,
+        hostName: str,
+        portNumber=0,
+        addressTypes=None,
+        transportSemantics="TCP",
+    ):
+        try:
+            addresses = dnscache[hostName]
+        except KeyError:
+            return self.original_resolver.resolveHostName(
+                _CachingResolutionReceiver(resolutionReceiver, hostName),
+                hostName,
+                portNumber,
+                addressTypes,
+                transportSemantics,
+            )
+        else:
+            resolutionReceiver.resolutionBegan(HostResolution(hostName))
+            for addr in addresses:
+                resolutionReceiver.addressResolved(addr)
+            resolutionReceiver.resolutionComplete()
+            return resolutionReceiver
diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py
index 31e10e18d..9e411d4aa 100644
--- a/scrapy/responsetypes.py
+++ b/scrapy/responsetypes.py
@@ -6,69 +6,134 @@ from io import StringIO
 from mimetypes import MimeTypes
 from pkgutil import get_data
 from typing import Dict, Mapping, Optional, Type, Union
+
 from scrapy.http import Response
 from scrapy.utils.misc import load_object
 from scrapy.utils.python import binary_is_text, to_bytes, to_unicode


 class ResponseTypes:
-    CLASSES = {'text/html': 'scrapy.http.HtmlResponse',
-        'application/atom+xml': 'scrapy.http.XmlResponse',
-        'application/rdf+xml': 'scrapy.http.XmlResponse',
-        'application/rss+xml': 'scrapy.http.XmlResponse',
-        'application/xhtml+xml': 'scrapy.http.HtmlResponse',
-        'application/vnd.wap.xhtml+xml': 'scrapy.http.HtmlResponse',
-        'application/xml': 'scrapy.http.XmlResponse', 'application/json':
-        'scrapy.http.TextResponse', 'application/x-json':
-        'scrapy.http.TextResponse', 'application/json-amazonui-streaming':
-        'scrapy.http.TextResponse', 'application/javascript':
-        'scrapy.http.TextResponse', 'application/x-javascript':
-        'scrapy.http.TextResponse', 'text/xml': 'scrapy.http.XmlResponse',
-        'text/*': 'scrapy.http.TextResponse'}
+    CLASSES = {
+        "text/html": "scrapy.http.HtmlResponse",
+        "application/atom+xml": "scrapy.http.XmlResponse",
+        "application/rdf+xml": "scrapy.http.XmlResponse",
+        "application/rss+xml": "scrapy.http.XmlResponse",
+        "application/xhtml+xml": "scrapy.http.HtmlResponse",
+        "application/vnd.wap.xhtml+xml": "scrapy.http.HtmlResponse",
+        "application/xml": "scrapy.http.XmlResponse",
+        "application/json": "scrapy.http.TextResponse",
+        "application/x-json": "scrapy.http.TextResponse",
+        "application/json-amazonui-streaming": "scrapy.http.TextResponse",
+        "application/javascript": "scrapy.http.TextResponse",
+        "application/x-javascript": "scrapy.http.TextResponse",
+        "text/xml": "scrapy.http.XmlResponse",
+        "text/*": "scrapy.http.TextResponse",
+    }

-    def __init__(self) ->None:
+    def __init__(self) -> None:
         self.classes: Dict[str, Type[Response]] = {}
         self.mimetypes: MimeTypes = MimeTypes()
-        mimedata = get_data('scrapy', 'mime.types')
+        mimedata = get_data("scrapy", "mime.types")
         if not mimedata:
             raise ValueError(
-                'The mime.types file is not found in the Scrapy installation')
-        self.mimetypes.readfp(StringIO(mimedata.decode('utf8')))
+                "The mime.types file is not found in the Scrapy installation"
+            )
+        self.mimetypes.readfp(StringIO(mimedata.decode("utf8")))
         for mimetype, cls in self.CLASSES.items():
             self.classes[mimetype] = load_object(cls)

-    def from_mimetype(self, mimetype: str) ->Type[Response]:
+    def from_mimetype(self, mimetype: str) -> Type[Response]:
         """Return the most appropriate Response class for the given mimetype"""
-        pass
+        if mimetype is None:
+            return Response
+        if mimetype in self.classes:
+            return self.classes[mimetype]
+        basetype = f"{mimetype.split('/')[0]}/*"
+        return self.classes.get(basetype, Response)

-    def from_content_type(self, content_type: Union[str, bytes],
-        content_encoding: Optional[bytes]=None) ->Type[Response]:
+    def from_content_type(
+        self, content_type: Union[str, bytes], content_encoding: Optional[bytes] = None
+    ) -> Type[Response]:
         """Return the most appropriate Response class from an HTTP Content-Type
         header"""
-        pass
+        if content_encoding:
+            return Response
+        mimetype = (
+            to_unicode(content_type, encoding="latin-1").split(";")[0].strip().lower()
+        )
+        return self.from_mimetype(mimetype)
+
+    def from_content_disposition(
+        self, content_disposition: Union[str, bytes]
+    ) -> Type[Response]:
+        try:
+            filename = (
+                to_unicode(content_disposition, encoding="latin-1", errors="replace")
+                .split(";")[1]
+                .split("=")[1]
+                .strip("\"'")
+            )
+            return self.from_filename(filename)
+        except IndexError:
+            return Response

-    def from_headers(self, headers: Mapping[bytes, bytes]) ->Type[Response]:
+    def from_headers(self, headers: Mapping[bytes, bytes]) -> Type[Response]:
         """Return the most appropriate Response class by looking at the HTTP
         headers"""
-        pass
+        cls = Response
+        if b"Content-Type" in headers:
+            cls = self.from_content_type(
+                content_type=headers[b"Content-Type"],
+                content_encoding=headers.get(b"Content-Encoding"),
+            )
+        if cls is Response and b"Content-Disposition" in headers:
+            cls = self.from_content_disposition(headers[b"Content-Disposition"])
+        return cls

-    def from_filename(self, filename: str) ->Type[Response]:
+    def from_filename(self, filename: str) -> Type[Response]:
         """Return the most appropriate Response class from a file name"""
-        pass
+        mimetype, encoding = self.mimetypes.guess_type(filename)
+        if mimetype and not encoding:
+            return self.from_mimetype(mimetype)
+        return Response

-    def from_body(self, body: bytes) ->Type[Response]:
+    def from_body(self, body: bytes) -> Type[Response]:
         """Try to guess the appropriate response based on the body content.
         This method is a bit magic and could be improved in the future, but
         it's not meant to be used except for special cases where response types
         cannot be guess using more straightforward methods."""
-        pass
+        chunk = body[:5000]
+        chunk = to_bytes(chunk)
+        if not binary_is_text(chunk):
+            return self.from_mimetype("application/octet-stream")
+        lowercase_chunk = chunk.lower()
+        if b"<html>" in lowercase_chunk:
+            return self.from_mimetype("text/html")
+        if b"<?xml" in lowercase_chunk:
+            return self.from_mimetype("text/xml")
+        if b"<!doctype html>" in lowercase_chunk:
+            return self.from_mimetype("text/html")
+        return self.from_mimetype("text")

-    def from_args(self, headers: Optional[Mapping[bytes, bytes]]=None, url:
-        Optional[str]=None, filename: Optional[str]=None, body: Optional[
-        bytes]=None) ->Type[Response]:
+    def from_args(
+        self,
+        headers: Optional[Mapping[bytes, bytes]] = None,
+        url: Optional[str] = None,
+        filename: Optional[str] = None,
+        body: Optional[bytes] = None,
+    ) -> Type[Response]:
         """Guess the most appropriate Response class based on
         the given arguments."""
-        pass
+        cls = Response
+        if headers is not None:
+            cls = self.from_headers(headers)
+        if cls is Response and url is not None:
+            cls = self.from_filename(url)
+        if cls is Response and filename is not None:
+            cls = self.from_filename(filename)
+        if cls is Response and body is not None:
+            cls = self.from_body(body)
+        return cls


 responsetypes = ResponseTypes()
diff --git a/scrapy/robotstxt.py b/scrapy/robotstxt.py
index a5916df9a..ea943c364 100644
--- a/scrapy/robotstxt.py
+++ b/scrapy/robotstxt.py
@@ -1,12 +1,32 @@
 import logging
 import sys
 from abc import ABCMeta, abstractmethod
+
 from scrapy.utils.python import to_unicode
+
 logger = logging.getLogger(__name__)


-class RobotParser(metaclass=ABCMeta):
+def decode_robotstxt(robotstxt_body, spider, to_native_str_type=False):
+    try:
+        if to_native_str_type:
+            robotstxt_body = to_unicode(robotstxt_body)
+        else:
+            robotstxt_body = robotstxt_body.decode("utf-8", errors="ignore")
+    except UnicodeDecodeError:
+        # If we found garbage or robots.txt in an encoding other than UTF-8, disregard it.
+        # Switch to 'allow all' state.
+        logger.warning(
+            "Failure while parsing robots.txt. File either contains garbage or "
+            "is in an encoding other than UTF-8, treating it as an empty file.",
+            exc_info=sys.exc_info(),
+            extra={"spider": spider},
+        )
+        robotstxt_body = ""
+    return robotstxt_body

+
+class RobotParser(metaclass=ABCMeta):
     @classmethod
     @abstractmethod
     def from_crawler(cls, crawler, robotstxt_body):
@@ -35,38 +55,81 @@ class RobotParser(metaclass=ABCMeta):


 class PythonRobotParser(RobotParser):
-
     def __init__(self, robotstxt_body, spider):
         from urllib.robotparser import RobotFileParser
+
         self.spider = spider
-        robotstxt_body = decode_robotstxt(robotstxt_body, spider,
-            to_native_str_type=True)
+        robotstxt_body = decode_robotstxt(
+            robotstxt_body, spider, to_native_str_type=True
+        )
         self.rp = RobotFileParser()
         self.rp.parse(robotstxt_body.splitlines())

+    @classmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        spider = None if not crawler else crawler.spider
+        o = cls(robotstxt_body, spider)
+        return o

-class ReppyRobotParser(RobotParser):
+    def allowed(self, url, user_agent):
+        user_agent = to_unicode(user_agent)
+        url = to_unicode(url)
+        return self.rp.can_fetch(user_agent, url)

+
+class ReppyRobotParser(RobotParser):
     def __init__(self, robotstxt_body, spider):
         from reppy.robots import Robots
+
         self.spider = spider
-        self.rp = Robots.parse('', robotstxt_body)
+        self.rp = Robots.parse("", robotstxt_body)

+    @classmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        spider = None if not crawler else crawler.spider
+        o = cls(robotstxt_body, spider)
+        return o
+
+    def allowed(self, url, user_agent):
+        return self.rp.allowed(url, user_agent)

-class RerpRobotParser(RobotParser):

+class RerpRobotParser(RobotParser):
     def __init__(self, robotstxt_body, spider):
         from robotexclusionrulesparser import RobotExclusionRulesParser
+
         self.spider = spider
         self.rp = RobotExclusionRulesParser()
         robotstxt_body = decode_robotstxt(robotstxt_body, spider)
         self.rp.parse(robotstxt_body)

+    @classmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        spider = None if not crawler else crawler.spider
+        o = cls(robotstxt_body, spider)
+        return o

-class ProtegoRobotParser(RobotParser):
+    def allowed(self, url, user_agent):
+        user_agent = to_unicode(user_agent)
+        url = to_unicode(url)
+        return self.rp.is_allowed(user_agent, url)

+
+class ProtegoRobotParser(RobotParser):
     def __init__(self, robotstxt_body, spider):
         from protego import Protego
+
         self.spider = spider
         robotstxt_body = decode_robotstxt(robotstxt_body, spider)
         self.rp = Protego.parse(robotstxt_body)
+
+    @classmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        spider = None if not crawler else crawler.spider
+        o = cls(robotstxt_body, spider)
+        return o
+
+    def allowed(self, url, user_agent):
+        user_agent = to_unicode(user_agent)
+        url = to_unicode(url)
+        return self.rp.can_fetch(url, user_agent)
diff --git a/scrapy/selector/unified.py b/scrapy/selector/unified.py
index 0c13d190a..5ad4724c0 100644
--- a/scrapy/selector/unified.py
+++ b/scrapy/selector/unified.py
@@ -2,15 +2,30 @@
 XPath selectors based on lxml
 """
 from typing import Any, Optional, Type, Union
+
 from parsel import Selector as _ParselSelector
+
 from scrapy.http import HtmlResponse, TextResponse, XmlResponse
 from scrapy.utils.python import to_bytes
 from scrapy.utils.response import get_base_url
 from scrapy.utils.trackref import object_ref
-__all__ = ['Selector', 'SelectorList']
+
+__all__ = ["Selector", "SelectorList"]
+
 _NOT_SET = object()


+def _st(response: Optional[TextResponse], st: Optional[str]) -> str:
+    if st is None:
+        return "xml" if isinstance(response, XmlResponse) else "html"
+    return st
+
+
+def _response_from_text(text: Union[str, bytes], st: Optional[str]) -> TextResponse:
+    rt: Type[TextResponse] = XmlResponse if st == "xml" else HtmlResponse
+    return rt(url="about:blank", encoding="utf-8", body=to_bytes(text, "utf-8"))
+
+
 class SelectorList(_ParselSelector.selectorlist_cls, object_ref):
     """
     The :class:`SelectorList` class is a subclass of the builtin ``list``
@@ -48,23 +63,36 @@ class Selector(_ParselSelector, object_ref):
     Otherwise, if ``type`` is set, the selector type will be forced and no
     detection will occur.
     """
-    __slots__ = ['response']
+
+    __slots__ = ["response"]
     selectorlist_cls = SelectorList

-    def __init__(self, response: Optional[TextResponse]=None, text:
-        Optional[str]=None, type: Optional[str]=None, root: Optional[Any]=
-        _NOT_SET, **kwargs: Any):
+    def __init__(
+        self,
+        response: Optional[TextResponse] = None,
+        text: Optional[str] = None,
+        type: Optional[str] = None,
+        root: Optional[Any] = _NOT_SET,
+        **kwargs: Any,
+    ):
         if response is not None and text is not None:
             raise ValueError(
-                f'{self.__class__.__name__}.__init__() received both response and text'
-                )
+                f"{self.__class__.__name__}.__init__() received "
+                "both response and text"
+            )
+
         st = _st(response, type)
+
         if text is not None:
             response = _response_from_text(text, st)
+
         if response is not None:
             text = response.text
-            kwargs.setdefault('base_url', get_base_url(response))
+            kwargs.setdefault("base_url", get_base_url(response))
+
         self.response = response
+
         if root is not _NOT_SET:
-            kwargs['root'] = root
+            kwargs["root"] = root
+
         super().__init__(text=text, type=st, **kwargs)
diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py
index 417c6e729..6affd2125 100644
--- a/scrapy/settings/default_settings.py
+++ b/scrapy/settings/default_settings.py
@@ -12,234 +12,327 @@ Scrapy developers, if you add a setting here remember to:
   (docs/topics/settings.rst)

 """
+
 import sys
 from importlib import import_module
 from pathlib import Path
+
 ADDONS = {}
+
 AJAXCRAWL_ENABLED = False
+
 ASYNCIO_EVENT_LOOP = None
+
 AUTOTHROTTLE_ENABLED = False
 AUTOTHROTTLE_DEBUG = False
 AUTOTHROTTLE_MAX_DELAY = 60.0
 AUTOTHROTTLE_START_DELAY = 5.0
 AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
-BOT_NAME = 'scrapybot'
+
+BOT_NAME = "scrapybot"
+
 CLOSESPIDER_TIMEOUT = 0
 CLOSESPIDER_PAGECOUNT = 0
 CLOSESPIDER_ITEMCOUNT = 0
 CLOSESPIDER_ERRORCOUNT = 0
-COMMANDS_MODULE = ''
+
+COMMANDS_MODULE = ""
+
 COMPRESSION_ENABLED = True
+
 CONCURRENT_ITEMS = 100
+
 CONCURRENT_REQUESTS = 16
 CONCURRENT_REQUESTS_PER_DOMAIN = 8
 CONCURRENT_REQUESTS_PER_IP = 0
+
 COOKIES_ENABLED = True
 COOKIES_DEBUG = False
-DEFAULT_ITEM_CLASS = 'scrapy.item.Item'
-DEFAULT_REQUEST_HEADERS = {'Accept':
-    'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-    'Accept-Language': 'en'}
+
+DEFAULT_ITEM_CLASS = "scrapy.item.Item"
+
+DEFAULT_REQUEST_HEADERS = {
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept-Language": "en",
+}
+
 DEPTH_LIMIT = 0
 DEPTH_STATS_VERBOSE = False
 DEPTH_PRIORITY = 0
+
 DNSCACHE_ENABLED = True
 DNSCACHE_SIZE = 10000
-DNS_RESOLVER = 'scrapy.resolver.CachingThreadedResolver'
+DNS_RESOLVER = "scrapy.resolver.CachingThreadedResolver"
 DNS_TIMEOUT = 60
+
 DOWNLOAD_DELAY = 0
+
 DOWNLOAD_HANDLERS = {}
-DOWNLOAD_HANDLERS_BASE = {'data':
-    'scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler',
-    'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
-    'http': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
-    'https': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
-    's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler', 'ftp':
-    'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler'}
-DOWNLOAD_TIMEOUT = 180
-DOWNLOAD_MAXSIZE = 1024 * 1024 * 1024
-DOWNLOAD_WARNSIZE = 32 * 1024 * 1024
+DOWNLOAD_HANDLERS_BASE = {
+    "data": "scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler",
+    "file": "scrapy.core.downloader.handlers.file.FileDownloadHandler",
+    "http": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
+    "https": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler",
+    "s3": "scrapy.core.downloader.handlers.s3.S3DownloadHandler",
+    "ftp": "scrapy.core.downloader.handlers.ftp.FTPDownloadHandler",
+}
+
+DOWNLOAD_TIMEOUT = 180  # 3mins
+
+DOWNLOAD_MAXSIZE = 1024 * 1024 * 1024  # 1024m
+DOWNLOAD_WARNSIZE = 32 * 1024 * 1024  # 32m
+
 DOWNLOAD_FAIL_ON_DATALOSS = True
-DOWNLOADER = 'scrapy.core.downloader.Downloader'
+
+DOWNLOADER = "scrapy.core.downloader.Downloader"
+
 DOWNLOADER_HTTPCLIENTFACTORY = (
-    'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory')
+    "scrapy.core.downloader.webclient.ScrapyHTTPClientFactory"
+)
 DOWNLOADER_CLIENTCONTEXTFACTORY = (
-    'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory')
-DOWNLOADER_CLIENT_TLS_CIPHERS = 'DEFAULT'
-DOWNLOADER_CLIENT_TLS_METHOD = 'TLS'
+    "scrapy.core.downloader.contextfactory.ScrapyClientContextFactory"
+)
+DOWNLOADER_CLIENT_TLS_CIPHERS = "DEFAULT"
+# Use highest TLS/SSL protocol version supported by the platform, also allowing negotiation:
+DOWNLOADER_CLIENT_TLS_METHOD = "TLS"
 DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING = False
+
 DOWNLOADER_MIDDLEWARES = {}
+
 DOWNLOADER_MIDDLEWARES_BASE = {
-    'scrapy.downloadermiddlewares.offsite.OffsiteMiddleware': 50,
-    'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware': 100,
-    'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 300,
-    'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware':
-    350,
-    'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware':
-    400, 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 500,
-    'scrapy.downloadermiddlewares.retry.RetryMiddleware': 550,
-    'scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware': 560,
-    'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware': 580,
-    'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware':
-    590, 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware': 600,
-    'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': 700,
-    'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 750,
-    'scrapy.downloadermiddlewares.stats.DownloaderStats': 850,
-    'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware': 900}
+    # Engine side
+    "scrapy.downloadermiddlewares.offsite.OffsiteMiddleware": 50,
+    "scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware": 100,
+    "scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware": 300,
+    "scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware": 350,
+    "scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware": 400,
+    "scrapy.downloadermiddlewares.useragent.UserAgentMiddleware": 500,
+    "scrapy.downloadermiddlewares.retry.RetryMiddleware": 550,
+    "scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware": 560,
+    "scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware": 580,
+    "scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware": 590,
+    "scrapy.downloadermiddlewares.redirect.RedirectMiddleware": 600,
+    "scrapy.downloadermiddlewares.cookies.CookiesMiddleware": 700,
+    "scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware": 750,
+    "scrapy.downloadermiddlewares.stats.DownloaderStats": 850,
+    "scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware": 900,
+    # Downloader side
+}
+
 DOWNLOADER_STATS = True
-DUPEFILTER_CLASS = 'scrapy.dupefilters.RFPDupeFilter'
-EDITOR = 'vi'
-if sys.platform == 'win32':
-    EDITOR = '%s -m idlelib.idle'
+
+DUPEFILTER_CLASS = "scrapy.dupefilters.RFPDupeFilter"
+
+EDITOR = "vi"
+if sys.platform == "win32":
+    EDITOR = "%s -m idlelib.idle"
+
 EXTENSIONS = {}
-EXTENSIONS_BASE = {'scrapy.extensions.corestats.CoreStats': 0,
-    'scrapy.extensions.telnet.TelnetConsole': 0,
-    'scrapy.extensions.memusage.MemoryUsage': 0,
-    'scrapy.extensions.memdebug.MemoryDebugger': 0,
-    'scrapy.extensions.closespider.CloseSpider': 0,
-    'scrapy.extensions.feedexport.FeedExporter': 0,
-    'scrapy.extensions.logstats.LogStats': 0,
-    'scrapy.extensions.spiderstate.SpiderState': 0,
-    'scrapy.extensions.throttle.AutoThrottle': 0}
+
+EXTENSIONS_BASE = {
+    "scrapy.extensions.corestats.CoreStats": 0,
+    "scrapy.extensions.telnet.TelnetConsole": 0,
+    "scrapy.extensions.memusage.MemoryUsage": 0,
+    "scrapy.extensions.memdebug.MemoryDebugger": 0,
+    "scrapy.extensions.closespider.CloseSpider": 0,
+    "scrapy.extensions.feedexport.FeedExporter": 0,
+    "scrapy.extensions.logstats.LogStats": 0,
+    "scrapy.extensions.spiderstate.SpiderState": 0,
+    "scrapy.extensions.throttle.AutoThrottle": 0,
+}
+
 FEED_TEMPDIR = None
 FEEDS = {}
-FEED_URI_PARAMS = None
+FEED_URI_PARAMS = None  # a function to extend uri arguments
 FEED_STORE_EMPTY = True
 FEED_EXPORT_ENCODING = None
 FEED_EXPORT_FIELDS = None
 FEED_STORAGES = {}
-FEED_STORAGES_BASE = {'': 'scrapy.extensions.feedexport.FileFeedStorage',
-    'file': 'scrapy.extensions.feedexport.FileFeedStorage', 'ftp':
-    'scrapy.extensions.feedexport.FTPFeedStorage', 'gs':
-    'scrapy.extensions.feedexport.GCSFeedStorage', 's3':
-    'scrapy.extensions.feedexport.S3FeedStorage', 'stdout':
-    'scrapy.extensions.feedexport.StdoutFeedStorage'}
+FEED_STORAGES_BASE = {
+    "": "scrapy.extensions.feedexport.FileFeedStorage",
+    "file": "scrapy.extensions.feedexport.FileFeedStorage",
+    "ftp": "scrapy.extensions.feedexport.FTPFeedStorage",
+    "gs": "scrapy.extensions.feedexport.GCSFeedStorage",
+    "s3": "scrapy.extensions.feedexport.S3FeedStorage",
+    "stdout": "scrapy.extensions.feedexport.StdoutFeedStorage",
+}
 FEED_EXPORT_BATCH_ITEM_COUNT = 0
 FEED_EXPORTERS = {}
-FEED_EXPORTERS_BASE = {'json': 'scrapy.exporters.JsonItemExporter',
-    'jsonlines': 'scrapy.exporters.JsonLinesItemExporter', 'jsonl':
-    'scrapy.exporters.JsonLinesItemExporter', 'jl':
-    'scrapy.exporters.JsonLinesItemExporter', 'csv':
-    'scrapy.exporters.CsvItemExporter', 'xml':
-    'scrapy.exporters.XmlItemExporter', 'marshal':
-    'scrapy.exporters.MarshalItemExporter', 'pickle':
-    'scrapy.exporters.PickleItemExporter'}
+FEED_EXPORTERS_BASE = {
+    "json": "scrapy.exporters.JsonItemExporter",
+    "jsonlines": "scrapy.exporters.JsonLinesItemExporter",
+    "jsonl": "scrapy.exporters.JsonLinesItemExporter",
+    "jl": "scrapy.exporters.JsonLinesItemExporter",
+    "csv": "scrapy.exporters.CsvItemExporter",
+    "xml": "scrapy.exporters.XmlItemExporter",
+    "marshal": "scrapy.exporters.MarshalItemExporter",
+    "pickle": "scrapy.exporters.PickleItemExporter",
+}
 FEED_EXPORT_INDENT = 0
+
 FEED_STORAGE_FTP_ACTIVE = False
-FEED_STORAGE_GCS_ACL = ''
-FEED_STORAGE_S3_ACL = ''
-FILES_STORE_S3_ACL = 'private'
-FILES_STORE_GCS_ACL = ''
-FTP_USER = 'anonymous'
-FTP_PASSWORD = 'guest'
+FEED_STORAGE_GCS_ACL = ""
+FEED_STORAGE_S3_ACL = ""
+
+FILES_STORE_S3_ACL = "private"
+FILES_STORE_GCS_ACL = ""
+
+FTP_USER = "anonymous"
+FTP_PASSWORD = "guest"
 FTP_PASSIVE_MODE = True
+
 GCS_PROJECT_ID = None
+
 HTTPCACHE_ENABLED = False
-HTTPCACHE_DIR = 'httpcache'
+HTTPCACHE_DIR = "httpcache"
 HTTPCACHE_IGNORE_MISSING = False
-HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
 HTTPCACHE_EXPIRATION_SECS = 0
 HTTPCACHE_ALWAYS_STORE = False
 HTTPCACHE_IGNORE_HTTP_CODES = []
-HTTPCACHE_IGNORE_SCHEMES = ['file']
+HTTPCACHE_IGNORE_SCHEMES = ["file"]
 HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS = []
-HTTPCACHE_DBM_MODULE = 'dbm'
-HTTPCACHE_POLICY = 'scrapy.extensions.httpcache.DummyPolicy'
+HTTPCACHE_DBM_MODULE = "dbm"
+HTTPCACHE_POLICY = "scrapy.extensions.httpcache.DummyPolicy"
 HTTPCACHE_GZIP = False
+
 HTTPPROXY_ENABLED = True
-HTTPPROXY_AUTH_ENCODING = 'latin-1'
-IMAGES_STORE_S3_ACL = 'private'
-IMAGES_STORE_GCS_ACL = ''
-ITEM_PROCESSOR = 'scrapy.pipelines.ItemPipelineManager'
+HTTPPROXY_AUTH_ENCODING = "latin-1"
+
+IMAGES_STORE_S3_ACL = "private"
+IMAGES_STORE_GCS_ACL = ""
+
+ITEM_PROCESSOR = "scrapy.pipelines.ItemPipelineManager"
+
 ITEM_PIPELINES = {}
 ITEM_PIPELINES_BASE = {}
+
 JOBDIR = None
+
 LOG_ENABLED = True
-LOG_ENCODING = 'utf-8'
-LOG_FORMATTER = 'scrapy.logformatter.LogFormatter'
-LOG_FORMAT = '%(asctime)s [%(name)s] %(levelname)s: %(message)s'
-LOG_DATEFORMAT = '%Y-%m-%d %H:%M:%S'
+LOG_ENCODING = "utf-8"
+LOG_FORMATTER = "scrapy.logformatter.LogFormatter"
+LOG_FORMAT = "%(asctime)s [%(name)s] %(levelname)s: %(message)s"
+LOG_DATEFORMAT = "%Y-%m-%d %H:%M:%S"
 LOG_STDOUT = False
-LOG_LEVEL = 'DEBUG'
+LOG_LEVEL = "DEBUG"
 LOG_FILE = None
 LOG_FILE_APPEND = True
 LOG_SHORT_NAMES = False
+
 SCHEDULER_DEBUG = False
+
 LOGSTATS_INTERVAL = 60.0
-MAIL_HOST = 'localhost'
+
+MAIL_HOST = "localhost"
 MAIL_PORT = 25
-MAIL_FROM = 'scrapy@localhost'
+MAIL_FROM = "scrapy@localhost"
 MAIL_PASS = None
 MAIL_USER = None
-MEMDEBUG_ENABLED = False
-MEMDEBUG_NOTIFY = []
+
+MEMDEBUG_ENABLED = False  # enable memory debugging
+MEMDEBUG_NOTIFY = []  # send memory debugging report by mail at engine shutdown
+
 MEMUSAGE_CHECK_INTERVAL_SECONDS = 60.0
 MEMUSAGE_ENABLED = True
 MEMUSAGE_LIMIT_MB = 0
 MEMUSAGE_NOTIFY_MAIL = []
 MEMUSAGE_WARNING_MB = 0
+
 METAREFRESH_ENABLED = True
-METAREFRESH_IGNORE_TAGS = ['noscript']
+METAREFRESH_IGNORE_TAGS = ["noscript"]
 METAREFRESH_MAXDELAY = 100
-NEWSPIDER_MODULE = ''
+
+NEWSPIDER_MODULE = ""
+
 PERIODIC_LOG_DELTA = None
 PERIODIC_LOG_STATS = None
 PERIODIC_LOG_TIMING_ENABLED = False
+
 RANDOMIZE_DOWNLOAD_DELAY = True
+
 REACTOR_THREADPOOL_MAXSIZE = 10
+
 REDIRECT_ENABLED = True
-REDIRECT_MAX_TIMES = 20
+REDIRECT_MAX_TIMES = 20  # uses Firefox default setting
 REDIRECT_PRIORITY_ADJUST = +2
+
 REFERER_ENABLED = True
-REFERRER_POLICY = 'scrapy.spidermiddlewares.referer.DefaultReferrerPolicy'
-REQUEST_FINGERPRINTER_CLASS = 'scrapy.utils.request.RequestFingerprinter'
-REQUEST_FINGERPRINTER_IMPLEMENTATION = '2.6'
+REFERRER_POLICY = "scrapy.spidermiddlewares.referer.DefaultReferrerPolicy"
+
+REQUEST_FINGERPRINTER_CLASS = "scrapy.utils.request.RequestFingerprinter"
+REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.6"
+
 RETRY_ENABLED = True
-RETRY_TIMES = 2
+RETRY_TIMES = 2  # initial response + 2 retries = 3 requests
 RETRY_HTTP_CODES = [500, 502, 503, 504, 522, 524, 408, 429]
 RETRY_PRIORITY_ADJUST = -1
-RETRY_EXCEPTIONS = ['twisted.internet.defer.TimeoutError',
-    'twisted.internet.error.TimeoutError',
-    'twisted.internet.error.DNSLookupError',
-    'twisted.internet.error.ConnectionRefusedError',
-    'twisted.internet.error.ConnectionDone',
-    'twisted.internet.error.ConnectError',
-    'twisted.internet.error.ConnectionLost',
-    'twisted.internet.error.TCPTimedOutError',
-    'twisted.web.client.ResponseFailed', OSError,
-    'scrapy.core.downloader.handlers.http11.TunnelError']
+RETRY_EXCEPTIONS = [
+    "twisted.internet.defer.TimeoutError",
+    "twisted.internet.error.TimeoutError",
+    "twisted.internet.error.DNSLookupError",
+    "twisted.internet.error.ConnectionRefusedError",
+    "twisted.internet.error.ConnectionDone",
+    "twisted.internet.error.ConnectError",
+    "twisted.internet.error.ConnectionLost",
+    "twisted.internet.error.TCPTimedOutError",
+    "twisted.web.client.ResponseFailed",
+    # OSError is raised by the HttpCompression middleware when trying to
+    # decompress an empty response
+    OSError,
+    "scrapy.core.downloader.handlers.http11.TunnelError",
+]
+
 ROBOTSTXT_OBEY = False
-ROBOTSTXT_PARSER = 'scrapy.robotstxt.ProtegoRobotParser'
+ROBOTSTXT_PARSER = "scrapy.robotstxt.ProtegoRobotParser"
 ROBOTSTXT_USER_AGENT = None
-SCHEDULER = 'scrapy.core.scheduler.Scheduler'
-SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleLifoDiskQueue'
-SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue'
-SCHEDULER_PRIORITY_QUEUE = 'scrapy.pqueues.ScrapyPriorityQueue'
+
+SCHEDULER = "scrapy.core.scheduler.Scheduler"
+SCHEDULER_DISK_QUEUE = "scrapy.squeues.PickleLifoDiskQueue"
+SCHEDULER_MEMORY_QUEUE = "scrapy.squeues.LifoMemoryQueue"
+SCHEDULER_PRIORITY_QUEUE = "scrapy.pqueues.ScrapyPriorityQueue"
+
 SCRAPER_SLOT_MAX_ACTIVE_SIZE = 5000000
-SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader'
+
+SPIDER_LOADER_CLASS = "scrapy.spiderloader.SpiderLoader"
 SPIDER_LOADER_WARN_ONLY = False
+
 SPIDER_MIDDLEWARES = {}
+
 SPIDER_MIDDLEWARES_BASE = {
-    'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware': 50,
-    'scrapy.spidermiddlewares.referer.RefererMiddleware': 700,
-    'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware': 800,
-    'scrapy.spidermiddlewares.depth.DepthMiddleware': 900}
+    # Engine side
+    "scrapy.spidermiddlewares.httperror.HttpErrorMiddleware": 50,
+    "scrapy.spidermiddlewares.referer.RefererMiddleware": 700,
+    "scrapy.spidermiddlewares.urllength.UrlLengthMiddleware": 800,
+    "scrapy.spidermiddlewares.depth.DepthMiddleware": 900,
+    # Spider side
+}
+
 SPIDER_MODULES = []
-STATS_CLASS = 'scrapy.statscollectors.MemoryStatsCollector'
+
+STATS_CLASS = "scrapy.statscollectors.MemoryStatsCollector"
 STATS_DUMP = True
+
 STATSMAILER_RCPTS = []
-TEMPLATES_DIR = str((Path(__file__).parent / '..' / 'templates').resolve())
+
+TEMPLATES_DIR = str((Path(__file__).parent / ".." / "templates").resolve())
+
 URLLENGTH_LIMIT = 2083
-USER_AGENT = (
-    f"Scrapy/{import_module('scrapy').__version__} (+https://scrapy.org)")
+
+USER_AGENT = f'Scrapy/{import_module("scrapy").__version__} (+https://scrapy.org)'
+
 TELNETCONSOLE_ENABLED = 1
 TELNETCONSOLE_PORT = [6023, 6073]
-TELNETCONSOLE_HOST = '127.0.0.1'
-TELNETCONSOLE_USERNAME = 'scrapy'
+TELNETCONSOLE_HOST = "127.0.0.1"
+TELNETCONSOLE_USERNAME = "scrapy"
 TELNETCONSOLE_PASSWORD = None
+
 TWISTED_REACTOR = None
+
 SPIDER_CONTRACTS = {}
-SPIDER_CONTRACTS_BASE = {'scrapy.contracts.default.UrlContract': 1,
-    'scrapy.contracts.default.CallbackKeywordArgumentsContract': 1,
-    'scrapy.contracts.default.ReturnsContract': 2,
-    'scrapy.contracts.default.ScrapesContract': 3}
+SPIDER_CONTRACTS_BASE = {
+    "scrapy.contracts.default.UrlContract": 1,
+    "scrapy.contracts.default.CallbackKeywordArgumentsContract": 1,
+    "scrapy.contracts.default.ReturnsContract": 2,
+    "scrapy.contracts.default.ScrapesContract": 3,
+}
diff --git a/scrapy/shell.py b/scrapy/shell.py
index f8a94309b..bb3b1461c 100644
--- a/scrapy/shell.py
+++ b/scrapy/shell.py
@@ -5,10 +5,12 @@ See documentation in docs/topics/shell.rst
 """
 import os
 import signal
+
 from itemadapter import is_item
 from twisted.internet import defer, threads
 from twisted.python import threadable
 from w3lib.url import any_to_uri
+
 from scrapy.crawler import Crawler
 from scrapy.exceptions import IgnoreRequest
 from scrapy.http import Request, Response
@@ -23,21 +25,159 @@ from scrapy.utils.response import open_in_browser


 class Shell:
-    relevant_classes = Crawler, Spider, Request, Response, Settings
+    relevant_classes = (Crawler, Spider, Request, Response, Settings)

     def __init__(self, crawler, update_vars=None, code=None):
         self.crawler = crawler
         self.update_vars = update_vars or (lambda x: None)
-        self.item_class = load_object(crawler.settings['DEFAULT_ITEM_CLASS'])
+        self.item_class = load_object(crawler.settings["DEFAULT_ITEM_CLASS"])
         self.spider = None
         self.inthread = not threadable.isInIOThread()
         self.code = code
         self.vars = {}

+    def start(self, url=None, request=None, response=None, spider=None, redirect=True):
+        # disable accidental Ctrl-C key press from shutting down the engine
+        signal.signal(signal.SIGINT, signal.SIG_IGN)
+        if url:
+            self.fetch(url, spider, redirect=redirect)
+        elif request:
+            self.fetch(request, spider)
+        elif response:
+            request = response.request
+            self.populate_vars(response, request, spider)
+        else:
+            self.populate_vars()
+        if self.code:
+            print(eval(self.code, globals(), self.vars))
+        else:
+            """
+            Detect interactive shell setting in scrapy.cfg
+            e.g.: ~/.config/scrapy.cfg or ~/.scrapy.cfg
+            [settings]
+            # shell can be one of ipython, bpython or python;
+            # to be used as the interactive python console, if available.
+            # (default is ipython, fallbacks in the order listed above)
+            shell = python
+            """
+            cfg = get_config()
+            section, option = "settings", "shell"
+            env = os.environ.get("SCRAPY_PYTHON_SHELL")
+            shells = []
+            if env:
+                shells += env.strip().lower().split(",")
+            elif cfg.has_option(section, option):
+                shells += [cfg.get(section, option).strip().lower()]
+            else:  # try all by default
+                shells += DEFAULT_PYTHON_SHELLS.keys()
+            # always add standard shell as fallback
+            shells += ["python"]
+            start_python_console(
+                self.vars, shells=shells, banner=self.vars.pop("banner", "")
+            )
+
+    def _schedule(self, request, spider):
+        if is_asyncio_reactor_installed():
+            # set the asyncio event loop for the current thread
+            event_loop_path = self.crawler.settings["ASYNCIO_EVENT_LOOP"]
+            set_asyncio_event_loop(event_loop_path)
+        spider = self._open_spider(request, spider)
+        d = _request_deferred(request)
+        d.addCallback(lambda x: (x, spider))
+        self.crawler.engine.crawl(request)
+        return d
+
+    def _open_spider(self, request, spider):
+        if self.spider:
+            return self.spider
+
+        if spider is None:
+            spider = self.crawler.spider or self.crawler._create_spider()
+
+        self.crawler.spider = spider
+        self.crawler.engine.open_spider(spider, close_if_idle=False)
+        self.spider = spider
+        return spider
+
+    def fetch(self, request_or_url, spider=None, redirect=True, **kwargs):
+        from twisted.internet import reactor
+
+        if isinstance(request_or_url, Request):
+            request = request_or_url
+        else:
+            url = any_to_uri(request_or_url)
+            request = Request(url, dont_filter=True, **kwargs)
+            if redirect:
+                request.meta["handle_httpstatus_list"] = SequenceExclude(
+                    range(300, 400)
+                )
+            else:
+                request.meta["handle_httpstatus_all"] = True
+        response = None
+        try:
+            response, spider = threads.blockingCallFromThread(
+                reactor, self._schedule, request, spider
+            )
+        except IgnoreRequest:
+            pass
+        self.populate_vars(response, request, spider)
+
+    def populate_vars(self, response=None, request=None, spider=None):
+        import scrapy
+
+        self.vars["scrapy"] = scrapy
+        self.vars["crawler"] = self.crawler
+        self.vars["item"] = self.item_class()
+        self.vars["settings"] = self.crawler.settings
+        self.vars["spider"] = spider
+        self.vars["request"] = request
+        self.vars["response"] = response
+        if self.inthread:
+            self.vars["fetch"] = self.fetch
+        self.vars["view"] = open_in_browser
+        self.vars["shelp"] = self.print_help
+        self.update_vars(self.vars)
+        if not self.code:
+            self.vars["banner"] = self.get_help()
+
+    def print_help(self):
+        print(self.get_help())
+
+    def get_help(self):
+        b = []
+        b.append("Available Scrapy objects:")
+        b.append(
+            "  scrapy     scrapy module (contains scrapy.Request, scrapy.Selector, etc)"
+        )
+        for k, v in sorted(self.vars.items()):
+            if self._is_relevant(v):
+                b.append(f"  {k:<10} {v}")
+        b.append("Useful shortcuts:")
+        if self.inthread:
+            b.append(
+                "  fetch(url[, redirect=True]) "
+                "Fetch URL and update local objects (by default, redirects are followed)"
+            )
+            b.append(
+                "  fetch(req)                  "
+                "Fetch a scrapy.Request and update local objects "
+            )
+        b.append("  shelp()           Shell help (print this help)")
+        b.append("  view(response)    View response in a browser")
+
+        return "\n".join(f"[s] {line}" for line in b)
+
+    def _is_relevant(self, value):
+        return isinstance(value, self.relevant_classes) or is_item(value)
+

 def inspect_response(response, spider):
     """Open a shell to inspect the given response"""
-    pass
+    # Shell.start removes the SIGINT handler, so save it and re-add it after
+    # the shell has closed
+    sigint_handler = signal.getsignal(signal.SIGINT)
+    Shell(spider.crawler).start(response=response, spider=spider)
+    signal.signal(signal.SIGINT, sigint_handler)


 def _request_deferred(request):
@@ -51,4 +191,18 @@ def _request_deferred(request):

     WARNING: Do not call request.replace() until after the deferred is called.
     """
-    pass
+    request_callback = request.callback
+    request_errback = request.errback
+
+    def _restore_callbacks(result):
+        request.callback = request_callback
+        request.errback = request_errback
+        return result
+
+    d = defer.Deferred()
+    d.addBoth(_restore_callbacks)
+    if request.callback:
+        d.addCallbacks(request.callback, request.errback)
+
+    request.callback, request.errback = d.callback, d.errback
+    return d
diff --git a/scrapy/signalmanager.py b/scrapy/signalmanager.py
index e85b12c05..f6df191d8 100644
--- a/scrapy/signalmanager.py
+++ b/scrapy/signalmanager.py
@@ -1,15 +1,16 @@
 from typing import Any, List, Tuple
+
 from pydispatch import dispatcher
 from twisted.internet.defer import Deferred
+
 from scrapy.utils import signal as _signal


 class SignalManager:
-
-    def __init__(self, sender: Any=dispatcher.Anonymous):
+    def __init__(self, sender: Any = dispatcher.Anonymous):
         self.sender: Any = sender

-    def connect(self, receiver: Any, signal: Any, **kwargs: Any) ->None:
+    def connect(self, receiver: Any, signal: Any, **kwargs: Any) -> None:
         """
         Connect a receiver function to a signal.

@@ -23,27 +24,29 @@ class SignalManager:
         :param signal: the signal to connect to
         :type signal: object
         """
-        pass
+        kwargs.setdefault("sender", self.sender)
+        dispatcher.connect(receiver, signal, **kwargs)

-    def disconnect(self, receiver: Any, signal: Any, **kwargs: Any) ->None:
+    def disconnect(self, receiver: Any, signal: Any, **kwargs: Any) -> None:
         """
         Disconnect a receiver function from a signal. This has the
         opposite effect of the :meth:`connect` method, and the arguments
         are the same.
         """
-        pass
+        kwargs.setdefault("sender", self.sender)
+        dispatcher.disconnect(receiver, signal, **kwargs)

-    def send_catch_log(self, signal: Any, **kwargs: Any) ->List[Tuple[Any, Any]
-        ]:
+    def send_catch_log(self, signal: Any, **kwargs: Any) -> List[Tuple[Any, Any]]:
         """
         Send a signal, catch exceptions and log them.

         The keyword arguments are passed to the signal handlers (connected
         through the :meth:`connect` method).
         """
-        pass
+        kwargs.setdefault("sender", self.sender)
+        return _signal.send_catch_log(signal, **kwargs)

-    def send_catch_log_deferred(self, signal: Any, **kwargs: Any) ->Deferred:
+    def send_catch_log_deferred(self, signal: Any, **kwargs: Any) -> Deferred:
         """
         Like :meth:`send_catch_log` but supports returning
         :class:`~twisted.internet.defer.Deferred` objects from signal handlers.
@@ -54,13 +57,15 @@ class SignalManager:
         The keyword arguments are passed to the signal handlers (connected
         through the :meth:`connect` method).
         """
-        pass
+        kwargs.setdefault("sender", self.sender)
+        return _signal.send_catch_log_deferred(signal, **kwargs)

-    def disconnect_all(self, signal: Any, **kwargs: Any) ->None:
+    def disconnect_all(self, signal: Any, **kwargs: Any) -> None:
         """
         Disconnect all receivers from the given signal.

         :param signal: the signal to disconnect from
         :type signal: object
         """
-        pass
+        kwargs.setdefault("sender", self.sender)
+        _signal.disconnect_all(signal, **kwargs)
diff --git a/scrapy/signals.py b/scrapy/signals.py
index 0d08d829c..0090f1c8b 100644
--- a/scrapy/signals.py
+++ b/scrapy/signals.py
@@ -4,6 +4,7 @@ Scrapy signals
 These signals are documented in docs/topics/signals.rst. Please don't add new
 signals here without documenting them there.
 """
+
 engine_started = object()
 engine_stopped = object()
 spider_opened = object()
@@ -23,8 +24,12 @@ item_dropped = object()
 item_error = object()
 feed_slot_closed = object()
 feed_exporter_closed = object()
+
+# for backward compatibility
 stats_spider_opened = spider_opened
 stats_spider_closing = spider_closed
 stats_spider_closed = spider_closed
+
 item_passed = item_scraped
+
 request_received = request_scheduled
diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py
index 9d53190bb..d855c962c 100644
--- a/scrapy/spiderloader.py
+++ b/scrapy/spiderloader.py
@@ -1,16 +1,21 @@
 from __future__ import annotations
+
 import traceback
 import warnings
 from collections import defaultdict
 from types import ModuleType
 from typing import TYPE_CHECKING, DefaultDict, Dict, List, Tuple, Type
+
 from zope.interface import implementer
+
 from scrapy import Request, Spider
 from scrapy.interfaces import ISpiderLoader
 from scrapy.settings import BaseSettings
 from scrapy.utils.misc import walk_modules
 from scrapy.utils.spider import iter_spider_classes
+
 if TYPE_CHECKING:
+    # typing.Self requires Python 3.11
     from typing_extensions import Self


@@ -22,28 +27,77 @@ class SpiderLoader:
     """

     def __init__(self, settings: BaseSettings):
-        self.spider_modules: List[str] = settings.getlist('SPIDER_MODULES')
-        self.warn_only: bool = settings.getbool('SPIDER_LOADER_WARN_ONLY')
+        self.spider_modules: List[str] = settings.getlist("SPIDER_MODULES")
+        self.warn_only: bool = settings.getbool("SPIDER_LOADER_WARN_ONLY")
         self._spiders: Dict[str, Type[Spider]] = {}
-        self._found: DefaultDict[str, List[Tuple[str, str]]] = defaultdict(list
-            )
+        self._found: DefaultDict[str, List[Tuple[str, str]]] = defaultdict(list)
         self._load_all_spiders()

-    def load(self, spider_name: str) ->Type[Spider]:
+    def _check_name_duplicates(self) -> None:
+        dupes = []
+        for name, locations in self._found.items():
+            dupes.extend(
+                [
+                    f"  {cls} named {name!r} (in {mod})"
+                    for mod, cls in locations
+                    if len(locations) > 1
+                ]
+            )
+
+        if dupes:
+            dupes_string = "\n\n".join(dupes)
+            warnings.warn(
+                "There are several spiders with the same name:\n\n"
+                f"{dupes_string}\n\n  This can cause unexpected behavior.",
+                category=UserWarning,
+            )
+
+    def _load_spiders(self, module: ModuleType) -> None:
+        for spcls in iter_spider_classes(module):
+            self._found[spcls.name].append((module.__name__, spcls.__name__))
+            self._spiders[spcls.name] = spcls
+
+    def _load_all_spiders(self) -> None:
+        for name in self.spider_modules:
+            try:
+                for module in walk_modules(name):
+                    self._load_spiders(module)
+            except ImportError:
+                if self.warn_only:
+                    warnings.warn(
+                        f"\n{traceback.format_exc()}Could not load spiders "
+                        f"from module '{name}'. "
+                        "See above traceback for details.",
+                        category=RuntimeWarning,
+                    )
+                else:
+                    raise
+        self._check_name_duplicates()
+
+    @classmethod
+    def from_settings(cls, settings: BaseSettings) -> Self:
+        return cls(settings)
+
+    def load(self, spider_name: str) -> Type[Spider]:
         """
         Return the Spider class for the given spider name. If the spider
         name is not found, raise a KeyError.
         """
-        pass
+        try:
+            return self._spiders[spider_name]
+        except KeyError:
+            raise KeyError(f"Spider not found: {spider_name}")

-    def find_by_request(self, request: Request) ->List[str]:
+    def find_by_request(self, request: Request) -> List[str]:
         """
         Return the list of spider names that can handle the given request.
         """
-        pass
+        return [
+            name for name, cls in self._spiders.items() if cls.handles_request(request)
+        ]

-    def list(self) ->List[str]:
+    def list(self) -> List[str]:
         """
         Return a list with the names of all spiders available in the project.
         """
-        pass
+        return list(self._spiders.keys())
diff --git a/scrapy/spidermiddlewares/depth.py b/scrapy/spidermiddlewares/depth.py
index 6b9fdb9ee..eadc7c6ab 100644
--- a/scrapy/spidermiddlewares/depth.py
+++ b/scrapy/spidermiddlewares/depth.py
@@ -3,15 +3,61 @@ Depth Spider Middleware

 See documentation in docs/topics/spider-middleware.rst
 """
+
 import logging
+
 from scrapy.http import Request
+
 logger = logging.getLogger(__name__)


 class DepthMiddleware:
-
     def __init__(self, maxdepth, stats, verbose_stats=False, prio=1):
         self.maxdepth = maxdepth
         self.stats = stats
         self.verbose_stats = verbose_stats
         self.prio = prio
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        settings = crawler.settings
+        maxdepth = settings.getint("DEPTH_LIMIT")
+        verbose = settings.getbool("DEPTH_STATS_VERBOSE")
+        prio = settings.getint("DEPTH_PRIORITY")
+        return cls(maxdepth, crawler.stats, verbose, prio)
+
+    def process_spider_output(self, response, result, spider):
+        self._init_depth(response, spider)
+        return (r for r in result or () if self._filter(r, response, spider))
+
+    async def process_spider_output_async(self, response, result, spider):
+        self._init_depth(response, spider)
+        async for r in result or ():
+            if self._filter(r, response, spider):
+                yield r
+
+    def _init_depth(self, response, spider):
+        # base case (depth=0)
+        if "depth" not in response.meta:
+            response.meta["depth"] = 0
+            if self.verbose_stats:
+                self.stats.inc_value("request_depth_count/0", spider=spider)
+
+    def _filter(self, request, response, spider):
+        if not isinstance(request, Request):
+            return True
+        depth = response.meta["depth"] + 1
+        request.meta["depth"] = depth
+        if self.prio:
+            request.priority -= depth * self.prio
+        if self.maxdepth and depth > self.maxdepth:
+            logger.debug(
+                "Ignoring link (depth > %(maxdepth)d): %(requrl)s ",
+                {"maxdepth": self.maxdepth, "requrl": request.url},
+                extra={"spider": spider},
+            )
+            return False
+        if self.verbose_stats:
+            self.stats.inc_value(f"request_depth_count/{depth}", spider=spider)
+        self.stats.max_value("request_depth_max", depth, spider=spider)
+        return True
diff --git a/scrapy/spidermiddlewares/httperror.py b/scrapy/spidermiddlewares/httperror.py
index 001661412..0d3e5fe0b 100644
--- a/scrapy/spidermiddlewares/httperror.py
+++ b/scrapy/spidermiddlewares/httperror.py
@@ -4,7 +4,9 @@ HttpError Spider Middleware
 See documentation in docs/topics/spider-middleware.rst
 """
 import logging
+
 from scrapy.exceptions import IgnoreRequest
+
 logger = logging.getLogger(__name__)


@@ -17,8 +19,41 @@ class HttpError(IgnoreRequest):


 class HttpErrorMiddleware:
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings)

     def __init__(self, settings):
-        self.handle_httpstatus_all = settings.getbool('HTTPERROR_ALLOW_ALL')
-        self.handle_httpstatus_list = settings.getlist(
-            'HTTPERROR_ALLOWED_CODES')
+        self.handle_httpstatus_all = settings.getbool("HTTPERROR_ALLOW_ALL")
+        self.handle_httpstatus_list = settings.getlist("HTTPERROR_ALLOWED_CODES")
+
+    def process_spider_input(self, response, spider):
+        if 200 <= response.status < 300:  # common case
+            return
+        meta = response.meta
+        if meta.get("handle_httpstatus_all", False):
+            return
+        if "handle_httpstatus_list" in meta:
+            allowed_statuses = meta["handle_httpstatus_list"]
+        elif self.handle_httpstatus_all:
+            return
+        else:
+            allowed_statuses = getattr(
+                spider, "handle_httpstatus_list", self.handle_httpstatus_list
+            )
+        if response.status in allowed_statuses:
+            return
+        raise HttpError(response, "Ignoring non-200 response")
+
+    def process_spider_exception(self, response, exception, spider):
+        if isinstance(exception, HttpError):
+            spider.crawler.stats.inc_value("httperror/response_ignored_count")
+            spider.crawler.stats.inc_value(
+                f"httperror/response_ignored_status_count/{response.status}"
+            )
+            logger.info(
+                "Ignoring response %(response)r: HTTP status code is not handled or not allowed",
+                {"response": response},
+                extra={"spider": spider},
+            )
+            return []
diff --git a/scrapy/spidermiddlewares/offsite.py b/scrapy/spidermiddlewares/offsite.py
index 5b86596e3..243055d89 100644
--- a/scrapy/spidermiddlewares/offsite.py
+++ b/scrapy/spidermiddlewares/offsite.py
@@ -6,24 +6,93 @@ See documentation in docs/topics/spider-middleware.rst
 import logging
 import re
 import warnings
+
 from scrapy import signals
 from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.http import Request
 from scrapy.utils.httpobj import urlparse_cached
+
 warnings.warn(
-    'The scrapy.spidermiddlewares.offsite module is deprecated, use scrapy.downloadermiddlewares.offsite instead.'
-    , ScrapyDeprecationWarning)
+    "The scrapy.spidermiddlewares.offsite module is deprecated, use "
+    "scrapy.downloadermiddlewares.offsite instead.",
+    ScrapyDeprecationWarning,
+)
+
 logger = logging.getLogger(__name__)


 class OffsiteMiddleware:
-
     def __init__(self, stats):
         self.stats = stats

+    @classmethod
+    def from_crawler(cls, crawler):
+        o = cls(crawler.stats)
+        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
+        return o
+
+    def process_spider_output(self, response, result, spider):
+        return (r for r in result or () if self._filter(r, spider))
+
+    async def process_spider_output_async(self, response, result, spider):
+        async for r in result or ():
+            if self._filter(r, spider):
+                yield r
+
+    def _filter(self, request, spider) -> bool:
+        if not isinstance(request, Request):
+            return True
+        if request.dont_filter or self.should_follow(request, spider):
+            return True
+        domain = urlparse_cached(request).hostname
+        if domain and domain not in self.domains_seen:
+            self.domains_seen.add(domain)
+            logger.debug(
+                "Filtered offsite request to %(domain)r: %(request)s",
+                {"domain": domain, "request": request},
+                extra={"spider": spider},
+            )
+            self.stats.inc_value("offsite/domains", spider=spider)
+        self.stats.inc_value("offsite/filtered", spider=spider)
+        return False
+
+    def should_follow(self, request, spider):
+        regex = self.host_regex
+        # hostname can be None for wrong urls (like javascript links)
+        host = urlparse_cached(request).hostname or ""
+        return bool(regex.search(host))
+
     def get_host_regex(self, spider):
         """Override this method to implement a different offsite policy"""
-        pass
+        allowed_domains = getattr(spider, "allowed_domains", None)
+        if not allowed_domains:
+            return re.compile("")  # allow all by default
+        url_pattern = re.compile(r"^https?://.*$")
+        port_pattern = re.compile(r":\d+$")
+        domains = []
+        for domain in allowed_domains:
+            if domain is None:
+                continue
+            if url_pattern.match(domain):
+                message = (
+                    "allowed_domains accepts only domains, not URLs. "
+                    f"Ignoring URL entry {domain} in allowed_domains."
+                )
+                warnings.warn(message, URLWarning)
+            elif port_pattern.search(domain):
+                message = (
+                    "allowed_domains accepts only domains without ports. "
+                    f"Ignoring entry {domain} in allowed_domains."
+                )
+                warnings.warn(message, PortWarning)
+            else:
+                domains.append(re.escape(domain))
+        regex = rf'^(.*\.)?({"|".join(domains)})$'
+        return re.compile(regex)
+
+    def spider_opened(self, spider):
+        self.host_regex = self.get_host_regex(spider)
+        self.domains_seen = set()


 class URLWarning(Warning):
diff --git a/scrapy/spidermiddlewares/referer.py b/scrapy/spidermiddlewares/referer.py
index a92a7e327..fd91e658b 100644
--- a/scrapy/spidermiddlewares/referer.py
+++ b/scrapy/spidermiddlewares/referer.py
@@ -5,29 +5,49 @@ originated it.
 import warnings
 from typing import Tuple
 from urllib.parse import urlparse
+
 from w3lib.url import safe_url_string
+
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.http import Request, Response
 from scrapy.utils.misc import load_object
 from scrapy.utils.python import to_unicode
 from scrapy.utils.url import strip_url
-LOCAL_SCHEMES = 'about', 'blob', 'data', 'filesystem'
-POLICY_NO_REFERRER = 'no-referrer'
-POLICY_NO_REFERRER_WHEN_DOWNGRADE = 'no-referrer-when-downgrade'
-POLICY_SAME_ORIGIN = 'same-origin'
-POLICY_ORIGIN = 'origin'
-POLICY_STRICT_ORIGIN = 'strict-origin'
-POLICY_ORIGIN_WHEN_CROSS_ORIGIN = 'origin-when-cross-origin'
-POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN = 'strict-origin-when-cross-origin'
-POLICY_UNSAFE_URL = 'unsafe-url'
-POLICY_SCRAPY_DEFAULT = 'scrapy-default'
+
+LOCAL_SCHEMES = (
+    "about",
+    "blob",
+    "data",
+    "filesystem",
+)
+
+POLICY_NO_REFERRER = "no-referrer"
+POLICY_NO_REFERRER_WHEN_DOWNGRADE = "no-referrer-when-downgrade"
+POLICY_SAME_ORIGIN = "same-origin"
+POLICY_ORIGIN = "origin"
+POLICY_STRICT_ORIGIN = "strict-origin"
+POLICY_ORIGIN_WHEN_CROSS_ORIGIN = "origin-when-cross-origin"
+POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN = "strict-origin-when-cross-origin"
+POLICY_UNSAFE_URL = "unsafe-url"
+POLICY_SCRAPY_DEFAULT = "scrapy-default"


 class ReferrerPolicy:
     NOREFERRER_SCHEMES: Tuple[str, ...] = LOCAL_SCHEMES
     name: str

+    def referrer(self, response_url, request_url):
+        raise NotImplementedError()
+
+    def stripped_referrer(self, url):
+        if urlparse(url).scheme not in self.NOREFERRER_SCHEMES:
+            return self.strip_url(url)
+
+    def origin_referrer(self, url):
+        if urlparse(url).scheme not in self.NOREFERRER_SCHEMES:
+            return self.origin(url)
+
     def strip_url(self, url, origin_only=False):
         """
         https://www.w3.org/TR/referrer-policy/#strip-url
@@ -42,11 +62,29 @@ class ReferrerPolicy:
             Set url's query to null.
         Return url.
         """
-        pass
+        if not url:
+            return None
+        return strip_url(
+            url,
+            strip_credentials=True,
+            strip_fragment=True,
+            strip_default_port=True,
+            origin_only=origin_only,
+        )

     def origin(self, url):
         """Return serialized origin (scheme, host, path) for a request or response URL."""
-        pass
+        return self.strip_url(url, origin_only=True)
+
+    def potentially_trustworthy(self, url):
+        # Note: this does not follow https://w3c.github.io/webappsec-secure-contexts/#is-url-trustworthy
+        parsed_url = urlparse(url)
+        if parsed_url.scheme in ("data",):
+            return False
+        return self.tls_protected(url)
+
+    def tls_protected(self, url):
+        return urlparse(url).scheme in ("https", "ftps")


 class NoReferrerPolicy(ReferrerPolicy):
@@ -57,8 +95,12 @@ class NoReferrerPolicy(ReferrerPolicy):
     is to be sent along with requests made from a particular request client to any origin.
     The header will be omitted entirely.
     """
+
     name: str = POLICY_NO_REFERRER

+    def referrer(self, response_url, request_url):
+        return None
+

 class NoReferrerWhenDowngradePolicy(ReferrerPolicy):
     """
@@ -74,8 +116,13 @@ class NoReferrerWhenDowngradePolicy(ReferrerPolicy):

     This is a user agent's default behavior, if no policy is otherwise specified.
     """
+
     name: str = POLICY_NO_REFERRER_WHEN_DOWNGRADE

+    def referrer(self, response_url, request_url):
+        if not self.tls_protected(response_url) or self.tls_protected(request_url):
+            return self.stripped_referrer(response_url)
+

 class SameOriginPolicy(ReferrerPolicy):
     """
@@ -87,8 +134,13 @@ class SameOriginPolicy(ReferrerPolicy):
     Cross-origin requests, on the other hand, will contain no referrer information.
     A Referer HTTP header will not be sent.
     """
+
     name: str = POLICY_SAME_ORIGIN

+    def referrer(self, response_url, request_url):
+        if self.origin(response_url) == self.origin(request_url):
+            return self.stripped_referrer(response_url)
+

 class OriginPolicy(ReferrerPolicy):
     """
@@ -99,8 +151,12 @@ class OriginPolicy(ReferrerPolicy):
     when making both same-origin requests and cross-origin requests
     from a particular request client.
     """
+
     name: str = POLICY_ORIGIN

+    def referrer(self, response_url, request_url):
+        return self.origin_referrer(response_url)
+

 class StrictOriginPolicy(ReferrerPolicy):
     """
@@ -115,8 +171,17 @@ class StrictOriginPolicy(ReferrerPolicy):
     on the other hand, will contain no referrer information.
     A Referer HTTP header will not be sent.
     """
+
     name: str = POLICY_STRICT_ORIGIN

+    def referrer(self, response_url, request_url):
+        if (
+            self.tls_protected(response_url)
+            and self.potentially_trustworthy(request_url)
+            or not self.tls_protected(response_url)
+        ):
+            return self.origin_referrer(response_url)
+

 class OriginWhenCrossOriginPolicy(ReferrerPolicy):
     """
@@ -129,8 +194,15 @@ class OriginWhenCrossOriginPolicy(ReferrerPolicy):
     is sent as referrer information when making cross-origin requests
     from a particular request client.
     """
+
     name: str = POLICY_ORIGIN_WHEN_CROSS_ORIGIN

+    def referrer(self, response_url, request_url):
+        origin = self.origin(response_url)
+        if origin == self.origin(request_url):
+            return self.stripped_referrer(response_url)
+        return origin
+

 class StrictOriginWhenCrossOriginPolicy(ReferrerPolicy):
     """
@@ -149,8 +221,20 @@ class StrictOriginWhenCrossOriginPolicy(ReferrerPolicy):
     on the other hand, will contain no referrer information.
     A Referer HTTP header will not be sent.
     """
+
     name: str = POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN

+    def referrer(self, response_url, request_url):
+        origin = self.origin(response_url)
+        if origin == self.origin(request_url):
+            return self.stripped_referrer(response_url)
+        if (
+            self.tls_protected(response_url)
+            and self.potentially_trustworthy(request_url)
+            or not self.tls_protected(response_url)
+        ):
+            return self.origin_referrer(response_url)
+

 class UnsafeUrlPolicy(ReferrerPolicy):
     """
@@ -165,8 +249,12 @@ class UnsafeUrlPolicy(ReferrerPolicy):
     to insecure origins.
     Carefully consider the impact of setting such a policy for potentially sensitive documents.
     """
+
     name: str = POLICY_UNSAFE_URL

+    def referrer(self, response_url, request_url):
+        return self.stripped_referrer(response_url)
+

 class DefaultReferrerPolicy(NoReferrerWhenDowngradePolicy):
     """
@@ -174,15 +262,28 @@ class DefaultReferrerPolicy(NoReferrerWhenDowngradePolicy):
     with the addition that "Referer" is not sent if the parent request was
     using ``file://`` or ``s3://`` scheme.
     """
-    NOREFERRER_SCHEMES: Tuple[str, ...] = LOCAL_SCHEMES + ('file', 's3')
+
+    NOREFERRER_SCHEMES: Tuple[str, ...] = LOCAL_SCHEMES + ("file", "s3")
     name: str = POLICY_SCRAPY_DEFAULT


-_policy_classes = {p.name: p for p in (NoReferrerPolicy,
-    NoReferrerWhenDowngradePolicy, SameOriginPolicy, OriginPolicy,
-    StrictOriginPolicy, OriginWhenCrossOriginPolicy,
-    StrictOriginWhenCrossOriginPolicy, UnsafeUrlPolicy, DefaultReferrerPolicy)}
-_policy_classes[''] = NoReferrerWhenDowngradePolicy
+_policy_classes = {
+    p.name: p
+    for p in (
+        NoReferrerPolicy,
+        NoReferrerWhenDowngradePolicy,
+        SameOriginPolicy,
+        OriginPolicy,
+        StrictOriginPolicy,
+        OriginWhenCrossOriginPolicy,
+        StrictOriginWhenCrossOriginPolicy,
+        UnsafeUrlPolicy,
+        DefaultReferrerPolicy,
+    )
+}
+
+# Reference: https://www.w3.org/TR/referrer-policy/#referrer-policy-empty-string
+_policy_classes[""] = NoReferrerWhenDowngradePolicy


 def _load_policy_class(policy, warning_only=False):
@@ -191,16 +292,36 @@ def _load_policy_class(policy, warning_only=False):
     otherwise try to interpret the string as a standard value
     from https://www.w3.org/TR/referrer-policy/#referrer-policies
     """
-    pass
+    try:
+        return load_object(policy)
+    except ValueError:
+        try:
+            return _policy_classes[policy.lower()]
+        except KeyError:
+            msg = f"Could not load referrer policy {policy!r}"
+            if not warning_only:
+                raise RuntimeError(msg)
+            else:
+                warnings.warn(msg, RuntimeWarning)
+                return None


 class RefererMiddleware:
-
     def __init__(self, settings=None):
         self.default_policy = DefaultReferrerPolicy
         if settings is not None:
-            self.default_policy = _load_policy_class(settings.get(
-                'REFERRER_POLICY'))
+            self.default_policy = _load_policy_class(settings.get("REFERRER_POLICY"))
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool("REFERER_ENABLED"):
+            raise NotConfigured
+        mw = cls(crawler.settings)
+
+        # Note: this hook is a bit of a hack to intercept redirections
+        crawler.signals.connect(mw.request_scheduled, signal=signals.request_scheduled)
+
+        return mw

     def policy(self, resp_or_url, request):
         """
@@ -215,4 +336,50 @@ class RefererMiddleware:
           it is used if valid
         - otherwise, the policy from settings is used.
         """
-        pass
+        policy_name = request.meta.get("referrer_policy")
+        if policy_name is None:
+            if isinstance(resp_or_url, Response):
+                policy_header = resp_or_url.headers.get("Referrer-Policy")
+                if policy_header is not None:
+                    policy_name = to_unicode(policy_header.decode("latin1"))
+        if policy_name is None:
+            return self.default_policy()
+
+        cls = _load_policy_class(policy_name, warning_only=True)
+        return cls() if cls else self.default_policy()
+
+    def process_spider_output(self, response, result, spider):
+        return (self._set_referer(r, response) for r in result or ())
+
+    async def process_spider_output_async(self, response, result, spider):
+        async for r in result or ():
+            yield self._set_referer(r, response)
+
+    def _set_referer(self, r, response):
+        if isinstance(r, Request):
+            referrer = self.policy(response, r).referrer(response.url, r.url)
+            if referrer is not None:
+                r.headers.setdefault("Referer", referrer)
+        return r
+
+    def request_scheduled(self, request, spider):
+        # check redirected request to patch "Referer" header if necessary
+        redirected_urls = request.meta.get("redirect_urls", [])
+        if redirected_urls:
+            request_referrer = request.headers.get("Referer")
+            # we don't patch the referrer value if there is none
+            if request_referrer is not None:
+                # the request's referrer header value acts as a surrogate
+                # for the parent response URL
+                #
+                # Note: if the 3xx response contained a Referrer-Policy header,
+                #       the information is not available using this hook
+                parent_url = safe_url_string(request_referrer)
+                policy_referrer = self.policy(parent_url, request).referrer(
+                    parent_url, request.url
+                )
+                if policy_referrer != request_referrer:
+                    if policy_referrer is None:
+                        request.headers.pop("Referer")
+                    else:
+                        request.headers["Referer"] = policy_referrer
diff --git a/scrapy/spidermiddlewares/urllength.py b/scrapy/spidermiddlewares/urllength.py
index 1844c1465..f6d92e53a 100644
--- a/scrapy/spidermiddlewares/urllength.py
+++ b/scrapy/spidermiddlewares/urllength.py
@@ -3,13 +3,43 @@ Url Length Spider Middleware

 See documentation in docs/topics/spider-middleware.rst
 """
+
 import logging
+
 from scrapy.exceptions import NotConfigured
 from scrapy.http import Request
+
 logger = logging.getLogger(__name__)


 class UrlLengthMiddleware:
-
     def __init__(self, maxlength):
         self.maxlength = maxlength
+
+    @classmethod
+    def from_settings(cls, settings):
+        maxlength = settings.getint("URLLENGTH_LIMIT")
+        if not maxlength:
+            raise NotConfigured
+        return cls(maxlength)
+
+    def process_spider_output(self, response, result, spider):
+        return (r for r in result or () if self._filter(r, spider))
+
+    async def process_spider_output_async(self, response, result, spider):
+        async for r in result or ():
+            if self._filter(r, spider):
+                yield r
+
+    def _filter(self, request, spider):
+        if isinstance(request, Request) and len(request.url) > self.maxlength:
+            logger.info(
+                "Ignoring link (url length > %(maxlength)d): %(url)s ",
+                {"maxlength": self.maxlength, "url": request.url},
+                extra={"spider": spider},
+            )
+            spider.crawler.stats.inc_value(
+                "urllength/request_ignored_count", spider=spider
+            )
+            return False
+        return True
diff --git a/scrapy/spiders/crawl.py b/scrapy/spiders/crawl.py
index 291994af6..31e845716 100644
--- a/scrapy/spiders/crawl.py
+++ b/scrapy/spiders/crawl.py
@@ -4,20 +4,46 @@ for scraping typical web sites that requires crawling pages.

 See documentation in docs/topics/spiders.rst
 """
+
 import copy
 from typing import AsyncIterable, Awaitable, Sequence
+
 from scrapy.http import HtmlResponse, Request, Response
 from scrapy.linkextractors import LinkExtractor
 from scrapy.spiders import Spider
 from scrapy.utils.asyncgen import collect_asyncgen
 from scrapy.utils.spider import iterate_spider_output
+
+
+def _identity(x):
+    return x
+
+
+def _identity_process_request(request, response):
+    return request
+
+
+def _get_method(method, spider):
+    if callable(method):
+        return method
+    if isinstance(method, str):
+        return getattr(spider, method, None)
+
+
 _default_link_extractor = LinkExtractor()


 class Rule:
-
-    def __init__(self, link_extractor=None, callback=None, cb_kwargs=None,
-        follow=None, process_links=None, process_request=None, errback=None):
+    def __init__(
+        self,
+        link_extractor=None,
+        callback=None,
+        cb_kwargs=None,
+        follow=None,
+        process_links=None,
+        process_request=None,
+        errback=None,
+    ):
         self.link_extractor = link_extractor or _default_link_extractor
         self.callback = callback
         self.errback = errback
@@ -26,6 +52,12 @@ class Rule:
         self.process_request = process_request or _identity_process_request
         self.follow = follow if follow is not None else not callback

+    def _compile(self, spider):
+        self.callback = _get_method(self.callback, spider)
+        self.errback = _get_method(self.errback, spider)
+        self.process_links = _get_method(self.process_links, spider)
+        self.process_request = _get_method(self.process_request, spider)
+

 class CrawlSpider(Spider):
     rules: Sequence[Rule] = ()
@@ -33,3 +65,85 @@ class CrawlSpider(Spider):
     def __init__(self, *a, **kw):
         super().__init__(*a, **kw)
         self._compile_rules()
+
+    def _parse(self, response, **kwargs):
+        return self._parse_response(
+            response=response,
+            callback=self.parse_start_url,
+            cb_kwargs=kwargs,
+            follow=True,
+        )
+
+    def parse_start_url(self, response, **kwargs):
+        return []
+
+    def process_results(self, response: Response, results: list):
+        return results
+
+    def _build_request(self, rule_index, link):
+        return Request(
+            url=link.url,
+            callback=self._callback,
+            errback=self._errback,
+            meta=dict(rule=rule_index, link_text=link.text),
+        )
+
+    def _requests_to_follow(self, response):
+        if not isinstance(response, HtmlResponse):
+            return
+        seen = set()
+        for rule_index, rule in enumerate(self._rules):
+            links = [
+                lnk
+                for lnk in rule.link_extractor.extract_links(response)
+                if lnk not in seen
+            ]
+            for link in rule.process_links(links):
+                seen.add(link)
+                request = self._build_request(rule_index, link)
+                yield rule.process_request(request, response)
+
+    def _callback(self, response, **cb_kwargs):
+        rule = self._rules[response.meta["rule"]]
+        return self._parse_response(
+            response, rule.callback, {**rule.cb_kwargs, **cb_kwargs}, rule.follow
+        )
+
+    def _errback(self, failure):
+        rule = self._rules[failure.request.meta["rule"]]
+        return self._handle_failure(failure, rule.errback)
+
+    async def _parse_response(self, response, callback, cb_kwargs, follow=True):
+        if callback:
+            cb_res = callback(response, **cb_kwargs) or ()
+            if isinstance(cb_res, AsyncIterable):
+                cb_res = await collect_asyncgen(cb_res)
+            elif isinstance(cb_res, Awaitable):
+                cb_res = await cb_res
+            cb_res = self.process_results(response, cb_res)
+            for request_or_item in iterate_spider_output(cb_res):
+                yield request_or_item
+
+        if follow and self._follow_links:
+            for request_or_item in self._requests_to_follow(response):
+                yield request_or_item
+
+    def _handle_failure(self, failure, errback):
+        if errback:
+            results = errback(failure) or ()
+            for request_or_item in iterate_spider_output(results):
+                yield request_or_item
+
+    def _compile_rules(self):
+        self._rules = []
+        for rule in self.rules:
+            self._rules.append(copy.copy(rule))
+            self._rules[-1]._compile(self)
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        spider = super().from_crawler(crawler, *args, **kwargs)
+        spider._follow_links = crawler.settings.getbool(
+            "CRAWLSPIDER_FOLLOW_LINKS", True
+        )
+        return spider
diff --git a/scrapy/spiders/feed.py b/scrapy/spiders/feed.py
index 4c0801928..42675c76a 100644
--- a/scrapy/spiders/feed.py
+++ b/scrapy/spiders/feed.py
@@ -20,8 +20,9 @@ class XMLFeedSpider(Spider):
     'xml' selector, or an 'html' selector.  In most cases, it's convenient to
     use iternodes, since it's a faster and cleaner.
     """
-    iterator = 'iternodes'
-    itertag = 'item'
+
+    iterator = "iternodes"
+    itertag = "item"
     namespaces = ()

     def process_results(self, response, results):
@@ -32,18 +33,20 @@ class XMLFeedSpider(Spider):
         the response which originated that results. It must return a list of
         results (items or requests).
         """
-        pass
+        return results

     def adapt_response(self, response):
         """You can override this function in order to make any changes you want
         to into the feed before parsing it. This function must return a
         response.
         """
-        pass
+        return response

     def parse_node(self, response, selector):
         """This method must be overridden with your custom spider functionality"""
-        pass
+        if hasattr(self, "parse_item"):  # backward compatibility
+            return self.parse_item(response, selector)
+        raise NotImplementedError

     def parse_nodes(self, response, nodes):
         """This method is called for the nodes matching the provided tag name
@@ -52,7 +55,42 @@ class XMLFeedSpider(Spider):
         This method must return either an item, a request, or a list
         containing any of them.
         """
-        pass
+
+        for selector in nodes:
+            ret = iterate_spider_output(self.parse_node(response, selector))
+            for result_item in self.process_results(response, ret):
+                yield result_item
+
+    def _parse(self, response, **kwargs):
+        if not hasattr(self, "parse_node"):
+            raise NotConfigured(
+                "You must define parse_node method in order to scrape this XML feed"
+            )
+
+        response = self.adapt_response(response)
+        if self.iterator == "iternodes":
+            nodes = self._iternodes(response)
+        elif self.iterator == "xml":
+            selector = Selector(response, type="xml")
+            self._register_namespaces(selector)
+            nodes = selector.xpath(f"//{self.itertag}")
+        elif self.iterator == "html":
+            selector = Selector(response, type="html")
+            self._register_namespaces(selector)
+            nodes = selector.xpath(f"//{self.itertag}")
+        else:
+            raise NotSupported("Unsupported node iterator")
+
+        return self.parse_nodes(response, nodes)
+
+    def _iternodes(self, response):
+        for node in xmliter_lxml(response, self.itertag):
+            self._register_namespaces(node)
+            yield node
+
+    def _register_namespaces(self, selector):
+        for prefix, uri in self.namespaces:
+            selector.register_namespace(prefix, uri)


 class CSVFeedSpider(Spider):
@@ -63,21 +101,26 @@ class CSVFeedSpider(Spider):
     You can set some options regarding the CSV file, such as the delimiter, quotechar
     and the file's headers.
     """
-    delimiter = None
-    quotechar = None
+
+    delimiter = (
+        None  # When this is None, python's csv module's default delimiter is used
+    )
+    quotechar = (
+        None  # When this is None, python's csv module's default quotechar is used
+    )
     headers = None

     def process_results(self, response, results):
         """This method has the same purpose as the one in XMLFeedSpider"""
-        pass
+        return results

     def adapt_response(self, response):
         """This method has the same purpose as the one in XMLFeedSpider"""
-        pass
+        return response

     def parse_row(self, response, row):
         """This method must be overridden with your custom spider functionality"""
-        pass
+        raise NotImplementedError

     def parse_rows(self, response):
         """Receives a response and a dict (representing each row) with a key for
@@ -85,4 +128,18 @@ class CSVFeedSpider(Spider):
         gives the opportunity to override adapt_response and
         process_results methods for pre and post-processing purposes.
         """
-        pass
+
+        for row in csviter(
+            response, self.delimiter, self.headers, quotechar=self.quotechar
+        ):
+            ret = iterate_spider_output(self.parse_row(response, row))
+            for result_item in self.process_results(response, ret):
+                yield result_item
+
+    def _parse(self, response, **kwargs):
+        if not hasattr(self, "parse_row"):
+            raise NotConfigured(
+                "You must define parse_row method in order to scrape this CSV feed"
+            )
+        response = self.adapt_response(response)
+        return self.parse_rows(response)
diff --git a/scrapy/spiders/init.py b/scrapy/spiders/init.py
index 7f6f7eefe..3cb215b0f 100644
--- a/scrapy/spiders/init.py
+++ b/scrapy/spiders/init.py
@@ -5,11 +5,15 @@ from scrapy.utils.spider import iterate_spider_output
 class InitSpider(Spider):
     """Base Spider with initialization facilities"""

+    def start_requests(self):
+        self._postinit_reqs = super().start_requests()
+        return iterate_spider_output(self.init_request())
+
     def initialized(self, response=None):
         """This method must be set as the callback of your last initialization
         request. See self.init_request() docstring for more info.
         """
-        pass
+        return self.__dict__.pop("_postinit_reqs")

     def init_request(self):
         """This function should return one initialization request, with the
@@ -24,4 +28,4 @@ class InitSpider(Spider):
         overridden only when you need to perform requests to initialize your
         spider
         """
-        pass
+        return self.initialized()
diff --git a/scrapy/spiders/sitemap.py b/scrapy/spiders/sitemap.py
index 97a4c2aed..386aa6a6e 100644
--- a/scrapy/spiders/sitemap.py
+++ b/scrapy/spiders/sitemap.py
@@ -1,25 +1,41 @@
 import logging
 import re
 from typing import TYPE_CHECKING, Any
+
 from scrapy.http import Request, XmlResponse
 from scrapy.spiders import Spider
 from scrapy.utils._compression import _DecompressionMaxSizeExceeded
 from scrapy.utils.gz import gunzip, gzip_magic_number
 from scrapy.utils.sitemap import Sitemap, sitemap_urls_from_robots
+
 if TYPE_CHECKING:
+    # typing.Self requires Python 3.11
     from typing_extensions import Self
+
     from scrapy.crawler import Crawler
+
 logger = logging.getLogger(__name__)


 class SitemapSpider(Spider):
     sitemap_urls = ()
-    sitemap_rules = [('', 'parse')]
-    sitemap_follow = ['']
+    sitemap_rules = [("", "parse")]
+    sitemap_follow = [""]
     sitemap_alternate_links = False
     _max_size: int
     _warn_size: int

+    @classmethod
+    def from_crawler(cls, crawler: "Crawler", *args: Any, **kwargs: Any) -> "Self":
+        spider = super().from_crawler(crawler, *args, **kwargs)
+        spider._max_size = getattr(
+            spider, "download_maxsize", spider.settings.getint("DOWNLOAD_MAXSIZE")
+        )
+        spider._warn_size = getattr(
+            spider, "download_warnsize", spider.settings.getint("DOWNLOAD_WARNSIZE")
+        )
+        return spider
+
     def __init__(self, *a, **kw):
         super().__init__(*a, **kw)
         self._cbs = []
@@ -29,15 +45,89 @@ class SitemapSpider(Spider):
             self._cbs.append((regex(r), c))
         self._follow = [regex(x) for x in self.sitemap_follow]

+    def start_requests(self):
+        for url in self.sitemap_urls:
+            yield Request(url, self._parse_sitemap)
+
     def sitemap_filter(self, entries):
         """This method can be used to filter sitemap entries by their
         attributes, for example, you can filter locs with lastmod greater
         than a given date (see docs).
         """
-        pass
+        for entry in entries:
+            yield entry
+
+    def _parse_sitemap(self, response):
+        if response.url.endswith("/robots.txt"):
+            for url in sitemap_urls_from_robots(response.text, base_url=response.url):
+                yield Request(url, callback=self._parse_sitemap)
+        else:
+            body = self._get_sitemap_body(response)
+            if body is None:
+                logger.warning(
+                    "Ignoring invalid sitemap: %(response)s",
+                    {"response": response},
+                    extra={"spider": self},
+                )
+                return
+
+            s = Sitemap(body)
+            it = self.sitemap_filter(s)
+
+            if s.type == "sitemapindex":
+                for loc in iterloc(it, self.sitemap_alternate_links):
+                    if any(x.search(loc) for x in self._follow):
+                        yield Request(loc, callback=self._parse_sitemap)
+            elif s.type == "urlset":
+                for loc in iterloc(it, self.sitemap_alternate_links):
+                    for r, c in self._cbs:
+                        if r.search(loc):
+                            yield Request(loc, callback=c)
+                            break

     def _get_sitemap_body(self, response):
         """Return the sitemap body contained in the given response,
         or None if the response is not a sitemap.
         """
-        pass
+        if isinstance(response, XmlResponse):
+            return response.body
+        if gzip_magic_number(response):
+            uncompressed_size = len(response.body)
+            max_size = response.meta.get("download_maxsize", self._max_size)
+            warn_size = response.meta.get("download_warnsize", self._warn_size)
+            try:
+                body = gunzip(response.body, max_size=max_size)
+            except _DecompressionMaxSizeExceeded:
+                return None
+            if uncompressed_size < warn_size <= len(body):
+                logger.warning(
+                    f"{response} body size after decompression ({len(body)} B) "
+                    f"is larger than the download warning size ({warn_size} B)."
+                )
+            return body
+        # actual gzipped sitemap files are decompressed above ;
+        # if we are here (response body is not gzipped)
+        # and have a response for .xml.gz,
+        # it usually means that it was already gunzipped
+        # by HttpCompression middleware,
+        # the HTTP response being sent with "Content-Encoding: gzip"
+        # without actually being a .xml.gz file in the first place,
+        # merely XML gzip-compressed on the fly,
+        # in other word, here, we have plain XML
+        if response.url.endswith(".xml") or response.url.endswith(".xml.gz"):
+            return response.body
+
+
+def regex(x):
+    if isinstance(x, str):
+        return re.compile(x)
+    return x
+
+
+def iterloc(it, alt=False):
+    for d in it:
+        yield d["loc"]
+
+        # Also consider alternate URLs (xhtml:link rel="alternate")
+        if alt and "alternate" in d:
+            yield from d["alternate"]
diff --git a/scrapy/squeues.py b/scrapy/squeues.py
index 612da342c..f665ad88c 100644
--- a/scrapy/squeues.py
+++ b/scrapy/squeues.py
@@ -1,28 +1,144 @@
 """
 Scheduler queues
 """
+
 import marshal
 import pickle
 from os import PathLike
 from pathlib import Path
 from typing import Union
+
 from queuelib import queue
+
 from scrapy.utils.request import request_from_dict
-_PickleFifoSerializationDiskQueue = _serializable_queue(_with_mkdir(queue.
-    FifoDiskQueue), _pickle_serialize, pickle.loads)
-_PickleLifoSerializationDiskQueue = _serializable_queue(_with_mkdir(queue.
-    LifoDiskQueue), _pickle_serialize, pickle.loads)
-_MarshalFifoSerializationDiskQueue = _serializable_queue(_with_mkdir(queue.
-    FifoDiskQueue), marshal.dumps, marshal.loads)
-_MarshalLifoSerializationDiskQueue = _serializable_queue(_with_mkdir(queue.
-    LifoDiskQueue), marshal.dumps, marshal.loads)
-PickleFifoDiskQueue = _scrapy_serialization_queue(
-    _PickleFifoSerializationDiskQueue)
-PickleLifoDiskQueue = _scrapy_serialization_queue(
-    _PickleLifoSerializationDiskQueue)
-MarshalFifoDiskQueue = _scrapy_serialization_queue(
-    _MarshalFifoSerializationDiskQueue)
-MarshalLifoDiskQueue = _scrapy_serialization_queue(
-    _MarshalLifoSerializationDiskQueue)
+
+
+def _with_mkdir(queue_class):
+    class DirectoriesCreated(queue_class):
+        def __init__(self, path: Union[str, PathLike], *args, **kwargs):
+            dirname = Path(path).parent
+            if not dirname.exists():
+                dirname.mkdir(parents=True, exist_ok=True)
+            super().__init__(path, *args, **kwargs)
+
+    return DirectoriesCreated
+
+
+def _serializable_queue(queue_class, serialize, deserialize):
+    class SerializableQueue(queue_class):
+        def push(self, obj):
+            s = serialize(obj)
+            super().push(s)
+
+        def pop(self):
+            s = super().pop()
+            if s:
+                return deserialize(s)
+
+        def peek(self):
+            """Returns the next object to be returned by :meth:`pop`,
+            but without removing it from the queue.
+
+            Raises :exc:`NotImplementedError` if the underlying queue class does
+            not implement a ``peek`` method, which is optional for queues.
+            """
+            try:
+                s = super().peek()
+            except AttributeError as ex:
+                raise NotImplementedError(
+                    "The underlying queue class does not implement 'peek'"
+                ) from ex
+            if s:
+                return deserialize(s)
+
+    return SerializableQueue
+
+
+def _scrapy_serialization_queue(queue_class):
+    class ScrapyRequestQueue(queue_class):
+        def __init__(self, crawler, key):
+            self.spider = crawler.spider
+            super().__init__(key)
+
+        @classmethod
+        def from_crawler(cls, crawler, key, *args, **kwargs):
+            return cls(crawler, key)
+
+        def push(self, request):
+            request = request.to_dict(spider=self.spider)
+            return super().push(request)
+
+        def pop(self):
+            request = super().pop()
+            if not request:
+                return None
+            return request_from_dict(request, spider=self.spider)
+
+        def peek(self):
+            """Returns the next object to be returned by :meth:`pop`,
+            but without removing it from the queue.
+
+            Raises :exc:`NotImplementedError` if the underlying queue class does
+            not implement a ``peek`` method, which is optional for queues.
+            """
+            request = super().peek()
+            if not request:
+                return None
+            return request_from_dict(request, spider=self.spider)
+
+    return ScrapyRequestQueue
+
+
+def _scrapy_non_serialization_queue(queue_class):
+    class ScrapyRequestQueue(queue_class):
+        @classmethod
+        def from_crawler(cls, crawler, *args, **kwargs):
+            return cls()
+
+        def peek(self):
+            """Returns the next object to be returned by :meth:`pop`,
+            but without removing it from the queue.
+
+            Raises :exc:`NotImplementedError` if the underlying queue class does
+            not implement a ``peek`` method, which is optional for queues.
+            """
+            try:
+                s = super().peek()
+            except AttributeError as ex:
+                raise NotImplementedError(
+                    "The underlying queue class does not implement 'peek'"
+                ) from ex
+            return s
+
+    return ScrapyRequestQueue
+
+
+def _pickle_serialize(obj):
+    try:
+        return pickle.dumps(obj, protocol=4)
+    # Both pickle.PicklingError and AttributeError can be raised by pickle.dump(s)
+    # TypeError is raised from parsel.Selector
+    except (pickle.PicklingError, AttributeError, TypeError) as e:
+        raise ValueError(str(e)) from e
+
+
+_PickleFifoSerializationDiskQueue = _serializable_queue(
+    _with_mkdir(queue.FifoDiskQueue), _pickle_serialize, pickle.loads
+)
+_PickleLifoSerializationDiskQueue = _serializable_queue(
+    _with_mkdir(queue.LifoDiskQueue), _pickle_serialize, pickle.loads
+)
+_MarshalFifoSerializationDiskQueue = _serializable_queue(
+    _with_mkdir(queue.FifoDiskQueue), marshal.dumps, marshal.loads
+)
+_MarshalLifoSerializationDiskQueue = _serializable_queue(
+    _with_mkdir(queue.LifoDiskQueue), marshal.dumps, marshal.loads
+)
+
+# public queue classes
+PickleFifoDiskQueue = _scrapy_serialization_queue(_PickleFifoSerializationDiskQueue)
+PickleLifoDiskQueue = _scrapy_serialization_queue(_PickleLifoSerializationDiskQueue)
+MarshalFifoDiskQueue = _scrapy_serialization_queue(_MarshalFifoSerializationDiskQueue)
+MarshalLifoDiskQueue = _scrapy_serialization_queue(_MarshalLifoSerializationDiskQueue)
 FifoMemoryQueue = _scrapy_non_serialization_queue(queue.FifoMemoryQueue)
 LifoMemoryQueue = _scrapy_non_serialization_queue(queue.LifoMemoryQueue)
diff --git a/scrapy/statscollectors.py b/scrapy/statscollectors.py
index dac77ae91..15193aac5 100644
--- a/scrapy/statscollectors.py
+++ b/scrapy/statscollectors.py
@@ -4,26 +4,95 @@ Scrapy extension for collecting scraping stats
 import logging
 import pprint
 from typing import TYPE_CHECKING, Any, Dict, Optional
+
 from scrapy import Spider
+
 if TYPE_CHECKING:
     from scrapy.crawler import Crawler
+
 logger = logging.getLogger(__name__)
+
+
 StatsT = Dict[str, Any]


 class StatsCollector:
-
-    def __init__(self, crawler: 'Crawler'):
-        self._dump: bool = crawler.settings.getbool('STATS_DUMP')
+    def __init__(self, crawler: "Crawler"):
+        self._dump: bool = crawler.settings.getbool("STATS_DUMP")
         self._stats: StatsT = {}

+    def get_value(
+        self, key: str, default: Any = None, spider: Optional[Spider] = None
+    ) -> Any:
+        return self._stats.get(key, default)

-class MemoryStatsCollector(StatsCollector):
+    def get_stats(self, spider: Optional[Spider] = None) -> StatsT:
+        return self._stats
+
+    def set_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
+        self._stats[key] = value
+
+    def set_stats(self, stats: StatsT, spider: Optional[Spider] = None) -> None:
+        self._stats = stats
+
+    def inc_value(
+        self, key: str, count: int = 1, start: int = 0, spider: Optional[Spider] = None
+    ) -> None:
+        d = self._stats
+        d[key] = d.setdefault(key, start) + count
+
+    def max_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
+        self._stats[key] = max(self._stats.setdefault(key, value), value)
+
+    def min_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
+        self._stats[key] = min(self._stats.setdefault(key, value), value)

-    def __init__(self, crawler: 'Crawler'):
+    def clear_stats(self, spider: Optional[Spider] = None) -> None:
+        self._stats.clear()
+
+    def open_spider(self, spider: Spider) -> None:
+        pass
+
+    def close_spider(self, spider: Spider, reason: str) -> None:
+        if self._dump:
+            logger.info(
+                "Dumping Scrapy stats:\n" + pprint.pformat(self._stats),
+                extra={"spider": spider},
+            )
+        self._persist_stats(self._stats, spider)
+
+    def _persist_stats(self, stats: StatsT, spider: Spider) -> None:
+        pass
+
+
+class MemoryStatsCollector(StatsCollector):
+    def __init__(self, crawler: "Crawler"):
         super().__init__(crawler)
         self.spider_stats: Dict[str, StatsT] = {}

+    def _persist_stats(self, stats: StatsT, spider: Spider) -> None:
+        self.spider_stats[spider.name] = stats
+

 class DummyStatsCollector(StatsCollector):
-    pass
+    def get_value(
+        self, key: str, default: Any = None, spider: Optional[Spider] = None
+    ) -> Any:
+        return default
+
+    def set_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
+        pass
+
+    def set_stats(self, stats: StatsT, spider: Optional[Spider] = None) -> None:
+        pass
+
+    def inc_value(
+        self, key: str, count: int = 1, start: int = 0, spider: Optional[Spider] = None
+    ) -> None:
+        pass
+
+    def max_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
+        pass
+
+    def min_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
+        pass
diff --git a/scrapy/utils/_compression.py b/scrapy/utils/_compression.py
index 106ea8c09..7c40d0a02 100644
--- a/scrapy/utils/_compression.py
+++ b/scrapy/utils/_compression.py
@@ -1,7 +1,9 @@
 import zlib
 from io import BytesIO
 from warnings import warn
+
 from scrapy.exceptions import ScrapyDeprecationWarning
+
 try:
     import brotli
 except ImportError:
@@ -11,14 +13,111 @@ else:
         brotli.Decompressor.process
     except AttributeError:
         warn(
-            'You have brotlipy installed, and Scrapy will use it, but Scrapy support for brotlipy is deprecated and will stop working in a future version of Scrapy. brotlipy itself is deprecated, it has been superseded by brotlicffi (not currently supported by Scrapy). Please, uninstall brotlipy and install brotli instead. brotlipy has the same import name as brotli, so keeping both installed is strongly discouraged.'
-            , ScrapyDeprecationWarning)
+            (
+                "You have brotlipy installed, and Scrapy will use it, but "
+                "Scrapy support for brotlipy is deprecated and will stop "
+                "working in a future version of Scrapy. brotlipy itself is "
+                "deprecated, it has been superseded by brotlicffi (not "
+                "currently supported by Scrapy). Please, uninstall brotlipy "
+                "and install brotli instead. brotlipy has the same import "
+                "name as brotli, so keeping both installed is strongly "
+                "discouraged."
+            ),
+            ScrapyDeprecationWarning,
+        )
+
+        def _brotli_decompress(decompressor, data):
+            return decompressor.decompress(data)
+
+    else:
+
+        def _brotli_decompress(decompressor, data):
+            return decompressor.process(data)
+
+
 try:
     import zstandard
 except ImportError:
     pass
-_CHUNK_SIZE = 65536
+
+
+_CHUNK_SIZE = 65536  # 64 KiB


 class _DecompressionMaxSizeExceeded(ValueError):
     pass
+
+
+def _inflate(data: bytes, *, max_size: int = 0) -> bytes:
+    decompressor = zlib.decompressobj()
+    raw_decompressor = zlib.decompressobj(wbits=-15)
+    input_stream = BytesIO(data)
+    output_stream = BytesIO()
+    output_chunk = b"."
+    decompressed_size = 0
+    while output_chunk:
+        input_chunk = input_stream.read(_CHUNK_SIZE)
+        try:
+            output_chunk = decompressor.decompress(input_chunk)
+        except zlib.error:
+            if decompressor != raw_decompressor:
+                # ugly hack to work with raw deflate content that may
+                # be sent by microsoft servers. For more information, see:
+                # http://carsten.codimi.de/gzip.yaws/
+                # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
+                # http://www.gzip.org/zlib/zlib_faq.html#faq38
+                decompressor = raw_decompressor
+                output_chunk = decompressor.decompress(input_chunk)
+            else:
+                raise
+        decompressed_size += len(output_chunk)
+        if max_size and decompressed_size > max_size:
+            raise _DecompressionMaxSizeExceeded(
+                f"The number of bytes decompressed so far "
+                f"({decompressed_size} B) exceed the specified maximum "
+                f"({max_size} B)."
+            )
+        output_stream.write(output_chunk)
+    output_stream.seek(0)
+    return output_stream.read()
+
+
+def _unbrotli(data: bytes, *, max_size: int = 0) -> bytes:
+    decompressor = brotli.Decompressor()
+    input_stream = BytesIO(data)
+    output_stream = BytesIO()
+    output_chunk = b"."
+    decompressed_size = 0
+    while output_chunk:
+        input_chunk = input_stream.read(_CHUNK_SIZE)
+        output_chunk = _brotli_decompress(decompressor, input_chunk)
+        decompressed_size += len(output_chunk)
+        if max_size and decompressed_size > max_size:
+            raise _DecompressionMaxSizeExceeded(
+                f"The number of bytes decompressed so far "
+                f"({decompressed_size} B) exceed the specified maximum "
+                f"({max_size} B)."
+            )
+        output_stream.write(output_chunk)
+    output_stream.seek(0)
+    return output_stream.read()
+
+
+def _unzstd(data: bytes, *, max_size: int = 0) -> bytes:
+    decompressor = zstandard.ZstdDecompressor()
+    stream_reader = decompressor.stream_reader(BytesIO(data))
+    output_stream = BytesIO()
+    output_chunk = b"."
+    decompressed_size = 0
+    while output_chunk:
+        output_chunk = stream_reader.read(_CHUNK_SIZE)
+        decompressed_size += len(output_chunk)
+        if max_size and decompressed_size > max_size:
+            raise _DecompressionMaxSizeExceeded(
+                f"The number of bytes decompressed so far "
+                f"({decompressed_size} B) exceed the specified maximum "
+                f"({max_size} B)."
+            )
+        output_stream.write(output_chunk)
+    output_stream.seek(0)
+    return output_stream.read()
diff --git a/scrapy/utils/asyncgen.py b/scrapy/utils/asyncgen.py
index bcdb5eb14..0505db343 100644
--- a/scrapy/utils/asyncgen.py
+++ b/scrapy/utils/asyncgen.py
@@ -1,7 +1,18 @@
 from typing import AsyncGenerator, AsyncIterable, Iterable, Union


-async def as_async_generator(it: Union[Iterable, AsyncIterable]
-    ) ->AsyncGenerator:
+async def collect_asyncgen(result: AsyncIterable) -> list:
+    results = []
+    async for x in result:
+        results.append(x)
+    return results
+
+
+async def as_async_generator(it: Union[Iterable, AsyncIterable]) -> AsyncGenerator:
     """Wraps an iterable (sync or async) into an async generator."""
-    pass
+    if isinstance(it, AsyncIterable):
+        async for r in it:
+            yield r
+    else:
+        for r in it:
+            yield r
diff --git a/scrapy/utils/benchserver.py b/scrapy/utils/benchserver.py
index fd25c425c..38884a9f0 100644
--- a/scrapy/utils/benchserver.py
+++ b/scrapy/utils/benchserver.py
@@ -1,5 +1,6 @@
 import random
 from urllib.parse import urlencode
+
 from twisted.web.resource import Resource
 from twisted.web.server import Site

@@ -7,11 +8,39 @@ from twisted.web.server import Site
 class Root(Resource):
     isLeaf = True

+    def getChild(self, name, request):
+        return self
+
+    def render(self, request):
+        total = _getarg(request, b"total", 100, int)
+        show = _getarg(request, b"show", 10, int)
+        nlist = [random.randint(1, total) for _ in range(show)]
+        request.write(b"<html><head></head><body>")
+        args = request.args.copy()
+        for nl in nlist:
+            args["n"] = nl
+            argstr = urlencode(args, doseq=True)
+            request.write(
+                f"<a href='/follow?{argstr}'>follow {nl}</a><br>".encode("utf8")
+            )
+        request.write(b"</body></html>")
+        return b""
+
+
+def _getarg(request, name, default=None, type=str):
+    return type(request.args[name][0]) if name in request.args else default

-if __name__ == '__main__':
+
+if __name__ == "__main__":
     from twisted.internet import reactor
+
     root = Root()
     factory = Site(root)
     httpPort = reactor.listenTCP(8998, Site(root))
+
+    def _print_listening():
+        httpHost = httpPort.getHost()
+        print(f"Bench server at http://{httpHost.host}:{httpHost.port}")
+
     reactor.callWhenRunning(_print_listening)
     reactor.run()
diff --git a/scrapy/utils/boto.py b/scrapy/utils/boto.py
index 94a28dddb..53cfeddd0 100644
--- a/scrapy/utils/boto.py
+++ b/scrapy/utils/boto.py
@@ -1 +1,10 @@
 """Boto/botocore helpers"""
+
+
+def is_botocore_available() -> bool:
+    try:
+        import botocore  # noqa: F401
+
+        return True
+    except ImportError:
+        return False
diff --git a/scrapy/utils/conf.py b/scrapy/utils/conf.py
index d81cbfbe5..641dfa4a2 100644
--- a/scrapy/utils/conf.py
+++ b/scrapy/utils/conf.py
@@ -5,53 +5,240 @@ import warnings
 from configparser import ConfigParser
 from operator import itemgetter
 from pathlib import Path
-from typing import Any, Callable, Collection, Dict, Iterable, List, Mapping, MutableMapping, Optional, Union
+from typing import (
+    Any,
+    Callable,
+    Collection,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    MutableMapping,
+    Optional,
+    Union,
+)
+
 from scrapy.exceptions import ScrapyDeprecationWarning, UsageError
 from scrapy.settings import BaseSettings
 from scrapy.utils.deprecate import update_classpath
 from scrapy.utils.python import without_none_values


-def build_component_list(compdict: MutableMapping[Any, Any], custom: Any=
-    None, convert: Callable[[Any], Any]=update_classpath) ->List[Any]:
+def build_component_list(
+    compdict: MutableMapping[Any, Any],
+    custom: Any = None,
+    convert: Callable[[Any], Any] = update_classpath,
+) -> List[Any]:
     """Compose a component list from a { class: order } dictionary."""
-    pass

+    def _check_components(complist: Collection[Any]) -> None:
+        if len({convert(c) for c in complist}) != len(complist):
+            raise ValueError(
+                f"Some paths in {complist!r} convert to the same object, "
+                "please update your settings"
+            )
+
+    def _map_keys(compdict: Mapping[Any, Any]) -> Union[BaseSettings, Dict[Any, Any]]:
+        if isinstance(compdict, BaseSettings):
+            compbs = BaseSettings()
+            for k, v in compdict.items():
+                prio = compdict.getpriority(k)
+                assert prio is not None
+                if compbs.getpriority(convert(k)) == prio:
+                    raise ValueError(
+                        f"Some paths in {list(compdict.keys())!r} "
+                        "convert to the same "
+                        "object, please update your settings"
+                    )
+                else:
+                    compbs.set(convert(k), v, priority=prio)
+            return compbs
+        _check_components(compdict)
+        return {convert(k): v for k, v in compdict.items()}
+
+    def _validate_values(compdict: Mapping[Any, Any]) -> None:
+        """Fail if a value in the components dict is not a real number or None."""
+        for name, value in compdict.items():
+            if value is not None and not isinstance(value, numbers.Real):
+                raise ValueError(
+                    f"Invalid value {value} for component {name}, "
+                    "please provide a real number or None instead"
+                )
+
+    if custom is not None:
+        warnings.warn(
+            "The 'custom' attribute of build_component_list() is deprecated. "
+            "Please merge its value into 'compdict' manually or change your "
+            "code to use Settings.getwithbase().",
+            category=ScrapyDeprecationWarning,
+            stacklevel=2,
+        )
+        if isinstance(custom, (list, tuple)):
+            _check_components(custom)
+            return type(custom)(convert(c) for c in custom)  # type: ignore[return-value]
+        compdict.update(custom)

-def arglist_to_dict(arglist: List[str]) ->Dict[str, str]:
+    _validate_values(compdict)
+    compdict = without_none_values(_map_keys(compdict))
+    return [k for k, v in sorted(compdict.items(), key=itemgetter(1))]
+
+
+def arglist_to_dict(arglist: List[str]) -> Dict[str, str]:
     """Convert a list of arguments like ['arg1=val1', 'arg2=val2', ...] to a
     dict
     """
-    pass
+    return dict(x.split("=", 1) for x in arglist)


-def closest_scrapy_cfg(path: Union[str, os.PathLike]='.', prevpath:
-    Optional[Union[str, os.PathLike]]=None) ->str:
+def closest_scrapy_cfg(
+    path: Union[str, os.PathLike] = ".",
+    prevpath: Optional[Union[str, os.PathLike]] = None,
+) -> str:
     """Return the path to the closest scrapy.cfg file by traversing the current
     directory and its parents
     """
-    pass
+    if prevpath is not None and str(path) == str(prevpath):
+        return ""
+    path = Path(path).resolve()
+    cfgfile = path / "scrapy.cfg"
+    if cfgfile.exists():
+        return str(cfgfile)
+    return closest_scrapy_cfg(path.parent, path)


-def init_env(project: str='default', set_syspath: bool=True) ->None:
+def init_env(project: str = "default", set_syspath: bool = True) -> None:
     """Initialize environment to use command-line tool from inside a project
     dir. This sets the Scrapy settings module and modifies the Python path to
     be able to locate the project module.
     """
-    pass
+    cfg = get_config()
+    if cfg.has_option("settings", project):
+        os.environ["SCRAPY_SETTINGS_MODULE"] = cfg.get("settings", project)
+    closest = closest_scrapy_cfg()
+    if closest:
+        projdir = str(Path(closest).parent)
+        if set_syspath and projdir not in sys.path:
+            sys.path.append(projdir)


-def get_config(use_closest: bool=True) ->ConfigParser:
+def get_config(use_closest: bool = True) -> ConfigParser:
     """Get Scrapy config file as a ConfigParser"""
-    pass
+    sources = get_sources(use_closest)
+    cfg = ConfigParser()
+    cfg.read(sources)
+    return cfg
+
+
+def get_sources(use_closest: bool = True) -> List[str]:
+    xdg_config_home = (
+        os.environ.get("XDG_CONFIG_HOME") or Path("~/.config").expanduser()
+    )
+    sources = [
+        "/etc/scrapy.cfg",
+        r"c:\scrapy\scrapy.cfg",
+        str(Path(xdg_config_home) / "scrapy.cfg"),
+        str(Path("~/.scrapy.cfg").expanduser()),
+    ]
+    if use_closest:
+        sources.append(closest_scrapy_cfg())
+    return sources


-def feed_process_params_from_cli(settings: BaseSettings, output: List[str],
-    output_format: Optional[str]=None, overwrite_output: Optional[List[str]
-    ]=None) ->Dict[str, Dict[str, Any]]:
+def feed_complete_default_values_from_settings(
+    feed: Dict[str, Any], settings: BaseSettings
+) -> Dict[str, Any]:
+    out = feed.copy()
+    out.setdefault("batch_item_count", settings.getint("FEED_EXPORT_BATCH_ITEM_COUNT"))
+    out.setdefault("encoding", settings["FEED_EXPORT_ENCODING"])
+    out.setdefault("fields", settings.getdictorlist("FEED_EXPORT_FIELDS") or None)
+    out.setdefault("store_empty", settings.getbool("FEED_STORE_EMPTY"))
+    out.setdefault("uri_params", settings["FEED_URI_PARAMS"])
+    out.setdefault("item_export_kwargs", {})
+    if settings["FEED_EXPORT_INDENT"] is None:
+        out.setdefault("indent", None)
+    else:
+        out.setdefault("indent", settings.getint("FEED_EXPORT_INDENT"))
+    return out
+
+
+def feed_process_params_from_cli(
+    settings: BaseSettings,
+    output: List[str],
+    output_format: Optional[str] = None,
+    overwrite_output: Optional[List[str]] = None,
+) -> Dict[str, Dict[str, Any]]:
     """
     Receives feed export params (from the 'crawl' or 'runspider' commands),
     checks for inconsistencies in their quantities and returns a dictionary
     suitable to be used as the FEEDS setting.
     """
-    pass
+    valid_output_formats: Iterable[str] = without_none_values(
+        settings.getwithbase("FEED_EXPORTERS")
+    ).keys()
+
+    def check_valid_format(output_format: str) -> None:
+        if output_format not in valid_output_formats:
+            raise UsageError(
+                f"Unrecognized output format '{output_format}'. "
+                f"Set a supported one ({tuple(valid_output_formats)}) "
+                "after a colon at the end of the output URI (i.e. -o/-O "
+                "<URI>:<FORMAT>) or as a file extension."
+            )
+
+    overwrite = False
+    if overwrite_output:
+        if output:
+            raise UsageError(
+                "Please use only one of -o/--output and -O/--overwrite-output"
+            )
+        if output_format:
+            raise UsageError(
+                "-t/--output-format is a deprecated command line option"
+                " and does not work in combination with -O/--overwrite-output."
+                " To specify a format please specify it after a colon at the end of the"
+                " output URI (i.e. -O <URI>:<FORMAT>)."
+                " Example working in the tutorial: "
+                "scrapy crawl quotes -O quotes.json:json"
+            )
+        output = overwrite_output
+        overwrite = True
+
+    if output_format:
+        if len(output) == 1:
+            check_valid_format(output_format)
+            message = (
+                "The -t/--output-format command line option is deprecated in favor of "
+                "specifying the output format within the output URI using the -o/--output or the"
+                " -O/--overwrite-output option (i.e. -o/-O <URI>:<FORMAT>). See the documentation"
+                " of the -o or -O option or the following examples for more information. "
+                "Examples working in the tutorial: "
+                "scrapy crawl quotes -o quotes.csv:csv   or   "
+                "scrapy crawl quotes -O quotes.json:json"
+            )
+            warnings.warn(message, ScrapyDeprecationWarning, stacklevel=2)
+            return {output[0]: {"format": output_format}}
+        raise UsageError(
+            "The -t command-line option cannot be used if multiple output "
+            "URIs are specified"
+        )
+
+    result: Dict[str, Dict[str, Any]] = {}
+    for element in output:
+        try:
+            feed_uri, feed_format = element.rsplit(":", 1)
+            check_valid_format(feed_format)
+        except (ValueError, UsageError):
+            feed_uri = element
+            feed_format = Path(element).suffix.replace(".", "")
+        else:
+            if feed_uri == "-":
+                feed_uri = "stdout:"
+        check_valid_format(feed_format)
+        result[feed_uri] = {"format": feed_format}
+        if overwrite:
+            result[feed_uri]["overwrite"] = True
+
+    # FEEDS setting should take precedence over the matching CLI options
+    result.update(settings.getdict("FEEDS"))
+
+    return result
diff --git a/scrapy/utils/console.py b/scrapy/utils/console.py
index b5bd733b2..100f040bb 100644
--- a/scrapy/utils/console.py
+++ b/scrapy/utils/console.py
@@ -1,40 +1,110 @@
 from functools import wraps


-def _embed_ipython_shell(namespace={}, banner=''):
+def _embed_ipython_shell(namespace={}, banner=""):
     """Start an IPython Shell"""
-    pass
+    try:
+        from IPython.terminal.embed import InteractiveShellEmbed
+        from IPython.terminal.ipapp import load_default_config
+    except ImportError:
+        from IPython.frontend.terminal.embed import InteractiveShellEmbed
+        from IPython.frontend.terminal.ipapp import load_default_config

+    @wraps(_embed_ipython_shell)
+    def wrapper(namespace=namespace, banner=""):
+        config = load_default_config()
+        # Always use .instance() to ensure _instance propagation to all parents
+        # this is needed for <TAB> completion works well for new imports
+        # and clear the instance to always have the fresh env
+        # on repeated breaks like with inspect_response()
+        InteractiveShellEmbed.clear_instance()
+        shell = InteractiveShellEmbed.instance(
+            banner1=banner, user_ns=namespace, config=config
+        )
+        shell()

-def _embed_bpython_shell(namespace={}, banner=''):
+    return wrapper
+
+
+def _embed_bpython_shell(namespace={}, banner=""):
     """Start a bpython shell"""
-    pass
+    import bpython
+
+    @wraps(_embed_bpython_shell)
+    def wrapper(namespace=namespace, banner=""):
+        bpython.embed(locals_=namespace, banner=banner)

+    return wrapper

-def _embed_ptpython_shell(namespace={}, banner=''):
+
+def _embed_ptpython_shell(namespace={}, banner=""):
     """Start a ptpython shell"""
-    pass
+    import ptpython.repl
+
+    @wraps(_embed_ptpython_shell)
+    def wrapper(namespace=namespace, banner=""):
+        print(banner)
+        ptpython.repl.embed(locals=namespace)
+
+    return wrapper


-def _embed_standard_shell(namespace={}, banner=''):
+def _embed_standard_shell(namespace={}, banner=""):
     """Start a standard python shell"""
-    pass
+    import code

+    try:  # readline module is only available on unix systems
+        import readline
+    except ImportError:
+        pass
+    else:
+        import rlcompleter  # noqa: F401

-DEFAULT_PYTHON_SHELLS = {'ptpython': _embed_ptpython_shell, 'ipython':
-    _embed_ipython_shell, 'bpython': _embed_bpython_shell, 'python':
-    _embed_standard_shell}
+        readline.parse_and_bind("tab:complete")
+
+    @wraps(_embed_standard_shell)
+    def wrapper(namespace=namespace, banner=""):
+        code.interact(banner=banner, local=namespace)
+
+    return wrapper
+
+
+DEFAULT_PYTHON_SHELLS = {
+    "ptpython": _embed_ptpython_shell,
+    "ipython": _embed_ipython_shell,
+    "bpython": _embed_bpython_shell,
+    "python": _embed_standard_shell,
+}


 def get_shell_embed_func(shells=None, known_shells=None):
     """Return the first acceptable shell-embed function
     from a given list of shell names.
     """
-    pass
+    if shells is None:  # list, preference order of shells
+        shells = DEFAULT_PYTHON_SHELLS.keys()
+    if known_shells is None:  # available embeddable shells
+        known_shells = DEFAULT_PYTHON_SHELLS.copy()
+    for shell in shells:
+        if shell in known_shells:
+            try:
+                # function test: run all setup code (imports),
+                # but dont fall into the shell
+                return known_shells[shell]()
+            except ImportError:
+                continue


-def start_python_console(namespace=None, banner='', shells=None):
+def start_python_console(namespace=None, banner="", shells=None):
     """Start Python console bound to the given namespace.
     Readline support and tab completion will be used on Unix, if available.
     """
-    pass
+    if namespace is None:
+        namespace = {}
+
+    try:
+        shell = get_shell_embed_func(shells)
+        if shell is not None:
+            shell(namespace=namespace, banner=banner)
+    except SystemExit:  # raised when using exit() in python code.interact
+        pass
diff --git a/scrapy/utils/curl.py b/scrapy/utils/curl.py
index fec4cdf21..f5dbbd64e 100644
--- a/scrapy/utils/curl.py
+++ b/scrapy/utils/curl.py
@@ -3,37 +3,68 @@ import warnings
 from http.cookies import SimpleCookie
 from shlex import split
 from urllib.parse import urlparse
+
 from w3lib.http import basic_auth_header


 class DataAction(argparse.Action):
-
     def __call__(self, parser, namespace, values, option_string=None):
         value = str(values)
-        if value.startswith('$'):
+        if value.startswith("$"):
             value = value[1:]
         setattr(namespace, self.dest, value)


 class CurlParser(argparse.ArgumentParser):
-    pass
+    def error(self, message):
+        error_msg = f"There was an error parsing the curl command: {message}"
+        raise ValueError(error_msg)


 curl_parser = CurlParser()
-curl_parser.add_argument('url')
-curl_parser.add_argument('-H', '--header', dest='headers', action='append')
-curl_parser.add_argument('-X', '--request', dest='method')
-curl_parser.add_argument('-d', '--data', '--data-raw', dest='data', action=
-    DataAction)
-curl_parser.add_argument('-u', '--user', dest='auth')
-safe_to_ignore_arguments = [['--compressed'], ['-s', '--silent'], ['-v',
-    '--verbose'], ['-#', '--progress-bar']]
+curl_parser.add_argument("url")
+curl_parser.add_argument("-H", "--header", dest="headers", action="append")
+curl_parser.add_argument("-X", "--request", dest="method")
+curl_parser.add_argument("-d", "--data", "--data-raw", dest="data", action=DataAction)
+curl_parser.add_argument("-u", "--user", dest="auth")
+
+
+safe_to_ignore_arguments = [
+    ["--compressed"],
+    # `--compressed` argument is not safe to ignore, but it's included here
+    # because the `HttpCompressionMiddleware` is enabled by default
+    ["-s", "--silent"],
+    ["-v", "--verbose"],
+    ["-#", "--progress-bar"],
+]
+
 for argument in safe_to_ignore_arguments:
-    curl_parser.add_argument(*argument, action='store_true')
+    curl_parser.add_argument(*argument, action="store_true")


-def curl_to_request_kwargs(curl_command: str, ignore_unknown_options: bool=True
-    ) ->dict:
+def _parse_headers_and_cookies(parsed_args):
+    headers = []
+    cookies = {}
+    for header in parsed_args.headers or ():
+        name, val = header.split(":", 1)
+        name = name.strip()
+        val = val.strip()
+        if name.title() == "Cookie":
+            for name, morsel in SimpleCookie(val).items():
+                cookies[name] = morsel.value
+        else:
+            headers.append((name, val))
+
+    if parsed_args.auth:
+        user, password = parsed_args.auth.split(":", 1)
+        headers.append(("Authorization", basic_auth_header(user, password)))
+
+    return headers, cookies
+
+
+def curl_to_request_kwargs(
+    curl_command: str, ignore_unknown_options: bool = True
+) -> dict:
     """Convert a cURL command syntax to Request kwargs.

     :param str curl_command: string containing the curl command
@@ -42,4 +73,44 @@ def curl_to_request_kwargs(curl_command: str, ignore_unknown_options: bool=True
                                         raises an error. (default: True)
     :return: dictionary of Request kwargs
     """
-    pass
+
+    curl_args = split(curl_command)
+
+    if curl_args[0] != "curl":
+        raise ValueError('A curl command must start with "curl"')
+
+    parsed_args, argv = curl_parser.parse_known_args(curl_args[1:])
+
+    if argv:
+        msg = f'Unrecognized options: {", ".join(argv)}'
+        if ignore_unknown_options:
+            warnings.warn(msg)
+        else:
+            raise ValueError(msg)
+
+    url = parsed_args.url
+
+    # curl automatically prepends 'http' if the scheme is missing, but Request
+    # needs the scheme to work
+    parsed_url = urlparse(url)
+    if not parsed_url.scheme:
+        url = "http://" + url
+
+    method = parsed_args.method or "GET"
+
+    result = {"method": method.upper(), "url": url}
+
+    headers, cookies = _parse_headers_and_cookies(parsed_args)
+
+    if headers:
+        result["headers"] = headers
+    if cookies:
+        result["cookies"] = cookies
+    if parsed_args.data:
+        result["body"] = parsed_args.data
+        if not parsed_args.method:
+            # if the "data" is specified but the "method" is not specified,
+            # the default method is 'POST'
+            result["method"] = "POST"
+
+    return result
diff --git a/scrapy/utils/datatypes.py b/scrapy/utils/datatypes.py
index 3ec4ef789..d5b9544cc 100644
--- a/scrapy/utils/datatypes.py
+++ b/scrapy/utils/datatypes.py
@@ -4,14 +4,17 @@ Python Standard Library.

 This module must not depend on any module outside the Standard Library.
 """
+
 import collections
 import warnings
 import weakref
 from collections.abc import Mapping
 from typing import Any, AnyStr, Optional, OrderedDict, Sequence, TypeVar
+
 from scrapy.exceptions import ScrapyDeprecationWarning
-_KT = TypeVar('_KT')
-_VT = TypeVar('_VT')
+
+_KT = TypeVar("_KT")
+_VT = TypeVar("_VT")


 class CaselessDict(dict):
@@ -19,10 +22,14 @@ class CaselessDict(dict):

     def __new__(cls, *args, **kwargs):
         from scrapy.http.headers import Headers
+
         if issubclass(cls, CaselessDict) and not issubclass(cls, Headers):
             warnings.warn(
-                'scrapy.utils.datatypes.CaselessDict is deprecated, please use scrapy.utils.datatypes.CaseInsensitiveDict instead'
-                , category=ScrapyDeprecationWarning, stacklevel=2)
+                "scrapy.utils.datatypes.CaselessDict is deprecated,"
+                " please use scrapy.utils.datatypes.CaseInsensitiveDict instead",
+                category=ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
         return super().__new__(cls, *args, **kwargs)

     def __init__(self, seq=None):
@@ -41,19 +48,39 @@ class CaselessDict(dict):

     def __contains__(self, key):
         return dict.__contains__(self, self.normkey(key))
+
     has_key = __contains__

     def __copy__(self):
         return self.__class__(self)
+
     copy = __copy__

     def normkey(self, key):
         """Method to normalize dictionary key access"""
-        pass
+        return key.lower()

     def normvalue(self, value):
         """Method to normalize values prior to be set"""
-        pass
+        return value
+
+    def get(self, key, def_val=None):
+        return dict.get(self, self.normkey(key), self.normvalue(def_val))
+
+    def setdefault(self, key, def_val=None):
+        return dict.setdefault(self, self.normkey(key), self.normvalue(def_val))
+
+    def update(self, seq):
+        seq = seq.items() if isinstance(seq, Mapping) else seq
+        iseq = ((self.normkey(k), self.normvalue(v)) for k, v in seq)
+        super().update(iseq)
+
+    @classmethod
+    def fromkeys(cls, keys, value=None):
+        return cls((k, value) for k in keys)
+
+    def pop(self, key, *args):
+        return dict.pop(self, self.normkey(key), *args)


 class CaseInsensitiveDict(collections.UserDict):
@@ -61,15 +88,15 @@ class CaseInsensitiveDict(collections.UserDict):
     as keys and allows case-insensitive lookups.
     """

-    def __init__(self, *args, **kwargs) ->None:
+    def __init__(self, *args, **kwargs) -> None:
         self._keys: dict = {}
         super().__init__(*args, **kwargs)

-    def __getitem__(self, key: AnyStr) ->Any:
+    def __getitem__(self, key: AnyStr) -> Any:
         normalized_key = self._normkey(key)
         return super().__getitem__(self._keys[normalized_key.lower()])

-    def __setitem__(self, key: AnyStr, value: Any) ->None:
+    def __setitem__(self, key: AnyStr, value: Any) -> None:
         normalized_key = self._normkey(key)
         try:
             lower_key = self._keys[normalized_key.lower()]
@@ -79,17 +106,23 @@ class CaseInsensitiveDict(collections.UserDict):
         super().__setitem__(normalized_key, self._normvalue(value))
         self._keys[normalized_key.lower()] = normalized_key

-    def __delitem__(self, key: AnyStr) ->None:
+    def __delitem__(self, key: AnyStr) -> None:
         normalized_key = self._normkey(key)
         stored_key = self._keys.pop(normalized_key.lower())
         super().__delitem__(stored_key)

-    def __contains__(self, key: AnyStr) ->bool:
+    def __contains__(self, key: AnyStr) -> bool:  # type: ignore[override]
         normalized_key = self._normkey(key)
         return normalized_key.lower() in self._keys

-    def __repr__(self) ->str:
-        return f'<{self.__class__.__name__}: {super().__repr__()}>'
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__}: {super().__repr__()}>"
+
+    def _normkey(self, key: AnyStr) -> AnyStr:
+        return key
+
+    def _normvalue(self, value: Any) -> Any:
+        return value


 class LocalCache(OrderedDict[_KT, _VT]):
@@ -98,11 +131,11 @@ class LocalCache(OrderedDict[_KT, _VT]):
     Older items expires first.
     """

-    def __init__(self, limit: Optional[int]=None):
+    def __init__(self, limit: Optional[int] = None):
         super().__init__()
         self.limit: Optional[int] = limit

-    def __setitem__(self, key: _KT, value: _VT) ->None:
+    def __setitem__(self, key: _KT, value: _VT) -> None:
         if self.limit:
             while len(self) >= self.limit:
                 self.popitem(last=False)
@@ -121,21 +154,21 @@ class LocalWeakReferencedCache(weakref.WeakKeyDictionary):
     it cannot be instantiated with an initial dictionary.
     """

-    def __init__(self, limit: Optional[int]=None):
+    def __init__(self, limit: Optional[int] = None):
         super().__init__()
         self.data: LocalCache = LocalCache(limit=limit)

-    def __setitem__(self, key: _KT, value: _VT) ->None:
+    def __setitem__(self, key: _KT, value: _VT) -> None:
         try:
             super().__setitem__(key, value)
         except TypeError:
-            pass
+            pass  # key is not weak-referenceable, skip caching

-    def __getitem__(self, key: _KT) ->Optional[_VT]:
+    def __getitem__(self, key: _KT) -> Optional[_VT]:  # type: ignore[override]
         try:
             return super().__getitem__(key)
         except (TypeError, KeyError):
-            return None
+            return None  # key is either not weak-referenceable or not cached


 class SequenceExclude:
@@ -144,5 +177,5 @@ class SequenceExclude:
     def __init__(self, seq: Sequence):
         self.seq: Sequence = seq

-    def __contains__(self, item: Any) ->bool:
+    def __contains__(self, item: Any) -> bool:
         return item not in self.seq
diff --git a/scrapy/utils/decorators.py b/scrapy/utils/decorators.py
index b441b495f..04186559f 100644
--- a/scrapy/utils/decorators.py
+++ b/scrapy/utils/decorators.py
@@ -1,25 +1,52 @@
 import warnings
 from functools import wraps
 from typing import Any, Callable
+
 from twisted.internet import defer, threads
 from twisted.internet.defer import Deferred
+
 from scrapy.exceptions import ScrapyDeprecationWarning


-def deprecated(use_instead: Any=None) ->Callable:
+def deprecated(use_instead: Any = None) -> Callable:
     """This is a decorator which can be used to mark functions
     as deprecated. It will result in a warning being emitted
     when the function is used."""
-    pass
+
+    def deco(func: Callable) -> Callable:
+        @wraps(func)
+        def wrapped(*args: Any, **kwargs: Any) -> Any:
+            message = f"Call to deprecated function {func.__name__}."
+            if use_instead:
+                message += f" Use {use_instead} instead."
+            warnings.warn(message, category=ScrapyDeprecationWarning, stacklevel=2)
+            return func(*args, **kwargs)
+
+        return wrapped
+
+    if callable(use_instead):
+        deco = deco(use_instead)
+        use_instead = None
+    return deco


-def defers(func: Callable) ->Callable[..., Deferred]:
+def defers(func: Callable) -> Callable[..., Deferred]:
     """Decorator to make sure a function always returns a deferred"""
-    pass

+    @wraps(func)
+    def wrapped(*a: Any, **kw: Any) -> Deferred:
+        return defer.maybeDeferred(func, *a, **kw)

-def inthread(func: Callable) ->Callable[..., Deferred]:
+    return wrapped
+
+
+def inthread(func: Callable) -> Callable[..., Deferred]:
     """Decorator to call a function in a thread and return a deferred with the
     result
     """
-    pass
+
+    @wraps(func)
+    def wrapped(*a: Any, **kw: Any) -> Deferred:
+        return threads.deferToThread(func, *a, **kw)
+
+    return wrapped
diff --git a/scrapy/utils/defer.py b/scrapy/utils/defer.py
index aff03f895..bf3c5ef5b 100644
--- a/scrapy/utils/defer.py
+++ b/scrapy/utils/defer.py
@@ -6,51 +6,101 @@ import inspect
 from asyncio import Future
 from functools import wraps
 from types import CoroutineType
-from typing import Any, AsyncGenerator, AsyncIterable, AsyncIterator, Awaitable, Callable, Coroutine, Dict, Generator, Iterable, Iterator, List, Optional, Tuple, TypeVar, Union, cast, overload
+from typing import (
+    Any,
+    AsyncGenerator,
+    AsyncIterable,
+    AsyncIterator,
+    Awaitable,
+    Callable,
+    Coroutine,
+    Dict,
+    Generator,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+    overload,
+)
+
 from twisted.internet import defer
 from twisted.internet.defer import Deferred, DeferredList, ensureDeferred
 from twisted.internet.task import Cooperator
 from twisted.python import failure
 from twisted.python.failure import Failure
+
 from scrapy.exceptions import IgnoreRequest
 from scrapy.utils.reactor import _get_asyncio_event_loop, is_asyncio_reactor_installed


-def defer_fail(_failure: Failure) ->Deferred:
+def defer_fail(_failure: Failure) -> Deferred:
     """Same as twisted.internet.defer.fail but delay calling errback until
     next reactor loop

     It delays by 100ms so reactor has a chance to go through readers and writers
     before attending pending delayed calls, so do not set delay to zero.
     """
-    pass
+    from twisted.internet import reactor
+
+    d: Deferred = Deferred()
+    reactor.callLater(0.1, d.errback, _failure)
+    return d


-def defer_succeed(result: Any) ->Deferred:
+def defer_succeed(result: Any) -> Deferred:
     """Same as twisted.internet.defer.succeed but delay calling callback until
     next reactor loop

     It delays by 100ms so reactor has a chance to go through readers and writers
     before attending pending delayed calls, so do not set delay to zero.
     """
-    pass
+    from twisted.internet import reactor
+
+    d: Deferred = Deferred()
+    reactor.callLater(0.1, d.callback, result)
+    return d
+
+
+def defer_result(result: Any) -> Deferred:
+    if isinstance(result, Deferred):
+        return result
+    if isinstance(result, failure.Failure):
+        return defer_fail(result)
+    return defer_succeed(result)


-def mustbe_deferred(f: Callable, *args: Any, **kw: Any) ->Deferred:
+def mustbe_deferred(f: Callable, *args: Any, **kw: Any) -> Deferred:
     """Same as twisted.internet.defer.maybeDeferred, but delay calling
     callback/errback to next reactor loop
     """
-    pass
-
-
-def parallel(iterable: Iterable, count: int, callable: Callable, *args: Any,
-    **named: Any) ->Deferred:
+    try:
+        result = f(*args, **kw)
+    # FIXME: Hack to avoid introspecting tracebacks. This to speed up
+    # processing of IgnoreRequest errors which are, by far, the most common
+    # exception in Scrapy - see #125
+    except IgnoreRequest as e:
+        return defer_fail(failure.Failure(e))
+    except Exception:
+        return defer_fail(failure.Failure())
+    else:
+        return defer_result(result)
+
+
+def parallel(
+    iterable: Iterable, count: int, callable: Callable, *args: Any, **named: Any
+) -> Deferred:
     """Execute a callable over the objects in the given iterable, in parallel,
     using no more than ``count`` concurrent calls.

     Taken from: https://jcalderone.livejournal.com/24285.html
     """
-    pass
+    coop = Cooperator()
+    work = (callable(elem, *args, **named) for elem in iterable)
+    return DeferredList([coop.coiterate(work) for _ in range(count)])


 class _AsyncCooperatorAdapter(Iterator):
@@ -99,8 +149,13 @@ class _AsyncCooperatorAdapter(Iterator):
     goal.
     """

-    def __init__(self, aiterable: AsyncIterable, callable: Callable, *
-        callable_args: Any, **callable_kwargs: Any):
+    def __init__(
+        self,
+        aiterable: AsyncIterable,
+        callable: Callable,
+        *callable_args: Any,
+        **callable_kwargs: Any,
+    ):
         self.aiterator: AsyncIterator = aiterable.__aiter__()
         self.callable: Callable = callable
         self.callable_args: Tuple[Any, ...] = callable_args
@@ -109,7 +164,38 @@ class _AsyncCooperatorAdapter(Iterator):
         self.waiting_deferreds: List[Deferred] = []
         self.anext_deferred: Optional[Deferred] = None

-    def __next__(self) ->Deferred:
+    def _callback(self, result: Any) -> None:
+        # This gets called when the result from aiterator.__anext__() is available.
+        # It calls the callable on it and sends the result to the oldest waiting Deferred
+        # (by chaining if the result is a Deferred too or by firing if not).
+        self.anext_deferred = None
+        result = self.callable(result, *self.callable_args, **self.callable_kwargs)
+        d = self.waiting_deferreds.pop(0)
+        if isinstance(result, Deferred):
+            result.chainDeferred(d)
+        else:
+            d.callback(None)
+        if self.waiting_deferreds:
+            self._call_anext()
+
+    def _errback(self, failure: Failure) -> None:
+        # This gets called on any exceptions in aiterator.__anext__().
+        # It handles StopAsyncIteration by stopping the iteration and reraises all others.
+        self.anext_deferred = None
+        failure.trap(StopAsyncIteration)
+        self.finished = True
+        for d in self.waiting_deferreds:
+            d.callback(None)
+
+    def _call_anext(self) -> None:
+        # This starts waiting for the next result from aiterator.
+        # If aiterator is exhausted, _errback will be called.
+        self.anext_deferred = deferred_from_coro(self.aiterator.__anext__())
+        self.anext_deferred.addCallbacks(self._callback, self._errback)
+
+    def __next__(self) -> Deferred:
+        # This puts a new Deferred into self.waiting_deferreds and returns it.
+        # It also calls __anext__() if needed.
         if self.finished:
             raise StopIteration
         d: Deferred = Deferred()
@@ -119,72 +205,160 @@ class _AsyncCooperatorAdapter(Iterator):
         return d


-def parallel_async(async_iterable: AsyncIterable, count: int, callable:
-    Callable, *args: Any, **named: Any) ->Deferred:
+def parallel_async(
+    async_iterable: AsyncIterable,
+    count: int,
+    callable: Callable,
+    *args: Any,
+    **named: Any,
+) -> Deferred:
     """Like parallel but for async iterators"""
-    pass
+    coop = Cooperator()
+    work = _AsyncCooperatorAdapter(async_iterable, callable, *args, **named)
+    dl: Deferred = DeferredList([coop.coiterate(work) for _ in range(count)])
+    return dl


-def process_chain(callbacks: Iterable[Callable], input: Any, *a: Any, **kw: Any
-    ) ->Deferred:
+def process_chain(
+    callbacks: Iterable[Callable], input: Any, *a: Any, **kw: Any
+) -> Deferred:
     """Return a Deferred built by chaining the given callbacks"""
-    pass
-
-
-def process_chain_both(callbacks: Iterable[Callable], errbacks: Iterable[
-    Callable], input: Any, *a: Any, **kw: Any) ->Deferred:
+    d: Deferred = Deferred()
+    for x in callbacks:
+        d.addCallback(x, *a, **kw)
+    d.callback(input)
+    return d
+
+
+def process_chain_both(
+    callbacks: Iterable[Callable],
+    errbacks: Iterable[Callable],
+    input: Any,
+    *a: Any,
+    **kw: Any,
+) -> Deferred:
     """Return a Deferred built by chaining the given callbacks and errbacks"""
-    pass
-
-
-def process_parallel(callbacks: Iterable[Callable], input: Any, *a: Any, **
-    kw: Any) ->Deferred:
+    d: Deferred = Deferred()
+    for cb, eb in zip(callbacks, errbacks):
+        d.addCallbacks(
+            callback=cb,
+            errback=eb,
+            callbackArgs=a,
+            callbackKeywords=kw,
+            errbackArgs=a,
+            errbackKeywords=kw,
+        )
+    if isinstance(input, failure.Failure):
+        d.errback(input)
+    else:
+        d.callback(input)
+    return d
+
+
+def process_parallel(
+    callbacks: Iterable[Callable], input: Any, *a: Any, **kw: Any
+) -> Deferred:
     """Return a Deferred with the output of all successful calls to the given
     callbacks
     """
-    pass
+    dfds = [defer.succeed(input).addCallback(x, *a, **kw) for x in callbacks]
+    d: Deferred = DeferredList(dfds, fireOnOneErrback=True, consumeErrors=True)
+    d.addCallbacks(lambda r: [x[1] for x in r], lambda f: f.value.subFailure)
+    return d


-def iter_errback(iterable: Iterable, errback: Callable, *a: Any, **kw: Any
-    ) ->Generator:
+def iter_errback(
+    iterable: Iterable, errback: Callable, *a: Any, **kw: Any
+) -> Generator:
     """Wraps an iterable calling an errback if an error is caught while
     iterating it.
     """
-    pass
-
-
-async def aiter_errback(aiterable: AsyncIterable, errback: Callable, *a:
-    Any, **kw: Any) ->AsyncGenerator:
+    it = iter(iterable)
+    while True:
+        try:
+            yield next(it)
+        except StopIteration:
+            break
+        except Exception:
+            errback(failure.Failure(), *a, **kw)
+
+
+async def aiter_errback(
+    aiterable: AsyncIterable, errback: Callable, *a: Any, **kw: Any
+) -> AsyncGenerator:
     """Wraps an async iterable calling an errback if an error is caught while
     iterating it. Similar to scrapy.utils.defer.iter_errback()
     """
-    pass
+    it = aiterable.__aiter__()
+    while True:
+        try:
+            yield await it.__anext__()
+        except StopAsyncIteration:
+            break
+        except Exception:
+            errback(failure.Failure(), *a, **kw)


-_CT = TypeVar('_CT', bound=Union[Awaitable, CoroutineType, Future])
-_T = TypeVar('_T')
+_CT = TypeVar("_CT", bound=Union[Awaitable, CoroutineType, Future])
+_T = TypeVar("_T")


-def deferred_from_coro(o: _T) ->Union[Deferred, _T]:
-    """Converts a coroutine into a Deferred, or returns the object as is if it isn't a coroutine"""
-    pass
+@overload
+def deferred_from_coro(o: _CT) -> Deferred:
+    ...
+

+@overload
+def deferred_from_coro(o: _T) -> _T:
+    ...

-def deferred_f_from_coro_f(coro_f: Callable[..., Coroutine]) ->Callable:
+
+def deferred_from_coro(o: _T) -> Union[Deferred, _T]:
+    """Converts a coroutine into a Deferred, or returns the object as is if it isn't a coroutine"""
+    if isinstance(o, Deferred):
+        return o
+    if asyncio.isfuture(o) or inspect.isawaitable(o):
+        if not is_asyncio_reactor_installed():
+            # wrapping the coroutine directly into a Deferred, this doesn't work correctly with coroutines
+            # that use asyncio, e.g. "await asyncio.sleep(1)"
+            return ensureDeferred(cast(Coroutine[Deferred, Any, Any], o))
+        # wrapping the coroutine into a Future and then into a Deferred, this requires AsyncioSelectorReactor
+        event_loop = _get_asyncio_event_loop()
+        return Deferred.fromFuture(asyncio.ensure_future(o, loop=event_loop))
+    return o
+
+
+def deferred_f_from_coro_f(coro_f: Callable[..., Coroutine]) -> Callable:
     """Converts a coroutine function into a function that returns a Deferred.

     The coroutine function will be called at the time when the wrapper is called. Wrapper args will be passed to it.
     This is useful for callback chains, as callback functions are called with the previous callback result.
     """
-    pass

+    @wraps(coro_f)
+    def f(*coro_args: Any, **coro_kwargs: Any) -> Any:
+        return deferred_from_coro(coro_f(*coro_args, **coro_kwargs))
+
+    return f

-def maybeDeferred_coro(f: Callable, *args: Any, **kw: Any) ->Deferred:
+
+def maybeDeferred_coro(f: Callable, *args: Any, **kw: Any) -> Deferred:
     """Copy of defer.maybeDeferred that also converts coroutines to Deferreds."""
-    pass
+    try:
+        result = f(*args, **kw)
+    except:  # noqa: E722
+        return defer.fail(failure.Failure(captureVars=Deferred.debug))
+
+    if isinstance(result, Deferred):
+        return result
+    if asyncio.isfuture(result) or inspect.isawaitable(result):
+        return deferred_from_coro(result)
+    if isinstance(result, failure.Failure):
+        return defer.fail(result)
+    return defer.succeed(result)


-def deferred_to_future(d: Deferred) ->Future:
+def deferred_to_future(d: Deferred) -> Future:
     """
     .. versionadded:: 2.6.0

@@ -203,10 +377,10 @@ def deferred_to_future(d: Deferred) ->Future:
                 deferred = self.crawler.engine.download(additional_request)
                 additional_response = await deferred_to_future(deferred)
     """
-    pass
+    return d.asFuture(_get_asyncio_event_loop())


-def maybe_deferred_to_future(d: Deferred) ->Union[Deferred, Future]:
+def maybe_deferred_to_future(d: Deferred) -> Union[Deferred, Future]:
     """
     .. versionadded:: 2.6.0

@@ -232,4 +406,6 @@ def maybe_deferred_to_future(d: Deferred) ->Union[Deferred, Future]:
                 deferred = self.crawler.engine.download(additional_request)
                 additional_response = await maybe_deferred_to_future(deferred)
     """
-    pass
+    if not is_asyncio_reactor_installed():
+        return d
+    return deferred_to_future(d)
diff --git a/scrapy/utils/deprecate.py b/scrapy/utils/deprecate.py
index 42dcda1fa..ea577c44a 100644
--- a/scrapy/utils/deprecate.py
+++ b/scrapy/utils/deprecate.py
@@ -1,18 +1,33 @@
 """Some helpers for deprecation messages"""
+
 import inspect
 import warnings
 from typing import Any, Dict, List, Optional, Tuple, Type, overload
+
 from scrapy.exceptions import ScrapyDeprecationWarning


-def create_deprecated_class(name: str, new_class: type, clsdict: Optional[
-    Dict[str, Any]]=None, warn_category: Type[Warning]=
-    ScrapyDeprecationWarning, warn_once: bool=True, old_class_path:
-    Optional[str]=None, new_class_path: Optional[str]=None,
-    subclass_warn_message: str=
-    '{cls} inherits from deprecated class {old}, please inherit from {new}.',
-    instance_warn_message: str=
-    '{cls} is deprecated, instantiate {new} instead.') ->type:
+def attribute(obj: Any, oldattr: str, newattr: str, version: str = "0.12") -> None:
+    cname = obj.__class__.__name__
+    warnings.warn(
+        f"{cname}.{oldattr} attribute is deprecated and will be no longer supported "
+        f"in Scrapy {version}, use {cname}.{newattr} attribute instead",
+        ScrapyDeprecationWarning,
+        stacklevel=3,
+    )
+
+
+def create_deprecated_class(
+    name: str,
+    new_class: type,
+    clsdict: Optional[Dict[str, Any]] = None,
+    warn_category: Type[Warning] = ScrapyDeprecationWarning,
+    warn_once: bool = True,
+    old_class_path: Optional[str] = None,
+    new_class_path: Optional[str] = None,
+    subclass_warn_message: str = "{cls} inherits from deprecated class {old}, please inherit from {new}.",
+    instance_warn_message: str = "{cls} is deprecated, instantiate {new} instead.",
+) -> type:
     """
     Return a "deprecated" class that causes its subclasses to issue a warning.
     Subclasses of ``new_class`` are considered subclasses of this class.
@@ -37,19 +52,115 @@ def create_deprecated_class(name: str, new_class: type, clsdict: Optional[
     checks they'll still return True if sub is a subclass of NewName instead of
     OldName.
     """
-    pass
+
+    # https://github.com/python/mypy/issues/4177
+    class DeprecatedClass(new_class.__class__):  # type: ignore[misc, name-defined]
+        deprecated_class: Optional[type] = None
+        warned_on_subclass: bool = False
+
+        def __new__(
+            metacls, name: str, bases: Tuple[type, ...], clsdict_: Dict[str, Any]
+        ) -> type:
+            cls = super().__new__(metacls, name, bases, clsdict_)
+            if metacls.deprecated_class is None:
+                metacls.deprecated_class = cls
+            return cls
+
+        def __init__(cls, name: str, bases: Tuple[type, ...], clsdict_: Dict[str, Any]):
+            meta = cls.__class__
+            old = meta.deprecated_class
+            if old in bases and not (warn_once and meta.warned_on_subclass):
+                meta.warned_on_subclass = True
+                msg = subclass_warn_message.format(
+                    cls=_clspath(cls),
+                    old=_clspath(old, old_class_path),
+                    new=_clspath(new_class, new_class_path),
+                )
+                if warn_once:
+                    msg += " (warning only on first subclass, there may be others)"
+                warnings.warn(msg, warn_category, stacklevel=2)
+            super().__init__(name, bases, clsdict_)
+
+        # see https://www.python.org/dev/peps/pep-3119/#overloading-isinstance-and-issubclass
+        # and https://docs.python.org/reference/datamodel.html#customizing-instance-and-subclass-checks
+        # for implementation details
+        def __instancecheck__(cls, inst: Any) -> bool:
+            return any(cls.__subclasscheck__(c) for c in (type(inst), inst.__class__))
+
+        def __subclasscheck__(cls, sub: type) -> bool:
+            if cls is not DeprecatedClass.deprecated_class:
+                # we should do the magic only if second `issubclass` argument
+                # is the deprecated class itself - subclasses of the
+                # deprecated class should not use custom `__subclasscheck__`
+                # method.
+                return super().__subclasscheck__(sub)
+
+            if not inspect.isclass(sub):
+                raise TypeError("issubclass() arg 1 must be a class")
+
+            mro = getattr(sub, "__mro__", ())
+            return any(c in {cls, new_class} for c in mro)
+
+        def __call__(cls, *args: Any, **kwargs: Any) -> Any:
+            old = DeprecatedClass.deprecated_class
+            if cls is old:
+                msg = instance_warn_message.format(
+                    cls=_clspath(cls, old_class_path),
+                    new=_clspath(new_class, new_class_path),
+                )
+                warnings.warn(msg, warn_category, stacklevel=2)
+            return super().__call__(*args, **kwargs)
+
+    deprecated_cls = DeprecatedClass(name, (new_class,), clsdict or {})
+
+    try:
+        frm = inspect.stack()[1]
+        parent_module = inspect.getmodule(frm[0])
+        if parent_module is not None:
+            deprecated_cls.__module__ = parent_module.__name__
+    except Exception as e:
+        # Sometimes inspect.stack() fails (e.g. when the first import of
+        # deprecated class is in jinja2 template). __module__ attribute is not
+        # important enough to raise an exception as users may be unable
+        # to fix inspect.stack() errors.
+        warnings.warn(f"Error detecting parent module: {e!r}")
+
+    return deprecated_cls
+
+
+def _clspath(cls: type, forced: Optional[str] = None) -> str:
+    if forced is not None:
+        return forced
+    return f"{cls.__module__}.{cls.__name__}"


 DEPRECATION_RULES: List[Tuple[str, str]] = []


-def update_classpath(path: Any) ->Any:
+@overload
+def update_classpath(path: str) -> str:
+    ...
+
+
+@overload
+def update_classpath(path: Any) -> Any:
+    ...
+
+
+def update_classpath(path: Any) -> Any:
     """Update a deprecated path from an object with its new location"""
-    pass
+    for prefix, replacement in DEPRECATION_RULES:
+        if isinstance(path, str) and path.startswith(prefix):
+            new_path = path.replace(prefix, replacement, 1)
+            warnings.warn(
+                f"`{path}` class is deprecated, use `{new_path}` instead",
+                ScrapyDeprecationWarning,
+            )
+            return new_path
+    return path


-def method_is_overridden(subclass: type, base_class: type, method_name: str
-    ) ->bool:
+def method_is_overridden(subclass: type, base_class: type, method_name: str) -> bool:
     """
     Return True if a method named ``method_name`` of a ``base_class``
     is overridden in a ``subclass``.
@@ -76,4 +187,6 @@ def method_is_overridden(subclass: type, base_class: type, method_name: str
     >>> method_is_overridden(Sub4, Base, 'foo')
     True
     """
-    pass
+    base_method = getattr(base_class, method_name)
+    sub_method = getattr(subclass, method_name)
+    return base_method.__code__ is not sub_method.__code__
diff --git a/scrapy/utils/display.py b/scrapy/utils/display.py
index 887a1f2c6..596cf89e4 100644
--- a/scrapy/utils/display.py
+++ b/scrapy/utils/display.py
@@ -1,9 +1,51 @@
 """
 pprint and pformat wrappers with colorization support
 """
+
 import ctypes
 import platform
 import sys
 from pprint import pformat as pformat_
 from typing import Any
+
 from packaging.version import Version as parse_version
+
+
+def _enable_windows_terminal_processing() -> bool:
+    # https://stackoverflow.com/a/36760881
+    kernel32 = ctypes.windll.kernel32  # type: ignore[attr-defined]
+    return bool(kernel32.SetConsoleMode(kernel32.GetStdHandle(-11), 7))
+
+
+def _tty_supports_color() -> bool:
+    if sys.platform != "win32":
+        return True
+
+    if parse_version(platform.version()) < parse_version("10.0.14393"):
+        return True
+
+    # Windows >= 10.0.14393 interprets ANSI escape sequences providing terminal
+    # processing is enabled.
+    return _enable_windows_terminal_processing()
+
+
+def _colorize(text: str, colorize: bool = True) -> str:
+    if not colorize or not sys.stdout.isatty() or not _tty_supports_color():
+        return text
+    try:
+        from pygments import highlight
+    except ImportError:
+        return text
+    else:
+        from pygments.formatters import TerminalFormatter
+        from pygments.lexers import PythonLexer
+
+        return highlight(text, PythonLexer(), TerminalFormatter())
+
+
+def pformat(obj: Any, *args: Any, **kwargs: Any) -> str:
+    return _colorize(pformat_(obj), kwargs.pop("colorize", True))
+
+
+def pprint(obj: Any, *args: Any, **kwargs: Any) -> None:
+    print(pformat(obj, *args, **kwargs))
diff --git a/scrapy/utils/engine.py b/scrapy/utils/engine.py
index 2861f1a69..a5f2a8c6e 100644
--- a/scrapy/utils/engine.py
+++ b/scrapy/utils/engine.py
@@ -1,10 +1,51 @@
 """Some debugging functions for working with the Scrapy engine"""
-from time import time
+
+# used in global tests code
+from time import time  # noqa: F401
 from typing import TYPE_CHECKING, Any, List, Tuple
+
 if TYPE_CHECKING:
     from scrapy.core.engine import ExecutionEngine


-def get_engine_status(engine: 'ExecutionEngine') ->List[Tuple[str, Any]]:
+def get_engine_status(engine: "ExecutionEngine") -> List[Tuple[str, Any]]:
     """Return a report of the current engine status"""
-    pass
+    tests = [
+        "time()-engine.start_time",
+        "len(engine.downloader.active)",
+        "engine.scraper.is_idle()",
+        "engine.spider.name",
+        "engine.spider_is_idle()",
+        "engine.slot.closing",
+        "len(engine.slot.inprogress)",
+        "len(engine.slot.scheduler.dqs or [])",
+        "len(engine.slot.scheduler.mqs)",
+        "len(engine.scraper.slot.queue)",
+        "len(engine.scraper.slot.active)",
+        "engine.scraper.slot.active_size",
+        "engine.scraper.slot.itemproc_size",
+        "engine.scraper.slot.needs_backout()",
+    ]
+
+    checks: List[Tuple[str, Any]] = []
+    for test in tests:
+        try:
+            checks += [(test, eval(test))]
+        except Exception as e:
+            checks += [(test, f"{type(e).__name__} (exception)")]
+
+    return checks
+
+
+def format_engine_status(engine: "ExecutionEngine") -> str:
+    checks = get_engine_status(engine)
+    s = "Execution engine status\n\n"
+    for test, result in checks:
+        s += f"{test:<47} : {result}\n"
+    s += "\n"
+
+    return s
+
+
+def print_engine_status(engine: "ExecutionEngine") -> None:
+    print(format_engine_status(engine))
diff --git a/scrapy/utils/ftp.py b/scrapy/utils/ftp.py
index 6acf0ecb9..c77681a53 100644
--- a/scrapy/utils/ftp.py
+++ b/scrapy/utils/ftp.py
@@ -4,18 +4,42 @@ from posixpath import dirname
 from typing import IO


-def ftp_makedirs_cwd(ftp: FTP, path: str, first_call: bool=True) ->None:
+def ftp_makedirs_cwd(ftp: FTP, path: str, first_call: bool = True) -> None:
     """Set the current directory of the FTP connection given in the ``ftp``
     argument (as a ftplib.FTP object), creating all parent directories if they
     don't exist. The ftplib.FTP object must be already connected and logged in.
     """
-    pass
+    try:
+        ftp.cwd(path)
+    except error_perm:
+        ftp_makedirs_cwd(ftp, dirname(path), False)
+        ftp.mkd(path)
+        if first_call:
+            ftp.cwd(path)


-def ftp_store_file(*, path: str, file: IO, host: str, port: int, username:
-    str, password: str, use_active_mode: bool=False, overwrite: bool=True
-    ) ->None:
+def ftp_store_file(
+    *,
+    path: str,
+    file: IO,
+    host: str,
+    port: int,
+    username: str,
+    password: str,
+    use_active_mode: bool = False,
+    overwrite: bool = True,
+) -> None:
     """Opens a FTP connection with passed credentials,sets current directory
     to the directory extracted from given path, then uploads the file to server
     """
-    pass
+    with FTP() as ftp:
+        ftp.connect(host, port)
+        ftp.login(username, password)
+        if use_active_mode:
+            ftp.set_pasv(False)
+        file.seek(0)
+        dirname, filename = posixpath.split(path)
+        ftp_makedirs_cwd(ftp, dirname)
+        command = "STOR" if overwrite else "APPE"
+        ftp.storbinary(f"{command} {filename}", file)
+        file.close()
diff --git a/scrapy/utils/gz.py b/scrapy/utils/gz.py
index 69751b6b3..2e487d88b 100644
--- a/scrapy/utils/gz.py
+++ b/scrapy/utils/gz.py
@@ -1,13 +1,42 @@
 import struct
 from gzip import GzipFile
 from io import BytesIO
+
 from scrapy.http import Response
+
 from ._compression import _CHUNK_SIZE, _DecompressionMaxSizeExceeded


-def gunzip(data: bytes, *, max_size: int=0) ->bytes:
+def gunzip(data: bytes, *, max_size: int = 0) -> bytes:
     """Gunzip the given data and return as much data as possible.

     This is resilient to CRC checksum errors.
     """
-    pass
+    f = GzipFile(fileobj=BytesIO(data))
+    output_stream = BytesIO()
+    chunk = b"."
+    decompressed_size = 0
+    while chunk:
+        try:
+            chunk = f.read1(_CHUNK_SIZE)
+        except (OSError, EOFError, struct.error):
+            # complete only if there is some data, otherwise re-raise
+            # see issue 87 about catching struct.error
+            # some pages are quite small so output_stream is empty
+            if output_stream.getbuffer().nbytes > 0:
+                break
+            raise
+        decompressed_size += len(chunk)
+        if max_size and decompressed_size > max_size:
+            raise _DecompressionMaxSizeExceeded(
+                f"The number of bytes decompressed so far "
+                f"({decompressed_size} B) exceed the specified maximum "
+                f"({max_size} B)."
+            )
+        output_stream.write(chunk)
+    output_stream.seek(0)
+    return output_stream.read()
+
+
+def gzip_magic_number(response: Response) -> bool:
+    return response.body[:3] == b"\x1f\x8b\x08"
diff --git a/scrapy/utils/httpobj.py b/scrapy/utils/httpobj.py
index 8ecd2b938..d502e8910 100644
--- a/scrapy/utils/httpobj.py
+++ b/scrapy/utils/httpobj.py
@@ -1,15 +1,20 @@
 """Helper functions for scrapy.http objects (Request, Response)"""
+
 from typing import Union
 from urllib.parse import ParseResult, urlparse
 from weakref import WeakKeyDictionary
+
 from scrapy.http import Request, Response
-(_urlparse_cache: 'WeakKeyDictionary[Union[Request, Response], ParseResult]'
-    ) = WeakKeyDictionary()
+
+_urlparse_cache: "WeakKeyDictionary[Union[Request, Response], ParseResult]" = (
+    WeakKeyDictionary()
+)


-def urlparse_cached(request_or_response: Union[Request, Response]
-    ) ->ParseResult:
+def urlparse_cached(request_or_response: Union[Request, Response]) -> ParseResult:
     """Return urlparse.urlparse caching the result, where the argument can be a
     Request or Response object
     """
-    pass
+    if request_or_response not in _urlparse_cache:
+        _urlparse_cache[request_or_response] = urlparse(request_or_response.url)
+    return _urlparse_cache[request_or_response]
diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py
index 8b4ca3eac..db86af2c3 100644
--- a/scrapy/utils/iterators.py
+++ b/scrapy/utils/iterators.py
@@ -2,20 +2,38 @@ import csv
 import logging
 import re
 from io import StringIO
-from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable, List, Literal, Optional, Union, cast, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Literal,
+    Optional,
+    Union,
+    cast,
+    overload,
+)
 from warnings import warn
-from lxml import etree
+
+from lxml import etree  # nosec
+
 from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.http import Response, TextResponse
 from scrapy.selector import Selector
 from scrapy.utils.python import re_rsearch, to_unicode
+
 if TYPE_CHECKING:
-    from lxml._types import SupportsReadClose
+    from lxml._types import SupportsReadClose  # nosec
+
 logger = logging.getLogger(__name__)


-def xmliter(obj: Union[Response, str, bytes], nodename: str) ->Generator[
-    Selector, Any, None]:
+def xmliter(
+    obj: Union[Response, str, bytes], nodename: str
+) -> Generator[Selector, Any, None]:
     """Return a iterator of Selector's over all nodes of a XML document,
        given the name of the node to iterate. Useful for parsing XML feeds.

@@ -24,27 +42,137 @@ def xmliter(obj: Union[Response, str, bytes], nodename: str) ->Generator[
     - a unicode string
     - a string encoded as utf-8
     """
-    pass
+    warn(
+        (
+            "xmliter is deprecated and its use strongly discouraged because "
+            "it is vulnerable to ReDoS attacks. Use xmliter_lxml instead. See "
+            "https://github.com/scrapy/scrapy/security/advisories/GHSA-cc65-xxvf-f7r9"
+        ),
+        ScrapyDeprecationWarning,
+        stacklevel=2,
+    )

+    nodename_patt = re.escape(nodename)
+
+    DOCUMENT_HEADER_RE = re.compile(r"<\?xml[^>]+>\s*", re.S)
+    HEADER_END_RE = re.compile(rf"<\s*/{nodename_patt}\s*>", re.S)
+    END_TAG_RE = re.compile(r"<\s*/([^\s>]+)\s*>", re.S)
+    NAMESPACE_RE = re.compile(r"((xmlns[:A-Za-z]*)=[^>\s]+)", re.S)
+    text = _body_or_str(obj)
+
+    document_header_match = re.search(DOCUMENT_HEADER_RE, text)
+    document_header = (
+        document_header_match.group().strip() if document_header_match else ""
+    )
+    header_end_idx = re_rsearch(HEADER_END_RE, text)
+    header_end = text[header_end_idx[1] :].strip() if header_end_idx else ""
+    namespaces: Dict[str, str] = {}
+    if header_end:
+        for tagname in reversed(re.findall(END_TAG_RE, header_end)):
+            assert header_end_idx
+            tag = re.search(
+                rf"<\s*{tagname}.*?xmlns[:=][^>]*>", text[: header_end_idx[1]], re.S
+            )
+            if tag:
+                for x in re.findall(NAMESPACE_RE, tag.group()):
+                    namespaces[x[1]] = x[0]
+
+    r = re.compile(rf"<{nodename_patt}[\s>].*?</{nodename_patt}>", re.DOTALL)
+    for match in r.finditer(text):
+        nodetext = (
+            document_header
+            + match.group().replace(
+                nodename, f'{nodename} {" ".join(namespaces.values())}', 1
+            )
+            + header_end
+        )
+        yield Selector(text=nodetext, type="xml")

-class _StreamReader:

+def xmliter_lxml(
+    obj: Union[Response, str, bytes],
+    nodename: str,
+    namespace: Optional[str] = None,
+    prefix: str = "x",
+) -> Generator[Selector, Any, None]:
+    reader = _StreamReader(obj)
+    tag = f"{{{namespace}}}{nodename}" if namespace else nodename
+    iterable = etree.iterparse(
+        cast("SupportsReadClose[bytes]", reader),
+        encoding=reader.encoding,
+        events=("end", "start-ns"),
+        resolve_entities=False,
+        huge_tree=True,
+    )
+    selxpath = "//" + (f"{prefix}:{nodename}" if namespace else nodename)
+    needs_namespace_resolution = not namespace and ":" in nodename
+    if needs_namespace_resolution:
+        prefix, nodename = nodename.split(":", maxsplit=1)
+    for event, data in iterable:
+        if event == "start-ns":
+            assert isinstance(data, tuple)
+            if needs_namespace_resolution:
+                _prefix, _namespace = data
+                if _prefix != prefix:
+                    continue
+                namespace = _namespace
+                needs_namespace_resolution = False
+                selxpath = f"//{prefix}:{nodename}"
+                tag = f"{{{namespace}}}{nodename}"
+            continue
+        assert isinstance(data, etree._Element)
+        node = data
+        if node.tag != tag:
+            continue
+        nodetext = etree.tostring(node, encoding="unicode")
+        node.clear()
+        xs = Selector(text=nodetext, type="xml")
+        if namespace:
+            xs.register_namespace(prefix, namespace)
+        yield xs.xpath(selxpath)[0]
+
+
+class _StreamReader:
     def __init__(self, obj: Union[Response, str, bytes]):
         self._ptr: int = 0
         self._text: Union[str, bytes]
         if isinstance(obj, TextResponse):
             self._text, self.encoding = obj.body, obj.encoding
         elif isinstance(obj, Response):
-            self._text, self.encoding = obj.body, 'utf-8'
+            self._text, self.encoding = obj.body, "utf-8"
         else:
-            self._text, self.encoding = obj, 'utf-8'
+            self._text, self.encoding = obj, "utf-8"
         self._is_unicode: bool = isinstance(self._text, str)
         self._is_first_read: bool = True

+    def read(self, n: int = 65535) -> bytes:
+        method: Callable[[int], bytes] = (
+            self._read_unicode if self._is_unicode else self._read_string
+        )
+        result = method(n)
+        if self._is_first_read:
+            self._is_first_read = False
+            result = result.lstrip()
+        return result
+
+    def _read_string(self, n: int = 65535) -> bytes:
+        s, e = self._ptr, self._ptr + n
+        self._ptr = e
+        return cast(bytes, self._text)[s:e]

-def csviter(obj: Union[Response, str, bytes], delimiter: Optional[str]=None,
-    headers: Optional[List[str]]=None, encoding: Optional[str]=None,
-    quotechar: Optional[str]=None) ->Generator[Dict[str, str], Any, None]:
+    def _read_unicode(self, n: int = 65535) -> bytes:
+        s, e = self._ptr, self._ptr + n
+        self._ptr = e
+        return cast(str, self._text)[s:e].encode("utf-8")
+
+
+def csviter(
+    obj: Union[Response, str, bytes],
+    delimiter: Optional[str] = None,
+    headers: Optional[List[str]] = None,
+    encoding: Optional[str] = None,
+    quotechar: Optional[str] = None,
+) -> Generator[Dict[str, str], Any, None]:
     """Returns an iterator of dictionaries from the given csv object

     obj can be:
@@ -59,4 +187,74 @@ def csviter(obj: Union[Response, str, bytes], delimiter: Optional[str]=None,

     quotechar is the character used to enclosure fields on the given obj.
     """
-    pass
+
+    encoding = obj.encoding if isinstance(obj, TextResponse) else encoding or "utf-8"
+
+    def row_to_unicode(row_: Iterable) -> List[str]:
+        return [to_unicode(field, encoding) for field in row_]
+
+    lines = StringIO(_body_or_str(obj, unicode=True))
+
+    kwargs: Dict[str, Any] = {}
+    if delimiter:
+        kwargs["delimiter"] = delimiter
+    if quotechar:
+        kwargs["quotechar"] = quotechar
+    csv_r = csv.reader(lines, **kwargs)
+
+    if not headers:
+        try:
+            row = next(csv_r)
+        except StopIteration:
+            return
+        headers = row_to_unicode(row)
+
+    for row in csv_r:
+        row = row_to_unicode(row)
+        if len(row) != len(headers):
+            logger.warning(
+                "ignoring row %(csvlnum)d (length: %(csvrow)d, "
+                "should be: %(csvheader)d)",
+                {
+                    "csvlnum": csv_r.line_num,
+                    "csvrow": len(row),
+                    "csvheader": len(headers),
+                },
+            )
+            continue
+        yield dict(zip(headers, row))
+
+
+@overload
+def _body_or_str(obj: Union[Response, str, bytes]) -> str:
+    ...
+
+
+@overload
+def _body_or_str(obj: Union[Response, str, bytes], unicode: Literal[True]) -> str:
+    ...
+
+
+@overload
+def _body_or_str(obj: Union[Response, str, bytes], unicode: Literal[False]) -> bytes:
+    ...
+
+
+def _body_or_str(
+    obj: Union[Response, str, bytes], unicode: bool = True
+) -> Union[str, bytes]:
+    expected_types = (Response, str, bytes)
+    if not isinstance(obj, expected_types):
+        expected_types_str = " or ".join(t.__name__ for t in expected_types)
+        raise TypeError(
+            f"Object {obj!r} must be {expected_types_str}, not {type(obj).__name__}"
+        )
+    if isinstance(obj, Response):
+        if not unicode:
+            return cast(bytes, obj.body)
+        if isinstance(obj, TextResponse):
+            return obj.text
+        return cast(bytes, obj.body).decode("utf-8")
+    if isinstance(obj, str):
+        return obj if unicode else obj.encode("utf-8")
+    return obj.decode("utf-8") if unicode else obj
diff --git a/scrapy/utils/job.py b/scrapy/utils/job.py
index 1149db8f5..e230e4235 100644
--- a/scrapy/utils/job.py
+++ b/scrapy/utils/job.py
@@ -1,3 +1,13 @@
 from pathlib import Path
 from typing import Optional
+
 from scrapy.settings import BaseSettings
+
+
+def job_dir(settings: BaseSettings) -> Optional[str]:
+    path: Optional[str] = settings["JOBDIR"]
+    if not path:
+        return None
+    if not Path(path).exists():
+        Path(path).mkdir(parents=True)
+    return path
diff --git a/scrapy/utils/log.py b/scrapy/utils/log.py
index cbe04ec83..e85082963 100644
--- a/scrapy/utils/log.py
+++ b/scrapy/utils/log.py
@@ -1,25 +1,49 @@
 from __future__ import annotations
+
 import logging
 import sys
 import warnings
 from logging.config import dictConfig
 from types import TracebackType
-from typing import TYPE_CHECKING, Any, List, MutableMapping, Optional, Tuple, Type, Union, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    List,
+    MutableMapping,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+    cast,
+)
+
 from twisted.python import log as twisted_log
 from twisted.python.failure import Failure
+
 import scrapy
 from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.settings import Settings
 from scrapy.utils.versions import scrapy_components_versions
+
 if TYPE_CHECKING:
     from scrapy.crawler import Crawler
+
 logger = logging.getLogger(__name__)


-def failure_to_exc_info(failure: Failure) ->Optional[Tuple[Type[
-    BaseException], BaseException, Optional[TracebackType]]]:
+def failure_to_exc_info(
+    failure: Failure,
+) -> Optional[Tuple[Type[BaseException], BaseException, Optional[TracebackType]]]:
     """Extract exc_info from Failure instances"""
-    pass
+    if isinstance(failure, Failure):
+        assert failure.type
+        assert failure.value
+        return (
+            failure.type,
+            failure.value,
+            cast(Optional[TracebackType], failure.getTracebackObject()),
+        )
+    return None


 class TopLevelFormatter(logging.Filter):
@@ -34,17 +58,38 @@ class TopLevelFormatter(logging.Filter):
     ``loggers`` list where it should act.
     """

-    def __init__(self, loggers: Optional[List[str]]=None):
+    def __init__(self, loggers: Optional[List[str]] = None):
         self.loggers: List[str] = loggers or []

-
-DEFAULT_LOGGING = {'version': 1, 'disable_existing_loggers': False,
-    'loggers': {'filelock': {'level': 'ERROR'}, 'hpack': {'level': 'ERROR'},
-    'scrapy': {'level': 'DEBUG'}, 'twisted': {'level': 'ERROR'}}}
-
-
-def configure_logging(settings: Union[Settings, dict, None]=None,
-    install_root_handler: bool=True) ->None:
+    def filter(self, record: logging.LogRecord) -> bool:
+        if any(record.name.startswith(logger + ".") for logger in self.loggers):
+            record.name = record.name.split(".", 1)[0]
+        return True
+
+
+DEFAULT_LOGGING = {
+    "version": 1,
+    "disable_existing_loggers": False,
+    "loggers": {
+        "filelock": {
+            "level": "ERROR",
+        },
+        "hpack": {
+            "level": "ERROR",
+        },
+        "scrapy": {
+            "level": "DEBUG",
+        },
+        "twisted": {
+            "level": "ERROR",
+        },
+    },
+}
+
+
+def configure_logging(
+    settings: Union[Settings, dict, None] = None, install_root_handler: bool = True
+) -> None:
     """
     Initialize logging defaults for Scrapy.

@@ -68,15 +113,93 @@ def configure_logging(settings: Union[Settings, dict, None]=None,
     using ``settings`` argument. When ``settings`` is empty or None, defaults
     are used.
     """
-    pass
+    if not sys.warnoptions:
+        # Route warnings through python logging
+        logging.captureWarnings(True)
+
+    observer = twisted_log.PythonLoggingObserver("twisted")
+    observer.start()
+
+    dictConfig(DEFAULT_LOGGING)
+
+    if isinstance(settings, dict) or settings is None:
+        settings = Settings(settings)
+
+    if settings.getbool("LOG_STDOUT"):
+        sys.stdout = StreamLogger(logging.getLogger("stdout"))  # type: ignore[assignment]
+
+    if install_root_handler:
+        install_scrapy_root_handler(settings)


 _scrapy_root_handler: Optional[logging.Handler] = None


-def _get_handler(settings: Settings) ->logging.Handler:
+def install_scrapy_root_handler(settings: Settings) -> None:
+    global _scrapy_root_handler
+
+    if (
+        _scrapy_root_handler is not None
+        and _scrapy_root_handler in logging.root.handlers
+    ):
+        logging.root.removeHandler(_scrapy_root_handler)
+    logging.root.setLevel(logging.NOTSET)
+    _scrapy_root_handler = _get_handler(settings)
+    logging.root.addHandler(_scrapy_root_handler)
+
+
+def get_scrapy_root_handler() -> Optional[logging.Handler]:
+    return _scrapy_root_handler
+
+
+def _get_handler(settings: Settings) -> logging.Handler:
     """Return a log handler object according to settings"""
-    pass
+    filename = settings.get("LOG_FILE")
+    handler: logging.Handler
+    if filename:
+        mode = "a" if settings.getbool("LOG_FILE_APPEND") else "w"
+        encoding = settings.get("LOG_ENCODING")
+        handler = logging.FileHandler(filename, mode=mode, encoding=encoding)
+    elif settings.getbool("LOG_ENABLED"):
+        handler = logging.StreamHandler()
+    else:
+        handler = logging.NullHandler()
+
+    formatter = logging.Formatter(
+        fmt=settings.get("LOG_FORMAT"), datefmt=settings.get("LOG_DATEFORMAT")
+    )
+    handler.setFormatter(formatter)
+    handler.setLevel(settings.get("LOG_LEVEL"))
+    if settings.getbool("LOG_SHORT_NAMES"):
+        handler.addFilter(TopLevelFormatter(["scrapy"]))
+    return handler
+
+
+def log_scrapy_info(settings: Settings) -> None:
+    logger.info(
+        "Scrapy %(version)s started (bot: %(bot)s)",
+        {"version": scrapy.__version__, "bot": settings["BOT_NAME"]},
+    )
+    versions = [
+        f"{name} {version}"
+        for name, version in scrapy_components_versions()
+        if name != "Scrapy"
+    ]
+    logger.info("Versions: %(versions)s", {"versions": ", ".join(versions)})
+
+
+def log_reactor_info() -> None:
+    from twisted.internet import reactor
+
+    logger.debug("Using reactor: %s.%s", reactor.__module__, reactor.__class__.__name__)
+    from twisted.internet import asyncioreactor
+
+    if isinstance(reactor, asyncioreactor.AsyncioSelectorReactor):
+        logger.debug(
+            "Using asyncio event loop: %s.%s",
+            reactor._asyncioEventloop.__module__,
+            reactor._asyncioEventloop.__class__.__name__,
+        )


 class StreamLogger:
@@ -86,10 +209,18 @@ class StreamLogger:
         https://www.electricmonk.nl/log/2011/08/14/redirect-stdout-and-stderr-to-a-logger-in-python/
     """

-    def __init__(self, logger: logging.Logger, log_level: int=logging.INFO):
+    def __init__(self, logger: logging.Logger, log_level: int = logging.INFO):
         self.logger: logging.Logger = logger
         self.log_level: int = log_level
-        self.linebuf: str = ''
+        self.linebuf: str = ""
+
+    def write(self, buf: str) -> None:
+        for line in buf.rstrip().splitlines():
+            self.logger.log(self.log_level, line.rstrip())
+
+    def flush(self) -> None:
+        for h in self.logger.handlers:
+            h.flush()


 class LogCounterHandler(logging.Handler):
@@ -99,19 +230,45 @@ class LogCounterHandler(logging.Handler):
         super().__init__(*args, **kwargs)
         self.crawler: Crawler = crawler

+    def emit(self, record: logging.LogRecord) -> None:
+        sname = f"log_count/{record.levelname}"
+        assert self.crawler.stats
+        self.crawler.stats.inc_value(sname)

-def logformatter_adapter(logkws: dict) ->Tuple[int, str, dict]:
+
+def logformatter_adapter(logkws: dict) -> Tuple[int, str, dict]:
     """
     Helper that takes the dictionary output from the methods in LogFormatter
     and adapts it into a tuple of positional arguments for logger.log calls,
     handling backward compatibility as well.
     """
-    pass
+    if not {"level", "msg", "args"} <= set(logkws):
+        warnings.warn("Missing keys in LogFormatter method", ScrapyDeprecationWarning)

+    if "format" in logkws:
+        warnings.warn(
+            "`format` key in LogFormatter methods has been "
+            "deprecated, use `msg` instead",
+            ScrapyDeprecationWarning,
+        )

-class SpiderLoggerAdapter(logging.LoggerAdapter):
+    level = logkws.get("level", logging.INFO)
+    message = logkws.get("format", logkws.get("msg"))
+    # NOTE: This also handles 'args' being an empty dict, that case doesn't
+    # play well in logger.log calls
+    args = logkws if not logkws.get("args") else logkws["args"]
+
+    return (level, message, args)

-    def process(self, msg: str, kwargs: MutableMapping[str, Any]) ->Tuple[
-        str, MutableMapping[str, Any]]:
+
+class SpiderLoggerAdapter(logging.LoggerAdapter):
+    def process(
+        self, msg: str, kwargs: MutableMapping[str, Any]
+    ) -> Tuple[str, MutableMapping[str, Any]]:
         """Method that augments logging with additional 'extra' data"""
-        pass
+        if isinstance(kwargs.get("extra"), MutableMapping):
+            kwargs["extra"].update(self.extra)
+        else:
+            kwargs["extra"] = self.extra
+
+        return msg, kwargs
diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py
index 6e45b6a2b..b3c28da92 100644
--- a/scrapy/utils/misc.py
+++ b/scrapy/utils/misc.py
@@ -11,27 +11,49 @@ from functools import partial
 from importlib import import_module
 from pkgutil import iter_modules
 from types import ModuleType
-from typing import IO, TYPE_CHECKING, Any, Callable, Deque, Generator, Iterable, List, Optional, Pattern, Union, cast
+from typing import (
+    IO,
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Deque,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    Pattern,
+    Union,
+    cast,
+)
+
 from w3lib.html import replace_entities
+
 from scrapy.item import Item
 from scrapy.utils.datatypes import LocalWeakReferencedCache
 from scrapy.utils.deprecate import ScrapyDeprecationWarning
 from scrapy.utils.python import flatten, to_unicode
+
 if TYPE_CHECKING:
     from scrapy import Spider
+
+
 _ITERABLE_SINGLE_VALUES = dict, Item, str, bytes


-def arg_to_iter(arg: Any) ->Iterable[Any]:
+def arg_to_iter(arg: Any) -> Iterable[Any]:
     """Convert an argument to an iterable. The argument can be a None, single
     value, or an iterable.

     Exception: if arg is a dict, [arg] will be returned
     """
-    pass
+    if arg is None:
+        return []
+    if not isinstance(arg, _ITERABLE_SINGLE_VALUES) and hasattr(arg, "__iter__"):
+        return cast(Iterable[Any], arg)
+    return [arg]


-def load_object(path: Union[str, Callable]) ->Any:
+def load_object(path: Union[str, Callable]) -> Any:
     """Load an object given its absolute object path, and return it.

     The object can be the import path of a class, function, variable or an
@@ -40,31 +62,86 @@ def load_object(path: Union[str, Callable]) ->Any:
     If ``path`` is not a string, but is a callable object, such as a class or
     a function, then return it as is.
     """
-    pass
+
+    if not isinstance(path, str):
+        if callable(path):
+            return path
+        raise TypeError(
+            f"Unexpected argument type, expected string or object, got: {type(path)}"
+        )
+
+    try:
+        dot = path.rindex(".")
+    except ValueError:
+        raise ValueError(f"Error loading object '{path}': not a full path")
+
+    module, name = path[:dot], path[dot + 1 :]
+    mod = import_module(module)
+
+    try:
+        obj = getattr(mod, name)
+    except AttributeError:
+        raise NameError(f"Module '{module}' doesn't define any object named '{name}'")
+
+    return obj


-def walk_modules(path: str) ->List[ModuleType]:
+def walk_modules(path: str) -> List[ModuleType]:
     """Loads a module and all its submodules from the given module path and
     returns them. If *any* module throws an exception while importing, that
     exception is thrown back.

     For example: walk_modules('scrapy.utils')
     """
-    pass

-
-def extract_regex(regex: Union[str, Pattern], text: str, encoding: str='utf-8'
-    ) ->List[str]:
+    mods: List[ModuleType] = []
+    mod = import_module(path)
+    mods.append(mod)
+    if hasattr(mod, "__path__"):
+        for _, subpath, ispkg in iter_modules(mod.__path__):
+            fullpath = path + "." + subpath
+            if ispkg:
+                mods += walk_modules(fullpath)
+            else:
+                submod = import_module(fullpath)
+                mods.append(submod)
+    return mods
+
+
+def extract_regex(
+    regex: Union[str, Pattern], text: str, encoding: str = "utf-8"
+) -> List[str]:
     """Extract a list of unicode strings from the given text/encoding using the following policies:

     * if the regex contains a named group called "extract" that will be returned
     * if the regex contains multiple numbered groups, all those will be returned (flattened)
     * if the regex doesn't contain any group the entire regex matching is returned
     """
-    pass
-
-
-def md5sum(file: IO) ->str:
+    warnings.warn(
+        "scrapy.utils.misc.extract_regex has moved to parsel.utils.extract_regex.",
+        ScrapyDeprecationWarning,
+        stacklevel=2,
+    )
+
+    if isinstance(regex, str):
+        regex = re.compile(regex, re.UNICODE)
+
+    try:
+        # named group
+        strings = [regex.search(text).group("extract")]  # type: ignore[union-attr]
+    except Exception:
+        # full regex or numbered groups
+        strings = regex.findall(text)
+    strings = flatten(strings)
+
+    if isinstance(text, str):
+        return [replace_entities(s, keep=["lt", "amp"]) for s in strings]
+    return [
+        replace_entities(to_unicode(s, encoding), keep=["lt", "amp"]) for s in strings
+    ]
+
+
+def md5sum(file: IO) -> str:
     """Calculate the md5 checksum of a file-like object without reading its
     whole content in memory.

@@ -72,12 +149,18 @@ def md5sum(file: IO) ->str:
     >>> md5sum(BytesIO(b'file content to hash'))
     '784406af91dd5a54fbb9c84c2236595a'
     """
-    pass
+    m = hashlib.md5()
+    while True:
+        d = file.read(8096)
+        if not d:
+            break
+        m.update(d)
+    return m.hexdigest()


-def rel_has_nofollow(rel: Optional[str]) ->bool:
+def rel_has_nofollow(rel: Optional[str]) -> bool:
     """Return True if link rel attribute has nofollow type"""
-    pass
+    return rel is not None and "nofollow" in rel.replace(",", " ").split()


 def create_instance(objcls, settings, crawler, *args, **kwargs):
@@ -97,39 +180,120 @@ def create_instance(objcls, settings, crawler, *args, **kwargs):
        Raises ``TypeError`` if the resulting instance is ``None`` (e.g. if an
        extension has not been implemented correctly).
     """
-    pass
+    if settings is None:
+        if crawler is None:
+            raise ValueError("Specify at least one of settings and crawler.")
+        settings = crawler.settings
+    if crawler and hasattr(objcls, "from_crawler"):
+        instance = objcls.from_crawler(crawler, *args, **kwargs)
+        method_name = "from_crawler"
+    elif hasattr(objcls, "from_settings"):
+        instance = objcls.from_settings(settings, *args, **kwargs)
+        method_name = "from_settings"
+    else:
+        instance = objcls(*args, **kwargs)
+        method_name = "__new__"
+    if instance is None:
+        raise TypeError(f"{objcls.__qualname__}.{method_name} returned None")
+    return instance


 @contextmanager
-def set_environ(**kwargs: str) ->Generator[None, Any, None]:
+def set_environ(**kwargs: str) -> Generator[None, Any, None]:
     """Temporarily set environment variables inside the context manager and
     fully restore previous environment afterwards
     """
-    pass

+    original_env = {k: os.environ.get(k) for k in kwargs}
+    os.environ.update(kwargs)
+    try:
+        yield
+    finally:
+        for k, v in original_env.items():
+            if v is None:
+                del os.environ[k]
+            else:
+                os.environ[k] = v

-def walk_callable(node: ast.AST) ->Generator[ast.AST, Any, None]:
+
+def walk_callable(node: ast.AST) -> Generator[ast.AST, Any, None]:
     """Similar to ``ast.walk``, but walks only function body and skips nested
     functions defined within the node.
     """
-    pass
+    todo: Deque[ast.AST] = deque([node])
+    walked_func_def = False
+    while todo:
+        node = todo.popleft()
+        if isinstance(node, ast.FunctionDef):
+            if walked_func_def:
+                continue
+            walked_func_def = True
+        todo.extend(ast.iter_child_nodes(node))
+        yield node


 _generator_callbacks_cache = LocalWeakReferencedCache(limit=128)


-def is_generator_with_return_value(callable: Callable) ->bool:
+def is_generator_with_return_value(callable: Callable) -> bool:
     """
     Returns True if a callable is a generator function which includes a
     'return' statement with a value different than None, False otherwise
     """
-    pass
+    if callable in _generator_callbacks_cache:
+        return bool(_generator_callbacks_cache[callable])
+
+    def returns_none(return_node: ast.Return) -> bool:
+        value = return_node.value
+        return (
+            value is None or isinstance(value, ast.NameConstant) and value.value is None
+        )
+
+    if inspect.isgeneratorfunction(callable):
+        func = callable
+        while isinstance(func, partial):
+            func = func.func
+
+        src = inspect.getsource(func)
+        pattern = re.compile(r"(^[\t ]+)")
+        code = pattern.sub("", src)
+
+        match = pattern.match(src)  # finds indentation
+        if match:
+            code = re.sub(f"\n{match.group(0)}", "\n", code)  # remove indentation
+
+        tree = ast.parse(code)
+        for node in walk_callable(tree):
+            if isinstance(node, ast.Return) and not returns_none(node):
+                _generator_callbacks_cache[callable] = True
+                return bool(_generator_callbacks_cache[callable])
+
+    _generator_callbacks_cache[callable] = False
+    return bool(_generator_callbacks_cache[callable])


-def warn_on_generator_with_return_value(spider: 'Spider', callable: Callable
-    ) ->None:
+def warn_on_generator_with_return_value(spider: "Spider", callable: Callable) -> None:
     """
     Logs a warning if a callable is a generator function and includes
     a 'return' statement with a value different than None
     """
-    pass
+    try:
+        if is_generator_with_return_value(callable):
+            warnings.warn(
+                f'The "{spider.__class__.__name__}.{callable.__name__}" method is '
+                'a generator and includes a "return" statement with a value '
+                "different than None. This could lead to unexpected behaviour. Please see "
+                "https://docs.python.org/3/reference/simple_stmts.html#the-return-statement "
+                'for details about the semantics of the "return" statement within generators',
+                stacklevel=2,
+            )
+    except IndentationError:
+        callable_name = spider.__class__.__name__ + "." + callable.__name__
+        warnings.warn(
+            f'Unable to determine whether or not "{callable_name}" is a generator with a return value. '
+            "This will not prevent your code from working, but it prevents Scrapy from detecting "
+            f'potential issues in your implementation of "{callable_name}". Please, report this in the '
+            "Scrapy issue tracker (https://github.com/scrapy/scrapy/issues), "
+            f'including the code of "{callable_name}"',
+            stacklevel=2,
+        )
diff --git a/scrapy/utils/ossignal.py b/scrapy/utils/ossignal.py
index 012a5cf9a..db9a71273 100644
--- a/scrapy/utils/ossignal.py
+++ b/scrapy/utils/ossignal.py
@@ -1,21 +1,31 @@
 import signal
 from types import FrameType
 from typing import Any, Callable, Dict, Optional, Union
-SignalHandlerT = Union[Callable[[int, Optional[FrameType]], Any], int,
-    signal.Handlers, None]
+
+# copy of _HANDLER from typeshed/stdlib/signal.pyi
+SignalHandlerT = Union[
+    Callable[[int, Optional[FrameType]], Any], int, signal.Handlers, None
+]
+
 signal_names: Dict[int, str] = {}
 for signame in dir(signal):
-    if signame.startswith('SIG') and not signame.startswith('SIG_'):
+    if signame.startswith("SIG") and not signame.startswith("SIG_"):
         signum = getattr(signal, signame)
         if isinstance(signum, int):
             signal_names[signum] = signame


-def install_shutdown_handlers(function: SignalHandlerT, override_sigint:
-    bool=True) ->None:
+def install_shutdown_handlers(
+    function: SignalHandlerT, override_sigint: bool = True
+) -> None:
     """Install the given function as a signal handler for all common shutdown
     signals (such as SIGINT, SIGTERM, etc). If ``override_sigint`` is ``False`` the
     SIGINT handler won't be installed if there is already a handler in place
     (e.g. Pdb)
     """
-    pass
+    signal.signal(signal.SIGTERM, function)
+    if signal.getsignal(signal.SIGINT) == signal.default_int_handler or override_sigint:
+        signal.signal(signal.SIGINT, function)
+    # Catch Ctrl-Break in windows
+    if hasattr(signal, "SIGBREAK"):
+        signal.signal(signal.SIGBREAK, function)
diff --git a/scrapy/utils/project.py b/scrapy/utils/project.py
index ce406428a..a2c224b90 100644
--- a/scrapy/utils/project.py
+++ b/scrapy/utils/project.py
@@ -2,21 +2,87 @@ import os
 import warnings
 from importlib import import_module
 from pathlib import Path
+
 from scrapy.exceptions import NotConfigured
 from scrapy.settings import Settings
 from scrapy.utils.conf import closest_scrapy_cfg, get_config, init_env
-ENVVAR = 'SCRAPY_SETTINGS_MODULE'
-DATADIR_CFG_SECTION = 'datadir'

+ENVVAR = "SCRAPY_SETTINGS_MODULE"
+DATADIR_CFG_SECTION = "datadir"
+
+
+def inside_project() -> bool:
+    scrapy_module = os.environ.get(ENVVAR)
+    if scrapy_module:
+        try:
+            import_module(scrapy_module)
+        except ImportError as exc:
+            warnings.warn(
+                f"Cannot import scrapy settings module {scrapy_module}: {exc}"
+            )
+        else:
+            return True
+    return bool(closest_scrapy_cfg())

-def project_data_dir(project: str='default') ->str:
+
+def project_data_dir(project: str = "default") -> str:
     """Return the current project data dir, creating it if it doesn't exist"""
-    pass
+    if not inside_project():
+        raise NotConfigured("Not inside a project")
+    cfg = get_config()
+    if cfg.has_option(DATADIR_CFG_SECTION, project):
+        d = Path(cfg.get(DATADIR_CFG_SECTION, project))
+    else:
+        scrapy_cfg = closest_scrapy_cfg()
+        if not scrapy_cfg:
+            raise NotConfigured(
+                "Unable to find scrapy.cfg file to infer project data dir"
+            )
+        d = (Path(scrapy_cfg).parent / ".scrapy").resolve()
+    if not d.exists():
+        d.mkdir(parents=True)
+    return str(d)


-def data_path(path: str, createdir: bool=False) ->str:
+def data_path(path: str, createdir: bool = False) -> str:
     """
     Return the given path joined with the .scrapy data directory.
     If given an absolute path, return it unmodified.
     """
-    pass
+    path_obj = Path(path)
+    if not path_obj.is_absolute():
+        if inside_project():
+            path_obj = Path(project_data_dir(), path)
+        else:
+            path_obj = Path(".scrapy", path)
+    if createdir and not path_obj.exists():
+        path_obj.mkdir(parents=True)
+    return str(path_obj)
+
+
+def get_project_settings() -> Settings:
+    if ENVVAR not in os.environ:
+        project = os.environ.get("SCRAPY_PROJECT", "default")
+        init_env(project)
+
+    settings = Settings()
+    settings_module_path = os.environ.get(ENVVAR)
+    if settings_module_path:
+        settings.setmodule(settings_module_path, priority="project")
+
+    valid_envvars = {
+        "CHECK",
+        "PROJECT",
+        "PYTHON_SHELL",
+        "SETTINGS_MODULE",
+    }
+
+    scrapy_envvars = {
+        k[7:]: v
+        for k, v in os.environ.items()
+        if k.startswith("SCRAPY_") and k.replace("SCRAPY_", "") in valid_envvars
+    }
+
+    settings.setdict(scrapy_envvars, priority="project")
+
+    return settings
diff --git a/scrapy/utils/python.py b/scrapy/utils/python.py
index fa47a6995..20305a75e 100644
--- a/scrapy/utils/python.py
+++ b/scrapy/utils/python.py
@@ -9,11 +9,29 @@ import sys
 import weakref
 from functools import partial, wraps
 from itertools import chain
-from typing import Any, AsyncGenerator, AsyncIterable, AsyncIterator, Callable, Dict, Generator, Iterable, Iterator, List, Mapping, Optional, Pattern, Tuple, Union, overload
+from typing import (
+    Any,
+    AsyncGenerator,
+    AsyncIterable,
+    AsyncIterator,
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Pattern,
+    Tuple,
+    Union,
+    overload,
+)
+
 from scrapy.utils.asyncgen import as_async_generator


-def flatten(x: Iterable) ->list:
+def flatten(x: Iterable) -> list:
     """flatten(sequence) -> list

     Returns a single, flat list which contains all elements retrieved
@@ -30,17 +48,22 @@ def flatten(x: Iterable) ->list:
     >>> flatten(["foo", ["baz", 42], "bar"])
     ['foo', 'baz', 42, 'bar']
     """
-    pass
+    return list(iflatten(x))


-def iflatten(x: Iterable) ->Iterable:
+def iflatten(x: Iterable) -> Iterable:
     """iflatten(sequence) -> iterator

     Similar to ``.flatten()``, but returns iterator instead"""
-    pass
+    for el in x:
+        if is_listlike(el):
+            for el_ in iflatten(el):
+                yield el_
+        else:
+            yield el


-def is_listlike(x: Any) ->bool:
+def is_listlike(x: Any) -> bool:
     """
     >>> is_listlike("foo")
     False
@@ -61,30 +84,58 @@ def is_listlike(x: Any) ->bool:
     >>> is_listlike(range(5))
     True
     """
-    pass
+    return hasattr(x, "__iter__") and not isinstance(x, (str, bytes))


-def unique(list_: Iterable, key: Callable[[Any], Any]=lambda x: x) ->list:
+def unique(list_: Iterable, key: Callable[[Any], Any] = lambda x: x) -> list:
     """efficient function to uniquify a list preserving item order"""
-    pass
-
-
-def to_unicode(text: Union[str, bytes], encoding: Optional[str]=None,
-    errors: str='strict') ->str:
+    seen = set()
+    result = []
+    for item in list_:
+        seenkey = key(item)
+        if seenkey in seen:
+            continue
+        seen.add(seenkey)
+        result.append(item)
+    return result
+
+
+def to_unicode(
+    text: Union[str, bytes], encoding: Optional[str] = None, errors: str = "strict"
+) -> str:
     """Return the unicode representation of a bytes object ``text``. If
     ``text`` is already an unicode object, return it as-is."""
-    pass
-
-
-def to_bytes(text: Union[str, bytes], encoding: Optional[str]=None, errors:
-    str='strict') ->bytes:
+    if isinstance(text, str):
+        return text
+    if not isinstance(text, (bytes, str)):
+        raise TypeError(
+            "to_unicode must receive a bytes or str "
+            f"object, got {type(text).__name__}"
+        )
+    if encoding is None:
+        encoding = "utf-8"
+    return text.decode(encoding, errors)
+
+
+def to_bytes(
+    text: Union[str, bytes], encoding: Optional[str] = None, errors: str = "strict"
+) -> bytes:
     """Return the binary representation of ``text``. If ``text``
     is already a bytes object, return it as-is."""
-    pass
-
-
-def re_rsearch(pattern: Union[str, Pattern], text: str, chunk_size: int=1024
-    ) ->Optional[Tuple[int, int]]:
+    if isinstance(text, bytes):
+        return text
+    if not isinstance(text, str):
+        raise TypeError(
+            "to_bytes must receive a str or bytes " f"object, got {type(text).__name__}"
+        )
+    if encoding is None:
+        encoding = "utf-8"
+    return text.encode(encoding, errors)
+
+
+def re_rsearch(
+    pattern: Union[str, Pattern], text: str, chunk_size: int = 1024
+) -> Optional[Tuple[int, int]]:
     """
     This function does a reverse search in a text using a regular expression
     given in the attribute 'pattern'.
@@ -97,33 +148,87 @@ def re_rsearch(pattern: Union[str, Pattern], text: str, chunk_size: int=1024
     In case the pattern wasn't found, None is returned, otherwise it returns a tuple containing
     the start position of the match, and the ending (regarding the entire text).
     """
-    pass

+    def _chunk_iter() -> Generator[Tuple[str, int], Any, None]:
+        offset = len(text)
+        while True:
+            offset -= chunk_size * 1024
+            if offset <= 0:
+                break
+            yield (text[offset:], offset)
+        yield (text, 0)
+
+    if isinstance(pattern, str):
+        pattern = re.compile(pattern)

-def memoizemethod_noargs(method: Callable) ->Callable:
+    for chunk, offset in _chunk_iter():
+        matches = [match for match in pattern.finditer(chunk)]
+        if matches:
+            start, end = matches[-1].span()
+            return offset + start, offset + end
+    return None
+
+
+def memoizemethod_noargs(method: Callable) -> Callable:
     """Decorator to cache the result of a method (without arguments) using a
     weak reference to its object
     """
-    pass
+    cache: weakref.WeakKeyDictionary[Any, Any] = weakref.WeakKeyDictionary()
+
+    @wraps(method)
+    def new_method(self: Any, *args: Any, **kwargs: Any) -> Any:
+        if self not in cache:
+            cache[self] = method(self, *args, **kwargs)
+        return cache[self]
+
+    return new_method


-_BINARYCHARS = {i for i in range(32) if to_bytes(chr(i)) not in {b'\x00',
-    b'\t', b'\n', b'\r'}}
+_BINARYCHARS = {
+    i for i in range(32) if to_bytes(chr(i)) not in {b"\0", b"\t", b"\n", b"\r"}
+}


-def binary_is_text(data: bytes) ->bool:
+def binary_is_text(data: bytes) -> bool:
     """Returns ``True`` if the given ``data`` argument (a ``bytes`` object)
     does not contain unprintable control characters.
     """
-    pass
+    if not isinstance(data, bytes):
+        raise TypeError(f"data must be bytes, got '{type(data).__name__}'")
+    return all(c not in _BINARYCHARS for c in data)


-def get_func_args(func: Callable, stripself: bool=False) ->List[str]:
+def get_func_args(func: Callable, stripself: bool = False) -> List[str]:
     """Return the argument name list of a callable object"""
-    pass
-
-
-def get_spec(func: Callable) ->Tuple[List[str], Dict[str, Any]]:
+    if not callable(func):
+        raise TypeError(f"func must be callable, got '{type(func).__name__}'")
+
+    args: List[str] = []
+    try:
+        sig = inspect.signature(func)
+    except ValueError:
+        return args
+
+    if isinstance(func, partial):
+        partial_args = func.args
+        partial_kw = func.keywords
+
+        for name, param in sig.parameters.items():
+            if param.name in partial_args:
+                continue
+            if partial_kw and param.name in partial_kw:
+                continue
+            args.append(name)
+    else:
+        for name in sig.parameters.keys():
+            args.append(name)
+
+    if stripself and args and args[0] == "self":
+        args = args[1:]
+    return args
+
+
+def get_spec(func: Callable) -> Tuple[List[str], Dict[str, Any]]:
     """Returns (args, kwargs) tuple for a function
     >>> import re
     >>> get_spec(re.match)
@@ -144,26 +249,66 @@ def get_spec(func: Callable) ->Tuple[List[str], Dict[str, Any]]:
     >>> get_spec(Test().method)
     (['self', 'val'], {'flags': 0})
     """
-    pass

+    if inspect.isfunction(func) or inspect.ismethod(func):
+        spec = inspect.getfullargspec(func)
+    elif hasattr(func, "__call__"):
+        spec = inspect.getfullargspec(func.__call__)
+    else:
+        raise TypeError(f"{type(func)} is not callable")
+
+    defaults: Tuple[Any, ...] = spec.defaults or ()

-def equal_attributes(obj1: Any, obj2: Any, attributes: Optional[List[Union[
-    str, Callable]]]) ->bool:
+    firstdefault = len(spec.args) - len(defaults)
+    args = spec.args[:firstdefault]
+    kwargs = dict(zip(spec.args[firstdefault:], defaults))
+    return args, kwargs
+
+
+def equal_attributes(
+    obj1: Any, obj2: Any, attributes: Optional[List[Union[str, Callable]]]
+) -> bool:
     """Compare two objects attributes"""
-    pass
+    # not attributes given return False by default
+    if not attributes:
+        return False
+
+    temp1, temp2 = object(), object()
+    for attr in attributes:
+        # support callables like itemgetter
+        if callable(attr):
+            if attr(obj1) != attr(obj2):
+                return False
+        elif getattr(obj1, attr, temp1) != getattr(obj2, attr, temp2):
+            return False
+    # all attributes equal
+    return True
+
+
+@overload
+def without_none_values(iterable: Mapping) -> dict:
+    ...
+
+
+@overload
+def without_none_values(iterable: Iterable) -> Iterable:
+    ...


-def without_none_values(iterable: Union[Mapping, Iterable]) ->Union[dict,
-    Iterable]:
+def without_none_values(iterable: Union[Mapping, Iterable]) -> Union[dict, Iterable]:
     """Return a copy of ``iterable`` with all ``None`` entries removed.

     If ``iterable`` is a mapping, return a dictionary where all pairs that have
     value ``None`` have been removed.
     """
-    pass
+    if isinstance(iterable, collections.abc.Mapping):
+        return {k: v for k, v in iterable.items() if v is not None}
+    else:
+        # the iterable __init__ must take another iterable
+        return type(iterable)(v for v in iterable if v is not None)  # type: ignore[call-arg]


-def global_object_name(obj: Any) ->str:
+def global_object_name(obj: Any) -> str:
     """
     Return full name of a global object.

@@ -171,10 +316,20 @@ def global_object_name(obj: Any) ->str:
     >>> global_object_name(Request)
     'scrapy.http.request.Request'
     """
-    pass
+    return f"{obj.__module__}.{obj.__qualname__}"


-if hasattr(sys, 'pypy_version_info'):
+if hasattr(sys, "pypy_version_info"):
+
+    def garbage_collect() -> None:
+        # Collecting weakreferences can take two collections on PyPy.
+        gc.collect()
+        gc.collect()
+
+else:
+
+    def garbage_collect() -> None:
+        gc.collect()


 class MutableChain(Iterable):
@@ -185,13 +340,22 @@ class MutableChain(Iterable):
     def __init__(self, *args: Iterable):
         self.data = chain.from_iterable(args)

-    def __iter__(self) ->Iterator:
+    def extend(self, *iterables: Iterable) -> None:
+        self.data = chain(self.data, chain.from_iterable(iterables))
+
+    def __iter__(self) -> Iterator:
         return self

-    def __next__(self) ->Any:
+    def __next__(self) -> Any:
         return next(self.data)


+async def _async_chain(*iterables: Union[Iterable, AsyncIterable]) -> AsyncGenerator:
+    for it in iterables:
+        async for o in as_async_generator(it):
+            yield o
+
+
 class MutableAsyncChain(AsyncIterable):
     """
     Similar to MutableChain but for async iterables
@@ -200,8 +364,11 @@ class MutableAsyncChain(AsyncIterable):
     def __init__(self, *args: Union[Iterable, AsyncIterable]):
         self.data = _async_chain(*args)

-    def __aiter__(self) ->AsyncIterator:
+    def extend(self, *iterables: Union[Iterable, AsyncIterable]) -> None:
+        self.data = _async_chain(self.data, _async_chain(*iterables))
+
+    def __aiter__(self) -> AsyncIterator:
         return self

-    async def __anext__(self) ->Any:
+    async def __anext__(self) -> Any:
         return await self.data.__anext__()
diff --git a/scrapy/utils/reactor.py b/scrapy/utils/reactor.py
index 5f7bcdfbd..ad3d1d8bc 100644
--- a/scrapy/utils/reactor.py
+++ b/scrapy/utils/reactor.py
@@ -4,15 +4,32 @@ from asyncio import AbstractEventLoop, AbstractEventLoopPolicy
 from contextlib import suppress
 from typing import Any, Callable, Dict, Optional, Sequence, Type
 from warnings import catch_warnings, filterwarnings, warn
+
 from twisted.internet import asyncioreactor, error
 from twisted.internet.base import DelayedCall
+
 from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.utils.misc import load_object


 def listen_tcp(portrange, host, factory):
     """Like reactor.listenTCP but tries different ports in a range."""
-    pass
+    from twisted.internet import reactor
+
+    if len(portrange) > 2:
+        raise ValueError(f"invalid portrange: {portrange}")
+    if not portrange:
+        return reactor.listenTCP(0, factory, interface=host)
+    if not hasattr(portrange, "__iter__"):
+        return reactor.listenTCP(portrange, factory, interface=host)
+    if len(portrange) == 1:
+        return reactor.listenTCP(portrange[0], factory, interface=host)
+    for x in range(portrange[0], portrange[1] + 1):
+        try:
+            return reactor.listenTCP(x, factory, interface=host)
+        except error.CannotListenError:
+            if x == portrange[1]:
+                raise


 class CallLaterOnce:
@@ -26,34 +43,146 @@ class CallLaterOnce:
         self._kw: Dict[str, Any] = kw
         self._call: Optional[DelayedCall] = None

-    def __call__(self) ->Any:
+    def schedule(self, delay: float = 0) -> None:
+        from twisted.internet import reactor
+
+        if self._call is None:
+            self._call = reactor.callLater(delay, self)
+
+    def cancel(self) -> None:
+        if self._call:
+            self._call.cancel()
+
+    def __call__(self) -> Any:
         self._call = None
         return self._func(*self._a, **self._kw)


-def set_asyncio_event_loop_policy() ->None:
+def set_asyncio_event_loop_policy() -> None:
     """The policy functions from asyncio often behave unexpectedly,
     so we restrict their use to the absolutely essential case.
     This should only be used to install the reactor.
     """
-    pass
+    _get_asyncio_event_loop_policy()
+
+
+def get_asyncio_event_loop_policy() -> AbstractEventLoopPolicy:
+    warn(
+        "Call to deprecated function "
+        "scrapy.utils.reactor.get_asyncio_event_loop_policy().\n"
+        "\n"
+        "Please use get_event_loop, new_event_loop and set_event_loop"
+        " from asyncio instead, as the corresponding policy methods may lead"
+        " to unexpected behaviour.\n"
+        "This function is replaced by set_asyncio_event_loop_policy and"
+        " is meant to be used only when the reactor is being installed.",
+        category=ScrapyDeprecationWarning,
+        stacklevel=2,
+    )
+    return _get_asyncio_event_loop_policy()
+
+
+def _get_asyncio_event_loop_policy() -> AbstractEventLoopPolicy:
+    policy = asyncio.get_event_loop_policy()
+    if (
+        sys.version_info >= (3, 8)
+        and sys.platform == "win32"
+        and not isinstance(policy, asyncio.WindowsSelectorEventLoopPolicy)
+    ):
+        policy = asyncio.WindowsSelectorEventLoopPolicy()
+        asyncio.set_event_loop_policy(policy)
+    return policy


-def install_reactor(reactor_path: str, event_loop_path: Optional[str]=None
-    ) ->None:
+def install_reactor(reactor_path: str, event_loop_path: Optional[str] = None) -> None:
     """Installs the :mod:`~twisted.internet.reactor` with the specified
     import path. Also installs the asyncio event loop with the specified import
     path if the asyncio reactor is enabled"""
-    pass
+    reactor_class = load_object(reactor_path)
+    if reactor_class is asyncioreactor.AsyncioSelectorReactor:
+        set_asyncio_event_loop_policy()
+        with suppress(error.ReactorAlreadyInstalledError):
+            event_loop = set_asyncio_event_loop(event_loop_path)
+            asyncioreactor.install(eventloop=event_loop)
+    else:
+        *module, _ = reactor_path.split(".")
+        installer_path = module + ["install"]
+        installer = load_object(".".join(installer_path))
+        with suppress(error.ReactorAlreadyInstalledError):
+            installer()


-def set_asyncio_event_loop(event_loop_path: Optional[str]) ->AbstractEventLoop:
+def _get_asyncio_event_loop() -> AbstractEventLoop:
+    return set_asyncio_event_loop(None)
+
+
+def set_asyncio_event_loop(event_loop_path: Optional[str]) -> AbstractEventLoop:
     """Sets and returns the event loop with specified import path."""
-    pass
+    if event_loop_path is not None:
+        event_loop_class: Type[AbstractEventLoop] = load_object(event_loop_path)
+        event_loop = event_loop_class()
+        asyncio.set_event_loop(event_loop)
+    else:
+        try:
+            with catch_warnings():
+                # In Python 3.10.9, 3.11.1, 3.12 and 3.13, a DeprecationWarning
+                # is emitted about the lack of a current event loop, because in
+                # Python 3.14 and later `get_event_loop` will raise a
+                # RuntimeError in that event. Because our code is already
+                # prepared for that future behavior, we ignore the deprecation
+                # warning.
+                filterwarnings(
+                    "ignore",
+                    message="There is no current event loop",
+                    category=DeprecationWarning,
+                )
+                event_loop = asyncio.get_event_loop()
+        except RuntimeError:
+            # `get_event_loop` raises RuntimeError when called with no asyncio
+            # event loop yet installed in the following scenarios:
+            # - Previsibly on Python 3.14 and later.
+            #   https://github.com/python/cpython/issues/100160#issuecomment-1345581902
+            event_loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(event_loop)
+    return event_loop


-def verify_installed_reactor(reactor_path: str) ->None:
+def verify_installed_reactor(reactor_path: str) -> None:
     """Raises :exc:`Exception` if the installed
     :mod:`~twisted.internet.reactor` does not match the specified import
     path."""
-    pass
+    from twisted.internet import reactor
+
+    reactor_class = load_object(reactor_path)
+    if not reactor.__class__ == reactor_class:
+        msg = (
+            "The installed reactor "
+            f"({reactor.__module__}.{reactor.__class__.__name__}) does not "
+            f"match the requested one ({reactor_path})"
+        )
+        raise Exception(msg)
+
+
+def verify_installed_asyncio_event_loop(loop_path: str) -> None:
+    from twisted.internet import reactor
+
+    loop_class = load_object(loop_path)
+    if isinstance(reactor._asyncioEventloop, loop_class):
+        return
+    installed = (
+        f"{reactor._asyncioEventloop.__class__.__module__}"
+        f".{reactor._asyncioEventloop.__class__.__qualname__}"
+    )
+    specified = f"{loop_class.__module__}.{loop_class.__qualname__}"
+    raise Exception(
+        "Scrapy found an asyncio Twisted reactor already "
+        f"installed, and its event loop class ({installed}) does "
+        "not match the one specified in the ASYNCIO_EVENT_LOOP "
+        f"setting ({specified})"
+    )
+
+
+def is_asyncio_reactor_installed() -> bool:
+    from twisted.internet import reactor
+
+    return isinstance(reactor, asyncioreactor.AsyncioSelectorReactor)
diff --git a/scrapy/utils/request.py b/scrapy/utils/request.py
index 9514d35a1..24fcbd85e 100644
--- a/scrapy/utils/request.py
+++ b/scrapy/utils/request.py
@@ -2,27 +2,57 @@
 This module provides some useful functions for working with
 scrapy.http.Request objects
 """
+
 import hashlib
 import json
 import warnings
-from typing import TYPE_CHECKING, Any, Dict, Generator, Iterable, List, Optional, Protocol, Tuple, Type, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    Protocol,
+    Tuple,
+    Type,
+    Union,
+)
 from urllib.parse import urlunparse
 from weakref import WeakKeyDictionary
+
 from w3lib.http import basic_auth_header
 from w3lib.url import canonicalize_url
+
 from scrapy import Request, Spider
 from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.utils.httpobj import urlparse_cached
 from scrapy.utils.misc import load_object
 from scrapy.utils.python import to_bytes, to_unicode
+
 if TYPE_CHECKING:
     from scrapy.crawler import Crawler
-_deprecated_fingerprint_cache: 'WeakKeyDictionary[Request, Dict[Tuple[Optional[Tuple[bytes, ...]], bool], str]]'
+
+_deprecated_fingerprint_cache: "WeakKeyDictionary[Request, Dict[Tuple[Optional[Tuple[bytes, ...]], bool], str]]"
 _deprecated_fingerprint_cache = WeakKeyDictionary()


-def request_fingerprint(request: Request, include_headers: Optional[
-    Iterable[Union[bytes, str]]]=None, keep_fragments: bool=False) ->str:
+def _serialize_headers(
+    headers: Iterable[bytes], request: Request
+) -> Generator[bytes, Any, None]:
+    for header in headers:
+        if header in request.headers:
+            yield header
+            for value in request.headers.getlist(header):
+                yield value
+
+
+def request_fingerprint(
+    request: Request,
+    include_headers: Optional[Iterable[Union[bytes, str]]] = None,
+    keep_fragments: bool = False,
+) -> str:
     """
     Return the request fingerprint as an hexadecimal string.

@@ -53,15 +83,95 @@ def request_fingerprint(request: Request, include_headers: Optional[
     If you want to include them, set the keep_fragments argument to True
     (for instance when handling requests with a headless browser).
     """
-    pass
-
-
-_fingerprint_cache: 'WeakKeyDictionary[Request, Dict[Tuple[Optional[Tuple[bytes, ...]], bool], bytes]]'
+    if include_headers or keep_fragments:
+        message = (
+            "Call to deprecated function "
+            "scrapy.utils.request.request_fingerprint().\n"
+            "\n"
+            "If you are using this function in a Scrapy component because you "
+            "need a non-default fingerprinting algorithm, and you are OK "
+            "with that non-default fingerprinting algorithm being used by "
+            "all Scrapy components and not just the one calling this "
+            "function, use crawler.request_fingerprinter.fingerprint() "
+            "instead in your Scrapy component (you can get the crawler "
+            "object from the 'from_crawler' class method), and use the "
+            "'REQUEST_FINGERPRINTER_CLASS' setting to configure your "
+            "non-default fingerprinting algorithm.\n"
+            "\n"
+            "Otherwise, consider using the "
+            "scrapy.utils.request.fingerprint() function instead.\n"
+            "\n"
+            "If you switch to 'fingerprint()', or assign the "
+            "'REQUEST_FINGERPRINTER_CLASS' setting a class that uses "
+            "'fingerprint()', the generated fingerprints will not only be "
+            "bytes instead of a string, but they will also be different from "
+            "those generated by 'request_fingerprint()'. Before you switch, "
+            "make sure that you understand the consequences of this (e.g. "
+            "cache invalidation) and are OK with them; otherwise, consider "
+            "implementing your own function which returns the same "
+            "fingerprints as the deprecated 'request_fingerprint()' function."
+        )
+    else:
+        message = (
+            "Call to deprecated function "
+            "scrapy.utils.request.request_fingerprint().\n"
+            "\n"
+            "If you are using this function in a Scrapy component, and you "
+            "are OK with users of your component changing the fingerprinting "
+            "algorithm through settings, use "
+            "crawler.request_fingerprinter.fingerprint() instead in your "
+            "Scrapy component (you can get the crawler object from the "
+            "'from_crawler' class method).\n"
+            "\n"
+            "Otherwise, consider using the "
+            "scrapy.utils.request.fingerprint() function instead.\n"
+            "\n"
+            "Either way, the resulting fingerprints will be returned as "
+            "bytes, not as a string, and they will also be different from "
+            "those generated by 'request_fingerprint()'. Before you switch, "
+            "make sure that you understand the consequences of this (e.g. "
+            "cache invalidation) and are OK with them; otherwise, consider "
+            "implementing your own function which returns the same "
+            "fingerprints as the deprecated 'request_fingerprint()' function."
+        )
+    warnings.warn(message, category=ScrapyDeprecationWarning, stacklevel=2)
+    processed_include_headers: Optional[Tuple[bytes, ...]] = None
+    if include_headers:
+        processed_include_headers = tuple(
+            to_bytes(h.lower()) for h in sorted(include_headers)
+        )
+    cache = _deprecated_fingerprint_cache.setdefault(request, {})
+    cache_key = (processed_include_headers, keep_fragments)
+    if cache_key not in cache:
+        fp = hashlib.sha1()
+        fp.update(to_bytes(request.method))
+        fp.update(
+            to_bytes(canonicalize_url(request.url, keep_fragments=keep_fragments))
+        )
+        fp.update(request.body or b"")
+        if processed_include_headers:
+            for part in _serialize_headers(processed_include_headers, request):
+                fp.update(part)
+        cache[cache_key] = fp.hexdigest()
+    return cache[cache_key]
+
+
+def _request_fingerprint_as_bytes(*args: Any, **kwargs: Any) -> bytes:
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        return bytes.fromhex(request_fingerprint(*args, **kwargs))
+
+
+_fingerprint_cache: "WeakKeyDictionary[Request, Dict[Tuple[Optional[Tuple[bytes, ...]], bool], bytes]]"
 _fingerprint_cache = WeakKeyDictionary()


-def fingerprint(request: Request, *, include_headers: Optional[Iterable[
-    Union[bytes, str]]]=None, keep_fragments: bool=False) ->bytes:
+def fingerprint(
+    request: Request,
+    *,
+    include_headers: Optional[Iterable[Union[bytes, str]]] = None,
+    keep_fragments: bool = False,
+) -> bytes:
     """
     Return the request fingerprint.

@@ -92,11 +202,38 @@ def fingerprint(request: Request, *, include_headers: Optional[Iterable[
     If you want to include them, set the keep_fragments argument to True
     (for instance when handling requests with a headless browser).
     """
-    pass
+    processed_include_headers: Optional[Tuple[bytes, ...]] = None
+    if include_headers:
+        processed_include_headers = tuple(
+            to_bytes(h.lower()) for h in sorted(include_headers)
+        )
+    cache = _fingerprint_cache.setdefault(request, {})
+    cache_key = (processed_include_headers, keep_fragments)
+    if cache_key not in cache:
+        # To decode bytes reliably (JSON does not support bytes), regardless of
+        # character encoding, we use bytes.hex()
+        headers: Dict[str, List[str]] = {}
+        if processed_include_headers:
+            for header in processed_include_headers:
+                if header in request.headers:
+                    headers[header.hex()] = [
+                        header_value.hex()
+                        for header_value in request.headers.getlist(header)
+                    ]
+        fingerprint_data = {
+            "method": to_unicode(request.method),
+            "url": canonicalize_url(request.url, keep_fragments=keep_fragments),
+            "body": (request.body or b"").hex(),
+            "headers": headers,
+        }
+        fingerprint_json = json.dumps(fingerprint_data, sort_keys=True)
+        cache[cache_key] = hashlib.sha1(fingerprint_json.encode()).digest()
+    return cache[cache_key]


 class RequestFingerprinterProtocol(Protocol):
-    pass
+    def fingerprint(self, request: Request) -> bytes:
+        ...


 class RequestFingerprinter:
@@ -112,70 +249,134 @@ class RequestFingerprinter:
     .. seealso:: :setting:`REQUEST_FINGERPRINTER_IMPLEMENTATION`.
     """

-    def __init__(self, crawler: Optional['Crawler']=None):
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
+    def __init__(self, crawler: Optional["Crawler"] = None):
         if crawler:
             implementation = crawler.settings.get(
-                'REQUEST_FINGERPRINTER_IMPLEMENTATION')
+                "REQUEST_FINGERPRINTER_IMPLEMENTATION"
+            )
         else:
-            implementation = '2.6'
-        if implementation == '2.6':
-            message = """'2.6' is a deprecated value for the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting.
-
-It is also the default value. In other words, it is normal to get this warning if you have not defined a value for the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting. This is so for backward compatibility reasons, but it will change in a future version of Scrapy.
-
-See the documentation of the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for information on how to handle this deprecation."""
-            warnings.warn(message, category=ScrapyDeprecationWarning,
-                stacklevel=2)
+            implementation = "2.6"
+        if implementation == "2.6":
+            message = (
+                "'2.6' is a deprecated value for the "
+                "'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting.\n"
+                "\n"
+                "It is also the default value. In other words, it is normal "
+                "to get this warning if you have not defined a value for the "
+                "'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting. This is so "
+                "for backward compatibility reasons, but it will change in a "
+                "future version of Scrapy.\n"
+                "\n"
+                "See the documentation of the "
+                "'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for "
+                "information on how to handle this deprecation."
+            )
+            warnings.warn(message, category=ScrapyDeprecationWarning, stacklevel=2)
             self._fingerprint = _request_fingerprint_as_bytes
-        elif implementation == '2.7':
+        elif implementation == "2.7":
             self._fingerprint = fingerprint
         else:
             raise ValueError(
-                f"Got an invalid value on setting 'REQUEST_FINGERPRINTER_IMPLEMENTATION': {implementation!r}. Valid values are '2.6' (deprecated) and '2.7'."
-                )
+                f"Got an invalid value on setting "
+                f"'REQUEST_FINGERPRINTER_IMPLEMENTATION': "
+                f"{implementation!r}. Valid values are '2.6' (deprecated) "
+                f"and '2.7'."
+            )

+    def fingerprint(self, request: Request) -> bytes:
+        return self._fingerprint(request)

-def request_authenticate(request: Request, username: str, password: str
-    ) ->None:
+
+def request_authenticate(
+    request: Request,
+    username: str,
+    password: str,
+) -> None:
     """Authenticate the given request (in place) using the HTTP basic access
     authentication mechanism (RFC 2617) and the given username and password
     """
-    pass
+    request.headers["Authorization"] = basic_auth_header(username, password)


-def request_httprepr(request: Request) ->bytes:
+def request_httprepr(request: Request) -> bytes:
     """Return the raw HTTP representation (as bytes) of the given request.
     This is provided only for reference since it's not the actual stream of
     bytes that will be send when performing the request (that's controlled
     by Twisted).
     """
-    pass
-
-
-def referer_str(request: Request) ->Optional[str]:
+    parsed = urlparse_cached(request)
+    path = urlunparse(("", "", parsed.path or "/", parsed.params, parsed.query, ""))
+    s = to_bytes(request.method) + b" " + to_bytes(path) + b" HTTP/1.1\r\n"
+    s += b"Host: " + to_bytes(parsed.hostname or b"") + b"\r\n"
+    if request.headers:
+        s += request.headers.to_string() + b"\r\n"
+    s += b"\r\n"
+    s += request.body
+    return s
+
+
+def referer_str(request: Request) -> Optional[str]:
     """Return Referer HTTP header suitable for logging."""
-    pass
+    referrer = request.headers.get("Referer")
+    if referrer is None:
+        return referrer
+    return to_unicode(referrer, errors="replace")


-def request_from_dict(d: dict, *, spider: Optional[Spider]=None) ->Request:
+def request_from_dict(d: dict, *, spider: Optional[Spider] = None) -> Request:
     """Create a :class:`~scrapy.Request` object from a dict.

     If a spider is given, it will try to resolve the callbacks looking at the
     spider for methods with the same name.
     """
-    pass
+    request_cls: Type[Request] = load_object(d["_class"]) if "_class" in d else Request
+    kwargs = {key: value for key, value in d.items() if key in request_cls.attributes}
+    if d.get("callback") and spider:
+        kwargs["callback"] = _get_method(spider, d["callback"])
+    if d.get("errback") and spider:
+        kwargs["errback"] = _get_method(spider, d["errback"])
+    return request_cls(**kwargs)


-def _get_method(obj: Any, name: Any) ->Any:
+def _get_method(obj: Any, name: Any) -> Any:
     """Helper function for request_from_dict"""
-    pass
+    name = str(name)
+    try:
+        return getattr(obj, name)
+    except AttributeError:
+        raise ValueError(f"Method {name!r} not found in: {obj}")


-def request_to_curl(request: Request) ->str:
+def request_to_curl(request: Request) -> str:
     """
     Converts a :class:`~scrapy.Request` object to a curl command.

     :param :class:`~scrapy.Request`: Request object to be converted
     :return: string containing the curl command
     """
-    pass
+    method = request.method
+
+    data = f"--data-raw '{request.body.decode('utf-8')}'" if request.body else ""
+
+    headers = " ".join(
+        f"-H '{k.decode()}: {v[0].decode()}'" for k, v in request.headers.items()
+    )
+
+    url = request.url
+    cookies = ""
+    if request.cookies:
+        if isinstance(request.cookies, dict):
+            cookie = "; ".join(f"{k}={v}" for k, v in request.cookies.items())
+            cookies = f"--cookie '{cookie}'"
+        elif isinstance(request.cookies, list):
+            cookie = "; ".join(
+                f"{list(c.keys())[0]}={list(c.values())[0]}" for c in request.cookies
+            )
+            cookies = f"--cookie '{cookie}'"
+
+    curl_cmd = f"curl -X {method} {url} {data} {headers} {cookies}".strip()
+    return " ".join(curl_cmd.split())
diff --git a/scrapy/utils/response.py b/scrapy/utils/response.py
index ce657fa90..fabfb1167 100644
--- a/scrapy/utils/response.py
+++ b/scrapy/utils/response.py
@@ -8,50 +8,91 @@ import tempfile
 import webbrowser
 from typing import Any, Callable, Iterable, Tuple, Union
 from weakref import WeakKeyDictionary
+
 from twisted.web import http
 from w3lib import html
+
 import scrapy
 from scrapy.http.response import Response
 from scrapy.utils.decorators import deprecated
 from scrapy.utils.python import to_bytes, to_unicode
-_baseurl_cache: 'WeakKeyDictionary[Response, str]' = WeakKeyDictionary()
+
+_baseurl_cache: "WeakKeyDictionary[Response, str]" = WeakKeyDictionary()


-def get_base_url(response: 'scrapy.http.response.text.TextResponse') ->str:
+def get_base_url(response: "scrapy.http.response.text.TextResponse") -> str:
     """Return the base url of the given response, joined with the response url"""
-    pass
+    if response not in _baseurl_cache:
+        text = response.text[0:4096]
+        _baseurl_cache[response] = html.get_base_url(
+            text, response.url, response.encoding
+        )
+    return _baseurl_cache[response]


-(_metaref_cache:
-    'WeakKeyDictionary[Response, Union[Tuple[None, None], Tuple[float, str]]]'
-    ) = WeakKeyDictionary()
+_metaref_cache: "WeakKeyDictionary[Response, Union[Tuple[None, None], Tuple[float, str]]]" = (
+    WeakKeyDictionary()
+)


-def get_meta_refresh(response: 'scrapy.http.response.text.TextResponse',
-    ignore_tags: Iterable[str]=('script', 'noscript')) ->Union[Tuple[None,
-    None], Tuple[float, str]]:
+def get_meta_refresh(
+    response: "scrapy.http.response.text.TextResponse",
+    ignore_tags: Iterable[str] = ("script", "noscript"),
+) -> Union[Tuple[None, None], Tuple[float, str]]:
     """Parse the http-equiv refresh parameter from the given response"""
-    pass
+    if response not in _metaref_cache:
+        text = response.text[0:4096]
+        _metaref_cache[response] = html.get_meta_refresh(
+            text, response.url, response.encoding, ignore_tags=ignore_tags
+        )
+    return _metaref_cache[response]


-def response_status_message(status: Union[bytes, float, int, str]) ->str:
+def response_status_message(status: Union[bytes, float, int, str]) -> str:
     """Return status code plus status text descriptive message"""
-    pass
+    status_int = int(status)
+    message = http.RESPONSES.get(status_int, "Unknown Status")
+    return f"{status_int} {to_unicode(message)}"


 @deprecated
-def response_httprepr(response: Response) ->bytes:
+def response_httprepr(response: Response) -> bytes:
     """Return raw HTTP representation (as bytes) of the given response. This
     is provided only for reference, since it's not the exact stream of bytes
     that was received (that's not exposed by Twisted).
     """
-    pass
-
-
-def open_in_browser(response: Union[
-    'scrapy.http.response.html.HtmlResponse',
-    'scrapy.http.response.text.TextResponse'], _openfunc: Callable[[str],
-    Any]=webbrowser.open) ->Any:
+    values = [
+        b"HTTP/1.1 ",
+        to_bytes(str(response.status)),
+        b" ",
+        to_bytes(http.RESPONSES.get(response.status, b"")),
+        b"\r\n",
+    ]
+    if response.headers:
+        values.extend([response.headers.to_string(), b"\r\n"])
+    values.extend([b"\r\n", response.body])
+    return b"".join(values)
+
+
+def _remove_html_comments(body):
+    start = body.find(b"<!--")
+    while start != -1:
+        end = body.find(b"-->", start + 1)
+        if end == -1:
+            return body[:start]
+        else:
+            body = body[:start] + body[end + 3 :]
+            start = body.find(b"<!--")
+    return body
+
+
+def open_in_browser(
+    response: Union[
+        "scrapy.http.response.html.HtmlResponse",
+        "scrapy.http.response.text.TextResponse",
+    ],
+    _openfunc: Callable[[str], Any] = webbrowser.open,
+) -> Any:
     """Open *response* in a local web browser, adjusting the `base tag`_ for
     external links to work, e.g. so that images and styles are displayed.

@@ -68,4 +109,21 @@ def open_in_browser(response: Union[
             if "item name" not in response.body:
                 open_in_browser(response)
     """
-    pass
+    from scrapy.http import HtmlResponse, TextResponse
+
+    # XXX: this implementation is a bit dirty and could be improved
+    body = response.body
+    if isinstance(response, HtmlResponse):
+        if b"<base" not in body:
+            _remove_html_comments(body)
+            repl = rf'\0<base href="{response.url}">'
+            body = re.sub(rb"<head(?:[^<>]*?>)", to_bytes(repl), body, count=1)
+        ext = ".html"
+    elif isinstance(response, TextResponse):
+        ext = ".txt"
+    else:
+        raise TypeError("Unsupported response type: " f"{response.__class__.__name__}")
+    fd, fname = tempfile.mkstemp(ext)
+    os.write(fd, body)
+    os.close(fd)
+    return _openfunc(f"file://{fname}")
diff --git a/scrapy/utils/serialize.py b/scrapy/utils/serialize.py
index 349bc19f4..3b4f67f00 100644
--- a/scrapy/utils/serialize.py
+++ b/scrapy/utils/serialize.py
@@ -2,14 +2,37 @@ import datetime
 import decimal
 import json
 from typing import Any
+
 from itemadapter import ItemAdapter, is_item
 from twisted.internet import defer
+
 from scrapy.http import Request, Response


 class ScrapyJSONEncoder(json.JSONEncoder):
-    DATE_FORMAT = '%Y-%m-%d'
-    TIME_FORMAT = '%H:%M:%S'
+    DATE_FORMAT = "%Y-%m-%d"
+    TIME_FORMAT = "%H:%M:%S"
+
+    def default(self, o: Any) -> Any:
+        if isinstance(o, set):
+            return list(o)
+        if isinstance(o, datetime.datetime):
+            return o.strftime(f"{self.DATE_FORMAT} {self.TIME_FORMAT}")
+        if isinstance(o, datetime.date):
+            return o.strftime(self.DATE_FORMAT)
+        if isinstance(o, datetime.time):
+            return o.strftime(self.TIME_FORMAT)
+        if isinstance(o, decimal.Decimal):
+            return str(o)
+        if isinstance(o, defer.Deferred):
+            return str(o)
+        if is_item(o):
+            return ItemAdapter(o).asdict()
+        if isinstance(o, Request):
+            return f"<{type(o).__name__} {o.method} {o.url}>"
+        if isinstance(o, Response):
+            return f"<{type(o).__name__} {o.status} {o.url}>"
+        return super().default(o)


 class ScrapyJSONDecoder(json.JSONDecoder):
diff --git a/scrapy/utils/signal.py b/scrapy/utils/signal.py
index ba9aa6bf8..21a12a19e 100644
--- a/scrapy/utils/signal.py
+++ b/scrapy/utils/signal.py
@@ -3,36 +3,110 @@ import collections.abc
 import logging
 from typing import Any as TypingAny
 from typing import List, Tuple
-from pydispatch.dispatcher import Anonymous, Any, disconnect, getAllReceivers, liveReceivers
+
+from pydispatch.dispatcher import (
+    Anonymous,
+    Any,
+    disconnect,
+    getAllReceivers,
+    liveReceivers,
+)
 from pydispatch.robustapply import robustApply
 from twisted.internet.defer import Deferred, DeferredList
 from twisted.python.failure import Failure
+
 from scrapy.exceptions import StopDownload
 from scrapy.utils.defer import maybeDeferred_coro
 from scrapy.utils.log import failure_to_exc_info
+
 logger = logging.getLogger(__name__)


-def send_catch_log(signal: TypingAny=Any, sender: TypingAny=Anonymous, *
-    arguments: TypingAny, **named: TypingAny) ->List[Tuple[TypingAny,
-    TypingAny]]:
+def send_catch_log(
+    signal: TypingAny = Any,
+    sender: TypingAny = Anonymous,
+    *arguments: TypingAny,
+    **named: TypingAny
+) -> List[Tuple[TypingAny, TypingAny]]:
     """Like pydispatcher.robust.sendRobust but it also logs errors and returns
     Failures instead of exceptions.
     """
-    pass
+    dont_log = named.pop("dont_log", ())
+    dont_log = (
+        tuple(dont_log)
+        if isinstance(dont_log, collections.abc.Sequence)
+        else (dont_log,)
+    )
+    dont_log += (StopDownload,)
+    spider = named.get("spider", None)
+    responses: List[Tuple[TypingAny, TypingAny]] = []
+    for receiver in liveReceivers(getAllReceivers(sender, signal)):
+        result: TypingAny
+        try:
+            response = robustApply(
+                receiver, signal=signal, sender=sender, *arguments, **named
+            )
+            if isinstance(response, Deferred):
+                logger.error(
+                    "Cannot return deferreds from signal handler: %(receiver)s",
+                    {"receiver": receiver},
+                    extra={"spider": spider},
+                )
+        except dont_log:
+            result = Failure()
+        except Exception:
+            result = Failure()
+            logger.error(
+                "Error caught on signal handler: %(receiver)s",
+                {"receiver": receiver},
+                exc_info=True,
+                extra={"spider": spider},
+            )
+        else:
+            result = response
+        responses.append((receiver, result))
+    return responses


-def send_catch_log_deferred(signal: TypingAny=Any, sender: TypingAny=
-    Anonymous, *arguments: TypingAny, **named: TypingAny) ->Deferred:
+def send_catch_log_deferred(
+    signal: TypingAny = Any,
+    sender: TypingAny = Anonymous,
+    *arguments: TypingAny,
+    **named: TypingAny
+) -> Deferred:
     """Like send_catch_log but supports returning deferreds on signal handlers.
     Returns a deferred that gets fired once all signal handlers deferreds were
     fired.
     """
-    pass
+
+    def logerror(failure: Failure, recv: Any) -> Failure:
+        if dont_log is None or not isinstance(failure.value, dont_log):
+            logger.error(
+                "Error caught on signal handler: %(receiver)s",
+                {"receiver": recv},
+                exc_info=failure_to_exc_info(failure),
+                extra={"spider": spider},
+            )
+        return failure
+
+    dont_log = named.pop("dont_log", None)
+    spider = named.get("spider", None)
+    dfds = []
+    for receiver in liveReceivers(getAllReceivers(sender, signal)):
+        d = maybeDeferred_coro(
+            robustApply, receiver, signal=signal, sender=sender, *arguments, **named
+        )
+        d.addErrback(logerror, receiver)
+        d.addBoth(lambda result: (receiver, result))
+        dfds.append(d)
+    d = DeferredList(dfds)
+    d.addCallback(lambda out: [x[1] for x in out])
+    return d


-def disconnect_all(signal: TypingAny=Any, sender: TypingAny=Any) ->None:
+def disconnect_all(signal: TypingAny = Any, sender: TypingAny = Any) -> None:
     """Disconnect all signal handlers. Useful for cleaning up after running
     tests
     """
-    pass
+    for receiver in liveReceivers(getAllReceivers(sender, signal)):
+        disconnect(receiver, signal=signal, sender=sender)
diff --git a/scrapy/utils/sitemap.py b/scrapy/utils/sitemap.py
index 2ee154109..759b1c1a9 100644
--- a/scrapy/utils/sitemap.py
+++ b/scrapy/utils/sitemap.py
@@ -6,7 +6,8 @@ SitemapSpider, its API is subject to change without notice.
 """
 from typing import Any, Dict, Generator, Iterator, Optional
 from urllib.parse import urljoin
-import lxml.etree
+
+import lxml.etree  # nosec


 class Sitemap:
@@ -14,30 +15,37 @@ class Sitemap:
     (type=sitemapindex) files"""

     def __init__(self, xmltext: str):
-        xmlp = lxml.etree.XMLParser(recover=True, remove_comments=True,
-            resolve_entities=False)
-        self._root = lxml.etree.fromstring(xmltext, parser=xmlp)
+        xmlp = lxml.etree.XMLParser(
+            recover=True, remove_comments=True, resolve_entities=False
+        )
+        self._root = lxml.etree.fromstring(xmltext, parser=xmlp)  # nosec
         rt = self._root.tag
-        self.type = self._root.tag.split('}', 1)[1] if '}' in rt else rt
+        self.type = self._root.tag.split("}", 1)[1] if "}" in rt else rt

-    def __iter__(self) ->Iterator[Dict[str, Any]]:
+    def __iter__(self) -> Iterator[Dict[str, Any]]:
         for elem in self._root.getchildren():
             d: Dict[str, Any] = {}
             for el in elem.getchildren():
                 tag = el.tag
-                name = tag.split('}', 1)[1] if '}' in tag else tag
-                if name == 'link':
-                    if 'href' in el.attrib:
-                        d.setdefault('alternate', []).append(el.get('href'))
+                name = tag.split("}", 1)[1] if "}" in tag else tag
+
+                if name == "link":
+                    if "href" in el.attrib:
+                        d.setdefault("alternate", []).append(el.get("href"))
                 else:
-                    d[name] = el.text.strip() if el.text else ''
-            if 'loc' in d:
+                    d[name] = el.text.strip() if el.text else ""
+
+            if "loc" in d:
                 yield d


-def sitemap_urls_from_robots(robots_text: str, base_url: Optional[str]=None
-    ) ->Generator[str, Any, None]:
+def sitemap_urls_from_robots(
+    robots_text: str, base_url: Optional[str] = None
+) -> Generator[str, Any, None]:
     """Return an iterator over all sitemap urls contained in the given
     robots.txt file
     """
-    pass
+    for line in robots_text.splitlines():
+        if line.lstrip().lower().startswith("sitemap:"):
+            url = line.split(":", 1)[1].strip()
+            yield urljoin(base_url or "", url)
diff --git a/scrapy/utils/spider.py b/scrapy/utils/spider.py
index b93bbbad9..704df8657 100644
--- a/scrapy/utils/spider.py
+++ b/scrapy/utils/spider.py
@@ -1,30 +1,121 @@
 from __future__ import annotations
+
 import inspect
 import logging
 from types import CoroutineType, ModuleType
-from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Iterable, Literal, Optional, Type, TypeVar, Union, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncGenerator,
+    Generator,
+    Iterable,
+    Literal,
+    Optional,
+    Type,
+    TypeVar,
+    Union,
+    overload,
+)
+
 from twisted.internet.defer import Deferred
+
 from scrapy import Request
 from scrapy.spiders import Spider
 from scrapy.utils.defer import deferred_from_coro
 from scrapy.utils.misc import arg_to_iter
+
 if TYPE_CHECKING:
     from scrapy.spiderloader import SpiderLoader
+
 logger = logging.getLogger(__name__)
-_T = TypeVar('_T')
+
+_T = TypeVar("_T")
+
+
+# https://stackoverflow.com/questions/60222982
+@overload
+def iterate_spider_output(result: AsyncGenerator) -> AsyncGenerator:  # type: ignore[misc]
+    ...
+
+
+@overload
+def iterate_spider_output(result: CoroutineType) -> Deferred:
+    ...
+
+
+@overload
+def iterate_spider_output(result: _T) -> Iterable:
+    ...


-def iter_spider_classes(module: ModuleType) ->Generator[Type[Spider], Any, None
-    ]:
+def iterate_spider_output(result: Any) -> Union[Iterable, AsyncGenerator, Deferred]:
+    if inspect.isasyncgen(result):
+        return result
+    if inspect.iscoroutine(result):
+        d = deferred_from_coro(result)
+        d.addCallback(iterate_spider_output)
+        return d
+    return arg_to_iter(deferred_from_coro(result))
+
+
+def iter_spider_classes(module: ModuleType) -> Generator[Type[Spider], Any, None]:
     """Return an iterator over all spider classes defined in the given module
     that can be instantiated (i.e. which have name)
     """
-    pass
+    # this needs to be imported here until get rid of the spider manager
+    # singleton in scrapy.spider.spiders
+    from scrapy.spiders import Spider
+
+    for obj in vars(module).values():
+        if (
+            inspect.isclass(obj)
+            and issubclass(obj, Spider)
+            and obj.__module__ == module.__name__
+            and getattr(obj, "name", None)
+        ):
+            yield obj
+
+
+@overload
+def spidercls_for_request(
+    spider_loader: SpiderLoader,
+    request: Request,
+    default_spidercls: Type[Spider],
+    log_none: bool = ...,
+    log_multiple: bool = ...,
+) -> Type[Spider]:
+    ...
+

+@overload
+def spidercls_for_request(
+    spider_loader: SpiderLoader,
+    request: Request,
+    default_spidercls: Literal[None],
+    log_none: bool = ...,
+    log_multiple: bool = ...,
+) -> Optional[Type[Spider]]:
+    ...

-def spidercls_for_request(spider_loader: SpiderLoader, request: Request,
-    default_spidercls: Optional[Type[Spider]]=None, log_none: bool=False,
-    log_multiple: bool=False) ->Optional[Type[Spider]]:
+
+@overload
+def spidercls_for_request(
+    spider_loader: SpiderLoader,
+    request: Request,
+    *,
+    log_none: bool = ...,
+    log_multiple: bool = ...,
+) -> Optional[Type[Spider]]:
+    ...
+
+
+def spidercls_for_request(
+    spider_loader: SpiderLoader,
+    request: Request,
+    default_spidercls: Optional[Type[Spider]] = None,
+    log_none: bool = False,
+    log_multiple: bool = False,
+) -> Optional[Type[Spider]]:
     """Return a spider class that handles the given Request.

     This will look for the spiders that can handle the given request (using
@@ -35,8 +126,23 @@ def spidercls_for_request(spider_loader: SpiderLoader, request: Request,
     default_spidercls passed. It can optionally log if multiple or no spiders
     are found.
     """
-    pass
+    snames = spider_loader.find_by_request(request)
+    if len(snames) == 1:
+        return spider_loader.load(snames[0])
+
+    if len(snames) > 1 and log_multiple:
+        logger.error(
+            "More than one spider can handle: %(request)s - %(snames)s",
+            {"request": request, "snames": ", ".join(snames)},
+        )
+
+    if len(snames) == 0 and log_none:
+        logger.error(
+            "Unable to find spider that handles: %(request)s", {"request": request}
+        )
+
+    return default_spidercls


 class DefaultSpider(Spider):
-    name = 'default'
+    name = "default"
diff --git a/scrapy/utils/ssl.py b/scrapy/utils/ssl.py
index 1588eed7a..d520ef809 100644
--- a/scrapy/utils/ssl.py
+++ b/scrapy/utils/ssl.py
@@ -1,6 +1,63 @@
 from typing import Any, Optional
+
 import OpenSSL._util as pyOpenSSLutil
 import OpenSSL.SSL
 import OpenSSL.version
 from OpenSSL.crypto import X509Name
+
 from scrapy.utils.python import to_unicode
+
+
+def ffi_buf_to_string(buf: Any) -> str:
+    return to_unicode(pyOpenSSLutil.ffi.string(buf))
+
+
+def x509name_to_string(x509name: X509Name) -> str:
+    # from OpenSSL.crypto.X509Name.__repr__
+    result_buffer: Any = pyOpenSSLutil.ffi.new("char[]", 512)
+    pyOpenSSLutil.lib.X509_NAME_oneline(
+        x509name._name, result_buffer, len(result_buffer)  # type: ignore[attr-defined]
+    )
+
+    return ffi_buf_to_string(result_buffer)
+
+
+def get_temp_key_info(ssl_object: Any) -> Optional[str]:
+    # adapted from OpenSSL apps/s_cb.c::ssl_print_tmp_key()
+    if not hasattr(pyOpenSSLutil.lib, "SSL_get_server_tmp_key"):
+        # removed in cryptography 40.0.0
+        return None
+    temp_key_p = pyOpenSSLutil.ffi.new("EVP_PKEY **")
+    if not pyOpenSSLutil.lib.SSL_get_server_tmp_key(ssl_object, temp_key_p):
+        return None
+    temp_key = temp_key_p[0]
+    if temp_key == pyOpenSSLutil.ffi.NULL:
+        return None
+    temp_key = pyOpenSSLutil.ffi.gc(temp_key, pyOpenSSLutil.lib.EVP_PKEY_free)
+    key_info = []
+    key_type = pyOpenSSLutil.lib.EVP_PKEY_id(temp_key)
+    if key_type == pyOpenSSLutil.lib.EVP_PKEY_RSA:
+        key_info.append("RSA")
+    elif key_type == pyOpenSSLutil.lib.EVP_PKEY_DH:
+        key_info.append("DH")
+    elif key_type == pyOpenSSLutil.lib.EVP_PKEY_EC:
+        key_info.append("ECDH")
+        ec_key = pyOpenSSLutil.lib.EVP_PKEY_get1_EC_KEY(temp_key)
+        ec_key = pyOpenSSLutil.ffi.gc(ec_key, pyOpenSSLutil.lib.EC_KEY_free)
+        nid = pyOpenSSLutil.lib.EC_GROUP_get_curve_name(
+            pyOpenSSLutil.lib.EC_KEY_get0_group(ec_key)
+        )
+        cname = pyOpenSSLutil.lib.EC_curve_nid2nist(nid)
+        if cname == pyOpenSSLutil.ffi.NULL:
+            cname = pyOpenSSLutil.lib.OBJ_nid2sn(nid)
+        key_info.append(ffi_buf_to_string(cname))
+    else:
+        key_info.append(ffi_buf_to_string(pyOpenSSLutil.lib.OBJ_nid2sn(key_type)))
+    key_info.append(f"{pyOpenSSLutil.lib.EVP_PKEY_bits(temp_key)} bits")
+    return ", ".join(key_info)
+
+
+def get_openssl_version() -> str:
+    system_openssl_bytes = OpenSSL.SSL.SSLeay_version(OpenSSL.SSL.SSLEAY_VERSION)
+    system_openssl = system_openssl_bytes.decode("ascii", errors="replace")
+    return f"{OpenSSL.version.__version__} ({system_openssl})"
diff --git a/scrapy/utils/template.py b/scrapy/utils/template.py
index 705073b43..6b22f3bfa 100644
--- a/scrapy/utils/template.py
+++ b/scrapy/utils/template.py
@@ -1,13 +1,30 @@
 """Helper functions for working with templates"""
+
 import re
 import string
 from os import PathLike
 from pathlib import Path
 from typing import Any, Union
-CAMELCASE_INVALID_CHARS = re.compile('[^a-zA-Z\\d]')


-def string_camelcase(string: str) ->str:
+def render_templatefile(path: Union[str, PathLike], **kwargs: Any) -> None:
+    path_obj = Path(path)
+    raw = path_obj.read_text("utf8")
+
+    content = string.Template(raw).substitute(**kwargs)
+
+    render_path = path_obj.with_suffix("") if path_obj.suffix == ".tmpl" else path_obj
+
+    if path_obj.suffix == ".tmpl":
+        path_obj.rename(render_path)
+
+    render_path.write_text(content, "utf8")
+
+
+CAMELCASE_INVALID_CHARS = re.compile(r"[^a-zA-Z\d]")
+
+
+def string_camelcase(string: str) -> str:
     """Convert a word  to its CamelCase version and remove invalid chars

     >>> string_camelcase('lost-pound')
@@ -17,4 +34,4 @@ def string_camelcase(string: str) ->str:
     'MissingImages'

     """
-    pass
+    return CAMELCASE_INVALID_CHARS.sub("", string.title())
diff --git a/scrapy/utils/test.py b/scrapy/utils/test.py
index 88505e748..709e0b00d 100644
--- a/scrapy/utils/test.py
+++ b/scrapy/utils/test.py
@@ -1,6 +1,7 @@
 """
 This module contains some assorted functions used in tests
 """
+
 import asyncio
 import os
 from importlib import import_module
@@ -8,49 +9,144 @@ from pathlib import Path
 from posixpath import split
 from typing import Any, Coroutine, Dict, List, Optional, Tuple, Type
 from unittest import TestCase, mock
+
 from twisted.internet.defer import Deferred
 from twisted.trial.unittest import SkipTest
+
 from scrapy import Spider
 from scrapy.crawler import Crawler
 from scrapy.utils.boto import is_botocore_available


+def assert_gcs_environ() -> None:
+    if "GCS_PROJECT_ID" not in os.environ:
+        raise SkipTest("GCS_PROJECT_ID not found")
+
+
+def skip_if_no_boto() -> None:
+    if not is_botocore_available():
+        raise SkipTest("missing botocore library")
+
+
+def get_gcs_content_and_delete(
+    bucket: Any, path: str
+) -> Tuple[bytes, List[Dict[str, str]], Any]:
+    from google.cloud import storage
+
+    client = storage.Client(project=os.environ.get("GCS_PROJECT_ID"))
+    bucket = client.get_bucket(bucket)
+    blob = bucket.get_blob(path)
+    content = blob.download_as_string()
+    acl = list(blob.acl)  # loads acl before it will be deleted
+    bucket.delete_blob(path)
+    return content, acl, blob
+
+
+def get_ftp_content_and_delete(
+    path: str,
+    host: str,
+    port: int,
+    username: str,
+    password: str,
+    use_active_mode: bool = False,
+) -> bytes:
+    from ftplib import FTP
+
+    ftp = FTP()
+    ftp.connect(host, port)
+    ftp.login(username, password)
+    if use_active_mode:
+        ftp.set_pasv(False)
+    ftp_data: List[bytes] = []
+
+    def buffer_data(data: bytes) -> None:
+        ftp_data.append(data)
+
+    ftp.retrbinary(f"RETR {path}", buffer_data)
+    dirname, filename = split(path)
+    ftp.cwd(dirname)
+    ftp.delete(filename)
+    return b"".join(ftp_data)
+
+
 class TestSpider(Spider):
-    name = 'test'
+    name = "test"


-def get_crawler(spidercls: Optional[Type[Spider]]=None, settings_dict:
-    Optional[Dict[str, Any]]=None, prevent_warnings: bool=True) ->Crawler:
+def get_crawler(
+    spidercls: Optional[Type[Spider]] = None,
+    settings_dict: Optional[Dict[str, Any]] = None,
+    prevent_warnings: bool = True,
+) -> Crawler:
     """Return an unconfigured Crawler object. If settings_dict is given, it
     will be used to populate the crawler settings with a project level
     priority.
     """
-    pass
+    from scrapy.crawler import CrawlerRunner
+
+    # Set by default settings that prevent deprecation warnings.
+    settings: Dict[str, Any] = {}
+    if prevent_warnings:
+        settings["REQUEST_FINGERPRINTER_IMPLEMENTATION"] = "2.7"
+    settings.update(settings_dict or {})
+    runner = CrawlerRunner(settings)
+    crawler = runner.create_crawler(spidercls or TestSpider)
+    crawler._apply_settings()
+    return crawler


-def get_pythonpath() ->str:
+def get_pythonpath() -> str:
     """Return a PYTHONPATH suitable to use in processes so that they find this
     installation of Scrapy"""
-    pass
+    scrapy_path = import_module("scrapy").__path__[0]
+    return str(Path(scrapy_path).parent) + os.pathsep + os.environ.get("PYTHONPATH", "")


-def get_testenv() ->Dict[str, str]:
+def get_testenv() -> Dict[str, str]:
     """Return a OS environment dict suitable to fork processes that need to import
     this installation of Scrapy, instead of a system installed one.
     """
-    pass
+    env = os.environ.copy()
+    env["PYTHONPATH"] = get_pythonpath()
+    return env


-def assert_samelines(testcase: TestCase, text1: str, text2: str, msg:
-    Optional[str]=None) ->None:
+def assert_samelines(
+    testcase: TestCase, text1: str, text2: str, msg: Optional[str] = None
+) -> None:
     """Asserts text1 and text2 have the same lines, ignoring differences in
     line endings between platforms
     """
-    pass
+    testcase.assertEqual(text1.splitlines(), text2.splitlines(), msg)


-def mock_google_cloud_storage() ->Tuple[Any, Any, Any]:
+def get_from_asyncio_queue(value: Any) -> Coroutine:
+    q: asyncio.Queue = asyncio.Queue()
+    getter = q.get()
+    q.put_nowait(value)
+    return getter
+
+
+def mock_google_cloud_storage() -> Tuple[Any, Any, Any]:
     """Creates autospec mocks for google-cloud-storage Client, Bucket and Blob
     classes and set their proper return values.
     """
-    pass
+    from google.cloud.storage import Blob, Bucket, Client
+
+    client_mock = mock.create_autospec(Client)
+
+    bucket_mock = mock.create_autospec(Bucket)
+    client_mock.get_bucket.return_value = bucket_mock
+
+    blob_mock = mock.create_autospec(Blob)
+    bucket_mock.blob.return_value = blob_mock
+
+    return (client_mock, bucket_mock, blob_mock)
+
+
+def get_web_client_agent_req(url: str) -> Deferred:
+    from twisted.internet import reactor
+    from twisted.web.client import Agent  # imports twisted.internet.reactor
+
+    agent = Agent(reactor)
+    return agent.request(b"GET", url.encode("utf-8"))
diff --git a/scrapy/utils/testproc.py b/scrapy/utils/testproc.py
index 4e09c46cc..0688e014b 100644
--- a/scrapy/utils/testproc.py
+++ b/scrapy/utils/testproc.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
+
 import os
 import sys
 from typing import Iterable, List, Optional, Tuple, cast
+
 from twisted.internet.defer import Deferred
 from twisted.internet.error import ProcessTerminated
 from twisted.internet.protocol import ProcessProtocol
@@ -10,14 +12,52 @@ from twisted.python.failure import Failure

 class ProcessTest:
     command = None
-    prefix = [sys.executable, '-m', 'scrapy.cmdline']
-    cwd = os.getcwd()
+    prefix = [sys.executable, "-m", "scrapy.cmdline"]
+    cwd = os.getcwd()  # trial chdirs to temp dir

+    def execute(
+        self,
+        args: Iterable[str],
+        check_code: bool = True,
+        settings: Optional[str] = None,
+    ) -> Deferred:
+        from twisted.internet import reactor
+
+        env = os.environ.copy()
+        if settings is not None:
+            env["SCRAPY_SETTINGS_MODULE"] = settings
+        assert self.command
+        cmd = self.prefix + [self.command] + list(args)
+        pp = TestProcessProtocol()
+        pp.deferred.addCallback(self._process_finished, cmd, check_code)
+        reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd)
+        return pp.deferred
+
+    def _process_finished(
+        self, pp: TestProcessProtocol, cmd: List[str], check_code: bool
+    ) -> Tuple[int, bytes, bytes]:
+        if pp.exitcode and check_code:
+            msg = f"process {cmd} exit with code {pp.exitcode}"
+            msg += f"\n>>> stdout <<<\n{pp.out.decode()}"
+            msg += "\n"
+            msg += f"\n>>> stderr <<<\n{pp.err.decode()}"
+            raise RuntimeError(msg)
+        return cast(int, pp.exitcode), pp.out, pp.err

-class TestProcessProtocol(ProcessProtocol):

-    def __init__(self) ->None:
+class TestProcessProtocol(ProcessProtocol):
+    def __init__(self) -> None:
         self.deferred: Deferred = Deferred()
-        self.out: bytes = b''
-        self.err: bytes = b''
+        self.out: bytes = b""
+        self.err: bytes = b""
         self.exitcode: Optional[int] = None
+
+    def outReceived(self, data: bytes) -> None:
+        self.out += data
+
+    def errReceived(self, data: bytes) -> None:
+        self.err += data
+
+    def processEnded(self, status: Failure) -> None:
+        self.exitcode = cast(ProcessTerminated, status.value).exitCode
+        self.deferred.callback(self)
diff --git a/scrapy/utils/testsite.py b/scrapy/utils/testsite.py
index c91969a13..de9ce992a 100644
--- a/scrapy/utils/testsite.py
+++ b/scrapy/utils/testsite.py
@@ -1,17 +1,55 @@
 from urllib.parse import urljoin
+
 from twisted.web import resource, server, static, util


 class SiteTest:
-    pass
+    def setUp(self):
+        from twisted.internet import reactor
+
+        super().setUp()
+        self.site = reactor.listenTCP(0, test_site(), interface="127.0.0.1")
+        self.baseurl = f"http://localhost:{self.site.getHost().port}/"
+
+    def tearDown(self):
+        super().tearDown()
+        self.site.stopListening()
+
+    def url(self, path):
+        return urljoin(self.baseurl, path)


 class NoMetaRefreshRedirect(util.Redirect):
-    pass
+    def render(self, request):
+        content = util.Redirect.render(self, request)
+        return content.replace(
+            b'http-equiv="refresh"', b'http-no-equiv="do-not-refresh-me"'
+        )


-if __name__ == '__main__':
+def test_site():
+    r = resource.Resource()
+    r.putChild(b"text", static.Data(b"Works", "text/plain"))
+    r.putChild(
+        b"html",
+        static.Data(
+            b"<body><p class='one'>Works</p><p class='two'>World</p></body>",
+            "text/html",
+        ),
+    )
+    r.putChild(
+        b"enc-gb18030",
+        static.Data(b"<p>gb18030 encoding</p>", "text/html; charset=gb18030"),
+    )
+    r.putChild(b"redirect", util.Redirect(b"/redirected"))
+    r.putChild(b"redirect-no-meta-refresh", NoMetaRefreshRedirect(b"/redirected"))
+    r.putChild(b"redirected", static.Data(b"Redirected here", "text/plain"))
+    return server.Site(r)
+
+
+if __name__ == "__main__":
     from twisted.internet import reactor
-    port = reactor.listenTCP(0, test_site(), interface='127.0.0.1')
-    print(f'http://localhost:{port.getHost().port}/')
+
+    port = reactor.listenTCP(0, test_site(), interface="127.0.0.1")
+    print(f"http://localhost:{port.getHost().port}/")
     reactor.run()
diff --git a/scrapy/utils/trackref.py b/scrapy/utils/trackref.py
index 42dd22dd7..9ff9a273f 100644
--- a/scrapy/utils/trackref.py
+++ b/scrapy/utils/trackref.py
@@ -8,43 +8,65 @@ About performance: This library has a minimal performance impact when enabled,
 and no performance penalty at all when disabled (as object_ref becomes just an
 alias to object in that case).
 """
+
 from collections import defaultdict
 from operator import itemgetter
 from time import time
 from typing import TYPE_CHECKING, Any, DefaultDict, Iterable
 from weakref import WeakKeyDictionary
+
 if TYPE_CHECKING:
+    # typing.Self requires Python 3.11
     from typing_extensions import Self
+
+
 NoneType = type(None)
-live_refs: DefaultDict[type, WeakKeyDictionary] = defaultdict(WeakKeyDictionary
-    )
+live_refs: DefaultDict[type, WeakKeyDictionary] = defaultdict(WeakKeyDictionary)


 class object_ref:
     """Inherit from this class to a keep a record of live instances"""
+
     __slots__ = ()

-    def __new__(cls, *args: Any, **kwargs: Any) ->'Self':
+    def __new__(cls, *args: Any, **kwargs: Any) -> "Self":
         obj = object.__new__(cls)
         live_refs[cls][obj] = time()
         return obj


-def format_live_refs(ignore: Any=NoneType) ->str:
+# using Any as it's hard to type type(None)
+def format_live_refs(ignore: Any = NoneType) -> str:
     """Return a tabular representation of tracked objects"""
-    pass
+    s = "Live References\n\n"
+    now = time()
+    for cls, wdict in sorted(live_refs.items(), key=lambda x: x[0].__name__):
+        if not wdict:
+            continue
+        if issubclass(cls, ignore):
+            continue
+        oldest = min(wdict.values())
+        s += f"{cls.__name__:<30} {len(wdict):6}   oldest: {int(now - oldest)}s ago\n"
+    return s


-def print_live_refs(*a: Any, **kw: Any) ->None:
+def print_live_refs(*a: Any, **kw: Any) -> None:
     """Print tracked objects"""
-    pass
+    print(format_live_refs(*a, **kw))


-def get_oldest(class_name: str) ->Any:
+def get_oldest(class_name: str) -> Any:
     """Get the oldest object for a specific class name"""
-    pass
+    for cls, wdict in live_refs.items():
+        if cls.__name__ == class_name:
+            if not wdict:
+                break
+            return min(wdict.items(), key=itemgetter(1))[0]


-def iter_all(class_name: str) ->Iterable[Any]:
+def iter_all(class_name: str) -> Iterable[Any]:
     """Iterate over all objects of the same class by its class name"""
-    pass
+    for cls, wdict in live_refs.items():
+        if cls.__name__ == class_name:
+            return wdict.keys()
+    return []
diff --git a/scrapy/utils/url.py b/scrapy/utils/url.py
index 31325513d..22b4197f9 100644
--- a/scrapy/utils/url.py
+++ b/scrapy/utils/url.py
@@ -8,37 +8,53 @@ to the w3lib.url module. Always import those from there instead.
 import re
 from typing import TYPE_CHECKING, Iterable, Optional, Type, Union, cast
 from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
+
+# scrapy.utils.url was moved to w3lib.url and import * ensures this
+# move doesn't break old code
 from w3lib.url import *
-from w3lib.url import _safe_chars, _unquotepath
+from w3lib.url import _safe_chars, _unquotepath  # noqa: F401
+
 from scrapy.utils.python import to_unicode
+
 if TYPE_CHECKING:
     from scrapy import Spider
+
+
 UrlT = Union[str, bytes, ParseResult]


-def url_is_from_any_domain(url: UrlT, domains: Iterable[str]) ->bool:
+def url_is_from_any_domain(url: UrlT, domains: Iterable[str]) -> bool:
     """Return True if the url belongs to any of the given domains"""
-    pass
+    host = parse_url(url).netloc.lower()
+    if not host:
+        return False
+    domains = [d.lower() for d in domains]
+    return any((host == d) or (host.endswith(f".{d}")) for d in domains)


-def url_is_from_spider(url: UrlT, spider: Type['Spider']) ->bool:
+def url_is_from_spider(url: UrlT, spider: Type["Spider"]) -> bool:
     """Return True if the url belongs to the given spider"""
-    pass
+    return url_is_from_any_domain(
+        url, [spider.name] + list(getattr(spider, "allowed_domains", []))
+    )


-def url_has_any_extension(url: UrlT, extensions: Iterable[str]) ->bool:
+def url_has_any_extension(url: UrlT, extensions: Iterable[str]) -> bool:
     """Return True if the url ends with one of the extensions provided"""
-    pass
+    lowercase_path = parse_url(url).path.lower()
+    return any(lowercase_path.endswith(ext) for ext in extensions)


-def parse_url(url: UrlT, encoding: Optional[str]=None) ->ParseResult:
+def parse_url(url: UrlT, encoding: Optional[str] = None) -> ParseResult:
     """Return urlparsed url from the given argument (which could be an already
     parsed url)
     """
-    pass
+    if isinstance(url, ParseResult):
+        return url
+    return cast(ParseResult, urlparse(to_unicode(url, encoding)))


-def escape_ajax(url: str) ->str:
+def escape_ajax(url: str) -> str:
     """
     Return the crawlable url according to:
     https://developers.google.com/webmasters/ajax-crawling/docs/getting-started
@@ -61,22 +77,80 @@ def escape_ajax(url: str) ->str:
     >>> escape_ajax("www.example.com/ajax.html")
     'www.example.com/ajax.html'
     """
-    pass
+    defrag, frag = urldefrag(url)
+    if not frag.startswith("!"):
+        return url
+    return add_or_replace_parameter(defrag, "_escaped_fragment_", frag[1:])


-def add_http_if_no_scheme(url: str) ->str:
+def add_http_if_no_scheme(url: str) -> str:
     """Add http as the default scheme if it is missing from the url."""
-    pass
-
-
-def guess_scheme(url: str) ->str:
+    match = re.match(r"^\w+://", url, flags=re.I)
+    if not match:
+        parts = urlparse(url)
+        scheme = "http:" if parts.netloc else "http://"
+        url = scheme + url
+
+    return url
+
+
+def _is_posix_path(string: str) -> bool:
+    return bool(
+        re.match(
+            r"""
+            ^                   # start with...
+            (
+                \.              # ...a single dot,
+                (
+                    \. | [^/\.]+  # optionally followed by
+                )?                # either a second dot or some characters
+                |
+                ~   # $HOME
+            )?      # optional match of ".", ".." or ".blabla"
+            /       # at least one "/" for a file path,
+            .       # and something after the "/"
+            """,
+            string,
+            flags=re.VERBOSE,
+        )
+    )
+
+
+def _is_windows_path(string: str) -> bool:
+    return bool(
+        re.match(
+            r"""
+            ^
+            (
+                [a-z]:\\
+                | \\\\
+            )
+            """,
+            string,
+            flags=re.IGNORECASE | re.VERBOSE,
+        )
+    )
+
+
+def _is_filesystem_path(string: str) -> bool:
+    return _is_posix_path(string) or _is_windows_path(string)
+
+
+def guess_scheme(url: str) -> str:
     """Add an URL scheme if missing: file:// for filepath-like input or
     http:// otherwise."""
-    pass
-
-
-def strip_url(url: str, strip_credentials: bool=True, strip_default_port:
-    bool=True, origin_only: bool=False, strip_fragment: bool=True) ->str:
+    if _is_filesystem_path(url):
+        return any_to_uri(url)
+    return add_http_if_no_scheme(url)
+
+
+def strip_url(
+    url: str,
+    strip_credentials: bool = True,
+    strip_default_port: bool = True,
+    origin_only: bool = False,
+    strip_fragment: bool = True,
+) -> str:
     """Strip URL string from some of its components:

     - ``strip_credentials`` removes "user:password@"
@@ -86,4 +160,27 @@ def strip_url(url: str, strip_credentials: bool=True, strip_default_port:
       query and fragment components ; it also strips credentials
     - ``strip_fragment`` drops any #fragment component
     """
-    pass
+
+    parsed_url = urlparse(url)
+    netloc = parsed_url.netloc
+    if (strip_credentials or origin_only) and (
+        parsed_url.username or parsed_url.password
+    ):
+        netloc = netloc.split("@")[-1]
+    if strip_default_port and parsed_url.port:
+        if (parsed_url.scheme, parsed_url.port) in (
+            ("http", 80),
+            ("https", 443),
+            ("ftp", 21),
+        ):
+            netloc = netloc.replace(f":{parsed_url.port}", "")
+    return urlunparse(
+        (
+            parsed_url.scheme,
+            netloc,
+            "/" if origin_only else parsed_url.path,
+            "" if origin_only else parsed_url.params,
+            "" if origin_only else parsed_url.query,
+            "" if strip_fragment else parsed_url.fragment,
+        )
+    )
diff --git a/scrapy/utils/versions.py b/scrapy/utils/versions.py
index b49afb199..42e5e9be4 100644
--- a/scrapy/utils/versions.py
+++ b/scrapy/utils/versions.py
@@ -1,11 +1,32 @@
 import platform
 import sys
 from typing import List, Tuple
+
 import cryptography
 import cssselect
-import lxml.etree
+import lxml.etree  # nosec
 import parsel
 import twisted
 import w3lib
+
 import scrapy
 from scrapy.utils.ssl import get_openssl_version
+
+
+def scrapy_components_versions() -> List[Tuple[str, str]]:
+    lxml_version = ".".join(map(str, lxml.etree.LXML_VERSION))
+    libxml2_version = ".".join(map(str, lxml.etree.LIBXML_VERSION))
+
+    return [
+        ("Scrapy", scrapy.__version__),
+        ("lxml", lxml_version),
+        ("libxml2", libxml2_version),
+        ("cssselect", cssselect.__version__),
+        ("parsel", parsel.__version__),
+        ("w3lib", w3lib.__version__),
+        ("Twisted", twisted.version.short()),
+        ("Python", sys.version.replace("\n", "- ")),
+        ("pyOpenSSL", get_openssl_version()),
+        ("cryptography", cryptography.__version__),
+        ("Platform", platform.platform()),
+    ]