33 buildPythonPackage rec {
36 format = "setuptools";
38 disabled = pythonOlder "3.8";
43 hash = "sha256-PL3tzgw/DgSC1hvi10WGg758188UsO5q37rduA9bNqU=";
47 # Fix compatiblity with Twisted>=23.8. Remove with the next release.
49 url = "https://github.com/scrapy/scrapy/commit/aa95ada42cdf570f840f55c463375f8a81b303f8.patch";
50 hash = "sha256-LuhA5BqtjSUgkotplvUCtvGNYOTrl0MJRCXiSBMDFzY=";
52 "tests/CrawlerProcess/sleeping.py"
53 "tests/test_crawler.py"
62 propagatedBuildInputs = [
91 LC_ALL = "en_US.UTF-8";
94 "tests/test_proxy_connect.py"
95 "tests/test_utils_display.py"
96 "tests/test_command_check.py"
97 # Don't test the documentation
102 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
105 "test_flavor_detection"
106 "test_follow_whitespace"
107 # Requires network access
108 "AnonymousFTPTestCase"
111 "test_custom_asyncio_loop_enabled_true"
112 "test_custom_loop_asyncio"
113 "test_custom_loop_asyncio_deferred_signal"
114 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
116 "test_timeout_download_from_spider_nodata_rcvd"
117 "test_timeout_download_from_spider_server_hangs"
118 "test_unbounded_response"
119 "CookiesMiddlewareTest"
121 "test_asyncio_enabled_reactor_different_loop"
122 "test_asyncio_enabled_reactor_same_loop"
123 # Fails with AssertionError
125 "test_peek_one_element"
127 "test_callback_kwargs"
128 # Test fails on Hydra
129 "test_start_requests_laziness"
130 ] ++ lib.optionals stdenv.isDarwin [
131 "test_xmliter_encoding"
133 "test_reactor_default_twisted_reactor_select"
134 "URIParamsSettingTest"
135 "URIParamsFeedOptionTest"
136 # flaky on darwin-aarch64
138 "test_start_requests_laziness"
142 installManPage extras/scrapy.1
143 installShellCompletion --cmd scrapy \
144 --zsh extras/scrapy_zsh_completion \
145 --bash extras/scrapy_bash_completion
148 pythonImportsCheck = [
152 __darwinAllowLocalNetworking = true;
155 description = "High-level web crawling and web scraping framework";
157 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
158 websites and extract structured data from their pages. It can be used for a wide
159 range of purposes, from data mining to monitoring and automated testing.
161 homepage = "https://scrapy.org/";
162 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
163 license = licenses.bsd3;
164 maintainers = with maintainers; [ marsam ];