33 buildPythonPackage rec {
38 disabled = pythonOlder "3.8";
43 hash = "sha256-czoDnHQj5StpvygQtTMgk9TkKoSEYDWcB7Auz/j3Pr4=";
51 propagatedBuildInputs = [
80 LC_ALL = "en_US.UTF-8";
83 "tests/test_proxy_connect.py"
84 "tests/test_utils_display.py"
85 "tests/test_command_check.py"
86 # Don't test the documentation
91 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
94 "test_flavor_detection"
95 "test_follow_whitespace"
96 # Requires network access
97 "AnonymousFTPTestCase"
100 "test_custom_asyncio_loop_enabled_true"
101 "test_custom_loop_asyncio"
102 "test_custom_loop_asyncio_deferred_signal"
103 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
105 "test_timeout_download_from_spider_nodata_rcvd"
106 "test_timeout_download_from_spider_server_hangs"
107 "test_unbounded_response"
108 "CookiesMiddlewareTest"
110 "test_asyncio_enabled_reactor_different_loop"
111 "test_asyncio_enabled_reactor_same_loop"
112 # Fails with AssertionError
114 "test_peek_one_element"
116 "test_callback_kwargs"
117 # Test fails on Hydra
118 "test_start_requests_laziness"
119 ] ++ lib.optionals stdenv.isDarwin [
120 "test_xmliter_encoding"
122 "test_reactor_default_twisted_reactor_select"
123 "URIParamsSettingTest"
124 "URIParamsFeedOptionTest"
125 # flaky on darwin-aarch64
127 "test_start_requests_laziness"
131 installManPage extras/scrapy.1
132 installShellCompletion --cmd scrapy \
133 --zsh extras/scrapy_zsh_completion \
134 --bash extras/scrapy_bash_completion
137 pythonImportsCheck = [
141 __darwinAllowLocalNetworking = true;
144 description = "High-level web crawling and web scraping framework";
145 mainProgram = "scrapy";
147 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
148 websites and extract structured data from their pages. It can be used for a wide
149 range of purposes, from data mining to monitoring and automated testing.
151 homepage = "https://scrapy.org/";
152 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
153 license = licenses.bsd3;
154 maintainers = with maintainers; [ ];