Merge pull request #307098 from r-ryantm/auto-update/cilium-cli
[NixPkgs.git] / pkgs / development / python-modules / scrapy / default.nix
blobdb59f743e777b500e0516522735315b5c1c4558b
1 { lib
2 , stdenv
3 , botocore
4 , buildPythonPackage
5 , cryptography
6 , cssselect
7 , fetchPypi
8 , glibcLocales
9 , installShellFiles
10 , itemadapter
11 , itemloaders
12 , jmespath
13 , lxml
14 , packaging
15 , parsel
16 , pexpect
17 , protego
18 , pydispatcher
19 , pyopenssl
20 , pytestCheckHook
21 , pythonOlder
22 , queuelib
23 , service-identity
24 , setuptools
25 , sybil
26 , testfixtures
27 , tldextract
28 , twisted
29 , w3lib
30 , zope-interface
33 buildPythonPackage rec {
34   pname = "scrapy";
35   version = "2.11.1";
36   pyproject = true;
38   disabled = pythonOlder "3.8";
40   src = fetchPypi {
41     inherit version;
42     pname = "Scrapy";
43     hash = "sha256-czoDnHQj5StpvygQtTMgk9TkKoSEYDWcB7Auz/j3Pr4=";
44   };
46   nativeBuildInputs = [
47     installShellFiles
48     setuptools
49   ];
51   propagatedBuildInputs = [
52     cryptography
53     cssselect
54     itemadapter
55     itemloaders
56     lxml
57     packaging
58     parsel
59     protego
60     pydispatcher
61     pyopenssl
62     queuelib
63     service-identity
64     tldextract
65     twisted
66     w3lib
67     zope-interface
68   ];
70   nativeCheckInputs = [
71     botocore
72     glibcLocales
73     jmespath
74     pexpect
75     pytestCheckHook
76     sybil
77     testfixtures
78   ];
80   LC_ALL = "en_US.UTF-8";
82   disabledTestPaths = [
83     "tests/test_proxy_connect.py"
84     "tests/test_utils_display.py"
85     "tests/test_command_check.py"
86     # Don't test the documentation
87     "docs"
88   ];
90   disabledTests = [
91     # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
92     "test_nested_css"
93     "test_nested_xpath"
94     "test_flavor_detection"
95     "test_follow_whitespace"
96     # Requires network access
97     "AnonymousFTPTestCase"
98     "FTPFeedStorageTest"
99     "FeedExportTest"
100     "test_custom_asyncio_loop_enabled_true"
101     "test_custom_loop_asyncio"
102     "test_custom_loop_asyncio_deferred_signal"
103     "FileFeedStoragePreFeedOptionsTest"  # https://github.com/scrapy/scrapy/issues/5157
104     "test_persist"
105     "test_timeout_download_from_spider_nodata_rcvd"
106     "test_timeout_download_from_spider_server_hangs"
107     "test_unbounded_response"
108     "CookiesMiddlewareTest"
109     # Depends on uvloop
110     "test_asyncio_enabled_reactor_different_loop"
111     "test_asyncio_enabled_reactor_same_loop"
112     # Fails with AssertionError
113     "test_peek_fifo"
114     "test_peek_one_element"
115     "test_peek_lifo"
116     "test_callback_kwargs"
117     # Test fails on Hydra
118     "test_start_requests_laziness"
119   ] ++ lib.optionals stdenv.isDarwin [
120     "test_xmliter_encoding"
121     "test_download"
122     "test_reactor_default_twisted_reactor_select"
123     "URIParamsSettingTest"
124     "URIParamsFeedOptionTest"
125     # flaky on darwin-aarch64
126     "test_fixed_delay"
127     "test_start_requests_laziness"
128   ];
130   postInstall = ''
131     installManPage extras/scrapy.1
132     installShellCompletion --cmd scrapy \
133       --zsh extras/scrapy_zsh_completion \
134       --bash extras/scrapy_bash_completion
135   '';
137   pythonImportsCheck = [
138     "scrapy"
139   ];
141   __darwinAllowLocalNetworking = true;
143   meta = with lib; {
144     description = "High-level web crawling and web scraping framework";
145     mainProgram = "scrapy";
146     longDescription = ''
147       Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
148       websites and extract structured data from their pages. It can be used for a wide
149       range of purposes, from data mining to monitoring and automated testing.
150     '';
151     homepage = "https://scrapy.org/";
152     changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
153     license = licenses.bsd3;
154     maintainers = with maintainers; [ ];
155   };