Merge pull request #268619 from tweag/lib-descriptions
[NixPkgs.git] / pkgs / development / python-modules / scrapy / default.nix
blob88ecd6f2be8a69d5baed6efe75afeb8eb6170a23
1 { lib
2 , stdenv
3 , botocore
4 , buildPythonPackage
5 , cryptography
6 , cssselect
7 , fetchPypi
8 , fetchpatch
9 , glibcLocales
10 , installShellFiles
11 , itemadapter
12 , itemloaders
13 , jmespath
14 , lxml
15 , packaging
16 , parsel
17 , pexpect
18 , protego
19 , pydispatcher
20 , pyopenssl
21 , pytestCheckHook
22 , pythonOlder
23 , queuelib
24 , service-identity
25 , sybil
26 , testfixtures
27 , tldextract
28 , twisted
29 , w3lib
30 , zope_interface
33 buildPythonPackage rec {
34   pname = "scrapy";
35   version = "2.11.0";
36   format = "setuptools";
38   disabled = pythonOlder "3.8";
40   src = fetchPypi {
41     inherit version;
42     pname = "Scrapy";
43     hash = "sha256-PL3tzgw/DgSC1hvi10WGg758188UsO5q37rduA9bNqU=";
44   };
46   patches = [
47     # Fix compatiblity with Twisted>=23.8. Remove with the next release.
48     (fetchpatch {
49       url = "https://github.com/scrapy/scrapy/commit/aa95ada42cdf570f840f55c463375f8a81b303f8.patch";
50       hash = "sha256-LuhA5BqtjSUgkotplvUCtvGNYOTrl0MJRCXiSBMDFzY=";
51       excludes = [
52         "tests/CrawlerProcess/sleeping.py"
53         "tests/test_crawler.py"
54       ];
55     })
56   ];
58   nativeBuildInputs = [
59     installShellFiles
60   ];
62   propagatedBuildInputs = [
63     cryptography
64     cssselect
65     itemadapter
66     itemloaders
67     lxml
68     packaging
69     parsel
70     protego
71     pydispatcher
72     pyopenssl
73     queuelib
74     service-identity
75     tldextract
76     twisted
77     w3lib
78     zope_interface
79   ];
81   nativeCheckInputs = [
82     botocore
83     glibcLocales
84     jmespath
85     pexpect
86     pytestCheckHook
87     sybil
88     testfixtures
89   ];
91   LC_ALL = "en_US.UTF-8";
93   disabledTestPaths = [
94     "tests/test_proxy_connect.py"
95     "tests/test_utils_display.py"
96     "tests/test_command_check.py"
97     # Don't test the documentation
98     "docs"
99   ];
101   disabledTests = [
102     # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
103     "test_nested_css"
104     "test_nested_xpath"
105     "test_flavor_detection"
106     "test_follow_whitespace"
107     # Requires network access
108     "AnonymousFTPTestCase"
109     "FTPFeedStorageTest"
110     "FeedExportTest"
111     "test_custom_asyncio_loop_enabled_true"
112     "test_custom_loop_asyncio"
113     "test_custom_loop_asyncio_deferred_signal"
114     "FileFeedStoragePreFeedOptionsTest"  # https://github.com/scrapy/scrapy/issues/5157
115     "test_persist"
116     "test_timeout_download_from_spider_nodata_rcvd"
117     "test_timeout_download_from_spider_server_hangs"
118     "test_unbounded_response"
119     "CookiesMiddlewareTest"
120     # Depends on uvloop
121     "test_asyncio_enabled_reactor_different_loop"
122     "test_asyncio_enabled_reactor_same_loop"
123     # Fails with AssertionError
124     "test_peek_fifo"
125     "test_peek_one_element"
126     "test_peek_lifo"
127     "test_callback_kwargs"
128     # Test fails on Hydra
129     "test_start_requests_laziness"
130   ] ++ lib.optionals stdenv.isDarwin [
131     "test_xmliter_encoding"
132     "test_download"
133     "test_reactor_default_twisted_reactor_select"
134     "URIParamsSettingTest"
135     "URIParamsFeedOptionTest"
136     # flaky on darwin-aarch64
137     "test_fixed_delay"
138     "test_start_requests_laziness"
139   ];
141   postInstall = ''
142     installManPage extras/scrapy.1
143     installShellCompletion --cmd scrapy \
144       --zsh extras/scrapy_zsh_completion \
145       --bash extras/scrapy_bash_completion
146   '';
148   pythonImportsCheck = [
149     "scrapy"
150   ];
152   __darwinAllowLocalNetworking = true;
154   meta = with lib; {
155     description = "High-level web crawling and web scraping framework";
156     longDescription = ''
157       Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
158       websites and extract structured data from their pages. It can be used for a wide
159       range of purposes, from data mining to monitoring and automated testing.
160     '';
161     homepage = "https://scrapy.org/";
162     changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
163     license = licenses.bsd3;
164     maintainers = with maintainers; [ marsam ];
165   };