Merge pull request #268619 from tweag/lib-descriptions
[NixPkgs.git] / pkgs / development / python-modules / pyarrow / default.nix
blob90fae9e2722cbde280a86490c16fbb2e6ed19686
1 { lib
2 , stdenv
3 , buildPythonPackage
4 , python
5 , pythonAtLeast
6 , pythonOlder
7 , arrow-cpp
8 , cffi
9 , cloudpickle
10 , cmake
11 , cython
12 , fsspec
13 , hypothesis
14 , numpy
15 , pandas
16 , pytestCheckHook
17 , pytest-lazy-fixture
18 , pkg-config
19 , scipy
20 , fetchpatch
21 , setuptools-scm
24 let
25   zero_or_one = cond: if cond then 1 else 0;
28 buildPythonPackage rec {
29   pname = "pyarrow";
30   inherit (arrow-cpp) version src;
32   disabled = pythonOlder "3.7";
34   sourceRoot = "apache-arrow-${version}/python";
36   nativeBuildInputs = [
37     cmake
38     cython
39     pkg-config
40     setuptools-scm
41   ];
43   buildInputs = [ arrow-cpp ];
45   propagatedBuildInputs = [
46     cffi
47     cloudpickle
48     fsspec
49     numpy
50     scipy
51   ];
53   nativeCheckInputs = [
54     hypothesis
55     pandas
56     pytestCheckHook
57     pytest-lazy-fixture
58   ];
60   PYARROW_BUILD_TYPE = "release";
62   PYARROW_WITH_DATASET = zero_or_one true;
63   PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight;
64   PYARROW_WITH_HDFS = zero_or_one true;
65   PYARROW_WITH_PARQUET = zero_or_one true;
66   PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true;
67   # Plasma is deprecated since arrow 10.0.0
68   PYARROW_WITH_PLASMA = zero_or_one false;
69   PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3;
70   PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs;
71   PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false;
73   PYARROW_CMAKE_OPTIONS = [
74     "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
75   ];
77   ARROW_HOME = arrow-cpp;
78   PARQUET_HOME = arrow-cpp;
80   ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA;
82   doCheck = true;
84   dontUseCmakeConfigure = true;
86   __darwinAllowLocalNetworking = true;
88   preBuild = ''
89     export PYARROW_PARALLEL=$NIX_BUILD_CORES
90   '';
92   postInstall = ''
93     # copy the pyarrow C++ header files to the appropriate location
94     pyarrow_include="$out/${python.sitePackages}/pyarrow/include"
95     mkdir -p "$pyarrow_include/arrow/python"
96     find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \;
97   '';
99   pytestFlagsArray = [
100     # A couple of tests are missing fixture imports, luckily pytest offers a
101     # clean solution.
102     "--fixtures pyarrow/tests/conftest.py"
103     # Deselect a single test because pyarrow prints a 2-line error message where
104     # only a single line is expected. The additional line of output comes from
105     # the glog library which is an optional dependency of arrow-cpp that is
106     # enabled in nixpkgs.
107     # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
108     "--deselect=pyarrow/tests/test_memory.py::test_env_var"
109     # these tests require access to s3 via the internet
110     "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
111     "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
112     "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
113     "--deselect=pyarrow/tests/test_fs.py::test_s3_options"
114     # Flaky test
115     "--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors"
116     "--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import"
117     # Flaky test, works locally but not on Hydra
118     "--deselect=pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation"
119     # expects arrow-cpp headers to be bundled
120     "--deselect=pyarrow/tests/test_cpp_internals.py::test_pyarrow_include"
121   ] ++ lib.optionals stdenv.isDarwin [
122     # Requires loopback networking
123     "--deselect=pyarrow/tests/test_ipc.py::test_socket_"
124     "--deselect=pyarrow/tests/test_flight.py::test_never_sends_data"
125     "--deselect=pyarrow/tests/test_flight.py::test_large_descriptor"
126     "--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client"
127     "--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect"
128     # fails to compile
129     "--deselect=pyarrow/tests/test_cython.py::test_cython_api"
130   ] ++ lib.optionals (pythonAtLeast "3.11") [
131     # Repr output is printing number instead of enum name so these tests fail
132     "--deselect=pyarrow/tests/test_fs.py::test_get_file_info"
133   ] ++ lib.optionals stdenv.isLinux [
134     # this test requires local networking
135     "--deselect=pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs"
136   ];
138   disabledTests = [ "GcsFileSystem" ];
140   dontUseSetuptoolsCheck = true;
142   preCheck = ''
143     shopt -s extglob
144     rm -r pyarrow/!(conftest.py|tests)
145     mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py
146     substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest
147   '' + lib.optionalString stdenv.isDarwin ''
148     # OSError: [Errno 24] Too many open files
149     ulimit -n 1024
150   '';
152   pythonImportsCheck = [
153     "pyarrow"
154   ] ++ map (module: "pyarrow.${module}") [
155     "compute"
156     "csv"
157     "dataset"
158     "feather"
159     "flight"
160     "fs"
161     "hdfs"
162     "json"
163     "parquet"
164   ];
166   meta = with lib; {
167     description = "A cross-language development platform for in-memory data";
168     homepage = "https://arrow.apache.org/";
169     license = licenses.asl20;
170     platforms = platforms.unix;
171     maintainers = with maintainers; [ veprbl cpcloud ];
172   };