biome: 1.9.2 -> 1.9.3 (#349335)
[NixPkgs.git] / pkgs / development / python-modules / pyarrow / default.nix
blobe22429f0716fa565a5cbbd480e52e7a84baf58b0
2   lib,
3   stdenv,
4   buildPythonPackage,
5   python,
6   pythonAtLeast,
7   pythonOlder,
8   arrow-cpp,
9   cffi,
10   cloudpickle,
11   cmake,
12   cython_0,
13   fsspec,
14   hypothesis,
15   numpy,
16   pandas,
17   pytestCheckHook,
18   pytest-lazy-fixture,
19   pkg-config,
20   setuptools,
21   setuptools-scm,
22   oldest-supported-numpy,
25 let
26   zero_or_one = cond: if cond then 1 else 0;
29 buildPythonPackage rec {
30   pname = "pyarrow";
31   inherit (arrow-cpp) version src;
32   pyproject = true;
34   disabled = pythonOlder "3.7";
36   sourceRoot = "${src.name}/python";
38   nativeBuildInputs = [
39     cmake
40     cython_0
41     pkg-config
42     setuptools
43     setuptools-scm
44     oldest-supported-numpy
45   ];
47   buildInputs = [ arrow-cpp ];
49   propagatedBuildInputs = [
50     cffi
51     numpy
52   ];
54   checkInputs = [
55     cloudpickle
56     fsspec
57   ];
59   nativeCheckInputs = [
60     hypothesis
61     pandas
62     pytestCheckHook
63     pytest-lazy-fixture
64   ];
66   PYARROW_BUILD_TYPE = "release";
68   PYARROW_WITH_DATASET = zero_or_one true;
69   PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight;
70   PYARROW_WITH_HDFS = zero_or_one true;
71   PYARROW_WITH_PARQUET = zero_or_one true;
72   PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true;
73   PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3;
74   PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs;
75   PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false;
77   PYARROW_CMAKE_OPTIONS = [ "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" ];
79   ARROW_HOME = arrow-cpp;
80   PARQUET_HOME = arrow-cpp;
82   ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA;
84   doCheck = true;
86   dontUseCmakeConfigure = true;
88   __darwinAllowLocalNetworking = true;
90   preBuild = ''
91     export PYARROW_PARALLEL=$NIX_BUILD_CORES
92   '';
94   postInstall = ''
95     # copy the pyarrow C++ header files to the appropriate location
96     pyarrow_include="$out/${python.sitePackages}/pyarrow/include"
97     mkdir -p "$pyarrow_include/arrow/python"
98     find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \;
99   '';
101   pytestFlagsArray =
102     [
103       # A couple of tests are missing fixture imports, luckily pytest offers a
104       # clean solution.
105       "--fixtures pyarrow/tests/conftest.py"
106       # Deselect a single test because pyarrow prints a 2-line error message where
107       # only a single line is expected. The additional line of output comes from
108       # the glog library which is an optional dependency of arrow-cpp that is
109       # enabled in nixpkgs.
110       # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
111       "--deselect=pyarrow/tests/test_memory.py::test_env_var"
112       # these tests require access to s3 via the internet
113       "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
114       "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
115       "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
116       "--deselect=pyarrow/tests/test_fs.py::test_s3_options"
117       # Flaky test
118       "--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors"
119       "--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import"
120       # Flaky test, works locally but not on Hydra
121       "--deselect=pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation"
122       # expects arrow-cpp headers to be bundled
123       "--deselect=pyarrow/tests/test_cpp_internals.py::test_pyarrow_include"
124     ]
125     ++ lib.optionals stdenv.hostPlatform.isDarwin [
126       # Requires loopback networking
127       "--deselect=pyarrow/tests/test_ipc.py::test_socket_"
128       "--deselect=pyarrow/tests/test_flight.py::test_never_sends_data"
129       "--deselect=pyarrow/tests/test_flight.py::test_large_descriptor"
130       "--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client"
131       "--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect"
132       # fails to compile
133       "--deselect=pyarrow/tests/test_cython.py::test_cython_api"
134     ]
135     ++ lib.optionals (pythonAtLeast "3.11") [
136       # Repr output is printing number instead of enum name so these tests fail
137       "--deselect=pyarrow/tests/test_fs.py::test_get_file_info"
138     ]
139     ++ lib.optionals stdenv.hostPlatform.isLinux [
140       # this test requires local networking
141       "--deselect=pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs"
142     ];
144   disabledTests = [ "GcsFileSystem" ];
146   dontUseSetuptoolsCheck = true;
148   preCheck =
149     ''
150       shopt -s extglob
151       rm -r pyarrow/!(conftest.py|tests)
152       mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py
153       substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest
154     ''
155     + lib.optionalString stdenv.hostPlatform.isDarwin ''
156       # OSError: [Errno 24] Too many open files
157       ulimit -n 1024
158     '';
160   pythonImportsCheck =
161     [ "pyarrow" ]
162     ++ map (module: "pyarrow.${module}") [
163       "compute"
164       "csv"
165       "dataset"
166       "feather"
167       "flight"
168       "fs"
169       "json"
170       "parquet"
171     ];
173   meta = with lib; {
174     description = "Cross-language development platform for in-memory data";
175     homepage = "https://arrow.apache.org/";
176     license = licenses.asl20;
177     platforms = platforms.unix;
178     maintainers = with maintainers; [
179       veprbl
180       cpcloud
181     ];
182   };