22 oldest-supported-numpy,
26 zero_or_one = cond: if cond then 1 else 0;
29 buildPythonPackage rec {
31 inherit (arrow-cpp) version src;
34 disabled = pythonOlder "3.7";
36 sourceRoot = "${src.name}/python";
44 oldest-supported-numpy
47 buildInputs = [ arrow-cpp ];
49 propagatedBuildInputs = [
66 PYARROW_BUILD_TYPE = "release";
68 PYARROW_WITH_DATASET = zero_or_one true;
69 PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight;
70 PYARROW_WITH_HDFS = zero_or_one true;
71 PYARROW_WITH_PARQUET = zero_or_one true;
72 PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true;
73 PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3;
74 PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs;
75 PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false;
77 PYARROW_CMAKE_OPTIONS = [ "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" ];
79 ARROW_HOME = arrow-cpp;
80 PARQUET_HOME = arrow-cpp;
82 ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA;
85 dontUseCmakeConfigure = true;
87 __darwinAllowLocalNetworking = true;
90 export PYARROW_PARALLEL=$NIX_BUILD_CORES
94 # copy the pyarrow C++ header files to the appropriate location
95 pyarrow_include="$out/${python.sitePackages}/pyarrow/include"
96 mkdir -p "$pyarrow_include/arrow/python"
97 find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \;
102 # A couple of tests are missing fixture imports, luckily pytest offers a
104 "--fixtures pyarrow/tests/conftest.py"
105 # Deselect a single test because pyarrow prints a 2-line error message where
106 # only a single line is expected. The additional line of output comes from
107 # the glog library which is an optional dependency of arrow-cpp that is
108 # enabled in nixpkgs.
109 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
110 "--deselect=pyarrow/tests/test_memory.py::test_env_var"
111 # these tests require access to s3 via the internet
112 "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
113 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
114 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
115 "--deselect=pyarrow/tests/test_fs.py::test_s3_options"
117 "--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors"
118 "--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import"
119 # Flaky test, works locally but not on Hydra
120 "--deselect=pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation"
121 # expects arrow-cpp headers to be bundled
122 "--deselect=pyarrow/tests/test_cpp_internals.py::test_pyarrow_include"
124 ++ lib.optionals stdenv.hostPlatform.isDarwin [
125 # Requires loopback networking
126 "--deselect=pyarrow/tests/test_ipc.py::test_socket_"
127 "--deselect=pyarrow/tests/test_flight.py::test_never_sends_data"
128 "--deselect=pyarrow/tests/test_flight.py::test_large_descriptor"
129 "--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client"
130 "--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect"
132 "--deselect=pyarrow/tests/test_cython.py::test_cython_api"
134 ++ lib.optionals (pythonAtLeast "3.11") [
135 # Repr output is printing number instead of enum name so these tests fail
136 "--deselect=pyarrow/tests/test_fs.py::test_get_file_info"
138 ++ lib.optionals stdenv.hostPlatform.isLinux [
139 # this test requires local networking
140 "--deselect=pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs"
143 disabledTests = [ "GcsFileSystem" ];
145 dontUseSetuptoolsCheck = true;
150 rm -r pyarrow/!(conftest.py|tests)
151 mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py
152 substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest
154 + lib.optionalString stdenv.hostPlatform.isDarwin ''
155 # OSError: [Errno 24] Too many open files
161 ++ map (module: "pyarrow.${module}") [
173 description = "Cross-language development platform for in-memory data";
174 homepage = "https://arrow.apache.org/";
175 license = licenses.asl20;
176 platforms = platforms.unix;
177 maintainers = with maintainers; [