25 zero_or_one = cond: if cond then 1 else 0;
28 buildPythonPackage rec {
30 inherit (arrow-cpp) version src;
32 disabled = pythonOlder "3.7";
34 sourceRoot = "apache-arrow-${version}/python";
43 buildInputs = [ arrow-cpp ];
45 propagatedBuildInputs = [
60 PYARROW_BUILD_TYPE = "release";
62 PYARROW_WITH_DATASET = zero_or_one true;
63 PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight;
64 PYARROW_WITH_HDFS = zero_or_one true;
65 PYARROW_WITH_PARQUET = zero_or_one true;
66 PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true;
67 # Plasma is deprecated since arrow 10.0.0
68 PYARROW_WITH_PLASMA = zero_or_one false;
69 PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3;
70 PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs;
71 PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false;
73 PYARROW_CMAKE_OPTIONS = [
74 "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
77 ARROW_HOME = arrow-cpp;
78 PARQUET_HOME = arrow-cpp;
80 ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA;
84 dontUseCmakeConfigure = true;
86 __darwinAllowLocalNetworking = true;
89 export PYARROW_PARALLEL=$NIX_BUILD_CORES
93 # copy the pyarrow C++ header files to the appropriate location
94 pyarrow_include="$out/${python.sitePackages}/pyarrow/include"
95 mkdir -p "$pyarrow_include/arrow/python"
96 find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \;
100 # A couple of tests are missing fixture imports, luckily pytest offers a
102 "--fixtures pyarrow/tests/conftest.py"
103 # Deselect a single test because pyarrow prints a 2-line error message where
104 # only a single line is expected. The additional line of output comes from
105 # the glog library which is an optional dependency of arrow-cpp that is
106 # enabled in nixpkgs.
107 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
108 "--deselect=pyarrow/tests/test_memory.py::test_env_var"
109 # these tests require access to s3 via the internet
110 "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
111 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
112 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
113 "--deselect=pyarrow/tests/test_fs.py::test_s3_options"
115 "--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors"
116 "--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import"
117 # Flaky test, works locally but not on Hydra
118 "--deselect=pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation"
119 # expects arrow-cpp headers to be bundled
120 "--deselect=pyarrow/tests/test_cpp_internals.py::test_pyarrow_include"
121 ] ++ lib.optionals stdenv.isDarwin [
122 # Requires loopback networking
123 "--deselect=pyarrow/tests/test_ipc.py::test_socket_"
124 "--deselect=pyarrow/tests/test_flight.py::test_never_sends_data"
125 "--deselect=pyarrow/tests/test_flight.py::test_large_descriptor"
126 "--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client"
127 "--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect"
129 "--deselect=pyarrow/tests/test_cython.py::test_cython_api"
130 ] ++ lib.optionals (pythonAtLeast "3.11") [
131 # Repr output is printing number instead of enum name so these tests fail
132 "--deselect=pyarrow/tests/test_fs.py::test_get_file_info"
133 ] ++ lib.optionals stdenv.isLinux [
134 # this test requires local networking
135 "--deselect=pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs"
138 disabledTests = [ "GcsFileSystem" ];
140 dontUseSetuptoolsCheck = true;
144 rm -r pyarrow/!(conftest.py|tests)
145 mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py
146 substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest
147 '' + lib.optionalString stdenv.isDarwin ''
148 # OSError: [Errno 24] Too many open files
152 pythonImportsCheck = [
154 ] ++ map (module: "pyarrow.${module}") [
167 description = "A cross-language development platform for in-memory data";
168 homepage = "https://arrow.apache.org/";
169 license = licenses.asl20;
170 platforms = platforms.unix;
171 maintainers = with maintainers; [ veprbl cpcloud ];