python3Packages.orjson: Disable failing tests on 32 bit
[NixPkgs.git] / pkgs / development / libraries / arrow-cpp / default.nix
blob2ce613c78534bc0e7d3d751845dfa954b005e7db
1 { stdenv
2 , lib
3 , fetchurl
4 , fetchFromGitHub
5 , fixDarwinDylibNames
6 , autoconf
7 , aws-sdk-cpp
8 , boost
9 , brotli
10 , c-ares
11 , cmake
12 , crc32c
13 , curl
14 , flatbuffers
15 , gflags
16 , glog
17 , google-cloud-cpp
18 , grpc
19 , gtest
20 , libbacktrace
21 , lz4
22 , minio
23 , ninja
24 , nlohmann_json
25 , openssl
26 , perl
27 , protobuf
28 , python3
29 , rapidjson
30 , re2
31 , snappy
32 , sqlite
33 , thrift
34 , tzdata
35 , utf8proc
36 , which
37 , zlib
38 , zstd
39 , enableShared ? !stdenv.hostPlatform.isStatic
40 , enableFlight ? true
41 , enableJemalloc ? !stdenv.isDarwin
42   # boost/process is broken in 1.69 on darwin, but fixed in 1.70 and
43   # non-existent in older versions
44   # see https://github.com/boostorg/process/issues/55
45 , enableS3 ? (!stdenv.isDarwin) || (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")
46 , enableGcs ? !stdenv.isDarwin # google-cloud-cpp is not supported on darwin
49 assert lib.asserts.assertMsg
50   ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
51   "S3 on Darwin requires Boost != 1.69";
53 let
54   arrow-testing = fetchFromGitHub {
55     owner = "apache";
56     repo = "arrow-testing";
57     rev = "5bab2f264a23f5af68f69ea93d24ef1e8e77fc88";
58     hash = "sha256-Pxx8ohUpXb5u1995IvXmxQMqWiDJ+7LAll/AjQP7ph8=";
59   };
61   parquet-testing = fetchFromGitHub {
62     owner = "apache";
63     repo = "parquet-testing";
64     rev = "aafd3fc9df431c2625a514fb46626e5614f1d199";
65     hash = "sha256-cO5t/mgsbBhbSefx8EMGTyxmgTjhZ8mFujkFQ3p/JS0=";
66   };
69 stdenv.mkDerivation rec {
70   pname = "arrow-cpp";
71   version = "9.0.0";
73   src = fetchurl {
74     url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
75     hash = "sha256-qaAz8KNJAomZj0WGgNGVec8HkRcXumWv3my4AHD3qbU=";
76   };
77   sourceRoot = "apache-arrow-${version}/cpp";
79   # versions are all taken from
80   # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
82   # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
83   # collisions as well as custom build flags
84   ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
85     url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
86     hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
87   };
89   # mimalloc: arrow uses custom build flags for mimalloc
90   ARROW_MIMALLOC_URL = fetchFromGitHub {
91     owner = "microsoft";
92     repo = "mimalloc";
93     rev = "v2.0.6";
94     hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
95   };
97   ARROW_XSIMD_URL = fetchFromGitHub {
98     owner = "xtensor-stack";
99     repo = "xsimd";
100     rev = "8.1.0";
101     hash = "sha256-Aqs6XJkGjAjGAp0PprabSM4m+32M/UXpSHppCHdzaZk=";
102   };
104   ARROW_SUBSTRAIT_URL = fetchFromGitHub {
105     owner = "substrait-io";
106     repo = "substrait";
107     rev = "v0.6.0";
108     hash = "sha256-hxCBomL4Qg9cHLRg9ZiO9k+JVOZXn6f4ikPtK+V9tno=";
109   };
111   patches = [
112     # patch to fix python-test
113     ./darwin.patch
114   ];
116   nativeBuildInputs = [
117     cmake
118     ninja
119     autoconf # for vendored jemalloc
120     flatbuffers
121   ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
122   buildInputs = [
123     boost
124     brotli
125     flatbuffers
126     gflags
127     glog
128     gtest
129     libbacktrace
130     lz4
131     nlohmann_json # alternative JSON parser to rapidjson
132     protobuf # substrait requires protobuf
133     rapidjson
134     re2
135     snappy
136     thrift
137     utf8proc
138     zlib
139     zstd
140   ] ++ lib.optionals enableShared [
141     python3.pkgs.python
142     python3.pkgs.numpy
143   ] ++ lib.optionals enableFlight [
144     grpc
145     openssl
146     protobuf
147   ] ++ lib.optionals enableS3 [ aws-sdk-cpp openssl ]
148   ++ lib.optionals enableGcs [
149     crc32c
150     curl
151     google-cloud-cpp grpc
152     nlohmann_json
153   ];
155   preConfigure = ''
156     patchShebangs build-support/
157     substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
158       --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
159   '';
161   cmakeFlags = [
162     "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
163     "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
164     "-DARROW_BUILD_TESTS=ON"
165     "-DARROW_BUILD_INTEGRATION=ON"
166     "-DARROW_BUILD_UTILITIES=ON"
167     "-DARROW_EXTRA_ERROR_CONTEXT=ON"
168     "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
169     "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
170     "-Dxsimd_SOURCE=AUTO"
171     "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
172     "-DARROW_COMPUTE=ON"
173     "-DARROW_CSV=ON"
174     "-DARROW_DATASET=ON"
175     "-DARROW_ENGINE=ON"
176     "-DARROW_FILESYSTEM=ON"
177     "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
178     "-DARROW_HDFS=ON"
179     "-DARROW_IPC=ON"
180     "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
181     "-DARROW_JSON=ON"
182     "-DARROW_PLASMA=ON"
183     # Disable Python for static mode because openblas is currently broken there.
184     "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
185     "-DARROW_USE_GLOG=ON"
186     "-DARROW_WITH_BACKTRACE=ON"
187     "-DARROW_WITH_BROTLI=ON"
188     "-DARROW_WITH_LZ4=ON"
189     "-DARROW_WITH_NLOHMANN_JSON=ON"
190     "-DARROW_WITH_SNAPPY=ON"
191     "-DARROW_WITH_UTF8PROC=ON"
192     "-DARROW_WITH_ZLIB=ON"
193     "-DARROW_WITH_ZSTD=ON"
194     "-DARROW_MIMALLOC=ON"
195     # Parquet options:
196     "-DARROW_PARQUET=ON"
197     "-DARROW_SUBSTRAIT=ON"
198     "-DPARQUET_BUILD_EXECUTABLES=ON"
199     "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
200     "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
201     "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
202     "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
203   ] ++ lib.optionals (!enableShared) [
204     "-DARROW_TEST_LINKAGE=static"
205   ] ++ lib.optionals stdenv.isDarwin [
206     "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
207   ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF"
208   ++ lib.optional enableS3 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp}/include/aws/core/Aws.h";
210   doInstallCheck = true;
211   ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data";
212   PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data";
213   GTEST_FILTER =
214     let
215       # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
216       filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
217         "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
218         "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
219         "TestCompareKernel.PrimitiveRandomTests"
220       ] ++ lib.optionals enableS3 [
221         "S3OptionsTest.FromUri"
222         "S3RegionResolutionTest.NonExistentBucket"
223         "S3RegionResolutionTest.PublicBucket"
224         "S3RegionResolutionTest.RestrictedBucket"
225         "TestMinioServer.Connect"
226         "TestS3FS.*"
227         "TestS3FSGeneric.*"
228       ];
229     in
230     lib.optionalString doInstallCheck "-${builtins.concatStringsSep ":" filteredTests}";
231   __darwinAllowLocalNetworking = true;
232   installCheckInputs = [ perl which sqlite ] ++ lib.optional enableS3 minio;
233   installCheckPhase =
234     let
235       excludedTests = lib.optionals stdenv.isDarwin [
236         # Some plasma tests need to be patched to use a shorter AF_UNIX socket
237         # path on Darwin. See https://github.com/NixOS/nix/pull/1085
238         "plasma-external-store-tests"
239         "plasma-client-tests"
240       ] ++ [ "arrow-gcsfs-test" ];
241     in
242     ''
243       runHook preInstallCheck
245       ctest -L unittest \
246         --exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$'
248       runHook postInstallCheck
249     '';
251   meta = with lib; {
252     description = "A cross-language development platform for in-memory data";
253     homepage = "https://arrow.apache.org/docs/cpp/";
254     license = licenses.asl20;
255     platforms = platforms.unix;
256     maintainers = with maintainers; [ tobim veprbl cpcloud ];
257   };
258   passthru = {
259     inherit enableFlight enableJemalloc enableS3 enableGcs;
260   };