Merge pull request #329823 from ExpidusOS/fix/pkgsllvm/elfutils
[NixPkgs.git] / pkgs / by-name / ar / arrow-cpp / package.nix
blob1cc21b5b56dc8700a59067767b973284de2d4d35
2   stdenv,
3   lib,
4   fetchurl,
5   fetchFromGitHub,
6   fixDarwinDylibNames,
7   autoconf,
8   aws-sdk-cpp,
9   aws-sdk-cpp-arrow ? aws-sdk-cpp.override {
10     apis = [
11       "cognito-identity"
12       "config"
13       "identity-management"
14       "s3"
15       "sts"
16       "transfer"
17     ];
18   },
19   boost,
20   brotli,
21   bzip2,
22   cmake,
23   crc32c,
24   curl,
25   flatbuffers,
26   gflags,
27   glog,
28   google-cloud-cpp,
29   grpc,
30   gtest,
31   libbacktrace,
32   lz4,
33   minio,
34   ninja,
35   nlohmann_json,
36   openssl,
37   perl,
38   protobuf,
39   python3,
40   rapidjson,
41   re2,
42   snappy,
43   sqlite,
44   thrift,
45   tzdata,
46   utf8proc,
47   which,
48   zlib,
49   zstd,
50   testers,
51   enableShared ? !stdenv.hostPlatform.isStatic,
52   enableFlight ? true,
53   enableJemalloc ? !stdenv.isDarwin,
54   enableS3 ? true,
55   enableGcs ? !stdenv.isDarwin,
58 assert lib.asserts.assertMsg (
59   (enableS3 && stdenv.isDarwin)
60   -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")
61 ) "S3 on Darwin requires Boost != 1.69";
63 let
64   arrow-testing = fetchFromGitHub {
65     name = "arrow-testing";
66     owner = "apache";
67     repo = "arrow-testing";
68     rev = "735ae7128d571398dd798d7ff004adebeb342883";
69     hash = "sha256-67KwnSt+EeEDvk+9kxR51tErL2wJqEPRITKb/dN+HMQ=";
70   };
72   parquet-testing = fetchFromGitHub {
73     name = "parquet-testing";
74     owner = "apache";
75     repo = "parquet-testing";
76     rev = "74278bc4a1122d74945969e6dec405abd1533ec3";
77     hash = "sha256-WbpndtAviph6+I/F2bevuMI9DkfSv4SMPgMaP98k6Qo=";
78   };
81 stdenv.mkDerivation (finalAttrs: {
82   pname = "arrow-cpp";
83   version = "17.0.0";
85   src = fetchFromGitHub {
86     owner = "apache";
87     repo = "arrow";
88     rev = "apache-arrow-17.0.0";
89     hash = "sha256-ZQqi1RFb4Ey0A0UVCThuIxM7DoFfkLwaeRAc2z8u9so=";
90   };
92   sourceRoot = "source/cpp";
94   # versions are all taken from
95   # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
97   # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
98   # collisions as well as custom build flags
99   ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
100     url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
101     hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
102   };
104   # mimalloc: arrow uses custom build flags for mimalloc
105   ARROW_MIMALLOC_URL = fetchFromGitHub {
106     owner = "microsoft";
107     repo = "mimalloc";
108     rev = "v2.0.6";
109     hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
110   };
112   ARROW_XSIMD_URL = fetchFromGitHub {
113     owner = "xtensor-stack";
114     repo = "xsimd";
115     rev = "13.0.0";
116     hash = "sha256-qElJYW5QDj3s59L3NgZj5zkhnUMzIP2mBa1sPks3/CE=";
117   };
119   ARROW_SUBSTRAIT_URL = fetchFromGitHub {
120     owner = "substrait-io";
121     repo = "substrait";
122     rev = "v0.44.0";
123     hash = "sha256-V739IFTGPtbGPlxcOi8sAaYSDhNUEpITvN9IqdPReug=";
124   };
126   nativeBuildInputs = [
127     cmake
128     ninja
129     autoconf # for vendored jemalloc
130     flatbuffers
131   ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
132   buildInputs =
133     [
134       boost
135       brotli
136       bzip2
137       flatbuffers
138       gflags
139       glog
140       gtest
141       libbacktrace
142       lz4
143       nlohmann_json # alternative JSON parser to rapidjson
144       protobuf # substrait requires protobuf
145       rapidjson
146       re2
147       snappy
148       thrift
149       utf8proc
150       zlib
151       zstd
152     ]
153     ++ lib.optionals enableFlight [
154       grpc
155       openssl
156       protobuf
157       sqlite
158     ]
159     ++ lib.optionals enableS3 [
160       aws-sdk-cpp-arrow
161       openssl
162     ]
163     ++ lib.optionals enableGcs [
164       crc32c
165       curl
166       google-cloud-cpp
167       grpc
168       nlohmann_json
169     ];
171   preConfigure = ''
172     patchShebangs build-support/
173     substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
174       --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
175   '';
177   cmakeFlags =
178     [
179       "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
180       "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
181       "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
182       "-DARROW_BUILD_TESTS=ON"
183       "-DARROW_BUILD_INTEGRATION=ON"
184       "-DARROW_BUILD_UTILITIES=ON"
185       "-DARROW_EXTRA_ERROR_CONTEXT=ON"
186       "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
187       "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
188       "-Dxsimd_SOURCE=AUTO"
189       "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
190       "-DARROW_COMPUTE=ON"
191       "-DARROW_CSV=ON"
192       "-DARROW_DATASET=ON"
193       "-DARROW_FILESYSTEM=ON"
194       "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
195       "-DARROW_HDFS=ON"
196       "-DARROW_IPC=ON"
197       "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
198       "-DARROW_JSON=ON"
199       "-DARROW_USE_GLOG=ON"
200       "-DARROW_WITH_BACKTRACE=ON"
201       "-DARROW_WITH_BROTLI=ON"
202       "-DARROW_WITH_BZ2=ON"
203       "-DARROW_WITH_LZ4=ON"
204       "-DARROW_WITH_NLOHMANN_JSON=ON"
205       "-DARROW_WITH_SNAPPY=ON"
206       "-DARROW_WITH_UTF8PROC=ON"
207       "-DARROW_WITH_ZLIB=ON"
208       "-DARROW_WITH_ZSTD=ON"
209       "-DARROW_MIMALLOC=ON"
210       "-DARROW_SUBSTRAIT=ON"
211       "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
212       "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
213       "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
214       "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
215       # Parquet options:
216       "-DARROW_PARQUET=ON"
217       "-DPARQUET_BUILD_EXECUTABLES=ON"
218       "-DPARQUET_REQUIRE_ENCRYPTION=ON"
219     ]
220     ++ lib.optionals (!enableShared) [ "-DARROW_TEST_LINKAGE=static" ]
221     ++ lib.optionals stdenv.isDarwin [
222       "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
223     ]
224     ++ lib.optionals (!stdenv.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
225     ++ lib.optionals enableS3 [
226       "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h"
227     ];
229   doInstallCheck = true;
230   ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data";
231   PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data";
232   GTEST_FILTER =
233     let
234       # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
235       filteredTests =
236         lib.optionals stdenv.hostPlatform.isAarch64 [
237           "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
238           "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
239           "TestCompareKernel.PrimitiveRandomTests"
240         ]
241         ++ lib.optionals enableS3 [
242           "S3OptionsTest.FromUri"
243           "S3RegionResolutionTest.NonExistentBucket"
244           "S3RegionResolutionTest.PublicBucket"
245           "S3RegionResolutionTest.RestrictedBucket"
246           "TestMinioServer.Connect"
247           "TestS3FS.*"
248           "TestS3FSGeneric.*"
249         ]
250         ++ lib.optionals stdenv.isDarwin [
251           # TODO: revisit at 12.0.0 or when
252           # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
253           # is available, see
254           # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
255           "ExecPlanExecution.StressSourceSinkStopped"
256         ];
257     in
258     lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
260   __darwinAllowLocalNetworking = true;
262   nativeInstallCheckInputs = [
263     perl
264     which
265     sqlite
266   ] ++ lib.optionals enableS3 [ minio ] ++ lib.optionals enableFlight [ python3 ];
268   installCheckPhase =
269     let
270       disabledTests = [
271         # flaky
272         "arrow-flight-test"
273         # requires networking
274         "arrow-gcsfs-test"
275         "arrow-flight-integration-test"
276       ];
277     in
278     ''
279       runHook preInstallCheck
281       ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
283       runHook postInstallCheck
284     '';
286   meta = with lib; {
287     description = "Cross-language development platform for in-memory data";
288     homepage = "https://arrow.apache.org/docs/cpp/";
289     license = licenses.asl20;
290     platforms = platforms.unix;
291     maintainers = with maintainers; [
292       tobim
293       veprbl
294       cpcloud
295     ];
296     pkgConfigModules = [
297       "arrow"
298       "arrow-acero"
299       "arrow-compute"
300       "arrow-csv"
301       "arrow-dataset"
302       "arrow-filesystem"
303       "arrow-flight"
304       "arrow-flight-sql"
305       "arrow-flight-testing"
306       "arrow-json"
307       "arrow-substrait"
308       "arrow-testing"
309       "parquet"
310     ];
311   };
312   passthru = {
313     inherit
314       enableFlight
315       enableJemalloc
316       enableS3
317       enableGcs
318       ;
319     tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage;
320   };