Merge pull request #268619 from tweag/lib-descriptions
[NixPkgs.git] / pkgs / development / libraries / arrow-cpp / default.nix
blob4cc55effdaa5fae685d7d621a63d08f6041af74b
1 { stdenv
2 , lib
3 , fetchurl
4 , fetchFromGitHub
5 , fixDarwinDylibNames
6 , autoconf
7 , aws-sdk-cpp
8 , boost
9 , brotli
10 , c-ares
11 , cmake
12 , crc32c
13 , curl
14 , flatbuffers
15 , gflags
16 , glog
17 , google-cloud-cpp
18 , grpc
19 , gtest
20 , libbacktrace
21 , lz4
22 , minio
23 , ninja
24 , nlohmann_json
25 , openssl
26 , perl
27 , protobuf
28 , python3
29 , rapidjson
30 , re2
31 , snappy
32 , sqlite
33 , thrift
34 , tzdata
35 , utf8proc
36 , which
37 , zlib
38 , zstd
39 , enableShared ? !stdenv.hostPlatform.isStatic
40 , enableFlight ? true
41 , enableJemalloc ? !stdenv.isDarwin
42 , enableS3 ? true
43 , enableGcs ? !stdenv.isDarwin
46 assert lib.asserts.assertMsg
47   ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
48   "S3 on Darwin requires Boost != 1.69";
50 let
51   arrow-testing = fetchFromGitHub {
52     name = "arrow-testing";
53     owner = "apache";
54     repo = "arrow-testing";
55     rev = "47f7b56b25683202c1fd957668e13f2abafc0f12";
56     hash = "sha256-ZDznR+yi0hm5O1s9as8zq5nh1QxJ8kXCRwbNQlzXpnI=";
57   };
59   parquet-testing = fetchFromGitHub {
60     name = "parquet-testing";
61     owner = "apache";
62     repo = "parquet-testing";
63     rev = "b2e7cc755159196e3a068c8594f7acbaecfdaaac";
64     hash = "sha256-IFvGTOkaRSNgZOj8DziRj88yH5JRF+wgSDZ5N0GNvjk=";
65   };
67   aws-sdk-cpp-arrow = aws-sdk-cpp.override {
68     apis = [
69       "cognito-identity"
70       "config"
71       "identity-management"
72       "s3"
73       "sts"
74       "transfer"
75     ];
76   };
79 stdenv.mkDerivation rec {
80   pname = "arrow-cpp";
81   version = "14.0.1";
83   src = fetchurl {
84     url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
85     hash = "sha256-XHDq+xAR+dEkuvsyiv5U9izFuSgLcIDh49Zo94wOQH4=";
86   };
88   sourceRoot = "apache-arrow-${version}/cpp";
90   # versions are all taken from
91   # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
93   # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
94   # collisions as well as custom build flags
95   ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
96     url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
97     hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
98   };
100   # mimalloc: arrow uses custom build flags for mimalloc
101   ARROW_MIMALLOC_URL = fetchFromGitHub {
102     owner = "microsoft";
103     repo = "mimalloc";
104     rev = "v2.0.6";
105     hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
106   };
108   ARROW_XSIMD_URL = fetchFromGitHub {
109     owner = "xtensor-stack";
110     repo = "xsimd";
111     rev = "9.0.1";
112     hash = "sha256-onALN6agtrHWigtFlCeefD9CiRZI4Y690XTzy2UDnrk=";
113   };
115   ARROW_SUBSTRAIT_URL = fetchFromGitHub {
116     owner = "substrait-io";
117     repo = "substrait";
118     rev = "v0.27.0";
119     hash = "sha256-wptEAXembah04pzqAz6UHeUxp+jMf6Lh/IdyuIhy/a8=";
120   };
122   nativeBuildInputs = [
123     cmake
124     ninja
125     autoconf # for vendored jemalloc
126     flatbuffers
127   ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
128   buildInputs = [
129     boost
130     brotli
131     flatbuffers
132     gflags
133     glog
134     gtest
135     libbacktrace
136     lz4
137     nlohmann_json # alternative JSON parser to rapidjson
138     protobuf # substrait requires protobuf
139     rapidjson
140     re2
141     snappy
142     thrift
143     utf8proc
144     zlib
145     zstd
146   ] ++ lib.optionals enableFlight [
147     grpc
148     openssl
149     protobuf
150     sqlite
151   ] ++ lib.optionals enableS3 [ aws-sdk-cpp-arrow openssl ]
152   ++ lib.optionals enableGcs [
153     crc32c
154     curl
155     google-cloud-cpp
156     grpc
157     nlohmann_json
158   ];
160   preConfigure = ''
161     patchShebangs build-support/
162     substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
163       --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
164   '';
166   cmakeFlags = [
167     "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
168     "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
169     "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
170     "-DARROW_BUILD_TESTS=ON"
171     "-DARROW_BUILD_INTEGRATION=ON"
172     "-DARROW_BUILD_UTILITIES=ON"
173     "-DARROW_EXTRA_ERROR_CONTEXT=ON"
174     "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
175     "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
176     "-Dxsimd_SOURCE=AUTO"
177     "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
178     "-DARROW_COMPUTE=ON"
179     "-DARROW_CSV=ON"
180     "-DARROW_DATASET=ON"
181     "-DARROW_FILESYSTEM=ON"
182     "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
183     "-DARROW_HDFS=ON"
184     "-DARROW_IPC=ON"
185     "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
186     "-DARROW_JSON=ON"
187     "-DARROW_USE_GLOG=ON"
188     "-DARROW_WITH_BACKTRACE=ON"
189     "-DARROW_WITH_BROTLI=ON"
190     "-DARROW_WITH_LZ4=ON"
191     "-DARROW_WITH_NLOHMANN_JSON=ON"
192     "-DARROW_WITH_SNAPPY=ON"
193     "-DARROW_WITH_UTF8PROC=ON"
194     "-DARROW_WITH_ZLIB=ON"
195     "-DARROW_WITH_ZSTD=ON"
196     "-DARROW_MIMALLOC=ON"
197     "-DARROW_SUBSTRAIT=ON"
198     "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
199     "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
200     "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
201     "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
202     # Parquet options:
203     "-DARROW_PARQUET=ON"
204     "-DPARQUET_BUILD_EXECUTABLES=ON"
205     "-DPARQUET_REQUIRE_ENCRYPTION=ON"
206   ] ++ lib.optionals (!enableShared) [
207     "-DARROW_TEST_LINKAGE=static"
208   ] ++ lib.optionals stdenv.isDarwin [
209     "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
210   ] ++ lib.optionals (!stdenv.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
211   ++ lib.optionals enableS3 [ "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h" ];
213   doInstallCheck = true;
214   ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data";
215   PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data";
216   GTEST_FILTER =
217     let
218       # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
219       filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
220         "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
221         "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
222         "TestCompareKernel.PrimitiveRandomTests"
223       ] ++ lib.optionals enableS3 [
224         "S3OptionsTest.FromUri"
225         "S3RegionResolutionTest.NonExistentBucket"
226         "S3RegionResolutionTest.PublicBucket"
227         "S3RegionResolutionTest.RestrictedBucket"
228         "TestMinioServer.Connect"
229         "TestS3FS.*"
230         "TestS3FSGeneric.*"
231       ] ++ lib.optionals stdenv.isDarwin [
232         # TODO: revisit at 12.0.0 or when
233         # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
234         # is available, see
235         # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
236         "ExecPlanExecution.StressSourceSinkStopped"
237       ];
238     in
239     lib.optionalString doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
241   __darwinAllowLocalNetworking = true;
243   nativeInstallCheckInputs = [ perl which sqlite ]
244     ++ lib.optionals enableS3 [ minio ]
245     ++ lib.optionals enableFlight [ python3 ];
247   disabledTests = [
248     # requires networking
249     "arrow-gcsfs-test"
250     "arrow-flight-integration-test"
251   ];
253   installCheckPhase = ''
254     runHook preInstallCheck
256     ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
258     runHook postInstallCheck
259   '';
261   meta = with lib; {
262     description = "A cross-language development platform for in-memory data";
263     homepage = "https://arrow.apache.org/docs/cpp/";
264     license = licenses.asl20;
265     platforms = platforms.unix;
266     maintainers = with maintainers; [ tobim veprbl cpcloud ];
267   };
268   passthru = {
269     inherit enableFlight enableJemalloc enableS3 enableGcs;
270   };