writers: add writeGuile[Bin] (#364531)
[NixPkgs.git] / pkgs / development / python-modules / polars / default.nix
blobd4bb4daf5cd827191308a107f1ba013d4c1739e1
2   lib,
3   stdenv,
4   buildPythonPackage,
5   cargo,
6   cmake,
7   darwin,
8   fetchFromGitHub,
9   pkg-config,
10   pkgs, # zstd hidden by python3Packages.zstd
11   pytestCheckHook,
12   pytest-codspeed ? null, # Not in Nixpkgs
13   pytest-cov,
14   pytest-xdist,
15   pytest-benchmark,
16   rustc,
17   rustPlatform,
18   runCommand,
20   mimalloc,
21   jemalloc,
22   rust-jemalloc-sys,
23   # Another alternative is to try `mimalloc`
24   polarsMemoryAllocator ? mimalloc, # polarsJemalloc,
25   polarsJemalloc ?
26     let
27       jemalloc' = rust-jemalloc-sys.override {
28         jemalloc = jemalloc.override {
29           # "libjemalloc.so.2: cannot allocate memory in static TLS block"
31           # https://github.com/pola-rs/polars/issues/5401#issuecomment-1300998316
32           disableInitExecTls = true;
33         };
34       };
35     in
36     assert builtins.elem "--disable-initial-exec-tls" jemalloc'.configureFlags;
37     jemalloc',
39   polars,
40   python,
43 let
44   version = "1.12.0";
46   # Hide symbols to prevent accidental use
47   rust-jemalloc-sys = throw "polars: use polarsMemoryAllocator over rust-jemalloc-sys";
48   jemalloc = throw "polars: use polarsMemoryAllocator over jemalloc";
51 buildPythonPackage {
52   pname = "polars";
53   inherit version;
55   src = fetchFromGitHub {
56     owner = "pola-rs";
57     repo = "polars";
58     rev = "py-${version}";
59     hash = "sha256-q//vt8FvVKY9N/BOIoOwxaSB/F/tNX1Zl/9jd0AzSH4=";
60   };
62   # Do not type-check assertions because some of them use unstable features (`is_none_or`)
63   postPatch = ''
64     while IFS= read -r -d "" path ; do
65       sed -i 's \(\s*\)debug_assert! \1#[cfg(debug_assertions)]\n\1debug_assert! ' "$path"
66     done < <( find -iname '*.rs' -print0 )
67   '';
69   cargoDeps = rustPlatform.importCargoLock {
70     lockFile = ./Cargo.lock;
71     outputHashes = {
72       "numpy-0.21.0" = "sha256-u0Z+6L8pXSPaA3cE1sUpY6sCoaU1clXUcj/avnNzmsw=";
73       "polars-parquet-format-2.10.0" = "sha256-iB3KZ72JSp7tJCLn9moukpDEGf9MUos04rIQ9rDGWfI=";
74     };
75   };
77   requiredSystemFeatures = [ "big-parallel" ];
79   build-system = [ rustPlatform.maturinBuildHook ];
81   nativeBuildInputs = [
82     cargo
83     pkg-config
84     cmake # libz-ng-sys
85     rustPlatform.cargoSetupHook
86     rustPlatform.cargoBuildHook
87     rustPlatform.cargoInstallHook
88     rustc
89   ];
91   buildInputs =
92     [
93       polarsMemoryAllocator
94       (pkgs.__splicedPackages.zstd or pkgs.zstd)
95     ]
96     ++ lib.optionals stdenv.hostPlatform.isDarwin [
97       darwin.apple_sdk.frameworks.AppKit
98       darwin.apple_sdk.frameworks.IOKit
99       darwin.apple_sdk.frameworks.Security
100     ];
102   env = {
103     ZSTD_SYS_USE_PKG_CONFIG = true;
105     # https://github.com/NixOS/nixpkgs/blob/5c38beb516f8da3a823d94b746dd3bf3c6b9bbd7/doc/languages-frameworks/rust.section.md#using-community-maintained-rust-toolchains-using-community-maintained-rust-toolchains
106     # https://discourse.nixos.org/t/nixpkgs-rustplatform-and-nightly/22870
107     RUSTC_BOOTSTRAP = true;
109     # Several `debug_assert!` statements use the unstable `Option::is_none_or` method
110     RUSTFLAGS = lib.concatStringsSep " " (
111       [
112         "-Cdebug_assertions=n"
113       ]
114       ++ lib.optionals (polarsMemoryAllocator.pname == "mimalloc") [
115         "--cfg use_mimalloc"
116       ]
117     );
118     RUST_BACKTRACE = true;
119   };
121   dontUseCmakeConfigure = true;
123   maturinBuildFlags = [
124     "-m"
125     "py-polars/Cargo.toml"
126   ];
128   postInstall = ''
129     # Move polars.abi3.so -> polars.so
130     local polarsSo=""
131     local soName=""
132     while IFS= read -r -d "" p ; do
133       polarsSo=$p
134       soName="$(basename "$polarsSo")"
135       [[ "$soName" == polars.so ]] && break
136     done < <( find "$out" -iname "polars*.so" -print0 )
137     [[ -z "''${polarsSo:-}" ]] && echo "polars.so not found" >&2 && exit 1
138     if [[ "$soName" != polars.so ]] ; then
139       mv "$polarsSo" "$(dirname "$polarsSo")/polars.so"
140     fi
141   '';
143   pythonImportsCheck = [
144     "polars"
145   ];
147   passthru.tests.dynloading-1 =
148     runCommand "polars-dynloading-1"
149       {
150         nativeBuildInputs = [
151           (python.withPackages (ps: [
152             ps.pyarrow
153             polars
154           ]))
155         ];
156       }
157       ''
158         ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF
159         import pyarrow
160         import polars
161         EOF
162         touch $out
163       '';
164   passthru.tests.dynloading-2 =
165     runCommand "polars-dynloading-2"
166       {
167         nativeBuildInputs = [
168           (python.withPackages (ps: [
169             ps.pyarrow
170             polars
171           ]))
172         ];
173         failureHook = ''
174           sed "s/^/    /" $out >&2
175         '';
176       }
177       ''
178         ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF
179         import polars
180         import pyarrow
181         EOF
182       '';
183   passthru.tests.pytest = stdenv.mkDerivation {
184     pname = "${polars.pname}-pytest";
186     inherit (polars) version src;
188     requiredSystemFeatures = [ "big-parallel" ];
190     sourceRoot = "source/py-polars";
191     postPatch = ''
192       for f in * ; do
193         [[ "$f" == "tests" ]] || \
194         [[ "$f" == "pyproject.toml" ]] || \
195         rm -rf "$f"
196       done
197       for pat in "__pycache__" "*.pyc" ; do
198         find -iname "$pat" -exec rm "{}" ";"
199       done
200     '';
201     dontConfigure = true;
202     dontBuild = true;
204     doCheck = true;
205     checkPhase = "pytestCheckPhase";
206     nativeBuildInputs = [
207       (python.withPackages (ps: [
208         polars
209         ps.aiosqlite
210         ps.altair
211         ps.boto3
212         ps.deltalake
213         ps.flask
214         ps.flask-cors
215         ps.fsspec
216         ps.gevent
217         ps.hypothesis
218         ps.jax
219         ps.jaxlib
220         (ps.kuzu or null)
221         ps.moto
222         ps.nest-asyncio
223         ps.numpy
224         ps.openpyxl
225         ps.pandas
226         ps.pyarrow
227         ps.pydantic
228         (ps.pyiceberg or null)
229         ps.sqlalchemy
230         ps.torch
231         ps.xlsx2csv
232         ps.xlsxwriter
233         ps.zstandard
234         ps.cloudpickle
235       ]))
236     ];
237     nativeCheckInputs = [
238       pytestCheckHook
239       pytest-codspeed
240       pytest-cov
241       pytest-xdist
242       pytest-benchmark
243     ];
245     pytestFlagsArray = [
246       "-n auto"
247       "--dist loadgroup"
248       ''-m "slow or not slow"''
249     ];
250     disabledTests = [
251       "test_read_kuzu_graph_database" # kuzu
252       "test_read_database_cx_credentials" # connectorx
254       # adbc_driver_.*
255       "test_write_database_append_replace"
256       "test_write_database_create"
257       "test_write_database_create_quoted_tablename"
258       "test_write_database_adbc_temporary_table"
259       "test_write_database_create"
260       "test_write_database_append_replace"
261       "test_write_database_errors"
262       "test_write_database_errors"
263       "test_write_database_create_quoted_tablename"
265       # Internet access:
266       "test_read_web_file"
268       # Untriaged
269       "test_pickle_lazyframe_nested_function_udf"
270       "test_serde_udf"
271       "test_hash_struct"
272     ];
273     disabledTestPaths = [
274       "tests/benchmark"
275       "tests/docs"
277       "tests/unit/io/test_iceberg.py" # Package pyiceberg
278       "tests/unit/io/test_spreadsheet.py" # Package fastexcel
280       # Wrong altair version
281       "tests/unit/operations/namespaces/test_plot.py"
283       # adbc
284       "tests/unit/io/database/test_read.py"
286       # Untriaged
287       "tests/unit/cloud/test_prepare_cloud_plan.py"
288       "tests/unit/io/cloud/test_cloud.py"
289     ];
291     installPhase = "touch $out";
292   };
294   meta = {
295     description = "Dataframes powered by a multithreaded, vectorized query engine, written in Rust";
296     homepage = "https://github.com/pola-rs/polars";
297     license = lib.licenses.mit;
298     maintainers = with lib.maintainers; [
299       happysalada
300       SomeoneSerge
301     ];
302     mainProgram = "polars";
303     platforms = lib.platforms.all;
304   };