20 arrow-testing = fetchFromGitHub {
21 name = "arrow-testing";
23 repo = "arrow-testing";
24 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c";
25 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o=";
28 parquet-testing = fetchFromGitHub {
29 name = "parquet-testing";
31 repo = "parquet-testing";
32 rev = "50af3d8ce206990d81014b1862e5ce7380dc3e08";
33 hash = "sha256-edyv/r5olkj09aHtm8LHZY0b3jUtLNUcufwI41qKYaY=";
37 buildPythonPackage rec {
42 src = fetchFromGitHub {
43 name = "datafusion-source";
45 repo = "arrow-datafusion-python";
46 rev = "refs/tags/${version}";
47 hash = "sha256-5WOSlx4XW9zO6oTY16lWQElShLv0ubflVPfSSEGrFgg=";
50 cargoDeps = rustPlatform.fetchCargoTarball {
51 name = "datafusion-cargo-deps";
53 hash = "sha256-hN03tbnH77VsMDxSMddMHIH00t7lUs5h8rTHbiMIExw=";
56 nativeBuildInputs = with rustPlatform; [
64 ++ lib.optionals stdenv.hostPlatform.isDarwin [
72 ] ++ lib.optionals (pythonOlder "3.13") [ typing-extensions ];
79 pythonImportsCheck = [ "datafusion" ];
88 ln -s ${arrow-testing} ./testing
89 ln -s ${parquet-testing} ./parquet
97 description = "Extensible query execution framework";
99 DataFusion is an extensible query execution framework, written in Rust,
100 that uses Apache Arrow as its in-memory format.
102 homepage = "https://arrow.apache.org/datafusion/";
103 changelog = "https://github.com/apache/arrow-datafusion-python/blob/${version}/CHANGELOG.md";
104 license = with licenses; [ asl20 ];
105 maintainers = with maintainers; [ cpcloud ];