From 7891c655a80dda3458bac240cd69fd709e47d4e6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 25 Jan 2022 09:30:26 -0500 Subject: [PATCH] python3Packages.datafusion: init at 0.4.0 (#152763) --- .../datafusion/Cargo.lock.patch | 78 ++++++++++++++++ .../python-modules/datafusion/default.nix | 90 +++++++++++++++++++ pkgs/top-level/python-packages.nix | 2 + 3 files changed, 170 insertions(+) create mode 100644 pkgs/development/python-modules/datafusion/Cargo.lock.patch create mode 100644 pkgs/development/python-modules/datafusion/default.nix diff --git a/pkgs/development/python-modules/datafusion/Cargo.lock.patch b/pkgs/development/python-modules/datafusion/Cargo.lock.patch new file mode 100644 index 000000000000..e4e5eca8af4c --- /dev/null +++ b/pkgs/development/python-modules/datafusion/Cargo.lock.patch @@ -0,0 +1,78 @@ +diff --git a/Cargo.lock b/Cargo.lock +index fa84a54c..3d790e1c 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -57,9 +57,9 @@ checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd" + + [[package]] + name = "arrow" +-version = "6.0.0" ++version = "6.5.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "337e668497751234149fd607f5cb41a6ae7b286b6329589126fe67f0ac55d637" ++checksum = "216c6846a292bdd93c2b93c1baab58c32ff50e2ab5e8d50db333ab518535dd8b" + dependencies = [ + "bitflags", + "chrono", +@@ -212,9 +212,9 @@ dependencies = [ + + [[package]] + name = "comfy-table" +-version = "4.1.1" ++version = "5.0.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "11e95a3e867422fd8d04049041f5671f94d53c32a9dcd82e2be268714942f3f3" ++checksum = "c42350b81f044f576ff88ac750419f914abb46a03831bb1747134344ee7a4e64" + dependencies = [ + "strum", + "strum_macros", +@@ -279,7 +279,7 @@ dependencies = [ + + [[package]] + name = "datafusion" +-version = "5.1.0" ++version = "6.0.0" + dependencies = [ + "ahash", + "arrow", +@@ -310,7 +310,7 @@ dependencies = [ + + [[package]] + name = "datafusion-python" +-version = "0.3.0" ++version = "0.4.0" + dependencies = [ + "datafusion", + "pyo3", +@@ -877,9 +877,9 @@ dependencies = [ + + [[package]] + name = "parquet" +-version = "6.0.0" ++version = "6.5.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d263b9b59ba260518de9e57bd65931c3f765fea0fabacfe84f40d6fde38e841a" ++checksum = "788d9953f4cfbe9db1beff7bebd54299d105e34680d78b82b1ddc85d432cac9d" + dependencies = [ + "arrow", + "base64", +@@ -1228,15 +1228,15 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + + [[package]] + name = "strum" +-version = "0.21.0" ++version = "0.22.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "aaf86bbcfd1fa9670b7a129f64fc0c9fcbbfe4f1bc4210e9e98fe71ffc12cde2" ++checksum = "f7ac893c7d471c8a21f31cfe213ec4f6d9afeed25537c772e08ef3f005f8729e" + + [[package]] + name = "strum_macros" +-version = "0.21.1" ++version = "0.22.0" + source = "registry+https://github.com/rust-lang/crates.io-index" +-checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec" ++checksum = "339f799d8b549e3744c7ac7feb216383e4005d94bdb22561b3ab8f3b808ae9fb" + dependencies = [ + "heck", + "proc-macro2", diff --git a/pkgs/development/python-modules/datafusion/default.nix b/pkgs/development/python-modules/datafusion/default.nix new file mode 100644 index 000000000000..4b36df22d183 --- /dev/null +++ b/pkgs/development/python-modules/datafusion/default.nix @@ -0,0 +1,90 @@ +{ lib +, stdenv +, fetchurl +, buildPythonPackage +, fetchPypi +, fetchFromGitHub +, rustPlatform +, maturin +, pytestCheckHook +, libiconv +, numpy +, pandas +, pyarrow +, pytest +}: +let + # le sigh, the perils of unrelated versions of software living in the same + # repo: there's no obvious way to map the top level source repo + # (arrow-datafusion) version to the version of contained repo + # (arrow-datafusion/python) + # + # A commit hash will do in a pinch, and ultimately the sha256 has the final + # say of what the content is when building + cargoLock = fetchurl { + url = "https://raw.githubusercontent.com/apache/arrow-datafusion/6.0.0/python/Cargo.lock"; + sha256 = "sha256-xiv3drEU5jOGsEIh0U01ZQ1NBKobxO2ctp4mxy9iigw="; + }; + + postUnpack = '' + cp "${cargoLock}" $sourceRoot/Cargo.lock + chmod u+w $sourceRoot/Cargo.lock + ''; +in +buildPythonPackage rec { + pname = "datafusion"; + version = "0.4.0"; + format = "pyproject"; + + src = fetchPypi { + inherit pname version; + sha256 = "sha256-+YqogteKfNhtI2QbVXv/5CIWm3PcOH653dwONm5ZcL8="; + }; + + inherit postUnpack; + + # TODO: remove the patch hacking and postUnpack hooks after + # https://github.com/apache/arrow-datafusion/pull/1508 is merged + # + # the lock file isn't up to date as of 6.0.0 so we need to patch the source + # lockfile and the vendored cargo deps lockfile + patches = [ ./Cargo.lock.patch ]; + cargoDeps = rustPlatform.fetchCargoTarball { + inherit src pname version postUnpack; + sha256 = "sha256-JGyDxpfBXzduJaMF1sbmRm7KJajHYdVSj+WbiSETiY0="; + patches = [ ./Cargo.lock.patch ]; + }; + + nativeBuildInputs = with rustPlatform; [ + cargoSetupHook + maturinBuildHook + ]; + + buildInputs = lib.optionals stdenv.isDarwin [ libiconv ]; + + propagatedBuildInputs = [ + numpy + pandas + pyarrow + ]; + + checkInputs = [ pytest ]; + pythonImportsCheck = [ "datafusion" ]; + + checkPhase = '' + runHook preCheck + pytest --pyargs "${pname}" + runHook postCheck + ''; + + meta = with lib; { + description = "Extensible query execution framework"; + longDescription = '' + DataFusion is an extensible query execution framework, written in Rust, + that uses Apache Arrow as its in-memory format. + ''; + homepage = "https://arrow.apache.org/datafusion/"; + license = with licenses; [ asl20 ]; + maintainers = with maintainers; [ cpcloud ]; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 82e44932c1c3..148e3946f9d8 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -1994,6 +1994,8 @@ in { datadog = callPackage ../development/python-modules/datadog { }; + datafusion = callPackage ../development/python-modules/datafusion { }; + datamodeldict = callPackage ../development/python-modules/datamodeldict { }; dataset = callPackage ../development/python-modules/dataset { };