22 buildPythonPackage rec {
25 format = "setuptools";
27 disabled = pythonOlder "3.8";
29 src = fetchFromGitHub {
30 owner = "huggingface";
33 hash = "sha256-b84Y7PixZUG1VXW11Q4fKxEcsWJjpXEHZIYugf2MSUU=";
36 # remove pyarrow<14.0.1 vulnerability fix
38 substituteInPlace src/datasets/features/features.py \
39 --replace "import pyarrow_hotfix" "#import pyarrow_hotfix"
42 propagatedBuildInputs = [
56 ] ++ lib.optionals (pythonOlder "3.8") [ importlib-metadata ];
58 # Tests require pervasive internet access
61 # Module import will attempt to create a cache directory
62 postFixup = "export HF_MODULES_CACHE=$TMPDIR";
64 pythonImportsCheck = [ "datasets" ];
67 description = "Open-access datasets and evaluation metrics for natural language processing";
68 mainProgram = "datasets-cli";
69 homepage = "https://github.com/huggingface/datasets";
70 changelog = "https://github.com/huggingface/datasets/releases/tag/${version}";
71 license = licenses.asl20;
72 platforms = platforms.unix;