1 { lib, newScope, fetchFromGitHub, unzip, stdenvNoCC }:
4 version = "0-unstable-2024-07-29";
5 nativeBuildInputs = [ unzip ];
8 description = "NLTK Data";
9 homepage = "https://github.com/nltk/nltk_data";
10 license = licenses.asl20;
11 platforms = platforms.all;
12 maintainers = with maintainers; [ happysalada ];
15 makeNltkDataPackage = {pname, location, hash}:
17 src = fetchFromGitHub {
20 rev = "cfe82914f3c2d24363687f1db3b05e8b9f687e2b";
22 sparseCheckout = [ "packages/${location}/${pname}.zip" ];
25 stdenvNoCC.mkDerivation (base // {
27 inherit (base) version;
32 unzip ${src}/packages/${location}/${pname}.zip
33 mkdir -p $out/${location}
34 cp -R ${pname}/ $out/${location}
40 lib.makeScope newScope (self: {
41 punkt = makeNltkDataPackage {
43 location = "tokenizers";
44 hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
46 punkt_tab = makeNltkDataPackage {
48 location = "tokenizers";
49 hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
51 averaged_perceptron_tagger = makeNltkDataPackage {
52 pname = "averaged_perceptron_tagger";
54 hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
56 snowball_data = makeNltkDataPackage {
57 pname = "snowball_data";
58 location = "stemmers";
59 hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk=";
61 stopwords = makeNltkDataPackage {
64 hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";