12 pythonEnv = python3.withPackages (packages: with packages; [
13 unstructured-api-tools
23 ] ++ packages.unstructured.optional-dependencies.local-inference);
25 unstructured_api_nltk_data = symlinkJoin {
26 name = "unstructured_api_nltk_data";
28 paths = [ nltk-data.punkt nltk-data.averaged_perceptron_tagger ];
30 in stdenvNoCC.mkDerivation {
31 pname = "unstructured-api";
34 src = fetchFromGitHub {
35 owner = "Unstructured-IO";
36 repo = "unstructured-api";
38 hash = "sha256-Ucd+SKIES9E5WgKJjg8Vihjc1hMrJ9e956Sb7QlQea8=";
41 nativeBuildInputs = [ makeWrapper ];
46 mkdir -p $out $out/bin $out/lib
49 makeWrapper ${pythonEnv}/bin/uvicorn $out/bin/unstructured-api \
50 --set NLTK_DATA ${unstructured_api_nltk_data} \
51 --prefix PYTHONPATH : $out/lib \
52 --add-flags "prepline_general.api.app:app"
58 updateScript = nix-update-script { };
62 description = "open-source toolkit designed to make it easy to prepare unstructured data like PDFs, HTML and Word Documents for downstream data science tasks";
63 homepage = "https://github.com/Unstructured-IO/unstructured-api";
64 changelog = "https://github.com/Unstructured-IO/unstructured-api/releases/tag/${version}";
65 license = licenses.asl20;
66 maintainers = with maintainers; [ happysalada ];