pkgs/development/python-modules/llama-index-core/default.nix

   1 {
   2   lib,
   3   aiohttp,
   4   buildPythonPackage,
   5   dataclasses-json,
   6   deprecated,
   7   dirtyjson,
   8   fetchFromGitHub,
   9   filetype,
  10   fsspec,
  11   jsonpath-ng,
  12   llamaindex-py-client,
  13   nest-asyncio,
  14   networkx,
  15   nltk,
  16   nltk-data,
  17   numpy,
  18   openai,
  19   pandas,
  20   pillow,
  21   poetry-core,
  22   pytest-asyncio,
  23   pytest-mock,
  24   pytestCheckHook,
  25   pythonOlder,
  26   pyvis,
  27   pyyaml,
  28   requests,
  29   spacy,
  30   sqlalchemy,
  31   tenacity,
  32   tiktoken,
  33   tree-sitter,
  34   typing-inspect,
  35 }:
  36
  37 buildPythonPackage rec {
  38   pname = "llama-index-core";
  39   version = "0.11.23";
  40   pyproject = true;
  41
  42   disabled = pythonOlder "3.8";
  43
  44   src = fetchFromGitHub {
  45     owner = "run-llama";
  46     repo = "llama_index";
  47     rev = "refs/tags/v${version}";
  48     hash = "sha256-DMdU8LT1IGTHM8EsCX44MvGv+luOsKnPSI7yRR5ULPo=";
  49   };
  50
  51   sourceRoot = "${src.name}/${pname}";
  52
  53   # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
  54   # download them if they aren't present.
  55   # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
  56   # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
  57   # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
  58   # solution seems more elegant.
  59   postPatch = ''
  60     mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
  61     cp -r ${nltk-data.stopwords}/corpora/stopwords/* llama_index/core/_static/nltk_cache/corpora/stopwords/
  62
  63     mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
  64     cp -r ${nltk-data.punkt}/tokenizers/punkt/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
  65   '';
  66
  67   pythonRelaxDeps = [ "tenacity" ];
  68
  69   build-system = [ poetry-core ];
  70
  71   dependencies = [
  72     aiohttp
  73     dataclasses-json
  74     deprecated
  75     dirtyjson
  76     filetype
  77     fsspec
  78     jsonpath-ng
  79     llamaindex-py-client
  80     nest-asyncio
  81     networkx
  82     nltk
  83     numpy
  84     openai
  85     pandas
  86     pillow
  87     pyvis
  88     pyyaml
  89     requests
  90     spacy
  91     sqlalchemy
  92     tenacity
  93     tiktoken
  94     typing-inspect
  95   ];
  96
  97   nativeCheckInputs = [
  98     tree-sitter
  99     pytest-asyncio
 100     pytest-mock
 101     pytestCheckHook
 102   ];
 103
 104   pythonImportsCheck = [ "llama_index" ];
 105
 106   disabledTestPaths = [
 107     # Tests require network access
 108     "tests/agent/"
 109     "tests/callbacks/"
 110     "tests/chat_engine/"
 111     "tests/evaluation/"
 112     "tests/indices/"
 113     "tests/ingestion/"
 114     "tests/memory/"
 115     "tests/node_parser/"
 116     "tests/objects/"
 117     "tests/playground/"
 118     "tests/postprocessor/"
 119     "tests/query_engine/"
 120     "tests/question_gen/"
 121     "tests/response_synthesizers/"
 122     "tests/retrievers/"
 123     "tests/selectors/"
 124     "tests/test_utils.py"
 125     "tests/text_splitter/"
 126     "tests/token_predictor/"
 127     "tests/tools/"
 128   ];
 129
 130   disabledTests = [
 131     # Tests require network access
 132     "test_from_namespaced_persist_dir"
 133     "test_from_persist_dir"
 134     # asyncio.exceptions.InvalidStateError: invalid state
 135     "test_workflow_context_to_dict_mid_run"
 136   ];
 137
 138   meta = with lib; {
 139     description = "Data framework for your LLM applications";
 140     homepage = "https://github.com/run-llama/llama_index/";
 141     changelog = "https://github.com/run-llama/llama_index/blob/${version}/CHANGELOG.md";
 142     license = licenses.mit;
 143     maintainers = with maintainers; [ fab ];
 144   };
 145 }