37 buildPythonPackage rec {
38 pname = "llama-index-core";
42 disabled = pythonOlder "3.8";
44 src = fetchFromGitHub {
47 rev = "refs/tags/v${version}";
48 hash = "sha256-DMdU8LT1IGTHM8EsCX44MvGv+luOsKnPSI7yRR5ULPo=";
51 sourceRoot = "${src.name}/${pname}";
53 # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
54 # download them if they aren't present.
55 # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
56 # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
57 # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
58 # solution seems more elegant.
60 mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
61 cp -r ${nltk-data.stopwords}/corpora/stopwords/* llama_index/core/_static/nltk_cache/corpora/stopwords/
63 mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
64 cp -r ${nltk-data.punkt}/tokenizers/punkt/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
67 pythonRelaxDeps = [ "tenacity" ];
69 build-system = [ poetry-core ];
104 pythonImportsCheck = [ "llama_index" ];
106 disabledTestPaths = [
107 # Tests require network access
118 "tests/postprocessor/"
119 "tests/query_engine/"
120 "tests/question_gen/"
121 "tests/response_synthesizers/"
124 "tests/test_utils.py"
125 "tests/text_splitter/"
126 "tests/token_predictor/"
131 # Tests require network access
132 "test_from_namespaced_persist_dir"
133 "test_from_persist_dir"
134 # asyncio.exceptions.InvalidStateError: invalid state
135 "test_workflow_context_to_dict_mid_run"
139 description = "Data framework for your LLM applications";
140 homepage = "https://github.com/run-llama/llama_index/";
141 changelog = "https://github.com/run-llama/llama_index/blob/${version}/CHANGELOG.md";
142 license = licenses.mit;
143 maintainers = with maintainers; [ fab ];