36 buildPythonPackage rec {
37 pname = "llama-index-core";
41 disabled = pythonOlder "3.8";
43 src = fetchFromGitHub {
46 rev = "refs/tags/v${version}";
47 hash = "sha256-r4xedtxoYv6CcxtDrgwau9LY3kOBg3jXlQm1g59L7x4=";
50 sourceRoot = "${src.name}/${pname}";
52 # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
53 # download them if they aren't present.
54 # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
55 # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
56 # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
57 # solution seems more elegant.
59 mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
60 cp -r ${nltk-data.stopwords}/corpora/stopwords/* llama_index/core/_static/nltk_cache/corpora/stopwords/
62 mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
63 cp -r ${nltk-data.punkt}/tokenizers/punkt/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
66 pythonRelaxDeps = [ "tenacity" ];
68 build-system = [ poetry-core ];
102 pythonImportsCheck = [ "llama_index" ];
104 disabledTestPaths = [
105 # Tests require network access
116 "tests/postprocessor/"
117 "tests/query_engine/"
118 "tests/question_gen/"
119 "tests/response_synthesizers/"
122 "tests/test_utils.py"
123 "tests/text_splitter/"
124 "tests/token_predictor/"
129 # Tests require network access
130 "test_from_namespaced_persist_dir"
131 "test_from_persist_dir"
132 # asyncio.exceptions.InvalidStateError: invalid state
133 "test_workflow_context_to_dict_mid_run"
137 description = "Data framework for your LLM applications";
138 homepage = "https://github.com/run-llama/llama_index/";
139 changelog = "https://github.com/run-llama/llama_index/blob/${version}/CHANGELOG.md";
140 license = licenses.mit;
141 maintainers = with maintainers; [ fab ];