chromium,chromedriver: 129.0.6668.91 -> 129.0.6668.100
[NixPkgs.git] / pkgs / by-name / lo / local-ai / tests.nix
blobeebc357489c7fff6b1c94f665e485b66ab6b2bc0
2   self,
3   lib,
4   testers,
5   fetchzip,
6   fetchurl,
7   writers,
8   symlinkJoin,
9   jq,
10   prom2json,
12 let
13   common-config =
14     { config, ... }:
15     {
16       imports = [ ./module.nix ];
17       services.local-ai = {
18         enable = true;
19         package = self;
20         threads = config.virtualisation.cores;
21         logLevel = "debug";
22       };
23     };
25   inherit (self.lib) genModels;
28   version = testers.testVersion {
29     package = self;
30     version = "v" + self.version;
31     command = "local-ai --help";
32   };
34   health = testers.runNixOSTest {
35     name = self.name + "-health";
36     nodes.machine = common-config;
37     testScript =
38       let
39         port = "8080";
40       in
41       ''
42         machine.wait_for_open_port(${port})
43         machine.succeed("curl -f http://localhost:${port}/readyz")
45         machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
46         machine.copy_from_vm("metrics.json")
47       '';
48   };
50   # https://localai.io/features/embeddings/#bert-embeddings
51   bert =
52     let
53       model = "embedding";
54       model-configs.${model} = {
55         # Note: q4_0 and q4_1 models can not be loaded
56         parameters.model = fetchurl {
57           url = "https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-f16.bin";
58           hash = "sha256-nBlbJFOk/vYKT2vjqIo5IRNmIU32SYpP5IhcniIxT1A=";
59         };
60         backend = "bert-embeddings";
61         embeddings = true;
62       };
64       models = genModels model-configs;
66       requests.request = {
67         inherit model;
68         input = "Your text string goes here";
69       };
70     in
71     testers.runNixOSTest {
72       name = self.name + "-bert";
73       nodes.machine = {
74         imports = [ common-config ];
75         virtualisation.cores = 2;
76         virtualisation.memorySize = 2048;
77         services.local-ai.models = models;
78       };
79       passthru = {
80         inherit models requests;
81       };
82       testScript =
83         let
84           port = "8080";
85         in
86         ''
87           machine.wait_for_open_port(${port})
88           machine.succeed("curl -f http://localhost:${port}/readyz")
89           machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
90           machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json")
92           machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json")
93           machine.copy_from_vm("embeddings.json")
94           machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json")
96           machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
97           machine.copy_from_vm("metrics.json")
98         '';
99     };
102 // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) {
103   # https://localai.io/docs/getting-started/manual/
104   llama =
105     let
106       model = "gpt-3.5-turbo";
108       # https://localai.io/advanced/#full-config-model-file-reference
109       model-configs.${model} = rec {
110         context_size = 16 * 1024; # 128kb is possible, but needs 16GB RAM
111         backend = "llama-cpp";
112         parameters = {
113           # https://ai.meta.com/blog/meta-llama-3-1/
114           model = fetchurl {
115             url = "https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf";
116             hash = "sha256-8r4+GiOcEsnz8BqWKxH7KAf4Ay/bY7ClUC6kLd71XkQ=";
117           };
118           # defaults from:
119           # https://deepinfra.com/meta-llama/Meta-Llama-3.1-8B-Instruct
120           temperature = 0.7;
121           top_p = 0.9;
122           top_k = 0;
123           # following parameter leads to outputs like: !!!!!!!!!!!!!!!!!!!
124           #repeat_penalty = 1;
125           presence_penalty = 0;
126           frequency_penalty = 0;
127           max_tokens = 100;
128         };
129         stopwords = [ "<|eot_id|>" ];
130         template = {
131           # Templates implement following specifications
132           # https://github.com/meta-llama/llama3/tree/main?tab=readme-ov-file#instruction-tuned-models
133           # ... and are insprired by:
134           # https://github.com/mudler/LocalAI/blob/master/embedded/models/llama3-instruct.yaml
135           #
136           # The rules for template evaluateion are defined here:
137           # https://pkg.go.dev/text/template
138           chat_message = ''
139             <|start_header_id|>{{.RoleName}}<|end_header_id|>
141             {{.Content}}${builtins.head stopwords}'';
143           chat = "{{.Input}}<|start_header_id|>assistant<|end_header_id|>";
145           completion = "{{.Input}}";
146         };
147       };
149       models = genModels model-configs;
151       requests = {
152         # https://localai.io/features/text-generation/#chat-completions
153         chat-completions = {
154           inherit model;
155           messages = [
156             {
157               role = "user";
158               content = "1 + 2 = ?";
159             }
160           ];
161         };
162         # https://localai.io/features/text-generation/#edit-completions
163         edit-completions = {
164           inherit model;
165           instruction = "rephrase";
166           input = "Black cat jumped out of the window";
167           max_tokens = 50;
168         };
169         # https://localai.io/features/text-generation/#completions
170         completions = {
171           inherit model;
172           prompt = "A long time ago in a galaxy far, far away";
173         };
174       };
175     in
176     testers.runNixOSTest {
177       name = self.name + "-llama";
178       nodes.machine = {
179         imports = [ common-config ];
180         virtualisation.cores = 4;
181         virtualisation.memorySize = 8192;
182         services.local-ai.models = models;
183         # TODO: Add test case parallel requests
184         services.local-ai.parallelRequests = 2;
185       };
186       passthru = {
187         inherit models requests;
188       };
189       testScript =
190         let
191           port = "8080";
192         in
193         ''
194           machine.wait_for_open_port(${port})
195           machine.succeed("curl -f http://localhost:${port}/readyz")
196           machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
197           machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json")
199           machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json")
200           machine.copy_from_vm("chat-completions.json")
201           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json")
202           machine.succeed("${jq}/bin/jq --exit-status 'debug | .choices | first.message.content | split(\" \") | last | tonumber == 3' chat-completions.json")
204           machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json")
205           machine.copy_from_vm("edit-completions.json")
206           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"edit\"' edit-completions.json")
207           machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString requests.edit-completions.max_tokens}' edit-completions.json")
209           machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json")
210           machine.copy_from_vm("completions.json")
211           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json")
212           machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${
213             toString model-configs.${model}.parameters.max_tokens
214           }' completions.json")
216           machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
217           machine.copy_from_vm("metrics.json")
218         '';
219     };
223   lib.optionalAttrs
224     (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas)
225     {
226       # https://localai.io/features/text-to-audio/#piper
227       tts =
228         let
229           model-stt = "whisper-en";
230           model-configs.${model-stt} = {
231             backend = "whisper";
232             parameters.model = fetchurl {
233               url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin";
234               hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs=";
235             };
236           };
238           model-tts = "piper-en";
239           model-configs.${model-tts} = {
240             backend = "piper";
241             parameters.model = "en-us-danny-low.onnx";
242           };
244           models =
245             let
246               models = genModels model-configs;
247             in
248             symlinkJoin {
249               inherit (models) name;
250               paths = [
251                 models
252                 (fetchzip {
253                   url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
254                   hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
255                   stripRoot = false;
256                 })
257               ];
258             };
260           requests.request = {
261             model = model-tts;
262             input = "Hello, how are you?";
263           };
264         in
265         testers.runNixOSTest {
266           name = self.name + "-tts";
267           nodes.machine = {
268             imports = [ common-config ];
269             virtualisation.cores = 2;
270             services.local-ai.models = models;
271           };
272           passthru = {
273             inherit models requests;
274           };
275           testScript =
276             let
277               port = "8080";
278             in
279             ''
280               machine.wait_for_open_port(${port})
281               machine.succeed("curl -f http://localhost:${port}/readyz")
282               machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
283               machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json")
285               machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav")
286               machine.copy_from_vm("out.wav")
288               machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json")
289               machine.copy_from_vm("transcription.json")
290               machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json")
292               machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
293               machine.copy_from_vm("metrics.json")
294             '';
295         };
296     }