1 { config, lib, pkgs, utils, ... }:
4 cfg = config.services.llama-cpp;
10 enable = lib.mkEnableOption "LLaMA C++ server";
12 package = lib.mkPackageOption pkgs "llama-cpp" { };
14 model = lib.mkOption {
15 type = lib.types.path;
16 example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf";
17 description = "Model path.";
20 extraFlags = lib.mkOption {
21 type = lib.types.listOf lib.types.str;
22 description = "Extra flags passed to llama-cpp-server.";
23 example = ["-c" "4096" "-ngl" "32" "--numa" "numactl"];
29 default = "127.0.0.1";
31 description = "IP address the LLaMA C++ server listens on.";
35 type = lib.types.port;
37 description = "Listen port for LLaMA C++ server.";
40 openFirewall = lib.mkOption {
41 type = lib.types.bool;
43 description = "Open ports in the firewall for LLaMA C++ server.";
49 config = lib.mkIf cfg.enable {
51 systemd.services.llama-cpp = {
52 description = "LLaMA C++ server";
53 after = ["network.target"];
54 wantedBy = ["multi-user.target"];
58 KillSignal = "SIGINT";
59 ExecStart = "${cfg.package}/bin/llama-server --log-disable --host ${cfg.host} --port ${builtins.toString cfg.port} -m ${cfg.model} ${utils.escapeSystemdExecArgs cfg.extraFlags}";
60 Restart = "on-failure";
63 # for GPU acceleration
64 PrivateDevices = false;
68 CapabilityBoundingSet = "";
69 RestrictAddressFamilies = [
74 NoNewPrivileges = true;
79 ProtectControlGroups = true;
81 ProtectKernelLogs = true;
82 ProtectKernelModules = true;
83 ProtectKernelTunables = true;
84 ProtectSystem = "strict";
85 MemoryDenyWriteExecute = true;
86 LockPersonality = true;
88 RestrictNamespaces = true;
89 RestrictRealtime = true;
90 RestrictSUIDSGID = true;
91 SystemCallArchitectures = "native";
96 SystemCallErrorNumber = "EPERM";
97 ProtectProc = "invisible";
98 ProtectHostname = true;
103 networking.firewall = lib.mkIf cfg.openFirewall {
104 allowedTCPPorts = [ cfg.port ];
109 meta.maintainers = with lib.maintainers; [ newam ];