nixos/modules/services/misc/llama-cpp.nix

   1 { config, lib, pkgs, utils, ... }:
   2
   3 let
   4   cfg = config.services.llama-cpp;
   5 in {
   6
   7   options = {
   8
   9     services.llama-cpp = {
  10       enable = lib.mkEnableOption "LLaMA C++ server";
  11
  12       package = lib.mkPackageOption pkgs "llama-cpp" { };
  13
  14       model = lib.mkOption {
  15         type = lib.types.path;
  16         example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf";
  17         description = "Model path.";
  18       };
  19
  20       extraFlags = lib.mkOption {
  21         type = lib.types.listOf lib.types.str;
  22         description = "Extra flags passed to llama-cpp-server.";
  23         example = ["-c" "4096" "-ngl" "32" "--numa" "numactl"];
  24         default = [];
  25       };
  26
  27       host = lib.mkOption {
  28         type = lib.types.str;
  29         default = "127.0.0.1";
  30         example = "0.0.0.0";
  31         description = "IP address the LLaMA C++ server listens on.";
  32       };
  33
  34       port = lib.mkOption {
  35         type = lib.types.port;
  36         default = 8080;
  37         description = "Listen port for LLaMA C++ server.";
  38       };
  39
  40       openFirewall = lib.mkOption {
  41         type = lib.types.bool;
  42         default = false;
  43         description = "Open ports in the firewall for LLaMA C++ server.";
  44       };
  45     };
  46
  47   };
  48
  49   config = lib.mkIf cfg.enable {
  50
  51     systemd.services.llama-cpp = {
  52       description = "LLaMA C++ server";
  53       after = ["network.target"];
  54       wantedBy = ["multi-user.target"];
  55
  56       serviceConfig = {
  57         Type = "idle";
  58         KillSignal = "SIGINT";
  59         ExecStart = "${cfg.package}/bin/llama-server --log-disable --host ${cfg.host} --port ${builtins.toString cfg.port} -m ${cfg.model} ${utils.escapeSystemdExecArgs cfg.extraFlags}";
  60         Restart = "on-failure";
  61         RestartSec = 300;
  62
  63         # for GPU acceleration
  64         PrivateDevices = false;
  65
  66         # hardening
  67         DynamicUser = true;
  68         CapabilityBoundingSet = "";
  69         RestrictAddressFamilies = [
  70           "AF_INET"
  71           "AF_INET6"
  72           "AF_UNIX"
  73         ];
  74         NoNewPrivileges = true;
  75         PrivateMounts = true;
  76         PrivateTmp = true;
  77         PrivateUsers = true;
  78         ProtectClock = true;
  79         ProtectControlGroups = true;
  80         ProtectHome = true;
  81         ProtectKernelLogs = true;
  82         ProtectKernelModules = true;
  83         ProtectKernelTunables = true;
  84         ProtectSystem = "strict";
  85         MemoryDenyWriteExecute = true;
  86         LockPersonality = true;
  87         RemoveIPC = true;
  88         RestrictNamespaces = true;
  89         RestrictRealtime = true;
  90         RestrictSUIDSGID = true;
  91         SystemCallArchitectures = "native";
  92         SystemCallFilter = [
  93           "@system-service"
  94           "~@privileged"
  95         ];
  96         SystemCallErrorNumber = "EPERM";
  97         ProtectProc = "invisible";
  98         ProtectHostname = true;
  99         ProcSubset = "pid";
 100       };
 101     };
 102
 103     networking.firewall = lib.mkIf cfg.openFirewall {
 104       allowedTCPPorts = [ cfg.port ];
 105     };
 106
 107   };
 108
 109   meta.maintainers = with lib.maintainers; [ newam ];
 110 }