nixos/modules/services/misc/ollama.nix

   1 {
   2   config,
   3   lib,
   4   pkgs,
   5   ...
   6 }:
   7 let
   8   inherit (lib) literalExpression types;
   9
  10   cfg = config.services.ollama;
  11   ollamaPackage = cfg.package.override { inherit (cfg) acceleration; };
  12
  13   staticUser = cfg.user != null && cfg.group != null;
  14 in
  15 {
  16   imports = [
  17     (lib.mkRemovedOptionModule [
  18       "services"
  19       "ollama"
  20       "listenAddress"
  21     ] "Use `services.ollama.host` and `services.ollama.port` instead.")
  22     (lib.mkRemovedOptionModule [
  23       "services"
  24       "ollama"
  25       "sandbox"
  26     ] "Set `services.ollama.user` and `services.ollama.group` instead.")
  27     (lib.mkRemovedOptionModule
  28       [
  29         "services"
  30         "ollama"
  31         "writablePaths"
  32       ]
  33       "The `models` directory is now always writable. To make other directories writable, use `systemd.services.ollama.serviceConfig.ReadWritePaths`."
  34     )
  35   ];
  36
  37   options = {
  38     services.ollama = {
  39       enable = lib.mkEnableOption "ollama server for local large language models";
  40       package = lib.mkPackageOption pkgs "ollama" { };
  41
  42       user = lib.mkOption {
  43         type = with types; nullOr str;
  44         default = null;
  45         example = "ollama";
  46         description = ''
  47           User account under which to run ollama. Defaults to [`DynamicUser`](https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html#DynamicUser=)
  48           when set to `null`.
  49
  50           The user will automatically be created, if this option is set to a non-null value.
  51         '';
  52       };
  53       group = lib.mkOption {
  54         type = with types; nullOr str;
  55         default = cfg.user;
  56         defaultText = literalExpression "config.services.ollama.user";
  57         example = "ollama";
  58         description = ''
  59           Group under which to run ollama. Only used when `services.ollama.user` is set.
  60
  61           The group will automatically be created, if this option is set to a non-null value.
  62         '';
  63       };
  64
  65       home = lib.mkOption {
  66         type = types.str;
  67         default = "/var/lib/ollama";
  68         example = "/home/foo";
  69         description = ''
  70           The home directory that the ollama service is started in.
  71         '';
  72       };
  73       models = lib.mkOption {
  74         type = types.str;
  75         default = "${cfg.home}/models";
  76         defaultText = "\${config.services.ollama.home}/models";
  77         example = "/path/to/ollama/models";
  78         description = ''
  79           The directory that the ollama service will read models from and download new models to.
  80         '';
  81       };
  82
  83       host = lib.mkOption {
  84         type = types.str;
  85         default = "127.0.0.1";
  86         example = "[::]";
  87         description = ''
  88           The host address which the ollama server HTTP interface listens to.
  89         '';
  90       };
  91       port = lib.mkOption {
  92         type = types.port;
  93         default = 11434;
  94         example = 11111;
  95         description = ''
  96           Which port the ollama server listens to.
  97         '';
  98       };
  99
 100       acceleration = lib.mkOption {
 101         type = types.nullOr (
 102           types.enum [
 103             false
 104             "rocm"
 105             "cuda"
 106           ]
 107         );
 108         default = null;
 109         example = "rocm";
 110         description = ''
 111           What interface to use for hardware acceleration.
 112
 113           - `null`: default behavior
 114             - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
 115             - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
 116             - otherwise defaults to `false`
 117           - `false`: disable GPU, only use CPU
 118           - `"rocm"`: supported by most modern AMD GPUs
 119             - may require overriding gpu type with `services.ollama.rocmOverrideGfx`
 120               if rocm doesn't detect your AMD gpu
 121           - `"cuda"`: supported by most modern NVIDIA GPUs
 122         '';
 123       };
 124       rocmOverrideGfx = lib.mkOption {
 125         type = types.nullOr types.str;
 126         default = null;
 127         example = "10.3.0";
 128         description = ''
 129           Override what rocm will detect your gpu model as.
 130           For example, if you have an RX 5700 XT, try setting this to `"10.1.0"` (gfx 1010).
 131
 132           This sets the value of `HSA_OVERRIDE_GFX_VERSION`. See [ollama's docs](
 133           https://github.com/ollama/ollama/blob/main/docs/gpu.md#amd-radeon
 134           ) for details.
 135         '';
 136       };
 137
 138       environmentVariables = lib.mkOption {
 139         type = types.attrsOf types.str;
 140         default = { };
 141         example = {
 142           OLLAMA_LLM_LIBRARY = "cpu";
 143           HIP_VISIBLE_DEVICES = "0,1";
 144         };
 145         description = ''
 146           Set arbitrary environment variables for the ollama service.
 147
 148           Be aware that these are only seen by the ollama server (systemd service),
 149           not normal invocations like `ollama run`.
 150           Since `ollama run` is mostly a shell around the ollama server, this is usually sufficient.
 151         '';
 152       };
 153       loadModels = lib.mkOption {
 154         type = types.listOf types.str;
 155         default = [ ];
 156         description = ''
 157           Download these models using `ollama pull` as soon as `ollama.service` has started.
 158
 159           This creates a systemd unit `ollama-model-loader.service`.
 160
 161           Search for models of your choice from: https://ollama.com/library
 162         '';
 163       };
 164       openFirewall = lib.mkOption {
 165         type = types.bool;
 166         default = false;
 167         description = ''
 168           Whether to open the firewall for ollama.
 169
 170           This adds `services.ollama.port` to `networking.firewall.allowedTCPPorts`.
 171         '';
 172       };
 173     };
 174   };
 175
 176   config = lib.mkIf cfg.enable {
 177     users = lib.mkIf staticUser {
 178       users.${cfg.user} = {
 179         inherit (cfg) home;
 180         isSystemUser = true;
 181         group = cfg.group;
 182       };
 183       groups.${cfg.group} = { };
 184     };
 185
 186     systemd.services.ollama = {
 187       description = "Server for local large language models";
 188       wantedBy = [ "multi-user.target" ];
 189       after = [ "network.target" ];
 190       environment =
 191         cfg.environmentVariables
 192         // {
 193           HOME = cfg.home;
 194           OLLAMA_MODELS = cfg.models;
 195           OLLAMA_HOST = "${cfg.host}:${toString cfg.port}";
 196         }
 197         // lib.optionalAttrs (cfg.rocmOverrideGfx != null) {
 198           HSA_OVERRIDE_GFX_VERSION = cfg.rocmOverrideGfx;
 199         };
 200       serviceConfig =
 201         lib.optionalAttrs staticUser {
 202           User = cfg.user;
 203           Group = cfg.group;
 204         }
 205         // {
 206           Type = "exec";
 207           DynamicUser = true;
 208           ExecStart = "${lib.getExe ollamaPackage} serve";
 209           WorkingDirectory = cfg.home;
 210           StateDirectory = [ "ollama" ];
 211           ReadWritePaths = [
 212             cfg.home
 213             cfg.models
 214           ];
 215
 216           CapabilityBoundingSet = [ "" ];
 217           DeviceAllow = [
 218             # CUDA
 219             # https://docs.nvidia.com/dgx/pdf/dgx-os-5-user-guide.pdf
 220             "char-nvidiactl"
 221             "char-nvidia-caps"
 222             "char-nvidia-frontend"
 223             "char-nvidia-uvm"
 224             # ROCm
 225             "char-drm"
 226             "char-kfd"
 227           ];
 228           DevicePolicy = "closed";
 229           LockPersonality = true;
 230           MemoryDenyWriteExecute = true;
 231           NoNewPrivileges = true;
 232           PrivateDevices = false; # hides acceleration devices
 233           PrivateTmp = true;
 234           PrivateUsers = true;
 235           ProcSubset = "all"; # /proc/meminfo
 236           ProtectClock = true;
 237           ProtectControlGroups = true;
 238           ProtectHome = true;
 239           ProtectHostname = true;
 240           ProtectKernelLogs = true;
 241           ProtectKernelModules = true;
 242           ProtectKernelTunables = true;
 243           ProtectProc = "invisible";
 244           ProtectSystem = "strict";
 245           RemoveIPC = true;
 246           RestrictNamespaces = true;
 247           RestrictRealtime = true;
 248           RestrictSUIDSGID = true;
 249           RestrictAddressFamilies = [
 250             "AF_INET"
 251             "AF_INET6"
 252             "AF_UNIX"
 253           ];
 254           SupplementaryGroups = [ "render" ]; # for rocm to access /dev/dri/renderD* devices
 255           SystemCallArchitectures = "native";
 256           SystemCallFilter = [
 257             "@system-service @resources"
 258             "~@privileged"
 259           ];
 260           UMask = "0077";
 261         };
 262     };
 263
 264     systemd.services.ollama-model-loader = lib.mkIf (cfg.loadModels != [ ]) {
 265       description = "Download ollama models in the background";
 266       wantedBy = [
 267         "multi-user.target"
 268         "ollama.service"
 269       ];
 270       after = [ "ollama.service" ];
 271       bindsTo = [ "ollama.service" ];
 272       environment = config.systemd.services.ollama.environment;
 273       serviceConfig = {
 274         Type = "exec";
 275         DynamicUser = true;
 276         Restart = "on-failure";
 277         # bounded exponential backoff
 278         RestartSec = "1s";
 279         RestartMaxDelaySec = "2h";
 280         RestartSteps = "10";
 281       };
 282
 283       script = ''
 284         total=${toString (builtins.length cfg.loadModels)}
 285         failed=0
 286
 287         for model in ${lib.escapeShellArgs cfg.loadModels}; do
 288           '${lib.getExe ollamaPackage}' pull "$model" &
 289         done
 290
 291         for job in $(jobs -p); do
 292           set +e
 293           wait $job
 294           exit_code=$?
 295           set -e
 296
 297           if [ $exit_code != 0 ]; then
 298             failed=$((failed + 1))
 299           fi
 300         done
 301
 302         if [ $failed != 0 ]; then
 303           echo "error: $failed out of $total attempted model downloads failed" >&2
 304           exit 1
 305         fi
 306       '';
 307     };
 308
 309     networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
 310
 311     environment.systemPackages = [ ollamaPackage ];
 312   };
 313
 314   meta.maintainers = with lib.maintainers; [
 315     abysssol
 316     onny
 317   ];
 318 }