8 inherit (lib) literalExpression types;
10 cfg = config.services.ollama;
11 ollamaPackage = cfg.package.override { inherit (cfg) acceleration; };
13 staticUser = cfg.user != null && cfg.group != null;
17 (lib.mkRemovedOptionModule [
21 ] "Use `services.ollama.host` and `services.ollama.port` instead.")
22 (lib.mkRemovedOptionModule [
26 ] "Set `services.ollama.user` and `services.ollama.group` instead.")
27 (lib.mkRemovedOptionModule
33 "The `models` directory is now always writable. To make other directories writable, use `systemd.services.ollama.serviceConfig.ReadWritePaths`."
39 enable = lib.mkEnableOption "ollama server for local large language models";
40 package = lib.mkPackageOption pkgs "ollama" { };
43 type = with types; nullOr str;
47 User account under which to run ollama. Defaults to [`DynamicUser`](https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html#DynamicUser=)
50 The user will automatically be created, if this option is set to a non-null value.
53 group = lib.mkOption {
54 type = with types; nullOr str;
56 defaultText = literalExpression "config.services.ollama.user";
59 Group under which to run ollama. Only used when `services.ollama.user` is set.
61 The group will automatically be created, if this option is set to a non-null value.
67 default = "/var/lib/ollama";
68 example = "/home/foo";
70 The home directory that the ollama service is started in.
73 models = lib.mkOption {
75 default = "${cfg.home}/models";
76 defaultText = "\${config.services.ollama.home}/models";
77 example = "/path/to/ollama/models";
79 The directory that the ollama service will read models from and download new models to.
85 default = "127.0.0.1";
88 The host address which the ollama server HTTP interface listens to.
96 Which port the ollama server listens to.
100 acceleration = lib.mkOption {
101 type = types.nullOr (
111 What interface to use for hardware acceleration.
113 - `null`: default behavior
114 - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
115 - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
116 - otherwise defaults to `false`
117 - `false`: disable GPU, only use CPU
118 - `"rocm"`: supported by most modern AMD GPUs
119 - may require overriding gpu type with `services.ollama.rocmOverrideGfx`
120 if rocm doesn't detect your AMD gpu
121 - `"cuda"`: supported by most modern NVIDIA GPUs
124 rocmOverrideGfx = lib.mkOption {
125 type = types.nullOr types.str;
129 Override what rocm will detect your gpu model as.
130 For example, if you have an RX 5700 XT, try setting this to `"10.1.0"` (gfx 1010).
132 This sets the value of `HSA_OVERRIDE_GFX_VERSION`. See [ollama's docs](
133 https://github.com/ollama/ollama/blob/main/docs/gpu.md#amd-radeon
138 environmentVariables = lib.mkOption {
139 type = types.attrsOf types.str;
142 OLLAMA_LLM_LIBRARY = "cpu";
143 HIP_VISIBLE_DEVICES = "0,1";
146 Set arbitrary environment variables for the ollama service.
148 Be aware that these are only seen by the ollama server (systemd service),
149 not normal invocations like `ollama run`.
150 Since `ollama run` is mostly a shell around the ollama server, this is usually sufficient.
153 loadModels = lib.mkOption {
154 type = types.listOf types.str;
157 Download these models using `ollama pull` as soon as `ollama.service` has started.
159 This creates a systemd unit `ollama-model-loader.service`.
161 Search for models of your choice from: https://ollama.com/library
164 openFirewall = lib.mkOption {
168 Whether to open the firewall for ollama.
170 This adds `services.ollama.port` to `networking.firewall.allowedTCPPorts`.
176 config = lib.mkIf cfg.enable {
177 users = lib.mkIf staticUser {
178 users.${cfg.user} = {
183 groups.${cfg.group} = { };
186 systemd.services.ollama = {
187 description = "Server for local large language models";
188 wantedBy = [ "multi-user.target" ];
189 after = [ "network.target" ];
191 cfg.environmentVariables
194 OLLAMA_MODELS = cfg.models;
195 OLLAMA_HOST = "${cfg.host}:${toString cfg.port}";
197 // lib.optionalAttrs (cfg.rocmOverrideGfx != null) {
198 HSA_OVERRIDE_GFX_VERSION = cfg.rocmOverrideGfx;
201 lib.optionalAttrs staticUser {
208 ExecStart = "${lib.getExe ollamaPackage} serve";
209 WorkingDirectory = cfg.home;
210 StateDirectory = [ "ollama" ];
216 CapabilityBoundingSet = [ "" ];
219 # https://docs.nvidia.com/dgx/pdf/dgx-os-5-user-guide.pdf
222 "char-nvidia-frontend"
228 DevicePolicy = "closed";
229 LockPersonality = true;
230 MemoryDenyWriteExecute = true;
231 NoNewPrivileges = true;
232 PrivateDevices = false; # hides acceleration devices
235 ProcSubset = "all"; # /proc/meminfo
237 ProtectControlGroups = true;
239 ProtectHostname = true;
240 ProtectKernelLogs = true;
241 ProtectKernelModules = true;
242 ProtectKernelTunables = true;
243 ProtectProc = "invisible";
244 ProtectSystem = "strict";
246 RestrictNamespaces = true;
247 RestrictRealtime = true;
248 RestrictSUIDSGID = true;
249 RestrictAddressFamilies = [
254 SupplementaryGroups = [ "render" ]; # for rocm to access /dev/dri/renderD* devices
255 SystemCallArchitectures = "native";
257 "@system-service @resources"
264 systemd.services.ollama-model-loader = lib.mkIf (cfg.loadModels != [ ]) {
265 description = "Download ollama models in the background";
270 after = [ "ollama.service" ];
271 bindsTo = [ "ollama.service" ];
272 environment = config.systemd.services.ollama.environment;
276 Restart = "on-failure";
277 # bounded exponential backoff
279 RestartMaxDelaySec = "2h";
284 total=${toString (builtins.length cfg.loadModels)}
287 for model in ${lib.escapeShellArgs cfg.loadModels}; do
288 '${lib.getExe ollamaPackage}' pull "$model" &
291 for job in $(jobs -p); do
297 if [ $exit_code != 0 ]; then
298 failed=$((failed + 1))
302 if [ $failed != 0 ]; then
303 echo "error: $failed out of $total attempted model downloads failed" >&2
309 networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
311 environment.systemPackages = [ ollamaPackage ];
314 meta.maintainers = with lib.maintainers; [