1 { config, lib, pkgs, ... }:
6 cfg = config.services.thanos;
8 nullOpt = type: description: mkOption {
9 type = types.nullOr type;
11 description = lib.mdDoc description;
14 optionToArgs = opt: v : optional (v != null) ''--${opt}="${toString v}"'';
15 flagToArgs = opt: v : optional v "--${opt}";
16 listToArgs = opt: vs : map (v: ''--${opt}="${v}"'') vs;
17 attrsToArgs = opt: kvs: mapAttrsToList (k: v: ''--${opt}=${k}=\"${v}\"'') kvs;
19 mkParamDef = type: default: description: mkParam type (description + ''
21 Defaults to `${toString default}` in Thanos
25 mkParam = type: description: {
26 toArgs = optionToArgs;
27 option = nullOpt type description;
30 mkFlagParam = description: {
35 description = lib.mdDoc description;
39 mkListParam = opt: description: {
40 toArgs = _opt: listToArgs opt;
42 type = types.listOf types.str;
44 description = lib.mdDoc description;
48 mkAttrsParam = opt: description: {
49 toArgs = _opt: attrsToArgs opt;
51 type = types.attrsOf types.str;
53 description = lib.mdDoc description;
57 mkStateDirParam = opt: default: description: {
58 toArgs = _opt: stateDir: optionToArgs opt "/var/lib/${stateDir}";
62 description = lib.mdDoc description;
66 toYAML = name: attrs: pkgs.runCommand name {
67 preferLocalBuild = true;
68 json = builtins.toFile "${name}.json" (builtins.toJSON attrs);
69 nativeBuildInputs = [ pkgs.remarshal ];
70 } "json2yaml -i $json -o $out";
72 thanos = cmd: "${cfg.package}/bin/thanos ${cmd}" +
73 (let args = cfg.${cmd}.arguments;
74 in optionalString (length args != 0) (" \\\n " +
75 concatStringsSep " \\\n " args));
77 argumentsOf = cmd: concatLists (collect isList
78 (flip mapParamsRecursive params.${cmd} (path: param:
79 let opt = concatStringsSep "." path;
80 v = getAttrFromPath path cfg.${cmd};
81 in param.toArgs opt v)));
83 mkArgumentsOption = cmd: mkOption {
84 type = types.listOf types.str;
85 default = argumentsOf cmd;
86 defaultText = literalMD ''
87 calculated from `config.services.thanos.${cmd}`
89 description = lib.mdDoc ''
90 Arguments to the `thanos ${cmd}` command.
92 Defaults to a list of arguments formed by converting the structured
93 options of {option}`services.thanos.${cmd}` to a list of arguments.
95 Overriding this option will cause none of the structured options to have
96 any effect. So only set this if you know what you're doing!
101 let noParam = attr: !(attr ? toArgs && attr ? option);
102 in mapAttrsRecursiveCond noParam;
104 paramsToOptions = mapParamsRecursive (_path: param: param.option);
110 log.level = mkParamDef (types.enum ["debug" "info" "warn" "error" "fatal"]) "info" ''
114 log.format = mkParam types.str ''
120 tracing.config-file = {
121 toArgs = _opt: path: optionToArgs "tracing.config-file" path;
123 type = with types; nullOr str;
124 default = if cfg.tracing.config == null then null
125 else toString (toYAML "tracing.yaml" cfg.tracing.config);
126 defaultText = literalExpression ''
127 if config.services.thanos.<cmd>.tracing.config == null then null
128 else toString (toYAML "tracing.yaml" config.services.thanos.<cmd>.tracing.config);
130 description = lib.mdDoc ''
131 Path to YAML file that contains tracing configuration.
133 See format details: <https://thanos.io/tracing.md/#configuration>
140 toArgs = _opt: _attrs: [];
141 option = nullOpt types.attrs ''
142 Tracing configuration.
144 When not `null` the attribute set gets converted to
145 a YAML file and stored in the Nix store. The option
146 {option}`tracing.config-file` will default to its path.
148 If {option}`tracing.config-file` is set this option has no effect.
150 See format details: <https://thanos.io/tracing.md/#configuration>
155 common = cfg: params.log // params.tracing cfg // {
157 http-address = mkParamDef types.str "0.0.0.0:10902" ''
158 Listen `host:port` for HTTP endpoints.
161 grpc-address = mkParamDef types.str "0.0.0.0:10901" ''
162 Listen `ip:port` address for gRPC endpoints (StoreAPI).
164 Make sure this address is routable from other components.
167 grpc-server-tls-cert = mkParam types.str ''
168 TLS Certificate for gRPC server, leave blank to disable TLS
171 grpc-server-tls-key = mkParam types.str ''
172 TLS Key for the gRPC server, leave blank to disable TLS
175 grpc-server-tls-client-ca = mkParam types.str ''
176 TLS CA to verify clients against.
178 If no client CA is specified, there is no client verification on server side.
185 objstore.config-file = {
186 toArgs = _opt: path: optionToArgs "objstore.config-file" path;
188 type = with types; nullOr str;
189 default = if cfg.objstore.config == null then null
190 else toString (toYAML "objstore.yaml" cfg.objstore.config);
191 defaultText = literalExpression ''
192 if config.services.thanos.<cmd>.objstore.config == null then null
193 else toString (toYAML "objstore.yaml" config.services.thanos.<cmd>.objstore.config);
195 description = lib.mdDoc ''
196 Path to YAML file that contains object store configuration.
198 See format details: <https://thanos.io/storage.md/#configuration>
205 toArgs = _opt: _attrs: [];
206 option = nullOpt types.attrs ''
207 Object store configuration.
209 When not `null` the attribute set gets converted to
210 a YAML file and stored in the Nix store. The option
211 {option}`objstore.config-file` will default to its path.
213 If {option}`objstore.config-file` is set this option has no effect.
215 See format details: <https://thanos.io/storage.md/#configuration>
220 sidecar = params.common cfg.sidecar // params.objstore cfg.sidecar // {
222 prometheus.url = mkParamDef types.str "http://localhost:9090" ''
223 URL at which to reach Prometheus's API.
225 For better performance use local network.
229 toArgs = optionToArgs;
232 default = "/var/lib/${config.services.prometheus.stateDir}/data";
233 defaultText = literalExpression ''"/var/lib/''${config.services.prometheus.stateDir}/data"'';
234 description = lib.mdDoc ''
235 Data directory of TSDB.
240 reloader.config-file = mkParam types.str ''
241 Config file watched by the reloader.
244 reloader.config-envsubst-file = mkParam types.str ''
245 Output file for environment variable substituted config file.
248 reloader.rule-dirs = mkListParam "reloader.rule-dir" ''
249 Rule directories for the reloader to refresh.
254 store = params.common cfg.store // params.objstore cfg.store // {
256 stateDir = mkStateDirParam "data-dir" "thanos-store" ''
257 Data directory relative to `/var/lib`
258 in which to cache remote blocks.
261 index-cache-size = mkParamDef types.str "250MB" ''
262 Maximum size of items held in the index cache.
265 chunk-pool-size = mkParamDef types.str "2GB" ''
266 Maximum size of concurrently allocatable bytes for chunks.
269 store.grpc.series-sample-limit = mkParamDef types.int 0 ''
270 Maximum amount of samples returned via a single Series call.
274 NOTE: for efficiency we take 120 as the number of samples in chunk (it
275 cannot be bigger than that), so the actual number of samples might be
276 lower, even though the maximum could be hit.
279 store.grpc.series-max-concurrency = mkParamDef types.int 20 ''
280 Maximum number of concurrent Series calls.
283 sync-block-duration = mkParamDef types.str "3m" ''
284 Repeat interval for syncing the blocks between local and remote view.
287 block-sync-concurrency = mkParamDef types.int 20 ''
288 Number of goroutines to use when syncing blocks from object storage.
291 min-time = mkParamDef types.str "0000-01-01T00:00:00Z" ''
292 Start of time range limit to serve.
294 Thanos Store serves only metrics, which happened later than this
295 value. Option can be a constant time in RFC3339 format or time duration
296 relative to current time, such as -1d or 2h45m. Valid duration units are
297 ms, s, m, h, d, w, y.
300 max-time = mkParamDef types.str "9999-12-31T23:59:59Z" ''
301 End of time range limit to serve.
303 Thanos Store serves only blocks, which happened eariler than this
304 value. Option can be a constant time in RFC3339 format or time duration
305 relative to current time, such as -1d or 2h45m. Valid duration units are
306 ms, s, m, h, d, w, y.
310 query = params.common cfg.query // {
312 grpc-client-tls-secure = mkFlagParam ''
313 Use TLS when talking to the gRPC server
316 grpc-client-tls-cert = mkParam types.str ''
317 TLS Certificates to use to identify this client to the server
320 grpc-client-tls-key = mkParam types.str ''
321 TLS Key for the client's certificate
324 grpc-client-tls-ca = mkParam types.str ''
325 TLS CA Certificates to use to verify gRPC servers
328 grpc-client-server-name = mkParam types.str ''
329 Server name to verify the hostname on the returned gRPC certificates.
330 See <https://tools.ietf.org/html/rfc4366#section-3.1>
333 web.route-prefix = mkParam types.str ''
334 Prefix for API and UI endpoints.
336 This allows thanos UI to be served on a sub-path. This option is
337 analogous to {option}`web.route-prefix` of Promethus.
340 web.external-prefix = mkParam types.str ''
341 Static prefix for all HTML links and redirect URLs in the UI query web
344 Actual endpoints are still served on / or the
345 {option}`web.route-prefix`. This allows thanos UI to be served
346 behind a reverse proxy that strips a URL sub-path.
349 web.prefix-header = mkParam types.str ''
350 Name of HTTP request header used for dynamic prefixing of UI links and
353 This option is ignored if the option
354 `web.external-prefix` is set.
356 Security risk: enable this option only if a reverse proxy in front of
357 thanos is resetting the header.
359 The setting `web.prefix-header="X-Forwarded-Prefix"`
360 can be useful, for example, if Thanos UI is served via Traefik reverse
361 proxy with `PathPrefixStrip` option enabled, which
362 sends the stripped prefix value in `X-Forwarded-Prefix`
363 header. This allows thanos UI to be served on a sub-path.
366 query.timeout = mkParamDef types.str "2m" ''
367 Maximum time to process query by query node.
370 query.max-concurrent = mkParamDef types.int 20 ''
371 Maximum number of queries processed concurrently by query node.
374 query.replica-label = mkParam types.str ''
375 Label to treat as a replica indicator along which data is
378 Still you will be able to query without deduplication using
379 `dedup=false` parameter.
382 selector-labels = mkAttrsParam "selector-label" ''
383 Query selector labels that will be exposed in info endpoint.
386 store.addresses = mkListParam "store" ''
387 Addresses of statically configured store API servers.
389 The scheme may be prefixed with `dns+` or
390 `dnssrv+` to detect store API servers through
391 respective DNS lookups.
394 store.sd-files = mkListParam "store.sd-files" ''
395 Path to files that contain addresses of store API servers. The path
396 can be a glob pattern.
399 store.sd-interval = mkParamDef types.str "5m" ''
400 Refresh interval to re-read file SD files. It is used as a resync fallback.
403 store.sd-dns-interval = mkParamDef types.str "30s" ''
404 Interval between DNS resolutions.
407 store.unhealthy-timeout = mkParamDef types.str "5m" ''
408 Timeout before an unhealthy store is cleaned from the store UI page.
411 query.auto-downsampling = mkFlagParam ''
412 Enable automatic adjustment (step / 5) to what source of data should
413 be used in store gateways if no
414 `max_source_resolution` param is specified.
417 query.partial-response = mkFlagParam ''
418 Enable partial response for queries if no
419 `partial_response` param is specified.
422 query.default-evaluation-interval = mkParamDef types.str "1m" ''
423 Set default evaluation interval for sub queries.
426 store.response-timeout = mkParamDef types.str "0ms" ''
427 If a Store doesn't send any data in this specified duration then a
428 Store will be ignored and partial data will be returned if it's
429 enabled. `0` disables timeout.
433 rule = params.common cfg.rule // params.objstore cfg.rule // {
435 labels = mkAttrsParam "label" ''
436 Labels to be applied to all generated metrics.
438 Similar to external labels for Prometheus,
439 used to identify ruler and its blocks as unique source.
442 stateDir = mkStateDirParam "data-dir" "thanos-rule" ''
443 Data directory relative to `/var/lib`.
446 rule-files = mkListParam "rule-file" ''
447 Rule files that should be used by rule manager. Can be in glob format.
450 eval-interval = mkParamDef types.str "30s" ''
451 The default evaluation interval to use.
454 tsdb.block-duration = mkParamDef types.str "2h" ''
455 Block duration for TSDB block.
458 tsdb.retention = mkParamDef types.str "48h" ''
459 Block retention time on local disk.
462 alertmanagers.urls = mkListParam "alertmanagers.url" ''
463 Alertmanager replica URLs to push firing alerts.
465 Ruler claims success if push to at least one alertmanager from
466 discovered succeeds. The scheme may be prefixed with
467 `dns+` or `dnssrv+` to detect
468 Alertmanager IPs through respective DNS lookups. The port defaults to
469 `9093` or the SRV record's value. The URL path is
470 used as a prefix for the regular Alertmanager API path.
473 alertmanagers.send-timeout = mkParamDef types.str "10s" ''
474 Timeout for sending alerts to alertmanager.
477 alert.query-url = mkParam types.str ''
478 The external Thanos Query URL that would be set in all alerts 'Source' field.
481 alert.label-drop = mkListParam "alert.label-drop" ''
482 Labels by name to drop before sending to alertmanager.
484 This allows alert to be deduplicated on replica label.
486 Similar Prometheus alert relabelling
489 web.route-prefix = mkParam types.str ''
490 Prefix for API and UI endpoints.
492 This allows thanos UI to be served on a sub-path.
494 This option is analogous to `--web.route-prefix` of Promethus.
497 web.external-prefix = mkParam types.str ''
498 Static prefix for all HTML links and redirect URLs in the UI query web
501 Actual endpoints are still served on / or the
502 {option}`web.route-prefix`. This allows thanos UI to be served
503 behind a reverse proxy that strips a URL sub-path.
506 web.prefix-header = mkParam types.str ''
507 Name of HTTP request header used for dynamic prefixing of UI links and
510 This option is ignored if the option
511 {option}`web.external-prefix` is set.
513 Security risk: enable this option only if a reverse proxy in front of
514 thanos is resetting the header.
516 The header `X-Forwarded-Prefix` can be useful, for
517 example, if Thanos UI is served via Traefik reverse proxy with
518 `PathPrefixStrip` option enabled, which sends the
519 stripped prefix value in `X-Forwarded-Prefix`
520 header. This allows thanos UI to be served on a sub-path.
523 query.addresses = mkListParam "query" ''
524 Addresses of statically configured query API servers.
526 The scheme may be prefixed with `dns+` or
527 `dnssrv+` to detect query API servers through
528 respective DNS lookups.
531 query.sd-files = mkListParam "query.sd-files" ''
532 Path to file that contain addresses of query peers.
533 The path can be a glob pattern.
536 query.sd-interval = mkParamDef types.str "5m" ''
537 Refresh interval to re-read file SD files. (used as a fallback)
540 query.sd-dns-interval = mkParamDef types.str "30s" ''
541 Interval between DNS resolutions.
545 compact = params.log // params.tracing cfg.compact // params.objstore cfg.compact // {
547 http-address = mkParamDef types.str "0.0.0.0:10902" ''
548 Listen `host:port` for HTTP endpoints.
551 stateDir = mkStateDirParam "data-dir" "thanos-compact" ''
552 Data directory relative to `/var/lib`
553 in which to cache blocks and process compactions.
556 consistency-delay = mkParamDef types.str "30m" ''
557 Minimum age of fresh (non-compacted) blocks before they are being
558 processed. Malformed blocks older than the maximum of consistency-delay
559 and 30m0s will be removed.
562 retention.resolution-raw = mkParamDef types.str "0d" ''
563 How long to retain raw samples in bucket.
565 `0d` - disables this retention
568 retention.resolution-5m = mkParamDef types.str "0d" ''
569 How long to retain samples of resolution 1 (5 minutes) in bucket.
571 `0d` - disables this retention
574 retention.resolution-1h = mkParamDef types.str "0d" ''
575 How long to retain samples of resolution 2 (1 hour) in bucket.
577 `0d` - disables this retention
581 toArgs = _opt: startAt: flagToArgs "wait" (startAt == null);
582 option = nullOpt types.str ''
583 When this option is set to a `systemd.time`
584 specification the Thanos compactor will run at the specified period.
586 When this option is `null` the Thanos compactor service
587 will run continuously. So it will not exit after all compactions have
588 been processed but wait for new work.
592 downsampling.disable = mkFlagParam ''
593 Disables downsampling.
595 This is not recommended as querying long time ranges without
596 non-downsampled data is not efficient and useful e.g it is not possible
597 to render all samples for a human eye anyway
600 block-sync-concurrency = mkParamDef types.int 20 ''
601 Number of goroutines to use when syncing block metadata from object storage.
604 compact.concurrency = mkParamDef types.int 1 ''
605 Number of goroutines to use when compacting groups.
609 downsample = params.log // params.tracing cfg.downsample // params.objstore cfg.downsample // {
611 stateDir = mkStateDirParam "data-dir" "thanos-downsample" ''
612 Data directory relative to `/var/lib`
613 in which to cache blocks and process downsamplings.
618 receive = params.common cfg.receive // params.objstore cfg.receive // {
620 remote-write.address = mkParamDef types.str "0.0.0.0:19291" ''
621 Address to listen on for remote write requests.
624 stateDir = mkStateDirParam "tsdb.path" "thanos-receive" ''
625 Data directory relative to `/var/lib` of TSDB.
628 labels = mkAttrsParam "labels" ''
629 External labels to announce.
631 This flag will be removed in the future when handling multiple tsdb
635 tsdb.retention = mkParamDef types.str "15d" ''
636 How long to retain raw samples on local storage.
638 `0d` - disables this retention
644 assertRelativeStateDir = cmd: {
647 assertion = !hasPrefix "/" cfg.${cmd}.stateDir;
649 "The option services.thanos.${cmd}.stateDir should not be an absolute directory." +
650 " It should be a directory relative to /var/lib.";
657 options.services.thanos = {
660 type = types.package;
661 default = pkgs.thanos;
662 defaultText = literalExpression "pkgs.thanos";
663 description = lib.mdDoc ''
664 The thanos package that should be used.
668 sidecar = paramsToOptions params.sidecar // {
669 enable = mkEnableOption
670 (lib.mdDoc "the Thanos sidecar for Prometheus server");
671 arguments = mkArgumentsOption "sidecar";
674 store = paramsToOptions params.store // {
675 enable = mkEnableOption
676 (lib.mdDoc "the Thanos store node giving access to blocks in a bucket provider.");
677 arguments = mkArgumentsOption "store";
680 query = paramsToOptions params.query // {
681 enable = mkEnableOption
682 (lib.mdDoc ("the Thanos query node exposing PromQL enabled Query API " +
683 "with data retrieved from multiple store nodes"));
684 arguments = mkArgumentsOption "query";
687 rule = paramsToOptions params.rule // {
688 enable = mkEnableOption
689 (lib.mdDoc ("the Thanos ruler service which evaluates Prometheus rules against" +
690 " given Query nodes, exposing Store API and storing old blocks in bucket"));
691 arguments = mkArgumentsOption "rule";
694 compact = paramsToOptions params.compact // {
695 enable = mkEnableOption
696 (lib.mdDoc "the Thanos compactor which continuously compacts blocks in an object store bucket");
697 arguments = mkArgumentsOption "compact";
700 downsample = paramsToOptions params.downsample // {
701 enable = mkEnableOption
702 (lib.mdDoc "the Thanos downsampler which continuously downsamples blocks in an object store bucket");
703 arguments = mkArgumentsOption "downsample";
706 receive = paramsToOptions params.receive // {
707 enable = mkEnableOption
708 (lib.mdDoc ("the Thanos receiver which accept Prometheus remote write API requests " +
709 "and write to local tsdb (EXPERIMENTAL, this may change drastically without notice)"));
710 arguments = mkArgumentsOption "receive";
716 (mkIf cfg.sidecar.enable {
719 assertion = config.services.prometheus.enable;
721 "Please enable services.prometheus when enabling services.thanos.sidecar.";
724 assertion = !(config.services.prometheus.globalConfig.external_labels == null ||
725 config.services.prometheus.globalConfig.external_labels == {});
727 "services.thanos.sidecar requires uniquely identifying external labels " +
728 "to be configured in the Prometheus server. " +
729 "Please set services.prometheus.globalConfig.external_labels.";
732 systemd.services.thanos-sidecar = {
733 wantedBy = [ "multi-user.target" ];
734 after = [ "network.target" "prometheus.service" ];
738 ExecStart = thanos "sidecar";
743 (mkIf cfg.store.enable (mkMerge [
744 (assertRelativeStateDir "store")
746 systemd.services.thanos-store = {
747 wantedBy = [ "multi-user.target" ];
748 after = [ "network.target" ];
751 StateDirectory = cfg.store.stateDir;
753 ExecStart = thanos "store";
759 (mkIf cfg.query.enable {
760 systemd.services.thanos-query = {
761 wantedBy = [ "multi-user.target" ];
762 after = [ "network.target" ];
766 ExecStart = thanos "query";
771 (mkIf cfg.rule.enable (mkMerge [
772 (assertRelativeStateDir "rule")
774 systemd.services.thanos-rule = {
775 wantedBy = [ "multi-user.target" ];
776 after = [ "network.target" ];
779 StateDirectory = cfg.rule.stateDir;
781 ExecStart = thanos "rule";
787 (mkIf cfg.compact.enable (mkMerge [
788 (assertRelativeStateDir "compact")
790 systemd.services.thanos-compact =
791 let wait = cfg.compact.startAt == null; in {
792 wantedBy = [ "multi-user.target" ];
793 after = [ "network.target" ];
795 Type = if wait then "simple" else "oneshot";
796 Restart = if wait then "always" else "no";
798 StateDirectory = cfg.compact.stateDir;
799 ExecStart = thanos "compact";
801 } // optionalAttrs (!wait) { inherit (cfg.compact) startAt; };
805 (mkIf cfg.downsample.enable (mkMerge [
806 (assertRelativeStateDir "downsample")
808 systemd.services.thanos-downsample = {
809 wantedBy = [ "multi-user.target" ];
810 after = [ "network.target" ];
813 StateDirectory = cfg.downsample.stateDir;
815 ExecStart = thanos "downsample";
821 (mkIf cfg.receive.enable (mkMerge [
822 (assertRelativeStateDir "receive")
824 systemd.services.thanos-receive = {
825 wantedBy = [ "multi-user.target" ];
826 after = [ "network.target" ];
829 StateDirectory = cfg.receive.stateDir;
831 ExecStart = thanos "receive";