1 { config, lib, pkgs, ... }:
28 cfg = config.services.thanos;
30 nullOpt = type: description: mkOption {
31 type = types.nullOr type;
33 description = description;
36 optionToArgs = opt: v : optional (v != null) ''--${opt}="${toString v}"'';
37 flagToArgs = opt: v : optional v "--${opt}";
38 listToArgs = opt: vs : map (v: ''--${opt}="${v}"'') vs;
39 attrsToArgs = opt: kvs: mapAttrsToList (k: v: ''--${opt}=${k}=\"${v}\"'') kvs;
41 mkParamDef = type: default: description: mkParam type (description + ''
43 Defaults to `${toString default}` in Thanos
47 mkParam = type: description: {
48 toArgs = optionToArgs;
49 option = nullOpt type description;
52 mkFlagParam = description: {
57 description = description;
61 mkListParam = opt: description: {
62 toArgs = _opt: listToArgs opt;
64 type = types.listOf types.str;
66 description = description;
70 mkAttrsParam = opt: description: {
71 toArgs = _opt: attrsToArgs opt;
73 type = types.attrsOf types.str;
75 description = description;
79 mkStateDirParam = opt: default: description: {
80 toArgs = _opt: stateDir: optionToArgs opt "/var/lib/${stateDir}";
84 description = description;
88 toYAML = name: attrs: pkgs.runCommand name {
89 preferLocalBuild = true;
90 json = builtins.toFile "${name}.json" (builtins.toJSON attrs);
91 nativeBuildInputs = [ pkgs.remarshal ];
92 } "json2yaml -i $json -o $out";
94 thanos = cmd: "${cfg.package}/bin/thanos ${cmd}" +
95 (let args = cfg.${cmd}.arguments;
96 in optionalString (length args != 0) (" \\\n " +
97 concatStringsSep " \\\n " args));
99 argumentsOf = cmd: concatLists (collect isList
100 (flip mapParamsRecursive params.${cmd} (path: param:
101 let opt = concatStringsSep "." path;
102 v = getAttrFromPath path cfg.${cmd};
103 in param.toArgs opt v)));
105 mkArgumentsOption = cmd: mkOption {
106 type = types.listOf types.str;
107 default = argumentsOf cmd;
108 defaultText = literalMD ''
109 calculated from `config.services.thanos.${cmd}`
112 Arguments to the `thanos ${cmd}` command.
114 Defaults to a list of arguments formed by converting the structured
115 options of {option}`services.thanos.${cmd}` to a list of arguments.
117 Overriding this option will cause none of the structured options to have
118 any effect. So only set this if you know what you're doing!
123 let noParam = attr: !(attr ? toArgs && attr ? option);
124 in mapAttrsRecursiveCond noParam;
126 paramsToOptions = mapParamsRecursive (_path: param: param.option);
132 log.level = mkParamDef (types.enum ["debug" "info" "warn" "error" "fatal"]) "info" ''
136 log.format = mkParam types.str ''
142 tracing.config-file = {
143 toArgs = _opt: path: optionToArgs "tracing.config-file" path;
145 type = with types; nullOr str;
146 default = if cfg.tracing.config == null then null
147 else toString (toYAML "tracing.yaml" cfg.tracing.config);
148 defaultText = literalExpression ''
149 if config.services.thanos.<cmd>.tracing.config == null then null
150 else toString (toYAML "tracing.yaml" config.services.thanos.<cmd>.tracing.config);
153 Path to YAML file that contains tracing configuration.
155 See format details: <https://thanos.io/tip/thanos/tracing.md/#configuration>
162 toArgs = _opt: _attrs: [];
163 option = nullOpt types.attrs ''
164 Tracing configuration.
166 When not `null` the attribute set gets converted to
167 a YAML file and stored in the Nix store. The option
168 {option}`tracing.config-file` will default to its path.
170 If {option}`tracing.config-file` is set this option has no effect.
172 See format details: <https://thanos.io/tip/thanos/tracing.md/#configuration>
177 common = cfg: params.log // params.tracing cfg // {
179 http-address = mkParamDef types.str "0.0.0.0:10902" ''
180 Listen `host:port` for HTTP endpoints.
183 grpc-address = mkParamDef types.str "0.0.0.0:10901" ''
184 Listen `ip:port` address for gRPC endpoints (StoreAPI).
186 Make sure this address is routable from other components.
189 grpc-server-tls-cert = mkParam types.str ''
190 TLS Certificate for gRPC server, leave blank to disable TLS
193 grpc-server-tls-key = mkParam types.str ''
194 TLS Key for the gRPC server, leave blank to disable TLS
197 grpc-server-tls-client-ca = mkParam types.str ''
198 TLS CA to verify clients against.
200 If no client CA is specified, there is no client verification on server side.
207 objstore.config-file = {
208 toArgs = _opt: path: optionToArgs "objstore.config-file" path;
210 type = with types; nullOr str;
211 default = if cfg.objstore.config == null then null
212 else toString (toYAML "objstore.yaml" cfg.objstore.config);
213 defaultText = literalExpression ''
214 if config.services.thanos.<cmd>.objstore.config == null then null
215 else toString (toYAML "objstore.yaml" config.services.thanos.<cmd>.objstore.config);
218 Path to YAML file that contains object store configuration.
220 See format details: <https://thanos.io/tip/thanos/storage.md/#configuring-access-to-object-storage>
227 toArgs = _opt: _attrs: [];
228 option = nullOpt types.attrs ''
229 Object store configuration.
231 When not `null` the attribute set gets converted to
232 a YAML file and stored in the Nix store. The option
233 {option}`objstore.config-file` will default to its path.
235 If {option}`objstore.config-file` is set this option has no effect.
237 See format details: <https://thanos.io/tip/thanos/storage.md/#configuring-access-to-object-storage>
242 sidecar = params.common cfg.sidecar // params.objstore cfg.sidecar // {
244 prometheus.url = mkParamDef types.str "http://localhost:9090" ''
245 URL at which to reach Prometheus's API.
247 For better performance use local network.
251 toArgs = optionToArgs;
254 default = "/var/lib/${config.services.prometheus.stateDir}/data";
255 defaultText = literalExpression ''"/var/lib/''${config.services.prometheus.stateDir}/data"'';
257 Data directory of TSDB.
262 reloader.config-file = mkParam types.str ''
263 Config file watched by the reloader.
266 reloader.config-envsubst-file = mkParam types.str ''
267 Output file for environment variable substituted config file.
270 reloader.rule-dirs = mkListParam "reloader.rule-dir" ''
271 Rule directories for the reloader to refresh.
276 store = params.common cfg.store // params.objstore cfg.store // {
278 stateDir = mkStateDirParam "data-dir" "thanos-store" ''
279 Data directory relative to `/var/lib`
280 in which to cache remote blocks.
283 index-cache-size = mkParamDef types.str "250MB" ''
284 Maximum size of items held in the index cache.
287 chunk-pool-size = mkParamDef types.str "2GB" ''
288 Maximum size of concurrently allocatable bytes for chunks.
291 store.limits.request-samples = mkParamDef types.int 0 ''
292 The maximum samples allowed for a single Series request.
293 The Series call fails if this limit is exceeded.
297 NOTE: For efficiency the limit is internally implemented as 'chunks limit'
298 considering each chunk contains a maximum of 120 samples.
301 store.grpc.series-max-concurrency = mkParamDef types.int 20 ''
302 Maximum number of concurrent Series calls.
305 sync-block-duration = mkParamDef types.str "3m" ''
306 Repeat interval for syncing the blocks between local and remote view.
309 block-sync-concurrency = mkParamDef types.int 20 ''
310 Number of goroutines to use when syncing blocks from object storage.
313 min-time = mkParamDef types.str "0000-01-01T00:00:00Z" ''
314 Start of time range limit to serve.
316 Thanos Store serves only metrics, which happened later than this
317 value. Option can be a constant time in RFC3339 format or time duration
318 relative to current time, such as -1d or 2h45m. Valid duration units are
319 ms, s, m, h, d, w, y.
322 max-time = mkParamDef types.str "9999-12-31T23:59:59Z" ''
323 End of time range limit to serve.
325 Thanos Store serves only blocks, which happened earlier than this
326 value. Option can be a constant time in RFC3339 format or time duration
327 relative to current time, such as -1d or 2h45m. Valid duration units are
328 ms, s, m, h, d, w, y.
332 query = params.common cfg.query // {
334 grpc-client-tls-secure = mkFlagParam ''
335 Use TLS when talking to the gRPC server
338 grpc-client-tls-cert = mkParam types.str ''
339 TLS Certificates to use to identify this client to the server
342 grpc-client-tls-key = mkParam types.str ''
343 TLS Key for the client's certificate
346 grpc-client-tls-ca = mkParam types.str ''
347 TLS CA Certificates to use to verify gRPC servers
350 grpc-client-server-name = mkParam types.str ''
351 Server name to verify the hostname on the returned gRPC certificates.
352 See <https://tools.ietf.org/html/rfc4366#section-3.1>
355 grpc-compression = mkParam types.str ''
356 Compression algorithm to use for gRPC requests to other clients.
359 web.route-prefix = mkParam types.str ''
360 Prefix for API and UI endpoints.
362 This allows thanos UI to be served on a sub-path. This option is
363 analogous to {option}`web.route-prefix` of Promethus.
366 web.external-prefix = mkParam types.str ''
367 Static prefix for all HTML links and redirect URLs in the UI query web
370 Actual endpoints are still served on / or the
371 {option}`web.route-prefix`. This allows thanos UI to be served
372 behind a reverse proxy that strips a URL sub-path.
375 web.prefix-header = mkParam types.str ''
376 Name of HTTP request header used for dynamic prefixing of UI links and
379 This option is ignored if the option
380 `web.external-prefix` is set.
382 Security risk: enable this option only if a reverse proxy in front of
383 thanos is resetting the header.
385 The setting `web.prefix-header="X-Forwarded-Prefix"`
386 can be useful, for example, if Thanos UI is served via Traefik reverse
387 proxy with `PathPrefixStrip` option enabled, which
388 sends the stripped prefix value in `X-Forwarded-Prefix`
389 header. This allows thanos UI to be served on a sub-path.
392 query.timeout = mkParamDef types.str "2m" ''
393 Maximum time to process query by query node.
396 query.max-concurrent = mkParamDef types.int 20 ''
397 Maximum number of queries processed concurrently by query node.
400 query.replica-labels = mkListParam "query.replica-label" ''
401 Labels to treat as a replica indicator along which data is
404 Still you will be able to query without deduplication using
405 'dedup=false' parameter. Data includes time series, recording
406 rules, and alerting rules.
409 selector-labels = mkAttrsParam "selector-label" ''
410 Query selector labels that will be exposed in info endpoint.
413 endpoints = mkListParam "endpoint" ''
414 Addresses of statically configured Thanos API servers (repeatable).
416 The scheme may be prefixed with 'dns+' or 'dnssrv+' to detect
417 Thanos API servers through respective DNS lookups.
420 store.sd-files = mkListParam "store.sd-files" ''
421 Path to files that contain addresses of store API servers. The path
422 can be a glob pattern.
425 store.sd-interval = mkParamDef types.str "5m" ''
426 Refresh interval to re-read file SD files. It is used as a resync fallback.
429 store.sd-dns-interval = mkParamDef types.str "30s" ''
430 Interval between DNS resolutions.
433 store.unhealthy-timeout = mkParamDef types.str "5m" ''
434 Timeout before an unhealthy store is cleaned from the store UI page.
437 query.auto-downsampling = mkFlagParam ''
438 Enable automatic adjustment (step / 5) to what source of data should
439 be used in store gateways if no
440 `max_source_resolution` param is specified.
443 query.partial-response = mkFlagParam ''
444 Enable partial response for queries if no
445 `partial_response` param is specified.
448 query.default-evaluation-interval = mkParamDef types.str "1m" ''
449 Set default evaluation interval for sub queries.
452 store.response-timeout = mkParamDef types.str "0ms" ''
453 If a Store doesn't send any data in this specified duration then a
454 Store will be ignored and partial data will be returned if it's
455 enabled. `0` disables timeout.
459 query-frontend = params.common cfg.query-frontend // {
460 query-frontend.downstream-url = mkParamDef types.str "http://localhost:9090" ''
461 URL of downstream Prometheus Query compatible API.
465 rule = params.common cfg.rule // params.objstore cfg.rule // {
467 labels = mkAttrsParam "label" ''
468 Labels to be applied to all generated metrics.
470 Similar to external labels for Prometheus,
471 used to identify ruler and its blocks as unique source.
474 stateDir = mkStateDirParam "data-dir" "thanos-rule" ''
475 Data directory relative to `/var/lib`.
478 rule-files = mkListParam "rule-file" ''
479 Rule files that should be used by rule manager. Can be in glob format.
482 eval-interval = mkParamDef types.str "1m" ''
483 The default evaluation interval to use.
486 tsdb.block-duration = mkParamDef types.str "2h" ''
487 Block duration for TSDB block.
490 tsdb.retention = mkParamDef types.str "48h" ''
491 Block retention time on local disk.
494 alertmanagers.urls = mkListParam "alertmanagers.url" ''
495 Alertmanager replica URLs to push firing alerts.
497 Ruler claims success if push to at least one alertmanager from
498 discovered succeeds. The scheme may be prefixed with
499 `dns+` or `dnssrv+` to detect
500 Alertmanager IPs through respective DNS lookups. The port defaults to
501 `9093` or the SRV record's value. The URL path is
502 used as a prefix for the regular Alertmanager API path.
505 alertmanagers.send-timeout = mkParamDef types.str "10s" ''
506 Timeout for sending alerts to alertmanager.
509 alert.query-url = mkParam types.str ''
510 The external Thanos Query URL that would be set in all alerts 'Source' field.
513 alert.label-drop = mkListParam "alert.label-drop" ''
514 Labels by name to drop before sending to alertmanager.
516 This allows alert to be deduplicated on replica label.
518 Similar Prometheus alert relabelling
521 web.route-prefix = mkParam types.str ''
522 Prefix for API and UI endpoints.
524 This allows thanos UI to be served on a sub-path.
526 This option is analogous to `--web.route-prefix` of Promethus.
529 web.external-prefix = mkParam types.str ''
530 Static prefix for all HTML links and redirect URLs in the UI query web
533 Actual endpoints are still served on / or the
534 {option}`web.route-prefix`. This allows thanos UI to be served
535 behind a reverse proxy that strips a URL sub-path.
538 web.prefix-header = mkParam types.str ''
539 Name of HTTP request header used for dynamic prefixing of UI links and
542 This option is ignored if the option
543 {option}`web.external-prefix` is set.
545 Security risk: enable this option only if a reverse proxy in front of
546 thanos is resetting the header.
548 The header `X-Forwarded-Prefix` can be useful, for
549 example, if Thanos UI is served via Traefik reverse proxy with
550 `PathPrefixStrip` option enabled, which sends the
551 stripped prefix value in `X-Forwarded-Prefix`
552 header. This allows thanos UI to be served on a sub-path.
555 query.addresses = mkListParam "query" ''
556 Addresses of statically configured query API servers.
558 The scheme may be prefixed with `dns+` or
559 `dnssrv+` to detect query API servers through
560 respective DNS lookups.
563 query.sd-files = mkListParam "query.sd-files" ''
564 Path to file that contain addresses of query peers.
565 The path can be a glob pattern.
568 query.sd-interval = mkParamDef types.str "5m" ''
569 Refresh interval to re-read file SD files. (used as a fallback)
572 query.sd-dns-interval = mkParamDef types.str "30s" ''
573 Interval between DNS resolutions.
577 compact = params.log // params.tracing cfg.compact // params.objstore cfg.compact // {
579 http-address = mkParamDef types.str "0.0.0.0:10902" ''
580 Listen `host:port` for HTTP endpoints.
583 stateDir = mkStateDirParam "data-dir" "thanos-compact" ''
584 Data directory relative to `/var/lib`
585 in which to cache blocks and process compactions.
588 consistency-delay = mkParamDef types.str "30m" ''
589 Minimum age of fresh (non-compacted) blocks before they are being
590 processed. Malformed blocks older than the maximum of consistency-delay
591 and 30m0s will be removed.
594 retention.resolution-raw = mkParamDef types.str "0d" ''
595 How long to retain raw samples in bucket.
597 `0d` - disables this retention
600 retention.resolution-5m = mkParamDef types.str "0d" ''
601 How long to retain samples of resolution 1 (5 minutes) in bucket.
603 `0d` - disables this retention
606 retention.resolution-1h = mkParamDef types.str "0d" ''
607 How long to retain samples of resolution 2 (1 hour) in bucket.
609 `0d` - disables this retention
613 toArgs = _opt: startAt: flagToArgs "wait" (startAt == null);
614 option = nullOpt types.str ''
615 When this option is set to a `systemd.time`
616 specification the Thanos compactor will run at the specified period.
618 When this option is `null` the Thanos compactor service
619 will run continuously. So it will not exit after all compactions have
620 been processed but wait for new work.
624 downsampling.disable = mkFlagParam ''
625 Disables downsampling.
627 This is not recommended as querying long time ranges without
628 non-downsampled data is not efficient and useful e.g it is not possible
629 to render all samples for a human eye anyway
632 compact.concurrency = mkParamDef types.int 1 ''
633 Number of goroutines to use when compacting groups.
637 downsample = params.log // params.tracing cfg.downsample // params.objstore cfg.downsample // {
639 stateDir = mkStateDirParam "data-dir" "thanos-downsample" ''
640 Data directory relative to `/var/lib`
641 in which to cache blocks and process downsamplings.
646 receive = params.common cfg.receive // params.objstore cfg.receive // {
648 receive.grpc-compression = mkParam types.str ''
649 Compression algorithm to use for gRPC requests to other receivers.
652 remote-write.address = mkParamDef types.str "0.0.0.0:19291" ''
653 Address to listen on for remote write requests.
656 stateDir = mkStateDirParam "tsdb.path" "thanos-receive" ''
657 Data directory relative to `/var/lib` of TSDB.
660 labels = mkAttrsParam "label" ''
661 External labels to announce.
663 This flag will be removed in the future when handling multiple tsdb
667 tsdb.retention = mkParamDef types.str "15d" ''
668 How long to retain raw samples on local storage.
670 `0d` - disables this retention
676 assertRelativeStateDir = cmd: {
679 assertion = !hasPrefix "/" cfg.${cmd}.stateDir;
681 "The option services.thanos.${cmd}.stateDir should not be an absolute directory." +
682 " It should be a directory relative to /var/lib.";
689 options.services.thanos = {
691 package = mkPackageOption pkgs "thanos" {};
693 sidecar = paramsToOptions params.sidecar // {
694 enable = mkEnableOption "the Thanos sidecar for Prometheus server";
695 arguments = mkArgumentsOption "sidecar";
698 store = paramsToOptions params.store // {
699 enable = mkEnableOption "the Thanos store node giving access to blocks in a bucket provider";
700 arguments = mkArgumentsOption "store";
703 query = paramsToOptions params.query // {
704 enable = mkEnableOption ("the Thanos query node exposing PromQL enabled Query API " +
705 "with data retrieved from multiple store nodes");
706 arguments = mkArgumentsOption "query";
709 query-frontend = paramsToOptions params.query-frontend // {
710 enable = mkEnableOption ("the Thanos query frontend implements a service deployed in front of queriers to
711 improve query parallelization and caching.");
712 arguments = mkArgumentsOption "query-frontend";
715 rule = paramsToOptions params.rule // {
716 enable = mkEnableOption ("the Thanos ruler service which evaluates Prometheus rules against" +
717 " given Query nodes, exposing Store API and storing old blocks in bucket");
718 arguments = mkArgumentsOption "rule";
721 compact = paramsToOptions params.compact // {
722 enable = mkEnableOption "the Thanos compactor which continuously compacts blocks in an object store bucket";
723 arguments = mkArgumentsOption "compact";
726 downsample = paramsToOptions params.downsample // {
727 enable = mkEnableOption "the Thanos downsampler which continuously downsamples blocks in an object store bucket";
728 arguments = mkArgumentsOption "downsample";
731 receive = paramsToOptions params.receive // {
732 enable = mkEnableOption ("the Thanos receiver which accept Prometheus remote write API requests and write to local tsdb");
733 arguments = mkArgumentsOption "receive";
739 (mkIf cfg.sidecar.enable {
742 assertion = config.services.prometheus.enable;
744 "Please enable services.prometheus when enabling services.thanos.sidecar.";
747 assertion = !(config.services.prometheus.globalConfig.external_labels == null ||
748 config.services.prometheus.globalConfig.external_labels == {});
750 "services.thanos.sidecar requires uniquely identifying external labels " +
751 "to be configured in the Prometheus server. " +
752 "Please set services.prometheus.globalConfig.external_labels.";
755 systemd.services.thanos-sidecar = {
756 wantedBy = [ "multi-user.target" ];
757 after = [ "network.target" "prometheus.service" ];
761 ExecStart = thanos "sidecar";
762 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
767 (mkIf cfg.store.enable (mkMerge [
768 (assertRelativeStateDir "store")
770 systemd.services.thanos-store = {
771 wantedBy = [ "multi-user.target" ];
772 after = [ "network.target" ];
775 StateDirectory = cfg.store.stateDir;
777 ExecStart = thanos "store";
778 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
784 (mkIf cfg.query.enable {
785 systemd.services.thanos-query = {
786 wantedBy = [ "multi-user.target" ];
787 after = [ "network.target" ];
791 ExecStart = thanos "query";
792 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
797 (mkIf cfg.query-frontend.enable {
798 systemd.services.thanos-query-frontend = {
799 wantedBy = [ "multi-user.target" ];
800 after = [ "network.target" ];
804 ExecStart = thanos "query-frontend";
805 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
810 (mkIf cfg.rule.enable (mkMerge [
811 (assertRelativeStateDir "rule")
813 systemd.services.thanos-rule = {
814 wantedBy = [ "multi-user.target" ];
815 after = [ "network.target" ];
818 StateDirectory = cfg.rule.stateDir;
820 ExecStart = thanos "rule";
821 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
827 (mkIf cfg.compact.enable (mkMerge [
828 (assertRelativeStateDir "compact")
830 systemd.services.thanos-compact =
831 let wait = cfg.compact.startAt == null; in {
832 wantedBy = [ "multi-user.target" ];
833 after = [ "network.target" ];
835 Type = if wait then "simple" else "oneshot";
836 Restart = if wait then "always" else "no";
838 StateDirectory = cfg.compact.stateDir;
839 ExecStart = thanos "compact";
840 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
842 } // optionalAttrs (!wait) { inherit (cfg.compact) startAt; };
846 (mkIf cfg.downsample.enable (mkMerge [
847 (assertRelativeStateDir "downsample")
849 systemd.services.thanos-downsample = {
850 wantedBy = [ "multi-user.target" ];
851 after = [ "network.target" ];
854 StateDirectory = cfg.downsample.stateDir;
856 ExecStart = thanos "downsample";
857 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
863 (mkIf cfg.receive.enable (mkMerge [
864 (assertRelativeStateDir "receive")
866 systemd.services.thanos-receive = {
867 wantedBy = [ "multi-user.target" ];
868 after = [ "network.target" ];
871 StateDirectory = cfg.receive.stateDir;
873 ExecStart = thanos "receive";
874 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";