python312Packages.millheater: 0.11.8 -> 0.12.0
[NixPkgs.git] / nixos / tests / prometheus / alertmanager.nix
blob6301db6df62e3c1d65bc75cdb362e58e16415ace
1 import ../make-test-python.nix ({ lib, pkgs, ... }:
4   name = "prometheus-alertmanager";
6   nodes = {
7     prometheus = { config, pkgs, ... }: {
8       environment.systemPackages = [ pkgs.jq ];
10       networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
12       services.prometheus = {
13         enable = true;
14         globalConfig.scrape_interval = "2s";
16         alertmanagers = [
17           {
18             scheme = "http";
19             static_configs = [
20               {
21                 targets = [
22                   "alertmanager:${toString config.services.prometheus.alertmanager.port}"
23                 ];
24               }
25             ];
26           }
27         ];
29         rules = [
30           ''
31             groups:
32               - name: test
33                 rules:
34                   - alert: InstanceDown
35                     expr: up == 0
36                     for: 5s
37                     labels:
38                       severity: page
39                     annotations:
40                       summary: "Instance {{ $labels.instance }} down"
41           ''
42         ];
44         scrapeConfigs = [
45           {
46             job_name = "alertmanager";
47             static_configs = [
48               {
49                 targets = [
50                   "alertmanager:${toString config.services.prometheus.alertmanager.port}"
51                 ];
52               }
53             ];
54           }
55           {
56             job_name = "node";
57             static_configs = [
58               {
59                 targets = [
60                   "node:${toString config.services.prometheus.exporters.node.port}"
61                 ];
62               }
63             ];
64           }
65         ];
66       };
67     };
69     alertmanager = { config, pkgs, ... }: {
70       services.prometheus.alertmanager = {
71         enable = true;
72         openFirewall = true;
74         configuration = {
75           global = {
76             resolve_timeout = "1m";
77           };
79           route = {
80             # Root route node
81             receiver = "test";
82             group_by = ["..."];
83             continue = false;
84             group_wait = "1s";
85             group_interval = "15s";
86             repeat_interval = "24h";
87           };
89           receivers = [
90             {
91               name = "test";
92               webhook_configs = [
93                 {
94                   url = "http://logger:6725";
95                   send_resolved = true;
96                   max_alerts = 0;
97                 }
98               ];
99             }
100           ];
101         };
102       };
103     };
105     logger = { config, pkgs, ... }: {
106       networking.firewall.allowedTCPPorts = [ 6725 ];
108       services.prometheus.alertmanagerWebhookLogger.enable = true;
109     };
110   };
112   testScript = ''
113     alertmanager.wait_for_unit("alertmanager")
114     alertmanager.wait_for_open_port(9093)
115     alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready")
116     #alertmanager.wait_until_succeeds("journalctl -o cat -u alertmanager.service | grep 'version=${pkgs.prometheus-alertmanager.version}'")
118     logger.wait_for_unit("alertmanager-webhook-logger")
119     logger.wait_for_open_port(6725)
121     prometheus.wait_for_unit("prometheus")
122     prometheus.wait_for_open_port(9090)
124     prometheus.wait_until_succeeds(
125       "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | "
126       + "jq '.data.result[0].value[1]' | grep '\"1\"'"
127     )
129     prometheus.wait_until_succeeds(
130       "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | "
131       + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'"
132     )
134     prometheus.wait_until_succeeds(
135       "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | "
136       + "jq '.data.result[0].value[1]' | grep '\"1\"'"
137     )
139     prometheus.wait_until_succeeds(
140       "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | "
141       + "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
142     )
144     logger.wait_until_succeeds(
145       "journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'"
146     )
148     logger.log(logger.succeed("systemd-analyze security alertmanager-webhook-logger.service | grep -v '✓'"))
150     alertmanager.log(alertmanager.succeed("systemd-analyze security alertmanager.service | grep -v '✓'"))
151   '';