9 accessKey = "BKIKJAA5BMMU2RHO6IBB";
10 secretKey = "V7f1CwQqAcwo80UEIJEjc5gVQUSSx5ohQ9GSrr12";
16 bucket = "thanos-bucket";
17 endpoint = "s3:${toString minioPort}";
19 access_key = s3.accessKey;
20 secret_key = s3.secretKey;
22 signature_version2 = false;
23 put_user_metadata = {};
25 idle_conn_timeout = "0s";
26 insecure_skip_verify = false;
34 in import ./make-test-python.nix {
38 prometheus = { pkgs, ... }: {
39 virtualisation.diskSize = 2 * 1024;
40 virtualisation.memorySize = 2048;
41 environment.systemPackages = [ pkgs.jq ];
42 networking.firewall.allowedTCPPorts = [ grpcPort ];
43 services.prometheus = {
48 job_name = "prometheus";
51 targets = [ "127.0.0.1:${toString queryPort}" ];
52 labels = { instance = "localhost"; };
57 job_name = "pushgateway";
58 scrape_interval = "1s";
61 targets = [ "127.0.0.1:${toString pushgwPort}" ];
72 expr: count(up{job="prometheus"})
77 some_label = "required by thanos";
82 "--storage.tsdb.min-block-duration=5s"
83 "--storage.tsdb.max-block-duration=5s"
86 services.prometheus.pushgateway = {
88 web.listen-address = ":${toString pushgwPort}";
89 persistMetrics = true;
90 persistence.interval = "1s";
91 stateDir = "prometheus-pushgateway";
96 grpc-address = "0.0.0.0:${toString grpcPort}";
100 # TODO: Add some tests for these services:
103 # http-address = "0.0.0.0:19194";
104 # grpc-address = "0.0.0.0:19193";
105 # query.addresses = [
115 # http-address = "0.0.0.0:19195";
123 # Adds a "specialisation" of the above config which allows us to
124 # "switch" to it and see if the services.prometheus.enableReload
125 # functionality actually reloads the prometheus service instead of
128 "prometheus-config-change" = {
130 environment.systemPackages = [ pkgs.yq ];
132 # This configuration just adds a new prometheus job
133 # to scrape the node_exporter metrics of the s3 machine.
134 services.prometheus = {
137 job_name = "s3-node_exporter";
140 targets = [ "s3:9100" ];
151 query = { pkgs, ... }: {
152 environment.systemPackages = [ pkgs.jq ];
153 services.thanos.query = {
155 http-address = "0.0.0.0:${toString queryPort}";
157 "prometheus:${toString grpcPort}"
160 services.thanos.query-frontend = {
162 http-address = "0.0.0.0:${toString frontPort}";
163 query-frontend.downstream-url = "http://127.0.0.1:${toString queryPort}";
167 store = { pkgs, ... }: {
168 virtualisation.diskSize = 2 * 1024;
169 virtualisation.memorySize = 2048;
170 environment.systemPackages = with pkgs; [ jq thanos ];
171 services.thanos.store = {
173 http-address = "0.0.0.0:10902";
174 grpc-address = "0.0.0.0:${toString grpcPort}";
176 sync-block-duration = "1s";
178 services.thanos.compact = {
180 http-address = "0.0.0.0:10903";
182 consistency-delay = "5s";
184 services.thanos.query = {
186 http-address = "0.0.0.0:${toString queryPort}";
188 "localhost:${toString grpcPort}"
193 s3 = { pkgs, ... } : {
194 # Minio requires at least 1GiB of free disk space to run.
198 networking.firewall.allowedTCPPorts = [ minioPort ];
202 inherit (s3) accessKey secretKey;
205 environment.systemPackages = [ pkgs.minio-client ];
207 services.prometheus.exporters.node = {
214 testScript = { nodes, ... } : ''
215 # Before starting the other machines we first make sure that our S3 service is online
216 # and has a bucket added for thanos:
218 s3.wait_for_unit("minio.service")
219 s3.wait_for_open_port(${toString minioPort})
221 "mc config host add minio "
222 + "http://localhost:${toString minioPort} "
223 + "${s3.accessKey} ${s3.secretKey} --api s3v4",
224 "mc mb minio/thanos-bucket",
227 # Now that s3 has started we can start the other machines:
228 for machine in prometheus, query, store:
231 # Check if prometheus responds to requests:
232 prometheus.wait_for_unit("prometheus.service")
234 prometheus.wait_for_open_port(${toString queryPort})
235 prometheus.succeed("curl -sf http://127.0.0.1:${toString queryPort}/metrics")
237 # Let's test if pushing a metric to the pushgateway succeeds:
238 prometheus.wait_for_unit("pushgateway.service")
240 "echo 'some_metric 3.14' | "
241 + "curl -f --data-binary \@- "
242 + "http://127.0.0.1:${toString pushgwPort}/metrics/job/some_job"
245 # Now check whether that metric gets ingested by prometheus.
246 # Since we'll check for the metric several times on different machines
247 # we abstract the test using the following function:
249 # Function to check if the metric "some_metric" has been received and returns the correct value.
250 def wait_for_metric(machine):
251 return machine.wait_until_succeeds(
252 "curl -sf 'http://127.0.0.1:${toString queryPort}/api/v1/query?query=some_metric' | "
253 + "jq '.data.result[0].value[1]' | grep '\"3.14\"'"
257 wait_for_metric(prometheus)
259 # Let's test if the pushgateway persists metrics to the configured location.
260 prometheus.wait_until_succeeds("test -e /var/lib/prometheus-pushgateway/metrics")
263 prometheus.wait_for_unit("thanos-sidecar.service")
265 # Test if the Thanos query service can correctly retrieve the metric that was send above.
266 query.wait_for_unit("thanos-query.service")
267 wait_for_metric(query)
269 # Test Thanos query frontend service
270 query.wait_for_unit("thanos-query-frontend.service")
271 query.succeed("curl -sS http://localhost:${toString frontPort}/-/healthy")
273 # Test if the Thanos sidecar has correctly uploaded its TSDB to S3, if the
274 # Thanos storage service has correctly downloaded it from S3 and if the Thanos
275 # query service running on $store can correctly retrieve the metric:
276 store.wait_for_unit("thanos-store.service")
277 wait_for_metric(store)
279 store.wait_for_unit("thanos-compact.service")
281 # Test if the Thanos bucket command is able to retrieve blocks from the S3 bucket
282 # and check if the blocks have the correct labels:
284 "thanos tools bucket ls "
285 + "--objstore.config-file=${nodes.store.config.services.thanos.store.objstore.config-file} "
287 + "jq .thanos.labels.some_label | "
288 + "grep 'required by thanos'"