1 /* SPDX-License-Identifier: GPL-2.0 */
4 #include <linux/limits.h>
11 #include <sys/types.h>
13 #include <sys/socket.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
20 #include "../kselftest.h"
21 #include "cgroup_util.h"
24 * This test creates two nested cgroups with and without enabling
25 * the memory controller.
27 static int test_memcg_subtree_control(const char *root
)
29 char *parent
, *child
, *parent2
= NULL
, *child2
= NULL
;
33 /* Create two nested cgroups with the memory controller enabled */
34 parent
= cg_name(root
, "memcg_test_0");
35 child
= cg_name(root
, "memcg_test_0/memcg_test_1");
36 if (!parent
|| !child
)
39 if (cg_create(parent
))
42 if (cg_write(parent
, "cgroup.subtree_control", "+memory"))
48 if (cg_read_strstr(child
, "cgroup.controllers", "memory"))
51 /* Create two nested cgroups without enabling memory controller */
52 parent2
= cg_name(root
, "memcg_test_1");
53 child2
= cg_name(root
, "memcg_test_1/memcg_test_1");
54 if (!parent2
|| !child2
)
57 if (cg_create(parent2
))
60 if (cg_create(child2
))
63 if (cg_read(child2
, "cgroup.controllers", buf
, sizeof(buf
)))
66 if (!cg_read_strstr(child2
, "cgroup.controllers", "memory"))
89 static int alloc_anon_50M_check(const char *cgroup
, void *arg
)
97 for (ptr
= buf
; ptr
< buf
+ size
; ptr
+= PAGE_SIZE
)
100 current
= cg_read_long(cgroup
, "memory.current");
104 if (!values_close(size
, current
, 3))
107 anon
= cg_read_key_long(cgroup
, "memory.stat", "anon ");
111 if (!values_close(anon
, current
, 3))
120 static int alloc_pagecache_50M_check(const char *cgroup
, void *arg
)
122 size_t size
= MB(50);
131 if (alloc_pagecache(fd
, size
))
134 current
= cg_read_long(cgroup
, "memory.current");
138 file
= cg_read_key_long(cgroup
, "memory.stat", "file ");
142 if (!values_close(file
, current
, 10))
153 * This test create a memory cgroup, allocates
154 * some anonymous memory and some pagecache
155 * and check memory.current and some memory.stat values.
157 static int test_memcg_current(const char *root
)
163 memcg
= cg_name(root
, "memcg_test");
167 if (cg_create(memcg
))
170 current
= cg_read_long(memcg
, "memory.current");
174 if (cg_run(memcg
, alloc_anon_50M_check
, NULL
))
177 if (cg_run(memcg
, alloc_pagecache_50M_check
, NULL
))
189 static int alloc_pagecache_50M(const char *cgroup
, void *arg
)
193 return alloc_pagecache(fd
, MB(50));
196 static int alloc_pagecache_50M_noexit(const char *cgroup
, void *arg
)
199 int ppid
= getppid();
201 if (alloc_pagecache(fd
, MB(50)))
204 while (getppid() == ppid
)
210 static int alloc_anon_noexit(const char *cgroup
, void *arg
)
212 int ppid
= getppid();
214 if (alloc_anon(cgroup
, arg
))
217 while (getppid() == ppid
)
224 * Wait until processes are killed asynchronously by the OOM killer
225 * If we exceed a timeout, fail.
227 static int cg_test_proc_killed(const char *cgroup
)
231 for (limit
= 10; limit
> 0; limit
--) {
232 if (cg_read_strcmp(cgroup
, "cgroup.procs", "") == 0)
241 * First, this test creates the following hierarchy:
242 * A memory.min = 50M, memory.max = 200M
243 * A/B memory.min = 50M, memory.current = 50M
244 * A/B/C memory.min = 75M, memory.current = 50M
245 * A/B/D memory.min = 25M, memory.current = 50M
246 * A/B/E memory.min = 500M, memory.current = 0
247 * A/B/F memory.min = 0, memory.current = 50M
249 * Usages are pagecache, but the test keeps a running
250 * process in every leaf cgroup.
251 * Then it creates A/G and creates a significant
252 * memory pressure in it.
254 * A/B memory.current ~= 50M
255 * A/B/C memory.current ~= 33M
256 * A/B/D memory.current ~= 17M
257 * A/B/E memory.current ~= 0
259 * After that it tries to allocate more than there is
260 * unprotected memory in A available, and checks
261 * checks that memory.min protects pagecache even
264 static int test_memcg_min(const char *root
)
267 char *parent
[3] = {NULL
};
268 char *children
[4] = {NULL
};
277 parent
[0] = cg_name(root
, "memcg_test_0");
281 parent
[1] = cg_name(parent
[0], "memcg_test_1");
285 parent
[2] = cg_name(parent
[0], "memcg_test_2");
289 if (cg_create(parent
[0]))
292 if (cg_read_long(parent
[0], "memory.min")) {
297 if (cg_write(parent
[0], "cgroup.subtree_control", "+memory"))
300 if (cg_write(parent
[0], "memory.max", "200M"))
303 if (cg_write(parent
[0], "memory.swap.max", "0"))
306 if (cg_create(parent
[1]))
309 if (cg_write(parent
[1], "cgroup.subtree_control", "+memory"))
312 if (cg_create(parent
[2]))
315 for (i
= 0; i
< ARRAY_SIZE(children
); i
++) {
316 children
[i
] = cg_name_indexed(parent
[1], "child_memcg", i
);
320 if (cg_create(children
[i
]))
326 cg_run_nowait(children
[i
], alloc_pagecache_50M_noexit
,
330 if (cg_write(parent
[0], "memory.min", "50M"))
332 if (cg_write(parent
[1], "memory.min", "50M"))
334 if (cg_write(children
[0], "memory.min", "75M"))
336 if (cg_write(children
[1], "memory.min", "25M"))
338 if (cg_write(children
[2], "memory.min", "500M"))
340 if (cg_write(children
[3], "memory.min", "0"))
344 while (!values_close(cg_read_long(parent
[1], "memory.current"),
351 if (cg_run(parent
[2], alloc_anon
, (void *)MB(148)))
354 if (!values_close(cg_read_long(parent
[1], "memory.current"), MB(50), 3))
357 for (i
= 0; i
< ARRAY_SIZE(children
); i
++)
358 c
[i
] = cg_read_long(children
[i
], "memory.current");
360 if (!values_close(c
[0], MB(33), 10))
363 if (!values_close(c
[1], MB(17), 10))
366 if (!values_close(c
[2], 0, 1))
369 if (!cg_run(parent
[2], alloc_anon
, (void *)MB(170)))
372 if (!values_close(cg_read_long(parent
[1], "memory.current"), MB(50), 3))
378 for (i
= ARRAY_SIZE(children
) - 1; i
>= 0; i
--) {
382 cg_destroy(children
[i
]);
386 for (i
= ARRAY_SIZE(parent
) - 1; i
>= 0; i
--) {
390 cg_destroy(parent
[i
]);
398 * First, this test creates the following hierarchy:
399 * A memory.low = 50M, memory.max = 200M
400 * A/B memory.low = 50M, memory.current = 50M
401 * A/B/C memory.low = 75M, memory.current = 50M
402 * A/B/D memory.low = 25M, memory.current = 50M
403 * A/B/E memory.low = 500M, memory.current = 0
404 * A/B/F memory.low = 0, memory.current = 50M
406 * Usages are pagecache.
407 * Then it creates A/G an creates a significant
408 * memory pressure in it.
410 * Then it checks actual memory usages and expects that:
411 * A/B memory.current ~= 50M
412 * A/B/ memory.current ~= 33M
413 * A/B/D memory.current ~= 17M
414 * A/B/E memory.current ~= 0
416 * After that it tries to allocate more than there is
417 * unprotected memory in A available,
418 * and checks low and oom events in memory.events.
420 static int test_memcg_low(const char *root
)
423 char *parent
[3] = {NULL
};
424 char *children
[4] = {NULL
};
434 parent
[0] = cg_name(root
, "memcg_test_0");
438 parent
[1] = cg_name(parent
[0], "memcg_test_1");
442 parent
[2] = cg_name(parent
[0], "memcg_test_2");
446 if (cg_create(parent
[0]))
449 if (cg_read_long(parent
[0], "memory.low"))
452 if (cg_write(parent
[0], "cgroup.subtree_control", "+memory"))
455 if (cg_write(parent
[0], "memory.max", "200M"))
458 if (cg_write(parent
[0], "memory.swap.max", "0"))
461 if (cg_create(parent
[1]))
464 if (cg_write(parent
[1], "cgroup.subtree_control", "+memory"))
467 if (cg_create(parent
[2]))
470 for (i
= 0; i
< ARRAY_SIZE(children
); i
++) {
471 children
[i
] = cg_name_indexed(parent
[1], "child_memcg", i
);
475 if (cg_create(children
[i
]))
481 if (cg_run(children
[i
], alloc_pagecache_50M
, (void *)(long)fd
))
485 if (cg_write(parent
[0], "memory.low", "50M"))
487 if (cg_write(parent
[1], "memory.low", "50M"))
489 if (cg_write(children
[0], "memory.low", "75M"))
491 if (cg_write(children
[1], "memory.low", "25M"))
493 if (cg_write(children
[2], "memory.low", "500M"))
495 if (cg_write(children
[3], "memory.low", "0"))
498 if (cg_run(parent
[2], alloc_anon
, (void *)MB(148)))
501 if (!values_close(cg_read_long(parent
[1], "memory.current"), MB(50), 3))
504 for (i
= 0; i
< ARRAY_SIZE(children
); i
++)
505 c
[i
] = cg_read_long(children
[i
], "memory.current");
507 if (!values_close(c
[0], MB(33), 10))
510 if (!values_close(c
[1], MB(17), 10))
513 if (!values_close(c
[2], 0, 1))
516 if (cg_run(parent
[2], alloc_anon
, (void *)MB(166))) {
518 "memory.low prevents from allocating anon memory\n");
522 for (i
= 0; i
< ARRAY_SIZE(children
); i
++) {
523 oom
= cg_read_key_long(children
[i
], "memory.events", "oom ");
524 low
= cg_read_key_long(children
[i
], "memory.events", "low ");
528 if (i
< 2 && low
<= 0)
537 for (i
= ARRAY_SIZE(children
) - 1; i
>= 0; i
--) {
541 cg_destroy(children
[i
]);
545 for (i
= ARRAY_SIZE(parent
) - 1; i
>= 0; i
--) {
549 cg_destroy(parent
[i
]);
556 static int alloc_pagecache_max_30M(const char *cgroup
, void *arg
)
558 size_t size
= MB(50);
567 if (alloc_pagecache(fd
, size
))
570 current
= cg_read_long(cgroup
, "memory.current");
571 if (current
<= MB(29) || current
> MB(30))
583 * This test checks that memory.high limits the amount of
584 * memory which can be consumed by either anonymous memory
587 static int test_memcg_high(const char *root
)
593 memcg
= cg_name(root
, "memcg_test");
597 if (cg_create(memcg
))
600 if (cg_read_strcmp(memcg
, "memory.high", "max\n"))
603 if (cg_write(memcg
, "memory.swap.max", "0"))
606 if (cg_write(memcg
, "memory.high", "30M"))
609 if (cg_run(memcg
, alloc_anon
, (void *)MB(100)))
612 if (!cg_run(memcg
, alloc_pagecache_50M_check
, NULL
))
615 if (cg_run(memcg
, alloc_pagecache_max_30M
, NULL
))
618 high
= cg_read_key_long(memcg
, "memory.events", "high ");
632 * This test checks that memory.max limits the amount of
633 * memory which can be consumed by either anonymous memory
636 static int test_memcg_max(const char *root
)
642 memcg
= cg_name(root
, "memcg_test");
646 if (cg_create(memcg
))
649 if (cg_read_strcmp(memcg
, "memory.max", "max\n"))
652 if (cg_write(memcg
, "memory.swap.max", "0"))
655 if (cg_write(memcg
, "memory.max", "30M"))
658 /* Should be killed by OOM killer */
659 if (!cg_run(memcg
, alloc_anon
, (void *)MB(100)))
662 if (cg_run(memcg
, alloc_pagecache_max_30M
, NULL
))
665 current
= cg_read_long(memcg
, "memory.current");
666 if (current
> MB(30) || !current
)
669 max
= cg_read_key_long(memcg
, "memory.events", "max ");
682 static int alloc_anon_50M_check_swap(const char *cgroup
, void *arg
)
684 long mem_max
= (long)arg
;
685 size_t size
= MB(50);
687 long mem_current
, swap_current
;
691 for (ptr
= buf
; ptr
< buf
+ size
; ptr
+= PAGE_SIZE
)
694 mem_current
= cg_read_long(cgroup
, "memory.current");
695 if (!mem_current
|| !values_close(mem_current
, mem_max
, 3))
698 swap_current
= cg_read_long(cgroup
, "memory.swap.current");
700 !values_close(mem_current
+ swap_current
, size
, 3))
710 * This test checks that memory.swap.max limits the amount of
711 * anonymous memory which can be swapped out.
713 static int test_memcg_swap_max(const char *root
)
719 if (!is_swap_enabled())
722 memcg
= cg_name(root
, "memcg_test");
726 if (cg_create(memcg
))
729 if (cg_read_long(memcg
, "memory.swap.current")) {
734 if (cg_read_strcmp(memcg
, "memory.max", "max\n"))
737 if (cg_read_strcmp(memcg
, "memory.swap.max", "max\n"))
740 if (cg_write(memcg
, "memory.swap.max", "30M"))
743 if (cg_write(memcg
, "memory.max", "30M"))
746 /* Should be killed by OOM killer */
747 if (!cg_run(memcg
, alloc_anon
, (void *)MB(100)))
750 if (cg_read_key_long(memcg
, "memory.events", "oom ") != 1)
753 if (cg_read_key_long(memcg
, "memory.events", "oom_kill ") != 1)
756 if (cg_run(memcg
, alloc_anon_50M_check_swap
, (void *)MB(30)))
759 max
= cg_read_key_long(memcg
, "memory.events", "max ");
773 * This test disables swapping and tries to allocate anonymous memory
774 * up to OOM. Then it checks for oom and oom_kill events in
777 static int test_memcg_oom_events(const char *root
)
782 memcg
= cg_name(root
, "memcg_test");
786 if (cg_create(memcg
))
789 if (cg_write(memcg
, "memory.max", "30M"))
792 if (cg_write(memcg
, "memory.swap.max", "0"))
795 if (!cg_run(memcg
, alloc_anon
, (void *)MB(100)))
798 if (cg_read_strcmp(memcg
, "cgroup.procs", ""))
801 if (cg_read_key_long(memcg
, "memory.events", "oom ") != 1)
804 if (cg_read_key_long(memcg
, "memory.events", "oom_kill ") != 1)
816 struct tcp_server_args
{
821 static int tcp_server(const char *cgroup
, void *arg
)
823 struct tcp_server_args
*srv_args
= arg
;
824 struct sockaddr_in6 saddr
= { 0 };
825 socklen_t slen
= sizeof(saddr
);
826 int sk
, client_sk
, ctl_fd
, yes
= 1, ret
= -1;
828 close(srv_args
->ctl
[0]);
829 ctl_fd
= srv_args
->ctl
[1];
831 saddr
.sin6_family
= AF_INET6
;
832 saddr
.sin6_addr
= in6addr_any
;
833 saddr
.sin6_port
= htons(srv_args
->port
);
835 sk
= socket(AF_INET6
, SOCK_STREAM
, 0);
839 if (setsockopt(sk
, SOL_SOCKET
, SO_REUSEADDR
, &yes
, sizeof(yes
)) < 0)
842 if (bind(sk
, (struct sockaddr
*)&saddr
, slen
)) {
843 write(ctl_fd
, &errno
, sizeof(errno
));
851 if (write(ctl_fd
, &ret
, sizeof(ret
)) != sizeof(ret
)) {
856 client_sk
= accept(sk
, NULL
, NULL
);
862 uint8_t buf
[0x100000];
864 if (write(client_sk
, buf
, sizeof(buf
)) <= 0) {
865 if (errno
== ECONNRESET
)
878 static int tcp_client(const char *cgroup
, unsigned short port
)
880 const char server
[] = "localhost";
883 int retries
= 0x10; /* nice round number */
886 snprintf(servport
, sizeof(servport
), "%hd", port
);
887 ret
= getaddrinfo(server
, servport
, NULL
, &ai
);
891 sk
= socket(ai
->ai_family
, ai
->ai_socktype
, ai
->ai_protocol
);
895 ret
= connect(sk
, ai
->ai_addr
, ai
->ai_addrlen
);
901 uint8_t buf
[0x100000];
904 if (read(sk
, buf
, sizeof(buf
)) <= 0)
907 current
= cg_read_long(cgroup
, "memory.current");
908 sock
= cg_read_key_long(cgroup
, "memory.stat", "sock ");
910 if (current
< 0 || sock
< 0)
916 if (values_close(current
, sock
, 10)) {
930 * This test checks socket memory accounting.
931 * The test forks a TCP server listens on a random port between 1000
932 * and 61000. Once it gets a client connection, it starts writing to
934 * The TCP client interleaves reads from the socket with check whether
935 * memory.current and memory.stat.sock are similar.
937 static int test_memcg_sock(const char *root
)
939 int bind_retries
= 5, ret
= KSFT_FAIL
, pid
, err
;
943 memcg
= cg_name(root
, "memcg_test");
947 if (cg_create(memcg
))
950 while (bind_retries
--) {
951 struct tcp_server_args args
;
956 port
= args
.port
= 1000 + rand() % 60000;
958 pid
= cg_run_nowait(memcg
, tcp_server
, &args
);
963 if (read(args
.ctl
[0], &err
, sizeof(err
)) != sizeof(err
))
969 if (err
!= EADDRINUSE
)
972 waitpid(pid
, NULL
, 0);
975 if (err
== EADDRINUSE
) {
980 if (tcp_client(memcg
, port
) != KSFT_PASS
)
983 waitpid(pid
, &err
, 0);
984 if (WEXITSTATUS(err
))
987 if (cg_read_long(memcg
, "memory.current") < 0)
990 if (cg_read_key_long(memcg
, "memory.stat", "sock "))
1003 * This test disables swapping and tries to allocate anonymous memory
1004 * up to OOM with memory.group.oom set. Then it checks that all
1005 * processes in the leaf (but not the parent) were killed.
1007 static int test_memcg_oom_group_leaf_events(const char *root
)
1009 int ret
= KSFT_FAIL
;
1010 char *parent
, *child
;
1012 parent
= cg_name(root
, "memcg_test_0");
1013 child
= cg_name(root
, "memcg_test_0/memcg_test_1");
1015 if (!parent
|| !child
)
1018 if (cg_create(parent
))
1021 if (cg_create(child
))
1024 if (cg_write(parent
, "cgroup.subtree_control", "+memory"))
1027 if (cg_write(child
, "memory.max", "50M"))
1030 if (cg_write(child
, "memory.swap.max", "0"))
1033 if (cg_write(child
, "memory.oom.group", "1"))
1036 cg_run_nowait(parent
, alloc_anon_noexit
, (void *) MB(60));
1037 cg_run_nowait(child
, alloc_anon_noexit
, (void *) MB(1));
1038 cg_run_nowait(child
, alloc_anon_noexit
, (void *) MB(1));
1039 if (!cg_run(child
, alloc_anon
, (void *)MB(100)))
1042 if (cg_test_proc_killed(child
))
1045 if (cg_read_key_long(child
, "memory.events", "oom_kill ") <= 0)
1048 if (cg_read_key_long(parent
, "memory.events", "oom_kill ") != 0)
1065 * This test disables swapping and tries to allocate anonymous memory
1066 * up to OOM with memory.group.oom set. Then it checks that all
1067 * processes in the parent and leaf were killed.
1069 static int test_memcg_oom_group_parent_events(const char *root
)
1071 int ret
= KSFT_FAIL
;
1072 char *parent
, *child
;
1074 parent
= cg_name(root
, "memcg_test_0");
1075 child
= cg_name(root
, "memcg_test_0/memcg_test_1");
1077 if (!parent
|| !child
)
1080 if (cg_create(parent
))
1083 if (cg_create(child
))
1086 if (cg_write(parent
, "memory.max", "80M"))
1089 if (cg_write(parent
, "memory.swap.max", "0"))
1092 if (cg_write(parent
, "memory.oom.group", "1"))
1095 cg_run_nowait(parent
, alloc_anon_noexit
, (void *) MB(60));
1096 cg_run_nowait(child
, alloc_anon_noexit
, (void *) MB(1));
1097 cg_run_nowait(child
, alloc_anon_noexit
, (void *) MB(1));
1099 if (!cg_run(child
, alloc_anon
, (void *)MB(100)))
1102 if (cg_test_proc_killed(child
))
1104 if (cg_test_proc_killed(parent
))
1121 * This test disables swapping and tries to allocate anonymous memory
1122 * up to OOM with memory.group.oom set. Then it checks that all
1123 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1125 static int test_memcg_oom_group_score_events(const char *root
)
1127 int ret
= KSFT_FAIL
;
1131 memcg
= cg_name(root
, "memcg_test_0");
1136 if (cg_create(memcg
))
1139 if (cg_write(memcg
, "memory.max", "50M"))
1142 if (cg_write(memcg
, "memory.swap.max", "0"))
1145 if (cg_write(memcg
, "memory.oom.group", "1"))
1148 safe_pid
= cg_run_nowait(memcg
, alloc_anon_noexit
, (void *) MB(1));
1149 if (set_oom_adj_score(safe_pid
, OOM_SCORE_ADJ_MIN
))
1152 cg_run_nowait(memcg
, alloc_anon_noexit
, (void *) MB(1));
1153 if (!cg_run(memcg
, alloc_anon
, (void *)MB(100)))
1156 if (cg_read_key_long(memcg
, "memory.events", "oom_kill ") != 3)
1159 if (kill(safe_pid
, SIGKILL
))
1173 #define T(x) { x, #x }
1175 int (*fn
)(const char *root
);
1178 T(test_memcg_subtree_control
),
1179 T(test_memcg_current
),
1184 T(test_memcg_oom_events
),
1185 T(test_memcg_swap_max
),
1187 T(test_memcg_oom_group_leaf_events
),
1188 T(test_memcg_oom_group_parent_events
),
1189 T(test_memcg_oom_group_score_events
),
1193 int main(int argc
, char **argv
)
1195 char root
[PATH_MAX
];
1196 int i
, ret
= EXIT_SUCCESS
;
1198 if (cg_find_unified_root(root
, sizeof(root
)))
1199 ksft_exit_skip("cgroup v2 isn't mounted\n");
1202 * Check that memory controller is available:
1203 * memory is listed in cgroup.controllers
1205 if (cg_read_strstr(root
, "cgroup.controllers", "memory"))
1206 ksft_exit_skip("memory controller isn't available\n");
1208 if (cg_read_strstr(root
, "cgroup.subtree_control", "memory"))
1209 if (cg_write(root
, "cgroup.subtree_control", "+memory"))
1210 ksft_exit_skip("Failed to set memory controller\n");
1212 for (i
= 0; i
< ARRAY_SIZE(tests
); i
++) {
1213 switch (tests
[i
].fn(root
)) {
1215 ksft_test_result_pass("%s\n", tests
[i
].name
);
1218 ksft_test_result_skip("%s\n", tests
[i
].name
);
1222 ksft_test_result_fail("%s\n", tests
[i
].name
);