1 /* SPDX-License-Identifier: GPL-2.0 */
4 #include <linux/limits.h>
11 #include <sys/types.h>
13 #include <sys/socket.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
21 #include "../kselftest.h"
22 #include "cgroup_util.h"
24 static bool has_localevents
;
25 static bool has_recursiveprot
;
28 * This test creates two nested cgroups with and without enabling
29 * the memory controller.
31 static int test_memcg_subtree_control(const char *root
)
33 char *parent
, *child
, *parent2
= NULL
, *child2
= NULL
;
37 /* Create two nested cgroups with the memory controller enabled */
38 parent
= cg_name(root
, "memcg_test_0");
39 child
= cg_name(root
, "memcg_test_0/memcg_test_1");
40 if (!parent
|| !child
)
43 if (cg_create(parent
))
46 if (cg_write(parent
, "cgroup.subtree_control", "+memory"))
52 if (cg_read_strstr(child
, "cgroup.controllers", "memory"))
55 /* Create two nested cgroups without enabling memory controller */
56 parent2
= cg_name(root
, "memcg_test_1");
57 child2
= cg_name(root
, "memcg_test_1/memcg_test_1");
58 if (!parent2
|| !child2
)
61 if (cg_create(parent2
))
64 if (cg_create(child2
))
67 if (cg_read(child2
, "cgroup.controllers", buf
, sizeof(buf
)))
70 if (!cg_read_strstr(child2
, "cgroup.controllers", "memory"))
93 static int alloc_anon_50M_check(const char *cgroup
, void *arg
)
102 fprintf(stderr
, "malloc() failed\n");
106 for (ptr
= buf
; ptr
< buf
+ size
; ptr
+= PAGE_SIZE
)
109 current
= cg_read_long(cgroup
, "memory.current");
113 if (!values_close(size
, current
, 3))
116 anon
= cg_read_key_long(cgroup
, "memory.stat", "anon ");
120 if (!values_close(anon
, current
, 3))
129 static int alloc_pagecache_50M_check(const char *cgroup
, void *arg
)
131 size_t size
= MB(50);
140 if (alloc_pagecache(fd
, size
))
143 current
= cg_read_long(cgroup
, "memory.current");
147 file
= cg_read_key_long(cgroup
, "memory.stat", "file ");
151 if (!values_close(file
, current
, 10))
162 * This test create a memory cgroup, allocates
163 * some anonymous memory and some pagecache
164 * and checks memory.current, memory.peak, and some memory.stat values.
166 static int test_memcg_current_peak(const char *root
)
169 long current
, peak
, peak_reset
;
171 bool fd2_closed
= false, fd3_closed
= false, fd4_closed
= false;
172 int peak_fd
= -1, peak_fd2
= -1, peak_fd3
= -1, peak_fd4
= -1;
175 memcg
= cg_name(root
, "memcg_test");
179 if (cg_create(memcg
))
182 current
= cg_read_long(memcg
, "memory.current");
186 peak
= cg_read_long(memcg
, "memory.peak");
190 if (cg_run(memcg
, alloc_anon_50M_check
, NULL
))
193 peak
= cg_read_long(memcg
, "memory.peak");
198 * We'll open a few FDs for the same memory.peak file to exercise the free-path
199 * We need at least three to be closed in a different order than writes occurred to test
200 * the linked-list handling.
202 peak_fd
= cg_open(memcg
, "memory.peak", O_RDWR
| O_APPEND
| O_CLOEXEC
);
211 * Before we try to use memory.peak's fd, try to figure out whether
212 * this kernel supports writing to that file in the first place. (by
213 * checking the writable bit on the file's st_mode)
215 if (fstat(peak_fd
, &ss
))
218 if ((ss
.st_mode
& S_IWUSR
) == 0) {
223 peak_fd2
= cg_open(memcg
, "memory.peak", O_RDWR
| O_APPEND
| O_CLOEXEC
);
228 peak_fd3
= cg_open(memcg
, "memory.peak", O_RDWR
| O_APPEND
| O_CLOEXEC
);
233 /* any non-empty string resets, but make it clear */
234 static const char reset_string
[] = "reset\n";
236 peak_reset
= write(peak_fd
, reset_string
, sizeof(reset_string
));
237 if (peak_reset
!= sizeof(reset_string
))
240 peak_reset
= write(peak_fd2
, reset_string
, sizeof(reset_string
));
241 if (peak_reset
!= sizeof(reset_string
))
244 peak_reset
= write(peak_fd3
, reset_string
, sizeof(reset_string
));
245 if (peak_reset
!= sizeof(reset_string
))
248 /* Make sure a completely independent read isn't affected by our FD-local reset above*/
249 peak
= cg_read_long(memcg
, "memory.peak");
257 peak_fd4
= cg_open(memcg
, "memory.peak", O_RDWR
| O_APPEND
| O_CLOEXEC
);
262 peak_reset
= write(peak_fd4
, reset_string
, sizeof(reset_string
));
263 if (peak_reset
!= sizeof(reset_string
))
266 peak
= cg_read_long_fd(peak_fd
);
267 if (peak
> MB(30) || peak
< 0)
270 if (cg_run(memcg
, alloc_pagecache_50M_check
, NULL
))
273 peak
= cg_read_long(memcg
, "memory.peak");
277 /* Make sure everything is back to normal */
278 peak
= cg_read_long_fd(peak_fd
);
282 peak
= cg_read_long_fd(peak_fd4
);
310 static int alloc_pagecache_50M_noexit(const char *cgroup
, void *arg
)
313 int ppid
= getppid();
315 if (alloc_pagecache(fd
, MB(50)))
318 while (getppid() == ppid
)
324 static int alloc_anon_noexit(const char *cgroup
, void *arg
)
326 int ppid
= getppid();
327 size_t size
= (unsigned long)arg
;
332 fprintf(stderr
, "malloc() failed\n");
336 for (ptr
= buf
; ptr
< buf
+ size
; ptr
+= PAGE_SIZE
)
339 while (getppid() == ppid
)
347 * Wait until processes are killed asynchronously by the OOM killer
348 * If we exceed a timeout, fail.
350 static int cg_test_proc_killed(const char *cgroup
)
354 for (limit
= 10; limit
> 0; limit
--) {
355 if (cg_read_strcmp(cgroup
, "cgroup.procs", "") == 0)
363 static bool reclaim_until(const char *memcg
, long goal
);
366 * First, this test creates the following hierarchy:
367 * A memory.min = 0, memory.max = 200M
368 * A/B memory.min = 50M
369 * A/B/C memory.min = 75M, memory.current = 50M
370 * A/B/D memory.min = 25M, memory.current = 50M
371 * A/B/E memory.min = 0, memory.current = 50M
372 * A/B/F memory.min = 500M, memory.current = 0
374 * (or memory.low if we test soft protection)
376 * Usages are pagecache and the test keeps a running
377 * process in every leaf cgroup.
378 * Then it creates A/G and creates a significant
379 * memory pressure in A.
381 * Then it checks actual memory usages and expects that:
382 * A/B memory.current ~= 50M
383 * A/B/C memory.current ~= 29M
384 * A/B/D memory.current ~= 21M
385 * A/B/E memory.current ~= 0
386 * A/B/F memory.current = 0
387 * (for origin of the numbers, see model in memcg_protection.m.)
389 * After that it tries to allocate more than there is
390 * unprotected memory in A available, and checks that:
391 * a) memory.min protects pagecache even in this case,
392 * b) memory.low allows reclaiming page cache with low events.
394 * Then we try to reclaim from A/B/C using memory.reclaim until its
396 * This makes sure that:
397 * (a) We ignore the protection of the reclaim target memcg.
398 * (b) The previously calculated emin value (~29M) should be dismissed.
400 static int test_memcg_protection(const char *root
, bool min
)
402 int ret
= KSFT_FAIL
, rc
;
403 char *parent
[3] = {NULL
};
404 char *children
[4] = {NULL
};
405 const char *attribute
= min
? "memory.min" : "memory.low";
415 parent
[0] = cg_name(root
, "memcg_test_0");
419 parent
[1] = cg_name(parent
[0], "memcg_test_1");
423 parent
[2] = cg_name(parent
[0], "memcg_test_2");
427 if (cg_create(parent
[0]))
430 if (cg_read_long(parent
[0], attribute
)) {
431 /* No memory.min on older kernels is fine */
437 if (cg_write(parent
[0], "cgroup.subtree_control", "+memory"))
440 if (cg_write(parent
[0], "memory.max", "200M"))
443 if (cg_write(parent
[0], "memory.swap.max", "0"))
446 if (cg_create(parent
[1]))
449 if (cg_write(parent
[1], "cgroup.subtree_control", "+memory"))
452 if (cg_create(parent
[2]))
455 for (i
= 0; i
< ARRAY_SIZE(children
); i
++) {
456 children
[i
] = cg_name_indexed(parent
[1], "child_memcg", i
);
460 if (cg_create(children
[i
]))
466 cg_run_nowait(children
[i
], alloc_pagecache_50M_noexit
,
470 if (cg_write(parent
[1], attribute
, "50M"))
472 if (cg_write(children
[0], attribute
, "75M"))
474 if (cg_write(children
[1], attribute
, "25M"))
476 if (cg_write(children
[2], attribute
, "0"))
478 if (cg_write(children
[3], attribute
, "500M"))
482 while (!values_close(cg_read_long(parent
[1], "memory.current"),
489 if (cg_run(parent
[2], alloc_anon
, (void *)MB(148)))
492 if (!values_close(cg_read_long(parent
[1], "memory.current"), MB(50), 3))
495 for (i
= 0; i
< ARRAY_SIZE(children
); i
++)
496 c
[i
] = cg_read_long(children
[i
], "memory.current");
498 if (!values_close(c
[0], MB(29), 10))
501 if (!values_close(c
[1], MB(21), 10))
507 rc
= cg_run(parent
[2], alloc_anon
, (void *)MB(170));
510 else if (!min
&& rc
) {
512 "memory.low prevents from allocating anon memory\n");
516 current
= min
? MB(50) : MB(30);
517 if (!values_close(cg_read_long(parent
[1], "memory.current"), current
, 3))
520 if (!reclaim_until(children
[0], MB(10)))
528 for (i
= 0; i
< ARRAY_SIZE(children
); i
++) {
529 int no_low_events_index
= 1;
532 oom
= cg_read_key_long(children
[i
], "memory.events", "oom ");
533 low
= cg_read_key_long(children
[i
], "memory.events", "low ");
537 if (i
<= no_low_events_index
&& low
<= 0)
539 if (i
> no_low_events_index
&& low
)
547 for (i
= ARRAY_SIZE(children
) - 1; i
>= 0; i
--) {
551 cg_destroy(children
[i
]);
555 for (i
= ARRAY_SIZE(parent
) - 1; i
>= 0; i
--) {
559 cg_destroy(parent
[i
]);
566 static int test_memcg_min(const char *root
)
568 return test_memcg_protection(root
, true);
571 static int test_memcg_low(const char *root
)
573 return test_memcg_protection(root
, false);
576 static int alloc_pagecache_max_30M(const char *cgroup
, void *arg
)
578 size_t size
= MB(50);
580 long current
, high
, max
;
583 high
= cg_read_long(cgroup
, "memory.high");
584 max
= cg_read_long(cgroup
, "memory.max");
585 if (high
!= MB(30) && max
!= MB(30))
592 if (alloc_pagecache(fd
, size
))
595 current
= cg_read_long(cgroup
, "memory.current");
596 if (!values_close(current
, MB(30), 5))
608 * This test checks that memory.high limits the amount of
609 * memory which can be consumed by either anonymous memory
612 static int test_memcg_high(const char *root
)
618 memcg
= cg_name(root
, "memcg_test");
622 if (cg_create(memcg
))
625 if (cg_read_strcmp(memcg
, "memory.high", "max\n"))
628 if (cg_write(memcg
, "memory.swap.max", "0"))
631 if (cg_write(memcg
, "memory.high", "30M"))
634 if (cg_run(memcg
, alloc_anon
, (void *)MB(31)))
637 if (!cg_run(memcg
, alloc_pagecache_50M_check
, NULL
))
640 if (cg_run(memcg
, alloc_pagecache_max_30M
, NULL
))
643 high
= cg_read_key_long(memcg
, "memory.events", "high ");
656 static int alloc_anon_mlock(const char *cgroup
, void *arg
)
658 size_t size
= (size_t)arg
;
661 buf
= mmap(NULL
, size
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANON
,
663 if (buf
== MAP_FAILED
)
672 * This test checks that memory.high is able to throttle big single shot
673 * allocation i.e. large allocation within one kernel entry.
675 static int test_memcg_high_sync(const char *root
)
677 int ret
= KSFT_FAIL
, pid
, fd
= -1;
679 long pre_high
, pre_max
;
680 long post_high
, post_max
;
682 memcg
= cg_name(root
, "memcg_test");
686 if (cg_create(memcg
))
689 pre_high
= cg_read_key_long(memcg
, "memory.events", "high ");
690 pre_max
= cg_read_key_long(memcg
, "memory.events", "max ");
691 if (pre_high
< 0 || pre_max
< 0)
694 if (cg_write(memcg
, "memory.swap.max", "0"))
697 if (cg_write(memcg
, "memory.high", "30M"))
700 if (cg_write(memcg
, "memory.max", "140M"))
703 fd
= memcg_prepare_for_wait(memcg
);
707 pid
= cg_run_nowait(memcg
, alloc_anon_mlock
, (void *)MB(200));
713 post_high
= cg_read_key_long(memcg
, "memory.events", "high ");
714 post_max
= cg_read_key_long(memcg
, "memory.events", "max ");
715 if (post_high
< 0 || post_max
< 0)
718 if (pre_high
== post_high
|| pre_max
!= post_max
)
733 * This test checks that memory.max limits the amount of
734 * memory which can be consumed by either anonymous memory
737 static int test_memcg_max(const char *root
)
743 memcg
= cg_name(root
, "memcg_test");
747 if (cg_create(memcg
))
750 if (cg_read_strcmp(memcg
, "memory.max", "max\n"))
753 if (cg_write(memcg
, "memory.swap.max", "0"))
756 if (cg_write(memcg
, "memory.max", "30M"))
759 /* Should be killed by OOM killer */
760 if (!cg_run(memcg
, alloc_anon
, (void *)MB(100)))
763 if (cg_run(memcg
, alloc_pagecache_max_30M
, NULL
))
766 current
= cg_read_long(memcg
, "memory.current");
767 if (current
> MB(30) || !current
)
770 max
= cg_read_key_long(memcg
, "memory.events", "max ");
784 * Reclaim from @memcg until usage reaches @goal by writing to
787 * This function will return false if the usage is already below the
790 * This function assumes that writing to memory.reclaim is the only
791 * source of change in memory.current (no concurrent allocations or
794 * This function makes sure memory.reclaim is sane. It will return
795 * false if memory.reclaim's error codes do not make sense, even if
796 * the usage goal was satisfied.
798 static bool reclaim_until(const char *memcg
, long goal
)
802 long current
, to_reclaim
;
803 bool reclaimed
= false;
805 for (retries
= 5; retries
> 0; retries
--) {
806 current
= cg_read_long(memcg
, "memory.current");
808 if (current
< goal
|| values_close(current
, goal
, 3))
810 /* Did memory.reclaim return 0 incorrectly? */
814 to_reclaim
= current
- goal
;
815 snprintf(buf
, sizeof(buf
), "%ld", to_reclaim
);
816 err
= cg_write(memcg
, "memory.reclaim", buf
);
819 else if (err
!= -EAGAIN
)
826 * This test checks that memory.reclaim reclaims the given
827 * amount of memory (from both anon and file, if possible).
829 static int test_memcg_reclaim(const char *root
)
835 long current
, expected_usage
;
837 memcg
= cg_name(root
, "memcg_test");
841 if (cg_create(memcg
))
844 current
= cg_read_long(memcg
, "memory.current");
852 cg_run_nowait(memcg
, alloc_pagecache_50M_noexit
, (void *)(long)fd
);
855 * If swap is enabled, try to reclaim from both anon and file, else try
856 * to reclaim from file only.
858 if (is_swap_enabled()) {
859 cg_run_nowait(memcg
, alloc_anon_noexit
, (void *) MB(50));
860 expected_usage
= MB(100);
862 expected_usage
= MB(50);
865 * Wait until current usage reaches the expected usage (or we run out of
869 while (!values_close(cg_read_long(memcg
, "memory.current"),
870 expected_usage
, 10)) {
876 "failed to allocate %ld for memcg reclaim test\n",
883 * Reclaim until current reaches 30M, this makes sure we hit both anon
884 * and file if swap is enabled.
886 if (!reclaim_until(memcg
, MB(30)))
898 static int alloc_anon_50M_check_swap(const char *cgroup
, void *arg
)
900 long mem_max
= (long)arg
;
901 size_t size
= MB(50);
903 long mem_current
, swap_current
;
908 fprintf(stderr
, "malloc() failed\n");
912 for (ptr
= buf
; ptr
< buf
+ size
; ptr
+= PAGE_SIZE
)
915 mem_current
= cg_read_long(cgroup
, "memory.current");
916 if (!mem_current
|| !values_close(mem_current
, mem_max
, 3))
919 swap_current
= cg_read_long(cgroup
, "memory.swap.current");
921 !values_close(mem_current
+ swap_current
, size
, 3))
931 * This test checks that memory.swap.max limits the amount of
932 * anonymous memory which can be swapped out. Additionally, it verifies that
933 * memory.swap.peak reflects the high watermark and can be reset.
935 static int test_memcg_swap_max_peak(const char *root
)
941 int swap_peak_fd
= -1, mem_peak_fd
= -1;
943 /* any non-empty string resets */
944 static const char reset_string
[] = "foobarbaz";
946 if (!is_swap_enabled())
949 memcg
= cg_name(root
, "memcg_test");
953 if (cg_create(memcg
))
956 if (cg_read_long(memcg
, "memory.swap.current")) {
961 swap_peak_fd
= cg_open(memcg
, "memory.swap.peak",
962 O_RDWR
| O_APPEND
| O_CLOEXEC
);
964 if (swap_peak_fd
== -1) {
971 * Before we try to use memory.swap.peak's fd, try to figure out
972 * whether this kernel supports writing to that file in the first
973 * place. (by checking the writable bit on the file's st_mode)
975 if (fstat(swap_peak_fd
, &ss
))
978 if ((ss
.st_mode
& S_IWUSR
) == 0) {
983 mem_peak_fd
= cg_open(memcg
, "memory.peak", O_RDWR
| O_APPEND
| O_CLOEXEC
);
985 if (mem_peak_fd
== -1)
988 if (cg_read_long(memcg
, "memory.swap.peak"))
991 if (cg_read_long_fd(swap_peak_fd
))
994 /* switch the swap and mem fds into local-peak tracking mode*/
995 int peak_reset
= write(swap_peak_fd
, reset_string
, sizeof(reset_string
));
997 if (peak_reset
!= sizeof(reset_string
))
1000 if (cg_read_long_fd(swap_peak_fd
))
1003 if (cg_read_long(memcg
, "memory.peak"))
1006 if (cg_read_long_fd(mem_peak_fd
))
1009 peak_reset
= write(mem_peak_fd
, reset_string
, sizeof(reset_string
));
1010 if (peak_reset
!= sizeof(reset_string
))
1013 if (cg_read_long_fd(mem_peak_fd
))
1016 if (cg_read_strcmp(memcg
, "memory.max", "max\n"))
1019 if (cg_read_strcmp(memcg
, "memory.swap.max", "max\n"))
1022 if (cg_write(memcg
, "memory.swap.max", "30M"))
1025 if (cg_write(memcg
, "memory.max", "30M"))
1028 /* Should be killed by OOM killer */
1029 if (!cg_run(memcg
, alloc_anon
, (void *)MB(100)))
1032 if (cg_read_key_long(memcg
, "memory.events", "oom ") != 1)
1035 if (cg_read_key_long(memcg
, "memory.events", "oom_kill ") != 1)
1038 peak
= cg_read_long(memcg
, "memory.peak");
1042 peak
= cg_read_long(memcg
, "memory.swap.peak");
1046 peak
= cg_read_long_fd(mem_peak_fd
);
1050 peak
= cg_read_long_fd(swap_peak_fd
);
1055 * open, reset and close the peak swap on another FD to make sure
1056 * multiple extant fds don't corrupt the linked-list
1058 peak_reset
= cg_write(memcg
, "memory.swap.peak", (char *)reset_string
);
1062 peak_reset
= cg_write(memcg
, "memory.peak", (char *)reset_string
);
1066 /* actually reset on the fds */
1067 peak_reset
= write(swap_peak_fd
, reset_string
, sizeof(reset_string
));
1068 if (peak_reset
!= sizeof(reset_string
))
1071 peak_reset
= write(mem_peak_fd
, reset_string
, sizeof(reset_string
));
1072 if (peak_reset
!= sizeof(reset_string
))
1075 peak
= cg_read_long_fd(swap_peak_fd
);
1080 * The cgroup is now empty, but there may be a page or two associated
1081 * with the open FD accounted to it.
1083 peak
= cg_read_long_fd(mem_peak_fd
);
1087 if (cg_read_long(memcg
, "memory.peak") < MB(29))
1090 if (cg_read_long(memcg
, "memory.swap.peak") < MB(29))
1093 if (cg_run(memcg
, alloc_anon_50M_check_swap
, (void *)MB(30)))
1096 max
= cg_read_key_long(memcg
, "memory.events", "max ");
1100 peak
= cg_read_long(memcg
, "memory.peak");
1104 peak
= cg_read_long(memcg
, "memory.swap.peak");
1108 peak
= cg_read_long_fd(mem_peak_fd
);
1112 peak
= cg_read_long_fd(swap_peak_fd
);
1119 if (mem_peak_fd
!= -1 && close(mem_peak_fd
))
1121 if (swap_peak_fd
!= -1 && close(swap_peak_fd
))
1130 * This test disables swapping and tries to allocate anonymous memory
1131 * up to OOM. Then it checks for oom and oom_kill events in
1134 static int test_memcg_oom_events(const char *root
)
1136 int ret
= KSFT_FAIL
;
1139 memcg
= cg_name(root
, "memcg_test");
1143 if (cg_create(memcg
))
1146 if (cg_write(memcg
, "memory.max", "30M"))
1149 if (cg_write(memcg
, "memory.swap.max", "0"))
1152 if (!cg_run(memcg
, alloc_anon
, (void *)MB(100)))
1155 if (cg_read_strcmp(memcg
, "cgroup.procs", ""))
1158 if (cg_read_key_long(memcg
, "memory.events", "oom ") != 1)
1161 if (cg_read_key_long(memcg
, "memory.events", "oom_kill ") != 1)
1173 struct tcp_server_args
{
1174 unsigned short port
;
1178 static int tcp_server(const char *cgroup
, void *arg
)
1180 struct tcp_server_args
*srv_args
= arg
;
1181 struct sockaddr_in6 saddr
= { 0 };
1182 socklen_t slen
= sizeof(saddr
);
1183 int sk
, client_sk
, ctl_fd
, yes
= 1, ret
= -1;
1185 close(srv_args
->ctl
[0]);
1186 ctl_fd
= srv_args
->ctl
[1];
1188 saddr
.sin6_family
= AF_INET6
;
1189 saddr
.sin6_addr
= in6addr_any
;
1190 saddr
.sin6_port
= htons(srv_args
->port
);
1192 sk
= socket(AF_INET6
, SOCK_STREAM
, 0);
1196 if (setsockopt(sk
, SOL_SOCKET
, SO_REUSEADDR
, &yes
, sizeof(yes
)) < 0)
1199 if (bind(sk
, (struct sockaddr
*)&saddr
, slen
)) {
1200 write(ctl_fd
, &errno
, sizeof(errno
));
1208 if (write(ctl_fd
, &ret
, sizeof(ret
)) != sizeof(ret
)) {
1213 client_sk
= accept(sk
, NULL
, NULL
);
1219 uint8_t buf
[0x100000];
1221 if (write(client_sk
, buf
, sizeof(buf
)) <= 0) {
1222 if (errno
== ECONNRESET
)
1235 static int tcp_client(const char *cgroup
, unsigned short port
)
1237 const char server
[] = "localhost";
1238 struct addrinfo
*ai
;
1240 int retries
= 0x10; /* nice round number */
1244 allocated
= cg_read_long(cgroup
, "memory.current");
1245 snprintf(servport
, sizeof(servport
), "%hd", port
);
1246 ret
= getaddrinfo(server
, servport
, NULL
, &ai
);
1250 sk
= socket(ai
->ai_family
, ai
->ai_socktype
, ai
->ai_protocol
);
1254 ret
= connect(sk
, ai
->ai_addr
, ai
->ai_addrlen
);
1260 uint8_t buf
[0x100000];
1263 if (read(sk
, buf
, sizeof(buf
)) <= 0)
1266 current
= cg_read_long(cgroup
, "memory.current");
1267 sock
= cg_read_key_long(cgroup
, "memory.stat", "sock ");
1269 if (current
< 0 || sock
< 0)
1272 /* exclude the memory not related to socket connection */
1273 if (values_close(current
- allocated
, sock
, 10)) {
1287 * This test checks socket memory accounting.
1288 * The test forks a TCP server listens on a random port between 1000
1289 * and 61000. Once it gets a client connection, it starts writing to
1291 * The TCP client interleaves reads from the socket with check whether
1292 * memory.current and memory.stat.sock are similar.
1294 static int test_memcg_sock(const char *root
)
1296 int bind_retries
= 5, ret
= KSFT_FAIL
, pid
, err
;
1297 unsigned short port
;
1300 memcg
= cg_name(root
, "memcg_test");
1304 if (cg_create(memcg
))
1307 while (bind_retries
--) {
1308 struct tcp_server_args args
;
1313 port
= args
.port
= 1000 + rand() % 60000;
1315 pid
= cg_run_nowait(memcg
, tcp_server
, &args
);
1320 if (read(args
.ctl
[0], &err
, sizeof(err
)) != sizeof(err
))
1326 if (err
!= EADDRINUSE
)
1329 waitpid(pid
, NULL
, 0);
1332 if (err
== EADDRINUSE
) {
1337 if (tcp_client(memcg
, port
) != KSFT_PASS
)
1340 waitpid(pid
, &err
, 0);
1341 if (WEXITSTATUS(err
))
1344 if (cg_read_long(memcg
, "memory.current") < 0)
1347 if (cg_read_key_long(memcg
, "memory.stat", "sock "))
1360 * This test disables swapping and tries to allocate anonymous memory
1361 * up to OOM with memory.group.oom set. Then it checks that all
1362 * processes in the leaf were killed. It also checks that oom_events
1363 * were propagated to the parent level.
1365 static int test_memcg_oom_group_leaf_events(const char *root
)
1367 int ret
= KSFT_FAIL
;
1368 char *parent
, *child
;
1369 long parent_oom_events
;
1371 parent
= cg_name(root
, "memcg_test_0");
1372 child
= cg_name(root
, "memcg_test_0/memcg_test_1");
1374 if (!parent
|| !child
)
1377 if (cg_create(parent
))
1380 if (cg_create(child
))
1383 if (cg_write(parent
, "cgroup.subtree_control", "+memory"))
1386 if (cg_write(child
, "memory.max", "50M"))
1389 if (cg_write(child
, "memory.swap.max", "0"))
1392 if (cg_write(child
, "memory.oom.group", "1"))
1395 cg_run_nowait(parent
, alloc_anon_noexit
, (void *) MB(60));
1396 cg_run_nowait(child
, alloc_anon_noexit
, (void *) MB(1));
1397 cg_run_nowait(child
, alloc_anon_noexit
, (void *) MB(1));
1398 if (!cg_run(child
, alloc_anon
, (void *)MB(100)))
1401 if (cg_test_proc_killed(child
))
1404 if (cg_read_key_long(child
, "memory.events", "oom_kill ") <= 0)
1407 parent_oom_events
= cg_read_key_long(
1408 parent
, "memory.events", "oom_kill ");
1410 * If memory_localevents is not enabled (the default), the parent should
1411 * count OOM events in its children groups. Otherwise, it should not
1412 * have observed any events.
1414 if (has_localevents
&& parent_oom_events
!= 0)
1416 else if (!has_localevents
&& parent_oom_events
<= 0)
1433 * This test disables swapping and tries to allocate anonymous memory
1434 * up to OOM with memory.group.oom set. Then it checks that all
1435 * processes in the parent and leaf were killed.
1437 static int test_memcg_oom_group_parent_events(const char *root
)
1439 int ret
= KSFT_FAIL
;
1440 char *parent
, *child
;
1442 parent
= cg_name(root
, "memcg_test_0");
1443 child
= cg_name(root
, "memcg_test_0/memcg_test_1");
1445 if (!parent
|| !child
)
1448 if (cg_create(parent
))
1451 if (cg_create(child
))
1454 if (cg_write(parent
, "memory.max", "80M"))
1457 if (cg_write(parent
, "memory.swap.max", "0"))
1460 if (cg_write(parent
, "memory.oom.group", "1"))
1463 cg_run_nowait(parent
, alloc_anon_noexit
, (void *) MB(60));
1464 cg_run_nowait(child
, alloc_anon_noexit
, (void *) MB(1));
1465 cg_run_nowait(child
, alloc_anon_noexit
, (void *) MB(1));
1467 if (!cg_run(child
, alloc_anon
, (void *)MB(100)))
1470 if (cg_test_proc_killed(child
))
1472 if (cg_test_proc_killed(parent
))
1489 * This test disables swapping and tries to allocate anonymous memory
1490 * up to OOM with memory.group.oom set. Then it checks that all
1491 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1493 static int test_memcg_oom_group_score_events(const char *root
)
1495 int ret
= KSFT_FAIL
;
1499 memcg
= cg_name(root
, "memcg_test_0");
1504 if (cg_create(memcg
))
1507 if (cg_write(memcg
, "memory.max", "50M"))
1510 if (cg_write(memcg
, "memory.swap.max", "0"))
1513 if (cg_write(memcg
, "memory.oom.group", "1"))
1516 safe_pid
= cg_run_nowait(memcg
, alloc_anon_noexit
, (void *) MB(1));
1517 if (set_oom_adj_score(safe_pid
, OOM_SCORE_ADJ_MIN
))
1520 cg_run_nowait(memcg
, alloc_anon_noexit
, (void *) MB(1));
1521 if (!cg_run(memcg
, alloc_anon
, (void *)MB(100)))
1524 if (cg_read_key_long(memcg
, "memory.events", "oom_kill ") != 3)
1527 if (kill(safe_pid
, SIGKILL
))
1540 #define T(x) { x, #x }
1542 int (*fn
)(const char *root
);
1545 T(test_memcg_subtree_control
),
1546 T(test_memcg_current_peak
),
1550 T(test_memcg_high_sync
),
1552 T(test_memcg_reclaim
),
1553 T(test_memcg_oom_events
),
1554 T(test_memcg_swap_max_peak
),
1556 T(test_memcg_oom_group_leaf_events
),
1557 T(test_memcg_oom_group_parent_events
),
1558 T(test_memcg_oom_group_score_events
),
1562 int main(int argc
, char **argv
)
1564 char root
[PATH_MAX
];
1565 int i
, proc_status
, ret
= EXIT_SUCCESS
;
1567 if (cg_find_unified_root(root
, sizeof(root
), NULL
))
1568 ksft_exit_skip("cgroup v2 isn't mounted\n");
1571 * Check that memory controller is available:
1572 * memory is listed in cgroup.controllers
1574 if (cg_read_strstr(root
, "cgroup.controllers", "memory"))
1575 ksft_exit_skip("memory controller isn't available\n");
1577 if (cg_read_strstr(root
, "cgroup.subtree_control", "memory"))
1578 if (cg_write(root
, "cgroup.subtree_control", "+memory"))
1579 ksft_exit_skip("Failed to set memory controller\n");
1581 proc_status
= proc_mount_contains("memory_recursiveprot");
1582 if (proc_status
< 0)
1583 ksft_exit_skip("Failed to query cgroup mount option\n");
1584 has_recursiveprot
= proc_status
;
1586 proc_status
= proc_mount_contains("memory_localevents");
1587 if (proc_status
< 0)
1588 ksft_exit_skip("Failed to query cgroup mount option\n");
1589 has_localevents
= proc_status
;
1591 for (i
= 0; i
< ARRAY_SIZE(tests
); i
++) {
1592 switch (tests
[i
].fn(root
)) {
1594 ksft_test_result_pass("%s\n", tests
[i
].name
);
1597 ksft_test_result_skip("%s\n", tests
[i
].name
);
1601 ksft_test_result_fail("%s\n", tests
[i
].name
);