drm/panel: panel-himax-hx83102: support for csot-pna957qt1-1 MIPI-DSI panel
[drm/drm-misc.git] / tools / testing / selftests / cgroup / test_memcontrol.c
blob16f5d74ae762edbd4540dcbe51185f38c3408120
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define _GNU_SOURCE
4 #include <linux/limits.h>
5 #include <linux/oom.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <sys/socket.h>
14 #include <sys/wait.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
17 #include <netdb.h>
18 #include <errno.h>
19 #include <sys/mman.h>
21 #include "../kselftest.h"
22 #include "cgroup_util.h"
24 static bool has_localevents;
25 static bool has_recursiveprot;
28 * This test creates two nested cgroups with and without enabling
29 * the memory controller.
31 static int test_memcg_subtree_control(const char *root)
33 char *parent, *child, *parent2 = NULL, *child2 = NULL;
34 int ret = KSFT_FAIL;
35 char buf[PAGE_SIZE];
37 /* Create two nested cgroups with the memory controller enabled */
38 parent = cg_name(root, "memcg_test_0");
39 child = cg_name(root, "memcg_test_0/memcg_test_1");
40 if (!parent || !child)
41 goto cleanup_free;
43 if (cg_create(parent))
44 goto cleanup_free;
46 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
47 goto cleanup_parent;
49 if (cg_create(child))
50 goto cleanup_parent;
52 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
53 goto cleanup_child;
55 /* Create two nested cgroups without enabling memory controller */
56 parent2 = cg_name(root, "memcg_test_1");
57 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
58 if (!parent2 || !child2)
59 goto cleanup_free2;
61 if (cg_create(parent2))
62 goto cleanup_free2;
64 if (cg_create(child2))
65 goto cleanup_parent2;
67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
68 goto cleanup_all;
70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
71 goto cleanup_all;
73 ret = KSFT_PASS;
75 cleanup_all:
76 cg_destroy(child2);
77 cleanup_parent2:
78 cg_destroy(parent2);
79 cleanup_free2:
80 free(parent2);
81 free(child2);
82 cleanup_child:
83 cg_destroy(child);
84 cleanup_parent:
85 cg_destroy(parent);
86 cleanup_free:
87 free(parent);
88 free(child);
90 return ret;
93 static int alloc_anon_50M_check(const char *cgroup, void *arg)
95 size_t size = MB(50);
96 char *buf, *ptr;
97 long anon, current;
98 int ret = -1;
100 buf = malloc(size);
101 if (buf == NULL) {
102 fprintf(stderr, "malloc() failed\n");
103 return -1;
106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
107 *ptr = 0;
109 current = cg_read_long(cgroup, "memory.current");
110 if (current < size)
111 goto cleanup;
113 if (!values_close(size, current, 3))
114 goto cleanup;
116 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
117 if (anon < 0)
118 goto cleanup;
120 if (!values_close(anon, current, 3))
121 goto cleanup;
123 ret = 0;
124 cleanup:
125 free(buf);
126 return ret;
129 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
131 size_t size = MB(50);
132 int ret = -1;
133 long current, file;
134 int fd;
136 fd = get_temp_fd();
137 if (fd < 0)
138 return -1;
140 if (alloc_pagecache(fd, size))
141 goto cleanup;
143 current = cg_read_long(cgroup, "memory.current");
144 if (current < size)
145 goto cleanup;
147 file = cg_read_key_long(cgroup, "memory.stat", "file ");
148 if (file < 0)
149 goto cleanup;
151 if (!values_close(file, current, 10))
152 goto cleanup;
154 ret = 0;
156 cleanup:
157 close(fd);
158 return ret;
162 * This test create a memory cgroup, allocates
163 * some anonymous memory and some pagecache
164 * and checks memory.current, memory.peak, and some memory.stat values.
166 static int test_memcg_current_peak(const char *root)
168 int ret = KSFT_FAIL;
169 long current, peak, peak_reset;
170 char *memcg;
171 bool fd2_closed = false, fd3_closed = false, fd4_closed = false;
172 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1;
173 struct stat ss;
175 memcg = cg_name(root, "memcg_test");
176 if (!memcg)
177 goto cleanup;
179 if (cg_create(memcg))
180 goto cleanup;
182 current = cg_read_long(memcg, "memory.current");
183 if (current != 0)
184 goto cleanup;
186 peak = cg_read_long(memcg, "memory.peak");
187 if (peak != 0)
188 goto cleanup;
190 if (cg_run(memcg, alloc_anon_50M_check, NULL))
191 goto cleanup;
193 peak = cg_read_long(memcg, "memory.peak");
194 if (peak < MB(50))
195 goto cleanup;
198 * We'll open a few FDs for the same memory.peak file to exercise the free-path
199 * We need at least three to be closed in a different order than writes occurred to test
200 * the linked-list handling.
202 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
204 if (peak_fd == -1) {
205 if (errno == ENOENT)
206 ret = KSFT_SKIP;
207 goto cleanup;
211 * Before we try to use memory.peak's fd, try to figure out whether
212 * this kernel supports writing to that file in the first place. (by
213 * checking the writable bit on the file's st_mode)
215 if (fstat(peak_fd, &ss))
216 goto cleanup;
218 if ((ss.st_mode & S_IWUSR) == 0) {
219 ret = KSFT_SKIP;
220 goto cleanup;
223 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
225 if (peak_fd2 == -1)
226 goto cleanup;
228 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
230 if (peak_fd3 == -1)
231 goto cleanup;
233 /* any non-empty string resets, but make it clear */
234 static const char reset_string[] = "reset\n";
236 peak_reset = write(peak_fd, reset_string, sizeof(reset_string));
237 if (peak_reset != sizeof(reset_string))
238 goto cleanup;
240 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string));
241 if (peak_reset != sizeof(reset_string))
242 goto cleanup;
244 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string));
245 if (peak_reset != sizeof(reset_string))
246 goto cleanup;
248 /* Make sure a completely independent read isn't affected by our FD-local reset above*/
249 peak = cg_read_long(memcg, "memory.peak");
250 if (peak < MB(50))
251 goto cleanup;
253 fd2_closed = true;
254 if (close(peak_fd2))
255 goto cleanup;
257 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
259 if (peak_fd4 == -1)
260 goto cleanup;
262 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string));
263 if (peak_reset != sizeof(reset_string))
264 goto cleanup;
266 peak = cg_read_long_fd(peak_fd);
267 if (peak > MB(30) || peak < 0)
268 goto cleanup;
270 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
271 goto cleanup;
273 peak = cg_read_long(memcg, "memory.peak");
274 if (peak < MB(50))
275 goto cleanup;
277 /* Make sure everything is back to normal */
278 peak = cg_read_long_fd(peak_fd);
279 if (peak < MB(50))
280 goto cleanup;
282 peak = cg_read_long_fd(peak_fd4);
283 if (peak < MB(50))
284 goto cleanup;
286 fd3_closed = true;
287 if (close(peak_fd3))
288 goto cleanup;
290 fd4_closed = true;
291 if (close(peak_fd4))
292 goto cleanup;
294 ret = KSFT_PASS;
296 cleanup:
297 close(peak_fd);
298 if (!fd2_closed)
299 close(peak_fd2);
300 if (!fd3_closed)
301 close(peak_fd3);
302 if (!fd4_closed)
303 close(peak_fd4);
304 cg_destroy(memcg);
305 free(memcg);
307 return ret;
310 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
312 int fd = (long)arg;
313 int ppid = getppid();
315 if (alloc_pagecache(fd, MB(50)))
316 return -1;
318 while (getppid() == ppid)
319 sleep(1);
321 return 0;
324 static int alloc_anon_noexit(const char *cgroup, void *arg)
326 int ppid = getppid();
327 size_t size = (unsigned long)arg;
328 char *buf, *ptr;
330 buf = malloc(size);
331 if (buf == NULL) {
332 fprintf(stderr, "malloc() failed\n");
333 return -1;
336 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
337 *ptr = 0;
339 while (getppid() == ppid)
340 sleep(1);
342 free(buf);
343 return 0;
347 * Wait until processes are killed asynchronously by the OOM killer
348 * If we exceed a timeout, fail.
350 static int cg_test_proc_killed(const char *cgroup)
352 int limit;
354 for (limit = 10; limit > 0; limit--) {
355 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
356 return 0;
358 usleep(100000);
360 return -1;
363 static bool reclaim_until(const char *memcg, long goal);
366 * First, this test creates the following hierarchy:
367 * A memory.min = 0, memory.max = 200M
368 * A/B memory.min = 50M
369 * A/B/C memory.min = 75M, memory.current = 50M
370 * A/B/D memory.min = 25M, memory.current = 50M
371 * A/B/E memory.min = 0, memory.current = 50M
372 * A/B/F memory.min = 500M, memory.current = 0
374 * (or memory.low if we test soft protection)
376 * Usages are pagecache and the test keeps a running
377 * process in every leaf cgroup.
378 * Then it creates A/G and creates a significant
379 * memory pressure in A.
381 * Then it checks actual memory usages and expects that:
382 * A/B memory.current ~= 50M
383 * A/B/C memory.current ~= 29M
384 * A/B/D memory.current ~= 21M
385 * A/B/E memory.current ~= 0
386 * A/B/F memory.current = 0
387 * (for origin of the numbers, see model in memcg_protection.m.)
389 * After that it tries to allocate more than there is
390 * unprotected memory in A available, and checks that:
391 * a) memory.min protects pagecache even in this case,
392 * b) memory.low allows reclaiming page cache with low events.
394 * Then we try to reclaim from A/B/C using memory.reclaim until its
395 * usage reaches 10M.
396 * This makes sure that:
397 * (a) We ignore the protection of the reclaim target memcg.
398 * (b) The previously calculated emin value (~29M) should be dismissed.
400 static int test_memcg_protection(const char *root, bool min)
402 int ret = KSFT_FAIL, rc;
403 char *parent[3] = {NULL};
404 char *children[4] = {NULL};
405 const char *attribute = min ? "memory.min" : "memory.low";
406 long c[4];
407 long current;
408 int i, attempts;
409 int fd;
411 fd = get_temp_fd();
412 if (fd < 0)
413 goto cleanup;
415 parent[0] = cg_name(root, "memcg_test_0");
416 if (!parent[0])
417 goto cleanup;
419 parent[1] = cg_name(parent[0], "memcg_test_1");
420 if (!parent[1])
421 goto cleanup;
423 parent[2] = cg_name(parent[0], "memcg_test_2");
424 if (!parent[2])
425 goto cleanup;
427 if (cg_create(parent[0]))
428 goto cleanup;
430 if (cg_read_long(parent[0], attribute)) {
431 /* No memory.min on older kernels is fine */
432 if (min)
433 ret = KSFT_SKIP;
434 goto cleanup;
437 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
438 goto cleanup;
440 if (cg_write(parent[0], "memory.max", "200M"))
441 goto cleanup;
443 if (cg_write(parent[0], "memory.swap.max", "0"))
444 goto cleanup;
446 if (cg_create(parent[1]))
447 goto cleanup;
449 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
450 goto cleanup;
452 if (cg_create(parent[2]))
453 goto cleanup;
455 for (i = 0; i < ARRAY_SIZE(children); i++) {
456 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
457 if (!children[i])
458 goto cleanup;
460 if (cg_create(children[i]))
461 goto cleanup;
463 if (i > 2)
464 continue;
466 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
467 (void *)(long)fd);
470 if (cg_write(parent[1], attribute, "50M"))
471 goto cleanup;
472 if (cg_write(children[0], attribute, "75M"))
473 goto cleanup;
474 if (cg_write(children[1], attribute, "25M"))
475 goto cleanup;
476 if (cg_write(children[2], attribute, "0"))
477 goto cleanup;
478 if (cg_write(children[3], attribute, "500M"))
479 goto cleanup;
481 attempts = 0;
482 while (!values_close(cg_read_long(parent[1], "memory.current"),
483 MB(150), 3)) {
484 if (attempts++ > 5)
485 break;
486 sleep(1);
489 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
490 goto cleanup;
492 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
493 goto cleanup;
495 for (i = 0; i < ARRAY_SIZE(children); i++)
496 c[i] = cg_read_long(children[i], "memory.current");
498 if (!values_close(c[0], MB(29), 10))
499 goto cleanup;
501 if (!values_close(c[1], MB(21), 10))
502 goto cleanup;
504 if (c[3] != 0)
505 goto cleanup;
507 rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
508 if (min && !rc)
509 goto cleanup;
510 else if (!min && rc) {
511 fprintf(stderr,
512 "memory.low prevents from allocating anon memory\n");
513 goto cleanup;
516 current = min ? MB(50) : MB(30);
517 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
518 goto cleanup;
520 if (!reclaim_until(children[0], MB(10)))
521 goto cleanup;
523 if (min) {
524 ret = KSFT_PASS;
525 goto cleanup;
528 for (i = 0; i < ARRAY_SIZE(children); i++) {
529 int no_low_events_index = 1;
530 long low, oom;
532 oom = cg_read_key_long(children[i], "memory.events", "oom ");
533 low = cg_read_key_long(children[i], "memory.events", "low ");
535 if (oom)
536 goto cleanup;
537 if (i <= no_low_events_index && low <= 0)
538 goto cleanup;
539 if (i > no_low_events_index && low)
540 goto cleanup;
544 ret = KSFT_PASS;
546 cleanup:
547 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
548 if (!children[i])
549 continue;
551 cg_destroy(children[i]);
552 free(children[i]);
555 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
556 if (!parent[i])
557 continue;
559 cg_destroy(parent[i]);
560 free(parent[i]);
562 close(fd);
563 return ret;
566 static int test_memcg_min(const char *root)
568 return test_memcg_protection(root, true);
571 static int test_memcg_low(const char *root)
573 return test_memcg_protection(root, false);
576 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
578 size_t size = MB(50);
579 int ret = -1;
580 long current, high, max;
581 int fd;
583 high = cg_read_long(cgroup, "memory.high");
584 max = cg_read_long(cgroup, "memory.max");
585 if (high != MB(30) && max != MB(30))
586 return -1;
588 fd = get_temp_fd();
589 if (fd < 0)
590 return -1;
592 if (alloc_pagecache(fd, size))
593 goto cleanup;
595 current = cg_read_long(cgroup, "memory.current");
596 if (!values_close(current, MB(30), 5))
597 goto cleanup;
599 ret = 0;
601 cleanup:
602 close(fd);
603 return ret;
608 * This test checks that memory.high limits the amount of
609 * memory which can be consumed by either anonymous memory
610 * or pagecache.
612 static int test_memcg_high(const char *root)
614 int ret = KSFT_FAIL;
615 char *memcg;
616 long high;
618 memcg = cg_name(root, "memcg_test");
619 if (!memcg)
620 goto cleanup;
622 if (cg_create(memcg))
623 goto cleanup;
625 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
626 goto cleanup;
628 if (cg_write(memcg, "memory.swap.max", "0"))
629 goto cleanup;
631 if (cg_write(memcg, "memory.high", "30M"))
632 goto cleanup;
634 if (cg_run(memcg, alloc_anon, (void *)MB(31)))
635 goto cleanup;
637 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
638 goto cleanup;
640 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
641 goto cleanup;
643 high = cg_read_key_long(memcg, "memory.events", "high ");
644 if (high <= 0)
645 goto cleanup;
647 ret = KSFT_PASS;
649 cleanup:
650 cg_destroy(memcg);
651 free(memcg);
653 return ret;
656 static int alloc_anon_mlock(const char *cgroup, void *arg)
658 size_t size = (size_t)arg;
659 void *buf;
661 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
662 0, 0);
663 if (buf == MAP_FAILED)
664 return -1;
666 mlock(buf, size);
667 munmap(buf, size);
668 return 0;
672 * This test checks that memory.high is able to throttle big single shot
673 * allocation i.e. large allocation within one kernel entry.
675 static int test_memcg_high_sync(const char *root)
677 int ret = KSFT_FAIL, pid, fd = -1;
678 char *memcg;
679 long pre_high, pre_max;
680 long post_high, post_max;
682 memcg = cg_name(root, "memcg_test");
683 if (!memcg)
684 goto cleanup;
686 if (cg_create(memcg))
687 goto cleanup;
689 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
690 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
691 if (pre_high < 0 || pre_max < 0)
692 goto cleanup;
694 if (cg_write(memcg, "memory.swap.max", "0"))
695 goto cleanup;
697 if (cg_write(memcg, "memory.high", "30M"))
698 goto cleanup;
700 if (cg_write(memcg, "memory.max", "140M"))
701 goto cleanup;
703 fd = memcg_prepare_for_wait(memcg);
704 if (fd < 0)
705 goto cleanup;
707 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
708 if (pid < 0)
709 goto cleanup;
711 cg_wait_for(fd);
713 post_high = cg_read_key_long(memcg, "memory.events", "high ");
714 post_max = cg_read_key_long(memcg, "memory.events", "max ");
715 if (post_high < 0 || post_max < 0)
716 goto cleanup;
718 if (pre_high == post_high || pre_max != post_max)
719 goto cleanup;
721 ret = KSFT_PASS;
723 cleanup:
724 if (fd >= 0)
725 close(fd);
726 cg_destroy(memcg);
727 free(memcg);
729 return ret;
733 * This test checks that memory.max limits the amount of
734 * memory which can be consumed by either anonymous memory
735 * or pagecache.
737 static int test_memcg_max(const char *root)
739 int ret = KSFT_FAIL;
740 char *memcg;
741 long current, max;
743 memcg = cg_name(root, "memcg_test");
744 if (!memcg)
745 goto cleanup;
747 if (cg_create(memcg))
748 goto cleanup;
750 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
751 goto cleanup;
753 if (cg_write(memcg, "memory.swap.max", "0"))
754 goto cleanup;
756 if (cg_write(memcg, "memory.max", "30M"))
757 goto cleanup;
759 /* Should be killed by OOM killer */
760 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
761 goto cleanup;
763 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
764 goto cleanup;
766 current = cg_read_long(memcg, "memory.current");
767 if (current > MB(30) || !current)
768 goto cleanup;
770 max = cg_read_key_long(memcg, "memory.events", "max ");
771 if (max <= 0)
772 goto cleanup;
774 ret = KSFT_PASS;
776 cleanup:
777 cg_destroy(memcg);
778 free(memcg);
780 return ret;
784 * Reclaim from @memcg until usage reaches @goal by writing to
785 * memory.reclaim.
787 * This function will return false if the usage is already below the
788 * goal.
790 * This function assumes that writing to memory.reclaim is the only
791 * source of change in memory.current (no concurrent allocations or
792 * reclaim).
794 * This function makes sure memory.reclaim is sane. It will return
795 * false if memory.reclaim's error codes do not make sense, even if
796 * the usage goal was satisfied.
798 static bool reclaim_until(const char *memcg, long goal)
800 char buf[64];
801 int retries, err;
802 long current, to_reclaim;
803 bool reclaimed = false;
805 for (retries = 5; retries > 0; retries--) {
806 current = cg_read_long(memcg, "memory.current");
808 if (current < goal || values_close(current, goal, 3))
809 break;
810 /* Did memory.reclaim return 0 incorrectly? */
811 else if (reclaimed)
812 return false;
814 to_reclaim = current - goal;
815 snprintf(buf, sizeof(buf), "%ld", to_reclaim);
816 err = cg_write(memcg, "memory.reclaim", buf);
817 if (!err)
818 reclaimed = true;
819 else if (err != -EAGAIN)
820 return false;
822 return reclaimed;
826 * This test checks that memory.reclaim reclaims the given
827 * amount of memory (from both anon and file, if possible).
829 static int test_memcg_reclaim(const char *root)
831 int ret = KSFT_FAIL;
832 int fd = -1;
833 int retries;
834 char *memcg;
835 long current, expected_usage;
837 memcg = cg_name(root, "memcg_test");
838 if (!memcg)
839 goto cleanup;
841 if (cg_create(memcg))
842 goto cleanup;
844 current = cg_read_long(memcg, "memory.current");
845 if (current != 0)
846 goto cleanup;
848 fd = get_temp_fd();
849 if (fd < 0)
850 goto cleanup;
852 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
855 * If swap is enabled, try to reclaim from both anon and file, else try
856 * to reclaim from file only.
858 if (is_swap_enabled()) {
859 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
860 expected_usage = MB(100);
861 } else
862 expected_usage = MB(50);
865 * Wait until current usage reaches the expected usage (or we run out of
866 * retries).
868 retries = 5;
869 while (!values_close(cg_read_long(memcg, "memory.current"),
870 expected_usage, 10)) {
871 if (retries--) {
872 sleep(1);
873 continue;
874 } else {
875 fprintf(stderr,
876 "failed to allocate %ld for memcg reclaim test\n",
877 expected_usage);
878 goto cleanup;
883 * Reclaim until current reaches 30M, this makes sure we hit both anon
884 * and file if swap is enabled.
886 if (!reclaim_until(memcg, MB(30)))
887 goto cleanup;
889 ret = KSFT_PASS;
890 cleanup:
891 cg_destroy(memcg);
892 free(memcg);
893 close(fd);
895 return ret;
898 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
900 long mem_max = (long)arg;
901 size_t size = MB(50);
902 char *buf, *ptr;
903 long mem_current, swap_current;
904 int ret = -1;
906 buf = malloc(size);
907 if (buf == NULL) {
908 fprintf(stderr, "malloc() failed\n");
909 return -1;
912 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
913 *ptr = 0;
915 mem_current = cg_read_long(cgroup, "memory.current");
916 if (!mem_current || !values_close(mem_current, mem_max, 3))
917 goto cleanup;
919 swap_current = cg_read_long(cgroup, "memory.swap.current");
920 if (!swap_current ||
921 !values_close(mem_current + swap_current, size, 3))
922 goto cleanup;
924 ret = 0;
925 cleanup:
926 free(buf);
927 return ret;
931 * This test checks that memory.swap.max limits the amount of
932 * anonymous memory which can be swapped out. Additionally, it verifies that
933 * memory.swap.peak reflects the high watermark and can be reset.
935 static int test_memcg_swap_max_peak(const char *root)
937 int ret = KSFT_FAIL;
938 char *memcg;
939 long max, peak;
940 struct stat ss;
941 int swap_peak_fd = -1, mem_peak_fd = -1;
943 /* any non-empty string resets */
944 static const char reset_string[] = "foobarbaz";
946 if (!is_swap_enabled())
947 return KSFT_SKIP;
949 memcg = cg_name(root, "memcg_test");
950 if (!memcg)
951 goto cleanup;
953 if (cg_create(memcg))
954 goto cleanup;
956 if (cg_read_long(memcg, "memory.swap.current")) {
957 ret = KSFT_SKIP;
958 goto cleanup;
961 swap_peak_fd = cg_open(memcg, "memory.swap.peak",
962 O_RDWR | O_APPEND | O_CLOEXEC);
964 if (swap_peak_fd == -1) {
965 if (errno == ENOENT)
966 ret = KSFT_SKIP;
967 goto cleanup;
971 * Before we try to use memory.swap.peak's fd, try to figure out
972 * whether this kernel supports writing to that file in the first
973 * place. (by checking the writable bit on the file's st_mode)
975 if (fstat(swap_peak_fd, &ss))
976 goto cleanup;
978 if ((ss.st_mode & S_IWUSR) == 0) {
979 ret = KSFT_SKIP;
980 goto cleanup;
983 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
985 if (mem_peak_fd == -1)
986 goto cleanup;
988 if (cg_read_long(memcg, "memory.swap.peak"))
989 goto cleanup;
991 if (cg_read_long_fd(swap_peak_fd))
992 goto cleanup;
994 /* switch the swap and mem fds into local-peak tracking mode*/
995 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
997 if (peak_reset != sizeof(reset_string))
998 goto cleanup;
1000 if (cg_read_long_fd(swap_peak_fd))
1001 goto cleanup;
1003 if (cg_read_long(memcg, "memory.peak"))
1004 goto cleanup;
1006 if (cg_read_long_fd(mem_peak_fd))
1007 goto cleanup;
1009 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1010 if (peak_reset != sizeof(reset_string))
1011 goto cleanup;
1013 if (cg_read_long_fd(mem_peak_fd))
1014 goto cleanup;
1016 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
1017 goto cleanup;
1019 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
1020 goto cleanup;
1022 if (cg_write(memcg, "memory.swap.max", "30M"))
1023 goto cleanup;
1025 if (cg_write(memcg, "memory.max", "30M"))
1026 goto cleanup;
1028 /* Should be killed by OOM killer */
1029 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1030 goto cleanup;
1032 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1033 goto cleanup;
1035 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1036 goto cleanup;
1038 peak = cg_read_long(memcg, "memory.peak");
1039 if (peak < MB(29))
1040 goto cleanup;
1042 peak = cg_read_long(memcg, "memory.swap.peak");
1043 if (peak < MB(29))
1044 goto cleanup;
1046 peak = cg_read_long_fd(mem_peak_fd);
1047 if (peak < MB(29))
1048 goto cleanup;
1050 peak = cg_read_long_fd(swap_peak_fd);
1051 if (peak < MB(29))
1052 goto cleanup;
1055 * open, reset and close the peak swap on another FD to make sure
1056 * multiple extant fds don't corrupt the linked-list
1058 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string);
1059 if (peak_reset)
1060 goto cleanup;
1062 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string);
1063 if (peak_reset)
1064 goto cleanup;
1066 /* actually reset on the fds */
1067 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
1068 if (peak_reset != sizeof(reset_string))
1069 goto cleanup;
1071 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1072 if (peak_reset != sizeof(reset_string))
1073 goto cleanup;
1075 peak = cg_read_long_fd(swap_peak_fd);
1076 if (peak > MB(10))
1077 goto cleanup;
1080 * The cgroup is now empty, but there may be a page or two associated
1081 * with the open FD accounted to it.
1083 peak = cg_read_long_fd(mem_peak_fd);
1084 if (peak > MB(1))
1085 goto cleanup;
1087 if (cg_read_long(memcg, "memory.peak") < MB(29))
1088 goto cleanup;
1090 if (cg_read_long(memcg, "memory.swap.peak") < MB(29))
1091 goto cleanup;
1093 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
1094 goto cleanup;
1096 max = cg_read_key_long(memcg, "memory.events", "max ");
1097 if (max <= 0)
1098 goto cleanup;
1100 peak = cg_read_long(memcg, "memory.peak");
1101 if (peak < MB(29))
1102 goto cleanup;
1104 peak = cg_read_long(memcg, "memory.swap.peak");
1105 if (peak < MB(29))
1106 goto cleanup;
1108 peak = cg_read_long_fd(mem_peak_fd);
1109 if (peak < MB(29))
1110 goto cleanup;
1112 peak = cg_read_long_fd(swap_peak_fd);
1113 if (peak < MB(19))
1114 goto cleanup;
1116 ret = KSFT_PASS;
1118 cleanup:
1119 if (mem_peak_fd != -1 && close(mem_peak_fd))
1120 ret = KSFT_FAIL;
1121 if (swap_peak_fd != -1 && close(swap_peak_fd))
1122 ret = KSFT_FAIL;
1123 cg_destroy(memcg);
1124 free(memcg);
1126 return ret;
1130 * This test disables swapping and tries to allocate anonymous memory
1131 * up to OOM. Then it checks for oom and oom_kill events in
1132 * memory.events.
1134 static int test_memcg_oom_events(const char *root)
1136 int ret = KSFT_FAIL;
1137 char *memcg;
1139 memcg = cg_name(root, "memcg_test");
1140 if (!memcg)
1141 goto cleanup;
1143 if (cg_create(memcg))
1144 goto cleanup;
1146 if (cg_write(memcg, "memory.max", "30M"))
1147 goto cleanup;
1149 if (cg_write(memcg, "memory.swap.max", "0"))
1150 goto cleanup;
1152 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1153 goto cleanup;
1155 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
1156 goto cleanup;
1158 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1159 goto cleanup;
1161 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1162 goto cleanup;
1164 ret = KSFT_PASS;
1166 cleanup:
1167 cg_destroy(memcg);
1168 free(memcg);
1170 return ret;
1173 struct tcp_server_args {
1174 unsigned short port;
1175 int ctl[2];
1178 static int tcp_server(const char *cgroup, void *arg)
1180 struct tcp_server_args *srv_args = arg;
1181 struct sockaddr_in6 saddr = { 0 };
1182 socklen_t slen = sizeof(saddr);
1183 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
1185 close(srv_args->ctl[0]);
1186 ctl_fd = srv_args->ctl[1];
1188 saddr.sin6_family = AF_INET6;
1189 saddr.sin6_addr = in6addr_any;
1190 saddr.sin6_port = htons(srv_args->port);
1192 sk = socket(AF_INET6, SOCK_STREAM, 0);
1193 if (sk < 0)
1194 return ret;
1196 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
1197 goto cleanup;
1199 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
1200 write(ctl_fd, &errno, sizeof(errno));
1201 goto cleanup;
1204 if (listen(sk, 1))
1205 goto cleanup;
1207 ret = 0;
1208 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
1209 ret = -1;
1210 goto cleanup;
1213 client_sk = accept(sk, NULL, NULL);
1214 if (client_sk < 0)
1215 goto cleanup;
1217 ret = -1;
1218 for (;;) {
1219 uint8_t buf[0x100000];
1221 if (write(client_sk, buf, sizeof(buf)) <= 0) {
1222 if (errno == ECONNRESET)
1223 ret = 0;
1224 break;
1228 close(client_sk);
1230 cleanup:
1231 close(sk);
1232 return ret;
1235 static int tcp_client(const char *cgroup, unsigned short port)
1237 const char server[] = "localhost";
1238 struct addrinfo *ai;
1239 char servport[6];
1240 int retries = 0x10; /* nice round number */
1241 int sk, ret;
1242 long allocated;
1244 allocated = cg_read_long(cgroup, "memory.current");
1245 snprintf(servport, sizeof(servport), "%hd", port);
1246 ret = getaddrinfo(server, servport, NULL, &ai);
1247 if (ret)
1248 return ret;
1250 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
1251 if (sk < 0)
1252 goto free_ainfo;
1254 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
1255 if (ret < 0)
1256 goto close_sk;
1258 ret = KSFT_FAIL;
1259 while (retries--) {
1260 uint8_t buf[0x100000];
1261 long current, sock;
1263 if (read(sk, buf, sizeof(buf)) <= 0)
1264 goto close_sk;
1266 current = cg_read_long(cgroup, "memory.current");
1267 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
1269 if (current < 0 || sock < 0)
1270 goto close_sk;
1272 /* exclude the memory not related to socket connection */
1273 if (values_close(current - allocated, sock, 10)) {
1274 ret = KSFT_PASS;
1275 break;
1279 close_sk:
1280 close(sk);
1281 free_ainfo:
1282 freeaddrinfo(ai);
1283 return ret;
1287 * This test checks socket memory accounting.
1288 * The test forks a TCP server listens on a random port between 1000
1289 * and 61000. Once it gets a client connection, it starts writing to
1290 * its socket.
1291 * The TCP client interleaves reads from the socket with check whether
1292 * memory.current and memory.stat.sock are similar.
1294 static int test_memcg_sock(const char *root)
1296 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1297 unsigned short port;
1298 char *memcg;
1300 memcg = cg_name(root, "memcg_test");
1301 if (!memcg)
1302 goto cleanup;
1304 if (cg_create(memcg))
1305 goto cleanup;
1307 while (bind_retries--) {
1308 struct tcp_server_args args;
1310 if (pipe(args.ctl))
1311 goto cleanup;
1313 port = args.port = 1000 + rand() % 60000;
1315 pid = cg_run_nowait(memcg, tcp_server, &args);
1316 if (pid < 0)
1317 goto cleanup;
1319 close(args.ctl[1]);
1320 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1321 goto cleanup;
1322 close(args.ctl[0]);
1324 if (!err)
1325 break;
1326 if (err != EADDRINUSE)
1327 goto cleanup;
1329 waitpid(pid, NULL, 0);
1332 if (err == EADDRINUSE) {
1333 ret = KSFT_SKIP;
1334 goto cleanup;
1337 if (tcp_client(memcg, port) != KSFT_PASS)
1338 goto cleanup;
1340 waitpid(pid, &err, 0);
1341 if (WEXITSTATUS(err))
1342 goto cleanup;
1344 if (cg_read_long(memcg, "memory.current") < 0)
1345 goto cleanup;
1347 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1348 goto cleanup;
1350 ret = KSFT_PASS;
1352 cleanup:
1353 cg_destroy(memcg);
1354 free(memcg);
1356 return ret;
1360 * This test disables swapping and tries to allocate anonymous memory
1361 * up to OOM with memory.group.oom set. Then it checks that all
1362 * processes in the leaf were killed. It also checks that oom_events
1363 * were propagated to the parent level.
1365 static int test_memcg_oom_group_leaf_events(const char *root)
1367 int ret = KSFT_FAIL;
1368 char *parent, *child;
1369 long parent_oom_events;
1371 parent = cg_name(root, "memcg_test_0");
1372 child = cg_name(root, "memcg_test_0/memcg_test_1");
1374 if (!parent || !child)
1375 goto cleanup;
1377 if (cg_create(parent))
1378 goto cleanup;
1380 if (cg_create(child))
1381 goto cleanup;
1383 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1384 goto cleanup;
1386 if (cg_write(child, "memory.max", "50M"))
1387 goto cleanup;
1389 if (cg_write(child, "memory.swap.max", "0"))
1390 goto cleanup;
1392 if (cg_write(child, "memory.oom.group", "1"))
1393 goto cleanup;
1395 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1396 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1397 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1398 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1399 goto cleanup;
1401 if (cg_test_proc_killed(child))
1402 goto cleanup;
1404 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1405 goto cleanup;
1407 parent_oom_events = cg_read_key_long(
1408 parent, "memory.events", "oom_kill ");
1410 * If memory_localevents is not enabled (the default), the parent should
1411 * count OOM events in its children groups. Otherwise, it should not
1412 * have observed any events.
1414 if (has_localevents && parent_oom_events != 0)
1415 goto cleanup;
1416 else if (!has_localevents && parent_oom_events <= 0)
1417 goto cleanup;
1419 ret = KSFT_PASS;
1421 cleanup:
1422 if (child)
1423 cg_destroy(child);
1424 if (parent)
1425 cg_destroy(parent);
1426 free(child);
1427 free(parent);
1429 return ret;
1433 * This test disables swapping and tries to allocate anonymous memory
1434 * up to OOM with memory.group.oom set. Then it checks that all
1435 * processes in the parent and leaf were killed.
1437 static int test_memcg_oom_group_parent_events(const char *root)
1439 int ret = KSFT_FAIL;
1440 char *parent, *child;
1442 parent = cg_name(root, "memcg_test_0");
1443 child = cg_name(root, "memcg_test_0/memcg_test_1");
1445 if (!parent || !child)
1446 goto cleanup;
1448 if (cg_create(parent))
1449 goto cleanup;
1451 if (cg_create(child))
1452 goto cleanup;
1454 if (cg_write(parent, "memory.max", "80M"))
1455 goto cleanup;
1457 if (cg_write(parent, "memory.swap.max", "0"))
1458 goto cleanup;
1460 if (cg_write(parent, "memory.oom.group", "1"))
1461 goto cleanup;
1463 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1464 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1465 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1467 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1468 goto cleanup;
1470 if (cg_test_proc_killed(child))
1471 goto cleanup;
1472 if (cg_test_proc_killed(parent))
1473 goto cleanup;
1475 ret = KSFT_PASS;
1477 cleanup:
1478 if (child)
1479 cg_destroy(child);
1480 if (parent)
1481 cg_destroy(parent);
1482 free(child);
1483 free(parent);
1485 return ret;
1489 * This test disables swapping and tries to allocate anonymous memory
1490 * up to OOM with memory.group.oom set. Then it checks that all
1491 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1493 static int test_memcg_oom_group_score_events(const char *root)
1495 int ret = KSFT_FAIL;
1496 char *memcg;
1497 int safe_pid;
1499 memcg = cg_name(root, "memcg_test_0");
1501 if (!memcg)
1502 goto cleanup;
1504 if (cg_create(memcg))
1505 goto cleanup;
1507 if (cg_write(memcg, "memory.max", "50M"))
1508 goto cleanup;
1510 if (cg_write(memcg, "memory.swap.max", "0"))
1511 goto cleanup;
1513 if (cg_write(memcg, "memory.oom.group", "1"))
1514 goto cleanup;
1516 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1517 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1518 goto cleanup;
1520 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1521 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1522 goto cleanup;
1524 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1525 goto cleanup;
1527 if (kill(safe_pid, SIGKILL))
1528 goto cleanup;
1530 ret = KSFT_PASS;
1532 cleanup:
1533 if (memcg)
1534 cg_destroy(memcg);
1535 free(memcg);
1537 return ret;
1540 #define T(x) { x, #x }
1541 struct memcg_test {
1542 int (*fn)(const char *root);
1543 const char *name;
1544 } tests[] = {
1545 T(test_memcg_subtree_control),
1546 T(test_memcg_current_peak),
1547 T(test_memcg_min),
1548 T(test_memcg_low),
1549 T(test_memcg_high),
1550 T(test_memcg_high_sync),
1551 T(test_memcg_max),
1552 T(test_memcg_reclaim),
1553 T(test_memcg_oom_events),
1554 T(test_memcg_swap_max_peak),
1555 T(test_memcg_sock),
1556 T(test_memcg_oom_group_leaf_events),
1557 T(test_memcg_oom_group_parent_events),
1558 T(test_memcg_oom_group_score_events),
1560 #undef T
1562 int main(int argc, char **argv)
1564 char root[PATH_MAX];
1565 int i, proc_status, ret = EXIT_SUCCESS;
1567 if (cg_find_unified_root(root, sizeof(root), NULL))
1568 ksft_exit_skip("cgroup v2 isn't mounted\n");
1571 * Check that memory controller is available:
1572 * memory is listed in cgroup.controllers
1574 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1575 ksft_exit_skip("memory controller isn't available\n");
1577 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1578 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1579 ksft_exit_skip("Failed to set memory controller\n");
1581 proc_status = proc_mount_contains("memory_recursiveprot");
1582 if (proc_status < 0)
1583 ksft_exit_skip("Failed to query cgroup mount option\n");
1584 has_recursiveprot = proc_status;
1586 proc_status = proc_mount_contains("memory_localevents");
1587 if (proc_status < 0)
1588 ksft_exit_skip("Failed to query cgroup mount option\n");
1589 has_localevents = proc_status;
1591 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1592 switch (tests[i].fn(root)) {
1593 case KSFT_PASS:
1594 ksft_test_result_pass("%s\n", tests[i].name);
1595 break;
1596 case KSFT_SKIP:
1597 ksft_test_result_skip("%s\n", tests[i].name);
1598 break;
1599 default:
1600 ret = EXIT_FAILURE;
1601 ksft_test_result_fail("%s\n", tests[i].name);
1602 break;
1606 return ret;