3 * Copyright IBM Corporation, 2012
4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
7 * Copyright (C) 2019 Red Hat, Inc.
8 * Author: Giuseppe Scrivano <gscrivan@redhat.com>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of version 2.1 of the GNU Lesser General Public License
12 * as published by the Free Software Foundation.
14 * This program is distributed in the hope that it would be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
20 #include <linux/cgroup.h>
21 #include <linux/page_counter.h>
22 #include <linux/slab.h>
23 #include <linux/hugetlb.h>
24 #include <linux/hugetlb_cgroup.h>
26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
27 #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff)
28 #define MEMFILE_ATTR(val) ((val) & 0xffff)
30 /* Use t->m[0] to encode the offset */
31 #define MEMFILE_OFFSET(t, m0) (((offsetof(t, m0) << 16) | sizeof_field(t, m0)))
32 #define MEMFILE_OFFSET0(val) (((val) >> 16) & 0xffff)
33 #define MEMFILE_FIELD_SIZE(val) ((val) & 0xffff)
35 #define DFL_TMPL_SIZE ARRAY_SIZE(hugetlb_dfl_tmpl)
36 #define LEGACY_TMPL_SIZE ARRAY_SIZE(hugetlb_legacy_tmpl)
38 static struct hugetlb_cgroup
*root_h_cgroup __read_mostly
;
39 static struct cftype
*dfl_files
;
40 static struct cftype
*legacy_files
;
42 static inline struct page_counter
*
43 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup
*h_cg
, int idx
,
47 return &h_cg
->rsvd_hugepage
[idx
];
48 return &h_cg
->hugepage
[idx
];
51 static inline struct page_counter
*
52 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup
*h_cg
, int idx
)
54 return __hugetlb_cgroup_counter_from_cgroup(h_cg
, idx
, false);
57 static inline struct page_counter
*
58 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup
*h_cg
, int idx
)
60 return __hugetlb_cgroup_counter_from_cgroup(h_cg
, idx
, true);
64 struct hugetlb_cgroup
*hugetlb_cgroup_from_css(struct cgroup_subsys_state
*s
)
66 return s
? container_of(s
, struct hugetlb_cgroup
, css
) : NULL
;
70 struct hugetlb_cgroup
*hugetlb_cgroup_from_task(struct task_struct
*task
)
72 return hugetlb_cgroup_from_css(task_css(task
, hugetlb_cgrp_id
));
75 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup
*h_cg
)
77 return (h_cg
== root_h_cgroup
);
80 static inline struct hugetlb_cgroup
*
81 parent_hugetlb_cgroup(struct hugetlb_cgroup
*h_cg
)
83 return hugetlb_cgroup_from_css(h_cg
->css
.parent
);
86 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup
*h_cg
)
91 if (page_counter_read(
92 hugetlb_cgroup_counter_from_cgroup(h_cg
, hstate_index(h
))))
98 static void hugetlb_cgroup_init(struct hugetlb_cgroup
*h_cgroup
,
99 struct hugetlb_cgroup
*parent_h_cgroup
)
103 for (idx
= 0; idx
< HUGE_MAX_HSTATE
; idx
++) {
104 struct page_counter
*fault_parent
= NULL
;
105 struct page_counter
*rsvd_parent
= NULL
;
109 if (parent_h_cgroup
) {
110 fault_parent
= hugetlb_cgroup_counter_from_cgroup(
111 parent_h_cgroup
, idx
);
112 rsvd_parent
= hugetlb_cgroup_counter_from_cgroup_rsvd(
113 parent_h_cgroup
, idx
);
115 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup
,
117 fault_parent
, false);
119 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup
, idx
),
122 limit
= round_down(PAGE_COUNTER_MAX
,
123 pages_per_huge_page(&hstates
[idx
]));
125 ret
= page_counter_set_max(
126 hugetlb_cgroup_counter_from_cgroup(h_cgroup
, idx
),
129 ret
= page_counter_set_max(
130 hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup
, idx
),
136 static void hugetlb_cgroup_free(struct hugetlb_cgroup
*h_cgroup
)
141 kfree(h_cgroup
->nodeinfo
[node
]);
145 static struct cgroup_subsys_state
*
146 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state
*parent_css
)
148 struct hugetlb_cgroup
*parent_h_cgroup
= hugetlb_cgroup_from_css(parent_css
);
149 struct hugetlb_cgroup
*h_cgroup
;
152 h_cgroup
= kzalloc(struct_size(h_cgroup
, nodeinfo
, nr_node_ids
),
156 return ERR_PTR(-ENOMEM
);
158 if (!parent_h_cgroup
)
159 root_h_cgroup
= h_cgroup
;
162 * TODO: this routine can waste much memory for nodes which will
163 * never be onlined. It's better to use memory hotplug callback
166 for_each_node(node
) {
167 /* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */
169 node_state(node
, N_NORMAL_MEMORY
) ? node
: NUMA_NO_NODE
;
170 h_cgroup
->nodeinfo
[node
] =
171 kzalloc_node(sizeof(struct hugetlb_cgroup_per_node
),
172 GFP_KERNEL
, node_to_alloc
);
173 if (!h_cgroup
->nodeinfo
[node
])
174 goto fail_alloc_nodeinfo
;
177 hugetlb_cgroup_init(h_cgroup
, parent_h_cgroup
);
178 return &h_cgroup
->css
;
181 hugetlb_cgroup_free(h_cgroup
);
182 return ERR_PTR(-ENOMEM
);
185 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state
*css
)
187 hugetlb_cgroup_free(hugetlb_cgroup_from_css(css
));
191 * Should be called with hugetlb_lock held.
192 * Since we are holding hugetlb_lock, pages cannot get moved from
193 * active list or uncharged from the cgroup, So no need to get
194 * page reference and test for page active here. This function
197 static void hugetlb_cgroup_move_parent(int idx
, struct hugetlb_cgroup
*h_cg
,
200 unsigned int nr_pages
;
201 struct page_counter
*counter
;
202 struct hugetlb_cgroup
*page_hcg
;
203 struct hugetlb_cgroup
*parent
= parent_hugetlb_cgroup(h_cg
);
204 struct folio
*folio
= page_folio(page
);
206 page_hcg
= hugetlb_cgroup_from_folio(folio
);
208 * We can have pages in active list without any cgroup
209 * ie, hugepage with less than 3 pages. We can safely
210 * ignore those pages.
212 if (!page_hcg
|| page_hcg
!= h_cg
)
215 nr_pages
= compound_nr(page
);
217 parent
= root_h_cgroup
;
218 /* root has no limit */
219 page_counter_charge(&parent
->hugepage
[idx
], nr_pages
);
221 counter
= &h_cg
->hugepage
[idx
];
222 /* Take the pages off the local counter */
223 page_counter_cancel(counter
, nr_pages
);
225 set_hugetlb_cgroup(folio
, parent
);
231 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
234 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state
*css
)
236 struct hugetlb_cgroup
*h_cg
= hugetlb_cgroup_from_css(css
);
242 spin_lock_irq(&hugetlb_lock
);
243 list_for_each_entry(page
, &h
->hugepage_activelist
, lru
)
244 hugetlb_cgroup_move_parent(hstate_index(h
), h_cg
, page
);
246 spin_unlock_irq(&hugetlb_lock
);
249 } while (hugetlb_cgroup_have_usage(h_cg
));
252 static inline void hugetlb_event(struct hugetlb_cgroup
*hugetlb
, int idx
,
253 enum hugetlb_memory_event event
)
255 atomic_long_inc(&hugetlb
->events_local
[idx
][event
]);
256 cgroup_file_notify(&hugetlb
->events_local_file
[idx
]);
259 atomic_long_inc(&hugetlb
->events
[idx
][event
]);
260 cgroup_file_notify(&hugetlb
->events_file
[idx
]);
261 } while ((hugetlb
= parent_hugetlb_cgroup(hugetlb
)) &&
262 !hugetlb_cgroup_is_root(hugetlb
));
265 static int __hugetlb_cgroup_charge_cgroup(int idx
, unsigned long nr_pages
,
266 struct hugetlb_cgroup
**ptr
,
270 struct page_counter
*counter
;
271 struct hugetlb_cgroup
*h_cg
= NULL
;
273 if (hugetlb_cgroup_disabled())
277 h_cg
= hugetlb_cgroup_from_task(current
);
278 if (!css_tryget(&h_cg
->css
)) {
284 if (!page_counter_try_charge(
285 __hugetlb_cgroup_counter_from_cgroup(h_cg
, idx
, rsvd
),
286 nr_pages
, &counter
)) {
288 hugetlb_event(h_cg
, idx
, HUGETLB_MAX
);
292 /* Reservations take a reference to the css because they do not get
302 int hugetlb_cgroup_charge_cgroup(int idx
, unsigned long nr_pages
,
303 struct hugetlb_cgroup
**ptr
)
305 return __hugetlb_cgroup_charge_cgroup(idx
, nr_pages
, ptr
, false);
308 int hugetlb_cgroup_charge_cgroup_rsvd(int idx
, unsigned long nr_pages
,
309 struct hugetlb_cgroup
**ptr
)
311 return __hugetlb_cgroup_charge_cgroup(idx
, nr_pages
, ptr
, true);
314 /* Should be called with hugetlb_lock held */
315 static void __hugetlb_cgroup_commit_charge(int idx
, unsigned long nr_pages
,
316 struct hugetlb_cgroup
*h_cg
,
317 struct folio
*folio
, bool rsvd
)
319 if (hugetlb_cgroup_disabled() || !h_cg
)
321 lockdep_assert_held(&hugetlb_lock
);
322 __set_hugetlb_cgroup(folio
, h_cg
, rsvd
);
324 unsigned long usage
=
325 h_cg
->nodeinfo
[folio_nid(folio
)]->usage
[idx
];
327 * This write is not atomic due to fetching usage and writing
328 * to it, but that's fine because we call this with
329 * hugetlb_lock held anyway.
331 WRITE_ONCE(h_cg
->nodeinfo
[folio_nid(folio
)]->usage
[idx
],
336 void hugetlb_cgroup_commit_charge(int idx
, unsigned long nr_pages
,
337 struct hugetlb_cgroup
*h_cg
,
340 __hugetlb_cgroup_commit_charge(idx
, nr_pages
, h_cg
, folio
, false);
343 void hugetlb_cgroup_commit_charge_rsvd(int idx
, unsigned long nr_pages
,
344 struct hugetlb_cgroup
*h_cg
,
347 __hugetlb_cgroup_commit_charge(idx
, nr_pages
, h_cg
, folio
, true);
351 * Should be called with hugetlb_lock held
353 static void __hugetlb_cgroup_uncharge_folio(int idx
, unsigned long nr_pages
,
354 struct folio
*folio
, bool rsvd
)
356 struct hugetlb_cgroup
*h_cg
;
358 if (hugetlb_cgroup_disabled())
360 lockdep_assert_held(&hugetlb_lock
);
361 h_cg
= __hugetlb_cgroup_from_folio(folio
, rsvd
);
364 __set_hugetlb_cgroup(folio
, NULL
, rsvd
);
366 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg
, idx
,
373 unsigned long usage
=
374 h_cg
->nodeinfo
[folio_nid(folio
)]->usage
[idx
];
376 * This write is not atomic due to fetching usage and writing
377 * to it, but that's fine because we call this with
378 * hugetlb_lock held anyway.
380 WRITE_ONCE(h_cg
->nodeinfo
[folio_nid(folio
)]->usage
[idx
],
385 void hugetlb_cgroup_uncharge_folio(int idx
, unsigned long nr_pages
,
388 __hugetlb_cgroup_uncharge_folio(idx
, nr_pages
, folio
, false);
391 void hugetlb_cgroup_uncharge_folio_rsvd(int idx
, unsigned long nr_pages
,
394 __hugetlb_cgroup_uncharge_folio(idx
, nr_pages
, folio
, true);
397 static void __hugetlb_cgroup_uncharge_cgroup(int idx
, unsigned long nr_pages
,
398 struct hugetlb_cgroup
*h_cg
,
401 if (hugetlb_cgroup_disabled() || !h_cg
)
404 page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg
, idx
,
412 void hugetlb_cgroup_uncharge_cgroup(int idx
, unsigned long nr_pages
,
413 struct hugetlb_cgroup
*h_cg
)
415 __hugetlb_cgroup_uncharge_cgroup(idx
, nr_pages
, h_cg
, false);
418 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx
, unsigned long nr_pages
,
419 struct hugetlb_cgroup
*h_cg
)
421 __hugetlb_cgroup_uncharge_cgroup(idx
, nr_pages
, h_cg
, true);
424 void hugetlb_cgroup_uncharge_counter(struct resv_map
*resv
, unsigned long start
,
427 if (hugetlb_cgroup_disabled() || !resv
|| !resv
->reservation_counter
||
431 page_counter_uncharge(resv
->reservation_counter
,
432 (end
- start
) * resv
->pages_per_hpage
);
436 void hugetlb_cgroup_uncharge_file_region(struct resv_map
*resv
,
437 struct file_region
*rg
,
438 unsigned long nr_pages
,
441 if (hugetlb_cgroup_disabled() || !resv
|| !rg
|| !nr_pages
)
444 if (rg
->reservation_counter
&& resv
->pages_per_hpage
&&
445 !resv
->reservation_counter
) {
446 page_counter_uncharge(rg
->reservation_counter
,
447 nr_pages
* resv
->pages_per_hpage
);
449 * Only do css_put(rg->css) when we delete the entire region
450 * because one file_region must hold exactly one css reference.
468 static int hugetlb_cgroup_read_numa_stat(struct seq_file
*seq
, void *dummy
)
471 struct cftype
*cft
= seq_cft(seq
);
472 int idx
= MEMFILE_IDX(cft
->private);
473 bool legacy
= !cgroup_subsys_on_dfl(hugetlb_cgrp_subsys
);
474 struct hugetlb_cgroup
*h_cg
= hugetlb_cgroup_from_css(seq_css(seq
));
475 struct cgroup_subsys_state
*css
;
479 /* Add up usage across all nodes for the non-hierarchical total. */
481 for_each_node_state(nid
, N_MEMORY
)
482 usage
+= READ_ONCE(h_cg
->nodeinfo
[nid
]->usage
[idx
]);
483 seq_printf(seq
, "total=%lu", usage
* PAGE_SIZE
);
485 /* Simply print the per-node usage for the non-hierarchical total. */
486 for_each_node_state(nid
, N_MEMORY
)
487 seq_printf(seq
, " N%d=%lu", nid
,
488 READ_ONCE(h_cg
->nodeinfo
[nid
]->usage
[idx
]) *
494 * The hierarchical total is pretty much the value recorded by the
495 * counter, so use that.
497 seq_printf(seq
, "%stotal=%lu", legacy
? "hierarchical_" : "",
498 page_counter_read(&h_cg
->hugepage
[idx
]) * PAGE_SIZE
);
501 * For each node, transverse the css tree to obtain the hierarchical
504 for_each_node_state(nid
, N_MEMORY
) {
507 css_for_each_descendant_pre(css
, &h_cg
->css
) {
508 usage
+= READ_ONCE(hugetlb_cgroup_from_css(css
)
513 seq_printf(seq
, " N%d=%lu", nid
, usage
* PAGE_SIZE
);
521 static u64
hugetlb_cgroup_read_u64(struct cgroup_subsys_state
*css
,
524 struct page_counter
*counter
;
525 struct page_counter
*rsvd_counter
;
526 struct hugetlb_cgroup
*h_cg
= hugetlb_cgroup_from_css(css
);
528 counter
= &h_cg
->hugepage
[MEMFILE_IDX(cft
->private)];
529 rsvd_counter
= &h_cg
->rsvd_hugepage
[MEMFILE_IDX(cft
->private)];
531 switch (MEMFILE_ATTR(cft
->private)) {
533 return (u64
)page_counter_read(counter
) * PAGE_SIZE
;
535 return (u64
)page_counter_read(rsvd_counter
) * PAGE_SIZE
;
537 return (u64
)counter
->max
* PAGE_SIZE
;
539 return (u64
)rsvd_counter
->max
* PAGE_SIZE
;
541 return (u64
)counter
->watermark
* PAGE_SIZE
;
542 case RES_RSVD_MAX_USAGE
:
543 return (u64
)rsvd_counter
->watermark
* PAGE_SIZE
;
545 return counter
->failcnt
;
546 case RES_RSVD_FAILCNT
:
547 return rsvd_counter
->failcnt
;
553 static int hugetlb_cgroup_read_u64_max(struct seq_file
*seq
, void *v
)
557 struct cftype
*cft
= seq_cft(seq
);
559 struct page_counter
*counter
;
560 struct hugetlb_cgroup
*h_cg
= hugetlb_cgroup_from_css(seq_css(seq
));
562 idx
= MEMFILE_IDX(cft
->private);
563 counter
= &h_cg
->hugepage
[idx
];
565 limit
= round_down(PAGE_COUNTER_MAX
,
566 pages_per_huge_page(&hstates
[idx
]));
568 switch (MEMFILE_ATTR(cft
->private)) {
570 counter
= &h_cg
->rsvd_hugepage
[idx
];
573 val
= (u64
)page_counter_read(counter
);
574 seq_printf(seq
, "%llu\n", val
* PAGE_SIZE
);
577 counter
= &h_cg
->rsvd_hugepage
[idx
];
580 val
= (u64
)counter
->max
;
582 seq_puts(seq
, "max\n");
584 seq_printf(seq
, "%llu\n", val
* PAGE_SIZE
);
593 static DEFINE_MUTEX(hugetlb_limit_mutex
);
595 static ssize_t
hugetlb_cgroup_write(struct kernfs_open_file
*of
,
596 char *buf
, size_t nbytes
, loff_t off
,
600 unsigned long nr_pages
;
601 struct hugetlb_cgroup
*h_cg
= hugetlb_cgroup_from_css(of_css(of
));
604 if (hugetlb_cgroup_is_root(h_cg
)) /* Can't set limit on root */
608 ret
= page_counter_memparse(buf
, max
, &nr_pages
);
612 idx
= MEMFILE_IDX(of_cft(of
)->private);
613 nr_pages
= round_down(nr_pages
, pages_per_huge_page(&hstates
[idx
]));
615 switch (MEMFILE_ATTR(of_cft(of
)->private)) {
620 mutex_lock(&hugetlb_limit_mutex
);
621 ret
= page_counter_set_max(
622 __hugetlb_cgroup_counter_from_cgroup(h_cg
, idx
, rsvd
),
624 mutex_unlock(&hugetlb_limit_mutex
);
630 return ret
?: nbytes
;
633 static ssize_t
hugetlb_cgroup_write_legacy(struct kernfs_open_file
*of
,
634 char *buf
, size_t nbytes
, loff_t off
)
636 return hugetlb_cgroup_write(of
, buf
, nbytes
, off
, "-1");
639 static ssize_t
hugetlb_cgroup_write_dfl(struct kernfs_open_file
*of
,
640 char *buf
, size_t nbytes
, loff_t off
)
642 return hugetlb_cgroup_write(of
, buf
, nbytes
, off
, "max");
645 static ssize_t
hugetlb_cgroup_reset(struct kernfs_open_file
*of
,
646 char *buf
, size_t nbytes
, loff_t off
)
649 struct page_counter
*counter
, *rsvd_counter
;
650 struct hugetlb_cgroup
*h_cg
= hugetlb_cgroup_from_css(of_css(of
));
652 counter
= &h_cg
->hugepage
[MEMFILE_IDX(of_cft(of
)->private)];
653 rsvd_counter
= &h_cg
->rsvd_hugepage
[MEMFILE_IDX(of_cft(of
)->private)];
655 switch (MEMFILE_ATTR(of_cft(of
)->private)) {
657 page_counter_reset_watermark(counter
);
659 case RES_RSVD_MAX_USAGE
:
660 page_counter_reset_watermark(rsvd_counter
);
663 counter
->failcnt
= 0;
665 case RES_RSVD_FAILCNT
:
666 rsvd_counter
->failcnt
= 0;
672 return ret
?: nbytes
;
675 static char *mem_fmt(char *buf
, int size
, unsigned long hsize
)
678 snprintf(buf
, size
, "%luGB", hsize
/ SZ_1G
);
679 else if (hsize
>= SZ_1M
)
680 snprintf(buf
, size
, "%luMB", hsize
/ SZ_1M
);
682 snprintf(buf
, size
, "%luKB", hsize
/ SZ_1K
);
686 static int __hugetlb_events_show(struct seq_file
*seq
, bool local
)
690 struct cftype
*cft
= seq_cft(seq
);
691 struct hugetlb_cgroup
*h_cg
= hugetlb_cgroup_from_css(seq_css(seq
));
693 idx
= MEMFILE_IDX(cft
->private);
696 max
= atomic_long_read(&h_cg
->events_local
[idx
][HUGETLB_MAX
]);
698 max
= atomic_long_read(&h_cg
->events
[idx
][HUGETLB_MAX
]);
700 seq_printf(seq
, "max %lu\n", max
);
705 static int hugetlb_events_show(struct seq_file
*seq
, void *v
)
707 return __hugetlb_events_show(seq
, false);
710 static int hugetlb_events_local_show(struct seq_file
*seq
, void *v
)
712 return __hugetlb_events_show(seq
, true);
715 static struct cftype hugetlb_dfl_tmpl
[] = {
718 .private = RES_LIMIT
,
719 .seq_show
= hugetlb_cgroup_read_u64_max
,
720 .write
= hugetlb_cgroup_write_dfl
,
721 .flags
= CFTYPE_NOT_ON_ROOT
,
725 .private = RES_RSVD_LIMIT
,
726 .seq_show
= hugetlb_cgroup_read_u64_max
,
727 .write
= hugetlb_cgroup_write_dfl
,
728 .flags
= CFTYPE_NOT_ON_ROOT
,
732 .private = RES_USAGE
,
733 .seq_show
= hugetlb_cgroup_read_u64_max
,
734 .flags
= CFTYPE_NOT_ON_ROOT
,
737 .name
= "rsvd.current",
738 .private = RES_RSVD_USAGE
,
739 .seq_show
= hugetlb_cgroup_read_u64_max
,
740 .flags
= CFTYPE_NOT_ON_ROOT
,
744 .seq_show
= hugetlb_events_show
,
745 .file_offset
= MEMFILE_OFFSET(struct hugetlb_cgroup
, events_file
[0]),
746 .flags
= CFTYPE_NOT_ON_ROOT
,
749 .name
= "events.local",
750 .seq_show
= hugetlb_events_local_show
,
751 .file_offset
= MEMFILE_OFFSET(struct hugetlb_cgroup
, events_local_file
[0]),
752 .flags
= CFTYPE_NOT_ON_ROOT
,
756 .seq_show
= hugetlb_cgroup_read_numa_stat
,
757 .flags
= CFTYPE_NOT_ON_ROOT
,
759 /* don't need terminator here */
762 static struct cftype hugetlb_legacy_tmpl
[] = {
764 .name
= "limit_in_bytes",
765 .private = RES_LIMIT
,
766 .read_u64
= hugetlb_cgroup_read_u64
,
767 .write
= hugetlb_cgroup_write_legacy
,
770 .name
= "rsvd.limit_in_bytes",
771 .private = RES_RSVD_LIMIT
,
772 .read_u64
= hugetlb_cgroup_read_u64
,
773 .write
= hugetlb_cgroup_write_legacy
,
776 .name
= "usage_in_bytes",
777 .private = RES_USAGE
,
778 .read_u64
= hugetlb_cgroup_read_u64
,
781 .name
= "rsvd.usage_in_bytes",
782 .private = RES_RSVD_USAGE
,
783 .read_u64
= hugetlb_cgroup_read_u64
,
786 .name
= "max_usage_in_bytes",
787 .private = RES_MAX_USAGE
,
788 .write
= hugetlb_cgroup_reset
,
789 .read_u64
= hugetlb_cgroup_read_u64
,
792 .name
= "rsvd.max_usage_in_bytes",
793 .private = RES_RSVD_MAX_USAGE
,
794 .write
= hugetlb_cgroup_reset
,
795 .read_u64
= hugetlb_cgroup_read_u64
,
799 .private = RES_FAILCNT
,
800 .write
= hugetlb_cgroup_reset
,
801 .read_u64
= hugetlb_cgroup_read_u64
,
804 .name
= "rsvd.failcnt",
805 .private = RES_RSVD_FAILCNT
,
806 .write
= hugetlb_cgroup_reset
,
807 .read_u64
= hugetlb_cgroup_read_u64
,
811 .seq_show
= hugetlb_cgroup_read_numa_stat
,
813 /* don't need terminator here */
817 hugetlb_cgroup_cfttypes_init(struct hstate
*h
, struct cftype
*cft
,
818 struct cftype
*tmpl
, int tmpl_size
)
821 int i
, idx
= hstate_index(h
);
823 /* format the size */
824 mem_fmt(buf
, sizeof(buf
), huge_page_size(h
));
826 for (i
= 0; i
< tmpl_size
; cft
++, tmpl
++, i
++) {
828 /* rebuild the name */
829 snprintf(cft
->name
, MAX_CFTYPE_NAME
, "%s.%s", buf
, tmpl
->name
);
830 /* rebuild the private */
831 cft
->private = MEMFILE_PRIVATE(idx
, tmpl
->private);
832 /* rebuild the file_offset */
833 if (tmpl
->file_offset
) {
834 unsigned int offset
= tmpl
->file_offset
;
836 cft
->file_offset
= MEMFILE_OFFSET0(offset
) +
837 MEMFILE_FIELD_SIZE(offset
) * idx
;
840 lockdep_register_key(&cft
->lockdep_key
);
844 static void __init
__hugetlb_cgroup_file_dfl_init(struct hstate
*h
)
846 int idx
= hstate_index(h
);
848 hugetlb_cgroup_cfttypes_init(h
, dfl_files
+ idx
* DFL_TMPL_SIZE
,
849 hugetlb_dfl_tmpl
, DFL_TMPL_SIZE
);
852 static void __init
__hugetlb_cgroup_file_legacy_init(struct hstate
*h
)
854 int idx
= hstate_index(h
);
856 hugetlb_cgroup_cfttypes_init(h
, legacy_files
+ idx
* LEGACY_TMPL_SIZE
,
857 hugetlb_legacy_tmpl
, LEGACY_TMPL_SIZE
);
860 static void __init
__hugetlb_cgroup_file_init(struct hstate
*h
)
862 __hugetlb_cgroup_file_dfl_init(h
);
863 __hugetlb_cgroup_file_legacy_init(h
);
866 static void __init
__hugetlb_cgroup_file_pre_init(void)
870 cft_count
= hugetlb_max_hstate
* DFL_TMPL_SIZE
+ 1; /* add terminator */
871 dfl_files
= kcalloc(cft_count
, sizeof(struct cftype
), GFP_KERNEL
);
873 cft_count
= hugetlb_max_hstate
* LEGACY_TMPL_SIZE
+ 1; /* add terminator */
874 legacy_files
= kcalloc(cft_count
, sizeof(struct cftype
), GFP_KERNEL
);
875 BUG_ON(!legacy_files
);
878 static void __init
__hugetlb_cgroup_file_post_init(void)
880 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys
,
882 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys
,
886 void __init
hugetlb_cgroup_file_init(void)
890 __hugetlb_cgroup_file_pre_init();
892 __hugetlb_cgroup_file_init(h
);
893 __hugetlb_cgroup_file_post_init();
897 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
898 * when we migrate hugepages
900 void hugetlb_cgroup_migrate(struct folio
*old_folio
, struct folio
*new_folio
)
902 struct hugetlb_cgroup
*h_cg
;
903 struct hugetlb_cgroup
*h_cg_rsvd
;
904 struct hstate
*h
= folio_hstate(old_folio
);
906 if (hugetlb_cgroup_disabled())
909 spin_lock_irq(&hugetlb_lock
);
910 h_cg
= hugetlb_cgroup_from_folio(old_folio
);
911 h_cg_rsvd
= hugetlb_cgroup_from_folio_rsvd(old_folio
);
912 set_hugetlb_cgroup(old_folio
, NULL
);
913 set_hugetlb_cgroup_rsvd(old_folio
, NULL
);
915 /* move the h_cg details to new cgroup */
916 set_hugetlb_cgroup(new_folio
, h_cg
);
917 set_hugetlb_cgroup_rsvd(new_folio
, h_cg_rsvd
);
918 list_move(&new_folio
->lru
, &h
->hugepage_activelist
);
919 spin_unlock_irq(&hugetlb_lock
);
923 static struct cftype hugetlb_files
[] = {
927 struct cgroup_subsys hugetlb_cgrp_subsys
= {
928 .css_alloc
= hugetlb_cgroup_css_alloc
,
929 .css_offline
= hugetlb_cgroup_css_offline
,
930 .css_free
= hugetlb_cgroup_css_free
,
931 .dfl_cftypes
= hugetlb_files
,
932 .legacy_cftypes
= hugetlb_files
,