1 #include <linux/module.h>
2 #include <linux/kernel.h>
3 #include <linux/mman.h>
4 #include <linux/init.h>
5 #include <linux/security.h>
6 #include <linux/sysctl.h>
7 #include <linux/swap.h>
8 #include <linux/kobject.h>
9 #include <linux/pagemap.h>
10 #include <linux/hugetlb.h>
11 #include <linux/sysfs.h>
12 #include <linux/oom.h>
14 #define MY_NAME "lowmem"
16 #define LOWMEM_MAX_UIDS 8
19 VM_LOWMEM_DENY_PAGES
= 1,
20 VM_LOWMEM_NOTIFY_LOW_PAGES
,
21 VM_LOWMEM_NOTIFY_HIGH_PAGES
,
22 VM_LOWMEM_NR_DECAY_PAGES
,
23 VM_LOWMEM_ALLOWED_UIDS
,
24 VM_LOWMEM_ALLOWED_PAGES
,
27 VM_LOWMEM_LEVEL1_NOTIFY
,
28 VM_LOWMEM_LEVEL2_NOTIFY
,
32 static long deny_pages
;
33 static long notify_low_pages
, notify_high_pages
;
34 static unsigned int nr_decay_pages
;
35 static unsigned long allowed_pages
;
36 static unsigned long lowmem_free_pages
;
37 static unsigned int allowed_uids
[LOWMEM_MAX_UIDS
];
38 static unsigned int minuid
= 1;
39 static unsigned int maxuid
= 65535;
40 static unsigned int deny_percentage
;
41 static unsigned int l1_notify
, l2_notify
;
42 static long used_pages
;
45 proc_dointvec_used(ctl_table
*table
, int write
, struct file
*filp
,
46 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
);
48 proc_dointvec_l1_notify(ctl_table
*table
, int write
, struct file
*filp
,
49 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
);
51 proc_dointvec_l2_notify(ctl_table
*table
, int write
, struct file
*filp
,
52 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
);
54 proc_dointvec_deny(ctl_table
*table
, int write
, struct file
*filp
,
55 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
);
57 static ctl_table lowmem_table
[] = {
59 .ctl_name
= VM_LOWMEM_DENY_PAGES
,
60 .procname
= "lowmem_deny_watermark_pages",
62 .maxlen
= sizeof(long),
65 .proc_handler
= &proc_dointvec
,
66 .strategy
= &sysctl_intvec
,
68 .ctl_name
= VM_LOWMEM_DENY
,
69 .procname
= "lowmem_deny_watermark",
70 .data
= &deny_percentage
,
71 .maxlen
= sizeof(unsigned int),
74 .proc_handler
= &proc_dointvec_deny
,
75 .strategy
= &sysctl_intvec
,
77 .ctl_name
= VM_LOWMEM_LEVEL1_NOTIFY
,
78 .procname
= "lowmem_notify_low",
80 .maxlen
= sizeof(unsigned int),
83 .proc_handler
= &proc_dointvec_l1_notify
,
84 .strategy
= &sysctl_intvec
,
86 .ctl_name
= VM_LOWMEM_LEVEL2_NOTIFY
,
87 .procname
= "lowmem_notify_high",
89 .maxlen
= sizeof(unsigned int),
92 .proc_handler
= &proc_dointvec_l2_notify
,
93 .strategy
= &sysctl_intvec
,
95 .ctl_name
= VM_LOWMEM_USED_PAGES
,
96 .procname
= "lowmem_used_pages",
98 .maxlen
= sizeof(long),
101 .proc_handler
= &proc_dointvec_used
,
102 .strategy
= &sysctl_intvec
,
104 .ctl_name
= VM_LOWMEM_NOTIFY_LOW_PAGES
,
105 .procname
= "lowmem_notify_low_pages",
106 .data
= ¬ify_low_pages
,
107 .maxlen
= sizeof(long),
110 .proc_handler
= &proc_dointvec
,
111 .strategy
= &sysctl_intvec
,
113 .ctl_name
= VM_LOWMEM_NOTIFY_HIGH_PAGES
,
114 .procname
= "lowmem_notify_high_pages",
115 .data
= ¬ify_high_pages
,
116 .maxlen
= sizeof(long),
119 .proc_handler
= &proc_dointvec
,
120 .strategy
= &sysctl_intvec
,
122 .ctl_name
= VM_LOWMEM_NR_DECAY_PAGES
,
123 .procname
= "lowmem_nr_decay_pages",
124 .data
= &nr_decay_pages
,
125 .maxlen
= sizeof(unsigned int),
128 .proc_handler
= &proc_dointvec
,
129 .strategy
= &sysctl_intvec
,
131 .ctl_name
= VM_LOWMEM_ALLOWED_UIDS
,
132 .procname
= "lowmem_allowed_uids",
133 .data
= &allowed_uids
,
134 .maxlen
= LOWMEM_MAX_UIDS
* sizeof(unsigned int),
137 .proc_handler
= &proc_dointvec_minmax
,
138 .strategy
= &sysctl_intvec
,
142 .ctl_name
= VM_LOWMEM_ALLOWED_PAGES
,
143 .procname
= "lowmem_allowed_pages",
144 .data
= &allowed_pages
,
145 .maxlen
= sizeof(unsigned long),
148 .proc_handler
= &proc_dointvec
,
149 .strategy
= &sysctl_intvec
,
151 .ctl_name
= VM_LOWMEM_FREE_PAGES
,
152 .procname
= "lowmem_free_pages",
153 .data
= &lowmem_free_pages
,
154 .maxlen
= sizeof(unsigned long),
157 .proc_handler
= &proc_dointvec
,
158 .strategy
= &sysctl_intvec
,
164 static ctl_table lowmem_root_table
[] = {
169 .child
= lowmem_table
,
175 #define KERNEL_ATTR_RO(_name) \
176 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
178 static int low_watermark_reached
, high_watermark_reached
;
181 proc_dointvec_l1_notify(ctl_table
*table
, int write
, struct file
*filp
,
182 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
185 100 - (100 * notify_low_pages
+ allowed_pages
/ 2) / allowed_pages
;
186 return proc_dointvec(table
, write
, filp
, buffer
, lenp
, ppos
);
190 proc_dointvec_l2_notify(ctl_table
*table
, int write
, struct file
*filp
,
191 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
194 100 - (100 * notify_high_pages
+ allowed_pages
/ 2) / allowed_pages
;
195 return proc_dointvec(table
, write
, filp
, buffer
, lenp
, ppos
);
199 proc_dointvec_deny(ctl_table
*table
, int write
, struct file
*filp
,
200 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
203 100 - (100 * deny_pages
+ allowed_pages
/ 2) / allowed_pages
;
204 return proc_dointvec(table
, write
, filp
, buffer
, lenp
, ppos
);
208 proc_dointvec_used(ctl_table
*table
, int write
, struct file
*filp
,
209 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
211 if (lowmem_free_pages
> 0 && allowed_pages
> lowmem_free_pages
)
212 used_pages
= allowed_pages
- lowmem_free_pages
;
215 return proc_dointvec(table
, write
, filp
, buffer
, lenp
, ppos
);
218 static ssize_t
low_watermark_show(struct kobject
*kobj
,
219 struct kobj_attribute
*attr
, char *page
)
221 return sprintf(page
, "%u\n", low_watermark_reached
);
224 static ssize_t
high_watermark_show(struct kobject
*kobj
,
225 struct kobj_attribute
*attr
, char *page
)
227 return sprintf(page
, "%u\n", high_watermark_reached
);
230 KERNEL_ATTR_RO(low_watermark
);
231 KERNEL_ATTR_RO(high_watermark
);
233 static void low_watermark_state(int new_state
)
235 if (low_watermark_reached
!= new_state
) {
236 low_watermark_reached
= new_state
;
237 sysfs_notify(kernel_kobj
, NULL
, "low_watermark");
241 static void high_watermark_state(int new_state
)
243 if (high_watermark_reached
!= new_state
) {
244 high_watermark_reached
= new_state
;
245 sysfs_notify(kernel_kobj
, NULL
, "high_watermark");
249 static int low_vm_enough_memory(struct mm_struct
*mm
, long pages
)
251 unsigned long free
, allowed
;
252 int cap_sys_admin
= 0, notify
;
254 if (cap_capable(current
, CAP_SYS_ADMIN
) == 0)
257 allowed
= totalram_pages
- hugetlb_total_pages();
258 allowed_pages
= allowed
;
260 /* We activate ourselves only after both parameters have been
262 if (deny_pages
== 0 || notify_low_pages
== 0 || notify_high_pages
== 0)
263 return __vm_enough_memory(mm
, pages
, cap_sys_admin
);
265 vm_acct_memory(pages
);
267 /* Easily freed pages when under VM pressure or direct reclaim */
268 free
= global_page_state(NR_FILE_PAGES
);
269 free
+= nr_swap_pages
;
270 free
+= global_page_state(NR_SLAB_RECLAIMABLE
);
272 if (likely(free
> notify_low_pages
))
275 /* No luck, lets make it more expensive and try again.. */
276 free
+= nr_free_pages();
278 if (free
< deny_pages
) {
281 lowmem_free_pages
= free
;
282 low_watermark_state(1);
283 high_watermark_state(1);
284 /* Memory allocations by root are always allowed */
288 /* OOM unkillable process is allowed to consume memory */
289 if (current
->oomkilladj
== OOM_DISABLE
)
292 /* uids from allowed_uids vector are also allowed no matter what */
293 for (i
= 0; i
< LOWMEM_MAX_UIDS
&& allowed_uids
[i
]; i
++)
294 if (current
->uid
== allowed_uids
[i
])
297 vm_unacct_memory(pages
);
298 if (printk_ratelimit()) {
299 printk(MY_NAME
": denying memory allocation to process %d (%s)\n",
300 current
->pid
, current
->comm
);
306 /* See if we need to notify level 1 */
307 low_watermark_state(free
< notify_low_pages
);
310 * In the level 2 notification case things are more complicated,
311 * as the level that we drop the state and send a notification
312 * should be lower than when it is first triggered. Having this
313 * on the same watermark level ends up bouncing back and forth
314 * when applications are being stupid.
316 notify
= free
< notify_high_pages
;
317 if (notify
|| free
- nr_decay_pages
> notify_high_pages
)
318 high_watermark_state(notify
);
320 /* We have plenty of memory */
321 lowmem_free_pages
= free
;
325 static struct security_operations lowmem_security_ops
= {
326 /* Use the capability functions for some of the hooks */
327 .ptrace_may_access
= cap_ptrace_may_access
,
328 .ptrace_traceme
= cap_ptrace_traceme
,
329 .capget
= cap_capget
,
330 .capset_check
= cap_capset_check
,
331 .capset_set
= cap_capset_set
,
332 .capable
= cap_capable
,
334 .bprm_apply_creds
= cap_bprm_apply_creds
,
335 .bprm_set_security
= cap_bprm_set_security
,
337 .task_post_setuid
= cap_task_post_setuid
,
338 .task_reparent_to_init
= cap_task_reparent_to_init
,
339 .vm_enough_memory
= low_vm_enough_memory
,
342 static struct ctl_table_header
*lowmem_table_header
;
344 static struct attribute
*lowmem_attrs
[] = {
345 &low_watermark_attr
.attr
,
346 &high_watermark_attr
.attr
,
350 static struct attribute_group lowmem_attr_group
= {
351 .attrs
= lowmem_attrs
,
354 static int __init
lowmem_init(void)
358 /* register ourselves with the security framework */
359 if (register_security(&lowmem_security_ops
)) {
360 printk(KERN_ERR MY_NAME
": Failure registering with the kernel\n");
364 /* initialize the uids vector */
365 memset(allowed_uids
, 0, sizeof(allowed_uids
));
367 lowmem_table_header
= register_sysctl_table(lowmem_root_table
);
368 if (unlikely(!lowmem_table_header
))
371 r
= sysfs_create_group(kernel_kobj
,
376 printk(KERN_INFO MY_NAME
": Module initialized.\n");
381 module_init(lowmem_init
);
383 MODULE_DESCRIPTION("Low watermark LSM module");
384 MODULE_LICENSE("GPL");