omap3 evm, beagle and overo use the generic twl4030 script
[linux-ginger.git] / security / lowmem.c
blobae78a530af39703e335ad769f1e6f097f63ec6dd
1 #include <linux/module.h>
2 #include <linux/kernel.h>
3 #include <linux/mman.h>
4 #include <linux/init.h>
5 #include <linux/security.h>
6 #include <linux/sysctl.h>
7 #include <linux/swap.h>
8 #include <linux/kobject.h>
9 #include <linux/pagemap.h>
10 #include <linux/hugetlb.h>
11 #include <linux/sysfs.h>
12 #include <linux/oom.h>
14 #define MY_NAME "lowmem"
16 #define LOWMEM_MAX_UIDS 8
18 enum {
19 VM_LOWMEM_DENY_PAGES = 1,
20 VM_LOWMEM_NOTIFY_LOW_PAGES,
21 VM_LOWMEM_NOTIFY_HIGH_PAGES,
22 VM_LOWMEM_NR_DECAY_PAGES,
23 VM_LOWMEM_ALLOWED_UIDS,
24 VM_LOWMEM_ALLOWED_PAGES,
25 VM_LOWMEM_FREE_PAGES,
26 VM_LOWMEM_DENY,
27 VM_LOWMEM_LEVEL1_NOTIFY,
28 VM_LOWMEM_LEVEL2_NOTIFY,
29 VM_LOWMEM_USED_PAGES
32 static long deny_pages;
33 static long notify_low_pages, notify_high_pages;
34 static unsigned int nr_decay_pages;
35 static unsigned long allowed_pages;
36 static unsigned long lowmem_free_pages;
37 static unsigned int allowed_uids[LOWMEM_MAX_UIDS];
38 static unsigned int minuid = 1;
39 static unsigned int maxuid = 65535;
40 static unsigned int deny_percentage;
41 static unsigned int l1_notify, l2_notify;
42 static long used_pages;
44 static int
45 proc_dointvec_used(ctl_table *table, int write, struct file *filp,
46 void __user *buffer, size_t *lenp, loff_t *ppos);
47 static int
48 proc_dointvec_l1_notify(ctl_table *table, int write, struct file *filp,
49 void __user *buffer, size_t *lenp, loff_t *ppos);
50 static int
51 proc_dointvec_l2_notify(ctl_table *table, int write, struct file *filp,
52 void __user *buffer, size_t *lenp, loff_t *ppos);
53 static int
54 proc_dointvec_deny(ctl_table *table, int write, struct file *filp,
55 void __user *buffer, size_t *lenp, loff_t *ppos);
57 static ctl_table lowmem_table[] = {
59 .ctl_name = VM_LOWMEM_DENY_PAGES,
60 .procname = "lowmem_deny_watermark_pages",
61 .data = &deny_pages,
62 .maxlen = sizeof(long),
63 .mode = 0644,
64 .child = NULL,
65 .proc_handler = &proc_dointvec,
66 .strategy = &sysctl_intvec,
67 }, {
68 .ctl_name = VM_LOWMEM_DENY,
69 .procname = "lowmem_deny_watermark",
70 .data = &deny_percentage,
71 .maxlen = sizeof(unsigned int),
72 .mode = 0444,
73 .child = NULL,
74 .proc_handler = &proc_dointvec_deny,
75 .strategy = &sysctl_intvec,
76 }, {
77 .ctl_name = VM_LOWMEM_LEVEL1_NOTIFY,
78 .procname = "lowmem_notify_low",
79 .data = &l1_notify,
80 .maxlen = sizeof(unsigned int),
81 .mode = 0444,
82 .child = NULL,
83 .proc_handler = &proc_dointvec_l1_notify,
84 .strategy = &sysctl_intvec,
85 }, {
86 .ctl_name = VM_LOWMEM_LEVEL2_NOTIFY,
87 .procname = "lowmem_notify_high",
88 .data = &l2_notify,
89 .maxlen = sizeof(unsigned int),
90 .mode = 0444,
91 .child = NULL,
92 .proc_handler = &proc_dointvec_l2_notify,
93 .strategy = &sysctl_intvec,
94 }, {
95 .ctl_name = VM_LOWMEM_USED_PAGES,
96 .procname = "lowmem_used_pages",
97 .data = &used_pages,
98 .maxlen = sizeof(long),
99 .mode = 0444,
100 .child = NULL,
101 .proc_handler = &proc_dointvec_used,
102 .strategy = &sysctl_intvec,
103 }, {
104 .ctl_name = VM_LOWMEM_NOTIFY_LOW_PAGES,
105 .procname = "lowmem_notify_low_pages",
106 .data = &notify_low_pages,
107 .maxlen = sizeof(long),
108 .mode = 0644,
109 .child = NULL,
110 .proc_handler = &proc_dointvec,
111 .strategy = &sysctl_intvec,
112 }, {
113 .ctl_name = VM_LOWMEM_NOTIFY_HIGH_PAGES,
114 .procname = "lowmem_notify_high_pages",
115 .data = &notify_high_pages,
116 .maxlen = sizeof(long),
117 .mode = 0644,
118 .child = NULL,
119 .proc_handler = &proc_dointvec,
120 .strategy = &sysctl_intvec,
121 }, {
122 .ctl_name = VM_LOWMEM_NR_DECAY_PAGES,
123 .procname = "lowmem_nr_decay_pages",
124 .data = &nr_decay_pages,
125 .maxlen = sizeof(unsigned int),
126 .mode = 0644,
127 .child = NULL,
128 .proc_handler = &proc_dointvec,
129 .strategy = &sysctl_intvec,
130 }, {
131 .ctl_name = VM_LOWMEM_ALLOWED_UIDS,
132 .procname = "lowmem_allowed_uids",
133 .data = &allowed_uids,
134 .maxlen = LOWMEM_MAX_UIDS * sizeof(unsigned int),
135 .mode = 0644,
136 .child = NULL,
137 .proc_handler = &proc_dointvec_minmax,
138 .strategy = &sysctl_intvec,
139 .extra1 = &minuid,
140 .extra2 = &maxuid,
141 }, {
142 .ctl_name = VM_LOWMEM_ALLOWED_PAGES,
143 .procname = "lowmem_allowed_pages",
144 .data = &allowed_pages,
145 .maxlen = sizeof(unsigned long),
146 .mode = 0444,
147 .child = NULL,
148 .proc_handler = &proc_dointvec,
149 .strategy = &sysctl_intvec,
150 }, {
151 .ctl_name = VM_LOWMEM_FREE_PAGES,
152 .procname = "lowmem_free_pages",
153 .data = &lowmem_free_pages,
154 .maxlen = sizeof(unsigned long),
155 .mode = 0444,
156 .child = NULL,
157 .proc_handler = &proc_dointvec,
158 .strategy = &sysctl_intvec,
159 }, {
160 .ctl_name = 0
164 static ctl_table lowmem_root_table[] = {
166 .ctl_name = CTL_VM,
167 .procname = "vm",
168 .mode = 0555,
169 .child = lowmem_table,
170 }, {
171 .ctl_name = 0
175 #define KERNEL_ATTR_RO(_name) \
176 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
178 static int low_watermark_reached, high_watermark_reached;
180 static int
181 proc_dointvec_l1_notify(ctl_table *table, int write, struct file *filp,
182 void __user *buffer, size_t *lenp, loff_t *ppos)
184 l1_notify =
185 100 - (100 * notify_low_pages + allowed_pages / 2) / allowed_pages;
186 return proc_dointvec(table, write, filp, buffer, lenp, ppos);
189 static int
190 proc_dointvec_l2_notify(ctl_table *table, int write, struct file *filp,
191 void __user *buffer, size_t *lenp, loff_t *ppos)
193 l2_notify =
194 100 - (100 * notify_high_pages + allowed_pages / 2) / allowed_pages;
195 return proc_dointvec(table, write, filp, buffer, lenp, ppos);
198 static int
199 proc_dointvec_deny(ctl_table *table, int write, struct file *filp,
200 void __user *buffer, size_t *lenp, loff_t *ppos)
202 deny_percentage =
203 100 - (100 * deny_pages + allowed_pages / 2) / allowed_pages;
204 return proc_dointvec(table, write, filp, buffer, lenp, ppos);
207 static int
208 proc_dointvec_used(ctl_table *table, int write, struct file *filp,
209 void __user *buffer, size_t *lenp, loff_t *ppos)
211 if (lowmem_free_pages > 0 && allowed_pages > lowmem_free_pages)
212 used_pages = allowed_pages - lowmem_free_pages;
213 else
214 used_pages = 0;
215 return proc_dointvec(table, write, filp, buffer, lenp, ppos);
218 static ssize_t low_watermark_show(struct kobject *kobj,
219 struct kobj_attribute *attr, char *page)
221 return sprintf(page, "%u\n", low_watermark_reached);
224 static ssize_t high_watermark_show(struct kobject *kobj,
225 struct kobj_attribute *attr, char *page)
227 return sprintf(page, "%u\n", high_watermark_reached);
230 KERNEL_ATTR_RO(low_watermark);
231 KERNEL_ATTR_RO(high_watermark);
233 static void low_watermark_state(int new_state)
235 if (low_watermark_reached != new_state) {
236 low_watermark_reached = new_state;
237 sysfs_notify(kernel_kobj, NULL, "low_watermark");
241 static void high_watermark_state(int new_state)
243 if (high_watermark_reached != new_state) {
244 high_watermark_reached = new_state;
245 sysfs_notify(kernel_kobj, NULL, "high_watermark");
249 static int low_vm_enough_memory(struct mm_struct *mm, long pages)
251 unsigned long free, allowed;
252 int cap_sys_admin = 0, notify;
254 if (cap_capable(current, CAP_SYS_ADMIN) == 0)
255 cap_sys_admin = 1;
257 allowed = totalram_pages - hugetlb_total_pages();
258 allowed_pages = allowed;
260 /* We activate ourselves only after both parameters have been
261 * configured. */
262 if (deny_pages == 0 || notify_low_pages == 0 || notify_high_pages == 0)
263 return __vm_enough_memory(mm, pages, cap_sys_admin);
265 vm_acct_memory(pages);
267 /* Easily freed pages when under VM pressure or direct reclaim */
268 free = global_page_state(NR_FILE_PAGES);
269 free += nr_swap_pages;
270 free += global_page_state(NR_SLAB_RECLAIMABLE);
272 if (likely(free > notify_low_pages))
273 goto enough_memory;
275 /* No luck, lets make it more expensive and try again.. */
276 free += nr_free_pages();
278 if (free < deny_pages) {
279 int i;
281 lowmem_free_pages = free;
282 low_watermark_state(1);
283 high_watermark_state(1);
284 /* Memory allocations by root are always allowed */
285 if (cap_sys_admin)
286 return 0;
288 /* OOM unkillable process is allowed to consume memory */
289 if (current->oomkilladj == OOM_DISABLE)
290 return 0;
292 /* uids from allowed_uids vector are also allowed no matter what */
293 for (i = 0; i < LOWMEM_MAX_UIDS && allowed_uids[i]; i++)
294 if (current->uid == allowed_uids[i])
295 return 0;
297 vm_unacct_memory(pages);
298 if (printk_ratelimit()) {
299 printk(MY_NAME ": denying memory allocation to process %d (%s)\n",
300 current->pid, current->comm);
302 return -ENOMEM;
305 enough_memory:
306 /* See if we need to notify level 1 */
307 low_watermark_state(free < notify_low_pages);
310 * In the level 2 notification case things are more complicated,
311 * as the level that we drop the state and send a notification
312 * should be lower than when it is first triggered. Having this
313 * on the same watermark level ends up bouncing back and forth
314 * when applications are being stupid.
316 notify = free < notify_high_pages;
317 if (notify || free - nr_decay_pages > notify_high_pages)
318 high_watermark_state(notify);
320 /* We have plenty of memory */
321 lowmem_free_pages = free;
322 return 0;
325 static struct security_operations lowmem_security_ops = {
326 /* Use the capability functions for some of the hooks */
327 .ptrace_may_access = cap_ptrace_may_access,
328 .ptrace_traceme = cap_ptrace_traceme,
329 .capget = cap_capget,
330 .capset_check = cap_capset_check,
331 .capset_set = cap_capset_set,
332 .capable = cap_capable,
334 .bprm_apply_creds = cap_bprm_apply_creds,
335 .bprm_set_security = cap_bprm_set_security,
337 .task_post_setuid = cap_task_post_setuid,
338 .task_reparent_to_init = cap_task_reparent_to_init,
339 .vm_enough_memory = low_vm_enough_memory,
342 static struct ctl_table_header *lowmem_table_header;
344 static struct attribute *lowmem_attrs[] = {
345 &low_watermark_attr.attr,
346 &high_watermark_attr.attr,
347 NULL,
350 static struct attribute_group lowmem_attr_group = {
351 .attrs = lowmem_attrs,
354 static int __init lowmem_init(void)
356 int r;
358 /* register ourselves with the security framework */
359 if (register_security(&lowmem_security_ops)) {
360 printk(KERN_ERR MY_NAME ": Failure registering with the kernel\n");
361 return -EINVAL;
364 /* initialize the uids vector */
365 memset(allowed_uids, 0, sizeof(allowed_uids));
367 lowmem_table_header = register_sysctl_table(lowmem_root_table);
368 if (unlikely(!lowmem_table_header))
369 return -EPERM;
371 r = sysfs_create_group(kernel_kobj,
372 &lowmem_attr_group);
373 if (unlikely(r))
374 return r;
376 printk(KERN_INFO MY_NAME ": Module initialized.\n");
378 return 0;
381 module_init(lowmem_init);
383 MODULE_DESCRIPTION("Low watermark LSM module");
384 MODULE_LICENSE("GPL");