kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/syscalls.h>
  14 #include <linux/slab.h>
  15 #include <linux/anon_inodes.h>
  16 #include <linux/file.h>
  17 #include <linux/license.h>
  18 #include <linux/filter.h>
  19 #include <linux/version.h>
  20
  21 int sysctl_unprivileged_bpf_disabled __read_mostly;
  22
  23 static LIST_HEAD(bpf_map_types);
  24
  25 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  26 {
  27         struct bpf_map_type_list *tl;
  28         struct bpf_map *map;
  29
  30         list_for_each_entry(tl, &bpf_map_types, list_node) {
  31                 if (tl->type == attr->map_type) {
  32                         map = tl->ops->map_alloc(attr);
  33                         if (IS_ERR(map))
  34                                 return map;
  35                         map->ops = tl->ops;
  36                         map->map_type = attr->map_type;
  37                         return map;
  38                 }
  39         }
  40         return ERR_PTR(-EINVAL);
  41 }
  42
  43 /* boot time registration of different map implementations */
  44 void bpf_register_map_type(struct bpf_map_type_list *tl)
  45 {
  46         list_add(&tl->list_node, &bpf_map_types);
  47 }
  48
  49 static int bpf_map_charge_memlock(struct bpf_map *map)
  50 {
  51         struct user_struct *user = get_current_user();
  52         unsigned long memlock_limit;
  53
  54         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  55
  56         atomic_long_add(map->pages, &user->locked_vm);
  57
  58         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
  59                 atomic_long_sub(map->pages, &user->locked_vm);
  60                 free_uid(user);
  61                 return -EPERM;
  62         }
  63         map->user = user;
  64         return 0;
  65 }
  66
  67 static void bpf_map_uncharge_memlock(struct bpf_map *map)
  68 {
  69         struct user_struct *user = map->user;
  70
  71         atomic_long_sub(map->pages, &user->locked_vm);
  72         free_uid(user);
  73 }
  74
  75 /* called from workqueue */
  76 static void bpf_map_free_deferred(struct work_struct *work)
  77 {
  78         struct bpf_map *map = container_of(work, struct bpf_map, work);
  79
  80         bpf_map_uncharge_memlock(map);
  81         /* implementation dependent freeing */
  82         map->ops->map_free(map);
  83 }
  84
  85 static void bpf_map_put_uref(struct bpf_map *map)
  86 {
  87         if (atomic_dec_and_test(&map->usercnt)) {
  88                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
  89                         bpf_fd_array_map_clear(map);
  90         }
  91 }
  92
  93 /* decrement map refcnt and schedule it for freeing via workqueue
  94  * (unrelying map implementation ops->map_free() might sleep)
  95  */
  96 void bpf_map_put(struct bpf_map *map)
  97 {
  98         if (atomic_dec_and_test(&map->refcnt)) {
  99                 INIT_WORK(&map->work, bpf_map_free_deferred);
 100                 schedule_work(&map->work);
 101         }
 102 }
 103
 104 void bpf_map_put_with_uref(struct bpf_map *map)
 105 {
 106         bpf_map_put_uref(map);
 107         bpf_map_put(map);
 108 }
 109
 110 static int bpf_map_release(struct inode *inode, struct file *filp)
 111 {
 112         bpf_map_put_with_uref(filp->private_data);
 113         return 0;
 114 }
 115
 116 static const struct file_operations bpf_map_fops = {
 117         .release = bpf_map_release,
 118 };
 119
 120 int bpf_map_new_fd(struct bpf_map *map)
 121 {
 122         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 123                                 O_RDWR | O_CLOEXEC);
 124 }
 125
 126 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 127 #define CHECK_ATTR(CMD) \
 128         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 129                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 130                    sizeof(*attr) - \
 131                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 132                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 133
 134 #define BPF_MAP_CREATE_LAST_FIELD max_entries
 135 /* called via syscall */
 136 static int map_create(union bpf_attr *attr)
 137 {
 138         struct bpf_map *map;
 139         int err;
 140
 141         err = CHECK_ATTR(BPF_MAP_CREATE);
 142         if (err)
 143                 return -EINVAL;
 144
 145         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 146         map = find_and_alloc_map(attr);
 147         if (IS_ERR(map))
 148                 return PTR_ERR(map);
 149
 150         atomic_set(&map->refcnt, 1);
 151         atomic_set(&map->usercnt, 1);
 152
 153         err = bpf_map_charge_memlock(map);
 154         if (err)
 155                 goto free_map;
 156
 157         err = bpf_map_new_fd(map);
 158         if (err < 0)
 159                 /* failed to allocate fd */
 160                 goto free_map;
 161
 162         return err;
 163
 164 free_map:
 165         map->ops->map_free(map);
 166         return err;
 167 }
 168
 169 /* if error is returned, fd is released.
 170  * On success caller should complete fd access with matching fdput()
 171  */
 172 struct bpf_map *__bpf_map_get(struct fd f)
 173 {
 174         if (!f.file)
 175                 return ERR_PTR(-EBADF);
 176         if (f.file->f_op != &bpf_map_fops) {
 177                 fdput(f);
 178                 return ERR_PTR(-EINVAL);
 179         }
 180
 181         return f.file->private_data;
 182 }
 183
 184 /* prog's and map's refcnt limit */
 185 #define BPF_MAX_REFCNT 32768
 186
 187 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 188 {
 189         if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 190                 atomic_dec(&map->refcnt);
 191                 return ERR_PTR(-EBUSY);
 192         }
 193         if (uref)
 194                 atomic_inc(&map->usercnt);
 195         return map;
 196 }
 197
 198 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 199 {
 200         struct fd f = fdget(ufd);
 201         struct bpf_map *map;
 202
 203         map = __bpf_map_get(f);
 204         if (IS_ERR(map))
 205                 return map;
 206
 207         map = bpf_map_inc(map, true);
 208         fdput(f);
 209
 210         return map;
 211 }
 212
 213 /* helper to convert user pointers passed inside __aligned_u64 fields */
 214 static void __user *u64_to_ptr(__u64 val)
 215 {
 216         return (void __user *) (unsigned long) val;
 217 }
 218
 219 /* last field in 'union bpf_attr' used by this command */
 220 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 221
 222 static int map_lookup_elem(union bpf_attr *attr)
 223 {
 224         void __user *ukey = u64_to_ptr(attr->key);
 225         void __user *uvalue = u64_to_ptr(attr->value);
 226         int ufd = attr->map_fd;
 227         struct bpf_map *map;
 228         void *key, *value, *ptr;
 229         struct fd f;
 230         int err;
 231
 232         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 233                 return -EINVAL;
 234
 235         f = fdget(ufd);
 236         map = __bpf_map_get(f);
 237         if (IS_ERR(map))
 238                 return PTR_ERR(map);
 239
 240         err = -ENOMEM;
 241         key = kmalloc(map->key_size, GFP_USER);
 242         if (!key)
 243                 goto err_put;
 244
 245         err = -EFAULT;
 246         if (copy_from_user(key, ukey, map->key_size) != 0)
 247                 goto free_key;
 248
 249         err = -ENOMEM;
 250         value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
 251         if (!value)
 252                 goto free_key;
 253
 254         rcu_read_lock();
 255         ptr = map->ops->map_lookup_elem(map, key);
 256         if (ptr)
 257                 memcpy(value, ptr, map->value_size);
 258         rcu_read_unlock();
 259
 260         err = -ENOENT;
 261         if (!ptr)
 262                 goto free_value;
 263
 264         err = -EFAULT;
 265         if (copy_to_user(uvalue, value, map->value_size) != 0)
 266                 goto free_value;
 267
 268         err = 0;
 269
 270 free_value:
 271         kfree(value);
 272 free_key:
 273         kfree(key);
 274 err_put:
 275         fdput(f);
 276         return err;
 277 }
 278
 279 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 280
 281 static int map_update_elem(union bpf_attr *attr)
 282 {
 283         void __user *ukey = u64_to_ptr(attr->key);
 284         void __user *uvalue = u64_to_ptr(attr->value);
 285         int ufd = attr->map_fd;
 286         struct bpf_map *map;
 287         void *key, *value;
 288         struct fd f;
 289         int err;
 290
 291         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 292                 return -EINVAL;
 293
 294         f = fdget(ufd);
 295         map = __bpf_map_get(f);
 296         if (IS_ERR(map))
 297                 return PTR_ERR(map);
 298
 299         err = -ENOMEM;
 300         key = kmalloc(map->key_size, GFP_USER);
 301         if (!key)
 302                 goto err_put;
 303
 304         err = -EFAULT;
 305         if (copy_from_user(key, ukey, map->key_size) != 0)
 306                 goto free_key;
 307
 308         err = -ENOMEM;
 309         value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
 310         if (!value)
 311                 goto free_key;
 312
 313         err = -EFAULT;
 314         if (copy_from_user(value, uvalue, map->value_size) != 0)
 315                 goto free_value;
 316
 317         /* eBPF program that use maps are running under rcu_read_lock(),
 318          * therefore all map accessors rely on this fact, so do the same here
 319          */
 320         rcu_read_lock();
 321         err = map->ops->map_update_elem(map, key, value, attr->flags);
 322         rcu_read_unlock();
 323
 324 free_value:
 325         kfree(value);
 326 free_key:
 327         kfree(key);
 328 err_put:
 329         fdput(f);
 330         return err;
 331 }
 332
 333 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 334
 335 static int map_delete_elem(union bpf_attr *attr)
 336 {
 337         void __user *ukey = u64_to_ptr(attr->key);
 338         int ufd = attr->map_fd;
 339         struct bpf_map *map;
 340         struct fd f;
 341         void *key;
 342         int err;
 343
 344         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 345                 return -EINVAL;
 346
 347         f = fdget(ufd);
 348         map = __bpf_map_get(f);
 349         if (IS_ERR(map))
 350                 return PTR_ERR(map);
 351
 352         err = -ENOMEM;
 353         key = kmalloc(map->key_size, GFP_USER);
 354         if (!key)
 355                 goto err_put;
 356
 357         err = -EFAULT;
 358         if (copy_from_user(key, ukey, map->key_size) != 0)
 359                 goto free_key;
 360
 361         rcu_read_lock();
 362         err = map->ops->map_delete_elem(map, key);
 363         rcu_read_unlock();
 364
 365 free_key:
 366         kfree(key);
 367 err_put:
 368         fdput(f);
 369         return err;
 370 }
 371
 372 /* last field in 'union bpf_attr' used by this command */
 373 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 374
 375 static int map_get_next_key(union bpf_attr *attr)
 376 {
 377         void __user *ukey = u64_to_ptr(attr->key);
 378         void __user *unext_key = u64_to_ptr(attr->next_key);
 379         int ufd = attr->map_fd;
 380         struct bpf_map *map;
 381         void *key, *next_key;
 382         struct fd f;
 383         int err;
 384
 385         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 386                 return -EINVAL;
 387
 388         f = fdget(ufd);
 389         map = __bpf_map_get(f);
 390         if (IS_ERR(map))
 391                 return PTR_ERR(map);
 392
 393         err = -ENOMEM;
 394         key = kmalloc(map->key_size, GFP_USER);
 395         if (!key)
 396                 goto err_put;
 397
 398         err = -EFAULT;
 399         if (copy_from_user(key, ukey, map->key_size) != 0)
 400                 goto free_key;
 401
 402         err = -ENOMEM;
 403         next_key = kmalloc(map->key_size, GFP_USER);
 404         if (!next_key)
 405                 goto free_key;
 406
 407         rcu_read_lock();
 408         err = map->ops->map_get_next_key(map, key, next_key);
 409         rcu_read_unlock();
 410         if (err)
 411                 goto free_next_key;
 412
 413         err = -EFAULT;
 414         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 415                 goto free_next_key;
 416
 417         err = 0;
 418
 419 free_next_key:
 420         kfree(next_key);
 421 free_key:
 422         kfree(key);
 423 err_put:
 424         fdput(f);
 425         return err;
 426 }
 427
 428 static LIST_HEAD(bpf_prog_types);
 429
 430 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 431 {
 432         struct bpf_prog_type_list *tl;
 433
 434         list_for_each_entry(tl, &bpf_prog_types, list_node) {
 435                 if (tl->type == type) {
 436                         prog->aux->ops = tl->ops;
 437                         prog->type = type;
 438                         return 0;
 439                 }
 440         }
 441
 442         return -EINVAL;
 443 }
 444
 445 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 446 {
 447         list_add(&tl->list_node, &bpf_prog_types);
 448 }
 449
 450 /* fixup insn->imm field of bpf_call instructions:
 451  * if (insn->imm == BPF_FUNC_map_lookup_elem)
 452  *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
 453  * else if (insn->imm == BPF_FUNC_map_update_elem)
 454  *      insn->imm = bpf_map_update_elem - __bpf_call_base;
 455  * else ...
 456  *
 457  * this function is called after eBPF program passed verification
 458  */
 459 static void fixup_bpf_calls(struct bpf_prog *prog)
 460 {
 461         const struct bpf_func_proto *fn;
 462         int i;
 463
 464         for (i = 0; i < prog->len; i++) {
 465                 struct bpf_insn *insn = &prog->insnsi[i];
 466
 467                 if (insn->code == (BPF_JMP | BPF_CALL)) {
 468                         /* we reach here when program has bpf_call instructions
 469                          * and it passed bpf_check(), means that
 470                          * ops->get_func_proto must have been supplied, check it
 471                          */
 472                         BUG_ON(!prog->aux->ops->get_func_proto);
 473
 474                         if (insn->imm == BPF_FUNC_get_route_realm)
 475                                 prog->dst_needed = 1;
 476                         if (insn->imm == BPF_FUNC_get_prandom_u32)
 477                                 bpf_user_rnd_init_once();
 478                         if (insn->imm == BPF_FUNC_tail_call) {
 479                                 /* mark bpf_tail_call as different opcode
 480                                  * to avoid conditional branch in
 481                                  * interpeter for every normal call
 482                                  * and to prevent accidental JITing by
 483                                  * JIT compiler that doesn't support
 484                                  * bpf_tail_call yet
 485                                  */
 486                                 insn->imm = 0;
 487                                 insn->code |= BPF_X;
 488                                 continue;
 489                         }
 490
 491                         fn = prog->aux->ops->get_func_proto(insn->imm);
 492                         /* all functions that have prototype and verifier allowed
 493                          * programs to call them, must be real in-kernel functions
 494                          */
 495                         BUG_ON(!fn->func);
 496                         insn->imm = fn->func - __bpf_call_base;
 497                 }
 498         }
 499 }
 500
 501 /* drop refcnt on maps used by eBPF program and free auxilary data */
 502 static void free_used_maps(struct bpf_prog_aux *aux)
 503 {
 504         int i;
 505
 506         for (i = 0; i < aux->used_map_cnt; i++)
 507                 bpf_map_put(aux->used_maps[i]);
 508
 509         kfree(aux->used_maps);
 510 }
 511
 512 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 513 {
 514         struct user_struct *user = get_current_user();
 515         unsigned long memlock_limit;
 516
 517         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 518
 519         atomic_long_add(prog->pages, &user->locked_vm);
 520         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 521                 atomic_long_sub(prog->pages, &user->locked_vm);
 522                 free_uid(user);
 523                 return -EPERM;
 524         }
 525         prog->aux->user = user;
 526         return 0;
 527 }
 528
 529 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 530 {
 531         struct user_struct *user = prog->aux->user;
 532
 533         atomic_long_sub(prog->pages, &user->locked_vm);
 534         free_uid(user);
 535 }
 536
 537 static void __prog_put_common(struct rcu_head *rcu)
 538 {
 539         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 540
 541         free_used_maps(aux);
 542         bpf_prog_uncharge_memlock(aux->prog);
 543         bpf_prog_free(aux->prog);
 544 }
 545
 546 /* version of bpf_prog_put() that is called after a grace period */
 547 void bpf_prog_put_rcu(struct bpf_prog *prog)
 548 {
 549         if (atomic_dec_and_test(&prog->aux->refcnt))
 550                 call_rcu(&prog->aux->rcu, __prog_put_common);
 551 }
 552
 553 void bpf_prog_put(struct bpf_prog *prog)
 554 {
 555         if (atomic_dec_and_test(&prog->aux->refcnt))
 556                 __prog_put_common(&prog->aux->rcu);
 557 }
 558 EXPORT_SYMBOL_GPL(bpf_prog_put);
 559
 560 static int bpf_prog_release(struct inode *inode, struct file *filp)
 561 {
 562         struct bpf_prog *prog = filp->private_data;
 563
 564         bpf_prog_put_rcu(prog);
 565         return 0;
 566 }
 567
 568 static const struct file_operations bpf_prog_fops = {
 569         .release = bpf_prog_release,
 570 };
 571
 572 int bpf_prog_new_fd(struct bpf_prog *prog)
 573 {
 574         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 575                                 O_RDWR | O_CLOEXEC);
 576 }
 577
 578 static struct bpf_prog *__bpf_prog_get(struct fd f)
 579 {
 580         if (!f.file)
 581                 return ERR_PTR(-EBADF);
 582         if (f.file->f_op != &bpf_prog_fops) {
 583                 fdput(f);
 584                 return ERR_PTR(-EINVAL);
 585         }
 586
 587         return f.file->private_data;
 588 }
 589
 590 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 591 {
 592         if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
 593                 atomic_dec(&prog->aux->refcnt);
 594                 return ERR_PTR(-EBUSY);
 595         }
 596         return prog;
 597 }
 598
 599 /* called by sockets/tracing/seccomp before attaching program to an event
 600  * pairs with bpf_prog_put()
 601  */
 602 struct bpf_prog *bpf_prog_get(u32 ufd)
 603 {
 604         struct fd f = fdget(ufd);
 605         struct bpf_prog *prog;
 606
 607         prog = __bpf_prog_get(f);
 608         if (IS_ERR(prog))
 609                 return prog;
 610
 611         prog = bpf_prog_inc(prog);
 612         fdput(f);
 613
 614         return prog;
 615 }
 616 EXPORT_SYMBOL_GPL(bpf_prog_get);
 617
 618 /* last field in 'union bpf_attr' used by this command */
 619 #define BPF_PROG_LOAD_LAST_FIELD kern_version
 620
 621 static int bpf_prog_load(union bpf_attr *attr)
 622 {
 623         enum bpf_prog_type type = attr->prog_type;
 624         struct bpf_prog *prog;
 625         int err;
 626         char license[128];
 627         bool is_gpl;
 628
 629         if (CHECK_ATTR(BPF_PROG_LOAD))
 630                 return -EINVAL;
 631
 632         /* copy eBPF program license from user space */
 633         if (strncpy_from_user(license, u64_to_ptr(attr->license),
 634                               sizeof(license) - 1) < 0)
 635                 return -EFAULT;
 636         license[sizeof(license) - 1] = 0;
 637
 638         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 639         is_gpl = license_is_gpl_compatible(license);
 640
 641         if (attr->insn_cnt >= BPF_MAXINSNS)
 642                 return -EINVAL;
 643
 644         if (type == BPF_PROG_TYPE_KPROBE &&
 645             attr->kern_version != LINUX_VERSION_CODE)
 646                 return -EINVAL;
 647
 648         if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 649                 return -EPERM;
 650
 651         /* plain bpf_prog allocation */
 652         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 653         if (!prog)
 654                 return -ENOMEM;
 655
 656         err = bpf_prog_charge_memlock(prog);
 657         if (err)
 658                 goto free_prog_nouncharge;
 659
 660         prog->len = attr->insn_cnt;
 661
 662         err = -EFAULT;
 663         if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
 664                            prog->len * sizeof(struct bpf_insn)) != 0)
 665                 goto free_prog;
 666
 667         prog->orig_prog = NULL;
 668         prog->jited = 0;
 669
 670         atomic_set(&prog->aux->refcnt, 1);
 671         prog->gpl_compatible = is_gpl ? 1 : 0;
 672
 673         /* find program type: socket_filter vs tracing_filter */
 674         err = find_prog_type(type, prog);
 675         if (err < 0)
 676                 goto free_prog;
 677
 678         /* run eBPF verifier */
 679         err = bpf_check(&prog, attr);
 680         if (err < 0)
 681                 goto free_used_maps;
 682
 683         /* fixup BPF_CALL->imm field */
 684         fixup_bpf_calls(prog);
 685
 686         /* eBPF program is ready to be JITed */
 687         err = bpf_prog_select_runtime(prog);
 688         if (err < 0)
 689                 goto free_used_maps;
 690
 691         err = bpf_prog_new_fd(prog);
 692         if (err < 0)
 693                 /* failed to allocate fd */
 694                 goto free_used_maps;
 695
 696         return err;
 697
 698 free_used_maps:
 699         free_used_maps(prog->aux);
 700 free_prog:
 701         bpf_prog_uncharge_memlock(prog);
 702 free_prog_nouncharge:
 703         bpf_prog_free(prog);
 704         return err;
 705 }
 706
 707 #define BPF_OBJ_LAST_FIELD bpf_fd
 708
 709 static int bpf_obj_pin(const union bpf_attr *attr)
 710 {
 711         if (CHECK_ATTR(BPF_OBJ))
 712                 return -EINVAL;
 713
 714         return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
 715 }
 716
 717 static int bpf_obj_get(const union bpf_attr *attr)
 718 {
 719         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 720                 return -EINVAL;
 721
 722         return bpf_obj_get_user(u64_to_ptr(attr->pathname));
 723 }
 724
 725 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 726 {
 727         union bpf_attr attr = {};
 728         int err;
 729
 730         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 731                 return -EPERM;
 732
 733         if (!access_ok(VERIFY_READ, uattr, 1))
 734                 return -EFAULT;
 735
 736         if (size > PAGE_SIZE)   /* silly large */
 737                 return -E2BIG;
 738
 739         /* If we're handed a bigger struct than we know of,
 740          * ensure all the unknown bits are 0 - i.e. new
 741          * user-space does not rely on any kernel feature
 742          * extensions we dont know about yet.
 743          */
 744         if (size > sizeof(attr)) {
 745                 unsigned char __user *addr;
 746                 unsigned char __user *end;
 747                 unsigned char val;
 748
 749                 addr = (void __user *)uattr + sizeof(attr);
 750                 end  = (void __user *)uattr + size;
 751
 752                 for (; addr < end; addr++) {
 753                         err = get_user(val, addr);
 754                         if (err)
 755                                 return err;
 756                         if (val)
 757                                 return -E2BIG;
 758                 }
 759                 size = sizeof(attr);
 760         }
 761
 762         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
 763         if (copy_from_user(&attr, uattr, size) != 0)
 764                 return -EFAULT;
 765
 766         switch (cmd) {
 767         case BPF_MAP_CREATE:
 768                 err = map_create(&attr);
 769                 break;
 770         case BPF_MAP_LOOKUP_ELEM:
 771                 err = map_lookup_elem(&attr);
 772                 break;
 773         case BPF_MAP_UPDATE_ELEM:
 774                 err = map_update_elem(&attr);
 775                 break;
 776         case BPF_MAP_DELETE_ELEM:
 777                 err = map_delete_elem(&attr);
 778                 break;
 779         case BPF_MAP_GET_NEXT_KEY:
 780                 err = map_get_next_key(&attr);
 781                 break;
 782         case BPF_PROG_LOAD:
 783                 err = bpf_prog_load(&attr);
 784                 break;
 785         case BPF_OBJ_PIN:
 786                 err = bpf_obj_pin(&attr);
 787                 break;
 788         case BPF_OBJ_GET:
 789                 err = bpf_obj_get(&attr);
 790                 break;
 791         default:
 792                 err = -EINVAL;
 793                 break;
 794         }
 795
 796         return err;
 797 }