1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/nacl/loader/nonsfi/nonsfi_sandbox.h"
9 #include <linux/futex.h>
10 #include <linux/net.h>
12 #include <sys/prctl.h>
13 #include <sys/ptrace.h>
14 #include <sys/socket.h>
15 #include <sys/syscall.h>
18 #include "base/basictypes.h"
19 #include "base/logging.h"
20 #include "base/time/time.h"
21 #include "build/build_config.h"
22 #include "content/public/common/sandbox_init.h"
23 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
24 #include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
25 #include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h"
26 #include "sandbox/linux/services/linux_syscalls.h"
28 #if defined(__arm__) && !defined(MAP_STACK)
29 // Chrome OS Daisy (ARM) build environment has old headers.
30 #define MAP_STACK 0x20000
33 #define CASES SANDBOX_BPF_DSL_CASES
35 using sandbox::CrashSIGSYS
;
36 using sandbox::CrashSIGSYSClone
;
37 using sandbox::CrashSIGSYSFutex
;
38 using sandbox::CrashSIGSYSPrctl
;
39 using sandbox::bpf_dsl::Allow
;
40 using sandbox::bpf_dsl::Arg
;
41 using sandbox::bpf_dsl::BoolExpr
;
42 using sandbox::bpf_dsl::Error
;
43 using sandbox::bpf_dsl::If
;
44 using sandbox::bpf_dsl::ResultExpr
;
50 ResultExpr
RestrictFcntlCommands() {
51 const Arg
<int> cmd(1);
52 const Arg
<long> long_arg(2);
54 // We allow following cases:
55 // 1. F_SETFD + FD_CLOEXEC: libevent's epoll_init uses this.
56 // 2. F_GETFL: Used by SetNonBlocking in
57 // message_pump_libevent.cc and Channel::ChannelImpl::CreatePipe
58 // in ipc_channel_posix.cc. Note that the latter does not work
60 // 3. F_SETFL: Used by evutil_make_socket_nonblocking in
61 // libevent and SetNonBlocking. As the latter mix O_NONBLOCK to
62 // the return value of F_GETFL, so we need to allow O_ACCMODE in
63 // addition to O_NONBLOCK.
64 const unsigned long denied_mask
= ~(O_ACCMODE
| O_NONBLOCK
);
65 return If((cmd
== F_SETFD
&& long_arg
== FD_CLOEXEC
) || cmd
== F_GETFL
||
66 (cmd
== F_SETFL
&& (long_arg
& denied_mask
) == 0),
67 Allow()).Else(CrashSIGSYS());
70 ResultExpr
RestrictClone() {
71 // We allow clone only for new thread creation.
72 const Arg
<int> flags(0);
73 return If(flags
== (CLONE_VM
| CLONE_FS
| CLONE_FILES
| CLONE_SIGHAND
|
74 CLONE_THREAD
| CLONE_SYSVSEM
| CLONE_SETTLS
|
75 CLONE_PARENT_SETTID
| CLONE_CHILD_CLEARTID
),
76 Allow()).Else(CrashSIGSYSClone());
79 ResultExpr
RestrictFutexOperation() {
80 // TODO(hamaji): Allow only FUTEX_PRIVATE_FLAG futexes.
81 const int kAllowedFutexFlags
= FUTEX_PRIVATE_FLAG
| FUTEX_CLOCK_REALTIME
;
82 const int kOperationMask
= ~kAllowedFutexFlags
;
84 return Switch(op
& kOperationMask
)
93 .Default(CrashSIGSYSFutex());
96 ResultExpr
RestrictPrctl() {
97 // base::PlatformThread::SetName() uses PR_SET_NAME so we return
98 // EPERM for it. Otherwise, we will raise SIGSYS.
99 const Arg
<int> option(0);
100 return If(option
== PR_SET_NAME
, Error(EPERM
)).Else(CrashSIGSYSPrctl());
103 #if defined(__i386__)
104 ResultExpr
RestrictSocketcall() {
105 // We only allow socketpair, sendmsg, and recvmsg.
106 const Arg
<int> call(0);
107 return If(call
== SYS_SOCKETPAIR
|| call
== SYS_SHUTDOWN
||
108 call
== SYS_SENDMSG
|| call
== SYS_RECVMSG
,
109 Allow()).Else(CrashSIGSYS());
113 ResultExpr
RestrictMprotect() {
114 // TODO(jln, keescook, drewry): Limit the use of mprotect by adding
115 // some features to linux kernel.
116 const uint32_t denied_mask
= ~(PROT_READ
| PROT_WRITE
| PROT_EXEC
);
117 const Arg
<int> prot(2);
118 return If((prot
& denied_mask
) == 0, Allow()).Else(CrashSIGSYS());
121 ResultExpr
RestrictMmap() {
122 const uint32_t denied_flag_mask
= ~(MAP_SHARED
| MAP_PRIVATE
|
123 MAP_ANONYMOUS
| MAP_STACK
| MAP_FIXED
);
124 // When PROT_EXEC is specified, IRT mmap of Non-SFI NaCl helper
125 // calls mmap without PROT_EXEC and then adds PROT_EXEC by mprotect,
126 // so we do not need to allow PROT_EXEC in mmap.
127 const uint32_t denied_prot_mask
= ~(PROT_READ
| PROT_WRITE
);
128 const Arg
<int> prot(2), flags(3);
129 return If((prot
& denied_prot_mask
) == 0 && (flags
& denied_flag_mask
) == 0,
130 Allow()).Else(CrashSIGSYS());
133 #if defined(__x86_64__) || defined(__arm__)
134 ResultExpr
RestrictSocketpair() {
135 // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen.
136 COMPILE_ASSERT(AF_UNIX
== PF_UNIX
, af_unix_pf_unix_different
);
137 const Arg
<int> domain(0);
138 return If(domain
== AF_UNIX
, Allow()).Else(CrashSIGSYS());
142 bool IsGracefullyDenied(int sysno
) {
144 // libevent tries this first and then falls back to poll if
145 // epoll_create fails.
146 case __NR_epoll_create
:
147 // third_party/libevent uses them, but we can just return -1 from
148 // them as it is just checking getuid() != geteuid() and
149 // getgid() != getegid()
150 #if defined(__i386__) || defined(__arm__)
160 // tcmalloc calls madvise in TCMalloc_SystemRelease.
162 // EPERM instead of SIGSYS as glibc tries to open files in /proc.
163 // openat via opendir via get_nprocs_conf and open via get_nprocs.
164 // TODO(hamaji): Remove this when we switch to newlib.
167 // For RunSandboxSanityChecks().
169 // glibc uses this for its pthread implementation. If we return
170 // EPERM for this, glibc will stop using this.
171 // TODO(hamaji): newlib does not use this. Make this SIGTRAP once
172 // we have switched to newlib.
173 case __NR_set_robust_list
:
174 // This is obsolete in ARM EABI, but x86 glibc indirectly calls
176 #if defined(__i386__) || defined(__x86_64__)
186 void RunSandboxSanityChecks() {
188 // Make a ptrace request with an invalid PID.
189 long ptrace_ret
= ptrace(PTRACE_PEEKUSER
, -1 /* pid */, NULL
, NULL
);
190 CHECK_EQ(-1, ptrace_ret
);
191 // Without the sandbox on, this ptrace call would ESRCH instead.
192 CHECK_EQ(EPERM
, errno
);
197 ResultExpr
NaClNonSfiBPFSandboxPolicy::EvaluateSyscall(int sysno
) const {
200 #if defined(__i386__) || defined(__arm__)
202 #elif defined(__x86_64__)
209 case __NR_exit_group
:
210 #if defined(__i386__) || defined(__arm__)
212 #elif defined(__x86_64__)
215 // TODO(hamaji): Remove the need of gettid. Currently, this is
216 // called from PlatformThread::CurrentId().
218 case __NR_gettimeofday
:
221 // TODO(hamaji): Remove the need of pipe. Currently, this is
222 // called from base::MessagePumpLibevent::Init().
228 case __NR_restart_syscall
:
229 case __NR_sched_yield
:
230 // __NR_times needed as clock() is called by CommandBufferHelper, which is
231 // used by NaCl applications that use Pepper's 3D interfaces.
232 // See crbug.com/264856 for details.
236 case __ARM_NR_cacheflush
:
240 case __NR_clock_getres
:
241 case __NR_clock_gettime
:
242 return sandbox::RestrictClockID();
245 return RestrictClone();
247 #if defined(__x86_64__)
250 #if defined(__i386__) || defined(__arm__)
253 return RestrictFcntlCommands();
256 return RestrictFutexOperation();
258 #if defined(__x86_64__)
261 #if defined(__i386__) || defined(__arm__)
264 return RestrictMmap();
266 return RestrictMprotect();
269 return RestrictPrctl();
271 #if defined(__i386__)
272 case __NR_socketcall
:
273 return RestrictSocketcall();
275 #if defined(__x86_64__) || defined(__arm__)
280 case __NR_socketpair
:
281 return RestrictSocketpair();
285 // The behavior of brk on Linux is different from other system
286 // calls. It does not return errno but the current break on
287 // failure. glibc thinks brk failed if the return value of brk
288 // is less than the requested address (i.e., brk(addr) < addr).
289 // So, glibc thinks brk succeeded if we return -EPERM and we
290 // need to return zero instead.
294 if (IsGracefullyDenied(sysno
))
296 return CrashSIGSYS();
300 ResultExpr
NaClNonSfiBPFSandboxPolicy::InvalidSyscall() const {
301 return CrashSIGSYS();
304 bool InitializeBPFSandbox() {
305 bool sandbox_is_initialized
= content::InitializeSandbox(
306 scoped_ptr
<sandbox::bpf_dsl::SandboxBPFDSLPolicy
>(
307 new nacl::nonsfi::NaClNonSfiBPFSandboxPolicy()));
308 if (!sandbox_is_initialized
)
310 RunSandboxSanityChecks();
314 } // namespace nonsfi