1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/nacl/loader/nonsfi/nonsfi_sandbox.h"
11 #include <sys/prctl.h>
12 #include <sys/socket.h>
13 #include <sys/syscall.h>
16 #include "base/basictypes.h"
17 #include "base/logging.h"
18 #include "base/time/time.h"
19 #include "build/build_config.h"
20 #include "content/public/common/sandbox_init.h"
21 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
22 #include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
23 #include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h"
24 #include "sandbox/linux/system_headers/linux_futex.h"
25 #include "sandbox/linux/system_headers/linux_signal.h"
26 #include "sandbox/linux/system_headers/linux_syscalls.h"
28 // Chrome OS Daisy (ARM) build environment and PNaCl toolchain do not define
30 #if !defined(MAP_STACK)
31 # if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY)
32 # define MAP_STACK 0x20000
33 # elif defined(ARCH_CPU_MIPS_FAMILY)
34 # define MAP_STACK 0x40000
36 // Note that, on other architectures, MAP_STACK has different value,
37 // though Non-SFI is not supported on such architectures.
38 # error "Unknown platform."
40 #endif // !defined(MAP_STACK)
42 #define CASES SANDBOX_BPF_DSL_CASES
44 using sandbox::CrashSIGSYS
;
45 using sandbox::CrashSIGSYSClone
;
46 using sandbox::CrashSIGSYSFutex
;
47 using sandbox::CrashSIGSYSPrctl
;
48 using sandbox::bpf_dsl::Allow
;
49 using sandbox::bpf_dsl::Arg
;
50 using sandbox::bpf_dsl::BoolExpr
;
51 using sandbox::bpf_dsl::Error
;
52 using sandbox::bpf_dsl::If
;
53 using sandbox::bpf_dsl::ResultExpr
;
59 ResultExpr
RestrictFcntlCommands() {
60 const Arg
<int> cmd(1);
61 const Arg
<long> long_arg(2);
63 // We allow following cases:
64 // 1. F_SETFD + FD_CLOEXEC: libevent's epoll_init uses this.
65 // 2. F_GETFL: Used by SetNonBlocking in
66 // message_pump_libevent.cc and Channel::ChannelImpl::CreatePipe
67 // in ipc_channel_posix.cc. Note that the latter does not work
69 // 3. F_SETFL: Used by evutil_make_socket_nonblocking in
70 // libevent and SetNonBlocking. As the latter mix O_NONBLOCK to
71 // the return value of F_GETFL, so we need to allow O_ACCMODE in
72 // addition to O_NONBLOCK.
73 const uint64_t kAllowedMask
= O_ACCMODE
| O_NONBLOCK
;
74 return If((cmd
== F_SETFD
&& long_arg
== FD_CLOEXEC
) || cmd
== F_GETFL
||
75 (cmd
== F_SETFL
&& (long_arg
& ~kAllowedMask
) == 0),
76 Allow()).Else(CrashSIGSYS());
79 ResultExpr
RestrictClone() {
80 // We allow clone only for new thread creation.
82 CLONE_VM
| CLONE_FS
| CLONE_FILES
| CLONE_SIGHAND
|
83 CLONE_THREAD
| CLONE_SYSVSEM
| CLONE_SETTLS
| CLONE_PARENT_SETTID
;
84 #if !defined(OS_NACL_NONSFI)
85 clone_flags
|= CLONE_CHILD_CLEARTID
;
87 const Arg
<int> flags(0);
88 return If(flags
== clone_flags
, Allow()).Else(CrashSIGSYSClone());
91 ResultExpr
RestrictFutexOperation() {
92 // TODO(hamaji): Allow only FUTEX_PRIVATE_FLAG futexes.
93 const uint64_t kAllowedFutexFlags
= FUTEX_PRIVATE_FLAG
| FUTEX_CLOCK_REALTIME
;
95 return Switch(op
& ~kAllowedFutexFlags
)
104 .Default(CrashSIGSYSFutex());
107 ResultExpr
RestrictPrctl() {
108 // base::PlatformThread::SetName() uses PR_SET_NAME so we return
109 // EPERM for it. Otherwise, we will raise SIGSYS.
110 const Arg
<int> option(0);
111 return If(option
== PR_SET_NAME
, Error(EPERM
)).Else(CrashSIGSYSPrctl());
114 #if defined(__i386__)
115 ResultExpr
RestrictSocketcall() {
116 // We only allow socketpair, sendmsg, and recvmsg.
117 const Arg
<int> call(0);
119 #if !defined(OS_NACL_NONSFI)
120 // nacl_helper in Non-SFI mode still uses socketpair() internally
122 // TODO(hidehiko): Remove this when the switching to nacl_helper_nonsfi
124 call
== SYS_SOCKETPAIR
||
126 call
== SYS_SHUTDOWN
|| call
== SYS_SENDMSG
|| call
== SYS_RECVMSG
,
127 Allow()).Else(CrashSIGSYS());
131 ResultExpr
RestrictMprotect() {
132 // TODO(jln, keescook, drewry): Limit the use of mprotect by adding
133 // some features to linux kernel.
134 const uint64_t kAllowedMask
= PROT_READ
| PROT_WRITE
| PROT_EXEC
;
135 const Arg
<int> prot(2);
136 return If((prot
& ~kAllowedMask
) == 0, Allow()).Else(CrashSIGSYS());
139 ResultExpr
RestrictMmap() {
140 const uint64_t kAllowedFlagMask
=
141 MAP_SHARED
| MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_STACK
| MAP_FIXED
;
142 // When PROT_EXEC is specified, IRT mmap of Non-SFI NaCl helper
143 // calls mmap without PROT_EXEC and then adds PROT_EXEC by mprotect,
144 // so we do not need to allow PROT_EXEC in mmap.
145 const uint64_t kAllowedProtMask
= PROT_READ
| PROT_WRITE
;
146 const Arg
<int> prot(2), flags(3);
147 return If((prot
& ~kAllowedProtMask
) == 0 && (flags
& ~kAllowedFlagMask
) == 0,
148 Allow()).Else(CrashSIGSYS());
151 ResultExpr
RestrictTgkill(int policy_pid
) {
152 const Arg
<int> tgid(0), tid(1), signum(2);
153 // Only sending SIGUSR1 to a thread in the same process is allowed.
154 return If(tgid
== policy_pid
&&
155 // Arg does not support a greater-than operator, so two separate
156 // checks are needed to ensure tid is positive.
158 (tid
& (1u << 31)) == 0 && // tid is non-negative.
159 signum
== LINUX_SIGUSR1
,
160 Allow()).Else(CrashSIGSYS());
163 #if !defined(OS_NACL_NONSFI) && (defined(__x86_64__) || defined(__arm__))
164 ResultExpr
RestrictSocketpair() {
165 // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen.
166 static_assert(AF_UNIX
== PF_UNIX
, "AF_UNIX must equal PF_UNIX.");
167 const Arg
<int> domain(0);
168 return If(domain
== AF_UNIX
, Allow()).Else(CrashSIGSYS());
172 bool IsGracefullyDenied(int sysno
) {
174 // libevent tries this first and then falls back to poll if
175 // epoll_create fails.
176 case __NR_epoll_create
:
177 // third_party/libevent uses them, but we can just return -1 from
178 // them as it is just checking getuid() != geteuid() and
179 // getgid() != getegid()
180 #if defined(__i386__) || defined(__arm__)
190 // tcmalloc calls madvise in TCMalloc_SystemRelease.
192 // EPERM instead of SIGSYS as glibc tries to open files in /proc.
193 // openat via opendir via get_nprocs_conf and open via get_nprocs.
194 // TODO(hamaji): Remove this when we switch to newlib.
197 // For RunSandboxSanityChecks().
199 // glibc uses this for its pthread implementation. If we return
200 // EPERM for this, glibc will stop using this.
201 // TODO(hamaji): newlib does not use this. Make this SIGTRAP once
202 // we have switched to newlib.
203 case __NR_set_robust_list
:
204 // This is obsolete in ARM EABI, but x86 glibc indirectly calls
206 #if defined(__i386__) || defined(__x86_64__)
216 void RunSandboxSanityChecks() {
218 // Make a ptrace request with an invalid PID.
219 long ptrace_ret
= syscall(
220 __NR_ptrace
, 3 /* = PTRACE_PEEKUSER */, -1 /* pid */, NULL
, NULL
);
221 CHECK_EQ(-1, ptrace_ret
);
222 // Without the sandbox on, this ptrace call would ESRCH instead.
223 CHECK_EQ(EPERM
, errno
);
228 NaClNonSfiBPFSandboxPolicy::NaClNonSfiBPFSandboxPolicy()
229 : policy_pid_(getpid()) {
232 NaClNonSfiBPFSandboxPolicy::~NaClNonSfiBPFSandboxPolicy() {
233 // Make sure that this policy is created, used and destroyed by a single
235 DCHECK_EQ(getpid(), policy_pid_
);
238 ResultExpr
NaClNonSfiBPFSandboxPolicy::EvaluateSyscall(int sysno
) const {
241 #if defined(__i386__) || defined(__arm__)
243 #elif defined(__x86_64__)
250 case __NR_exit_group
:
251 #if defined(__i386__) || defined(__arm__)
253 #elif defined(__x86_64__)
256 // TODO(hamaji): Remove the need of gettid. Currently, this is
257 // called from PlatformThread::CurrentId().
259 case __NR_gettimeofday
:
262 // TODO(hamaji): Remove the need of pipe. Currently, this is
263 // called from base::MessagePumpLibevent::Init().
269 case __NR_restart_syscall
:
270 case __NR_sched_yield
:
271 // __NR_times needed as clock() is called by CommandBufferHelper, which is
272 // used by NaCl applications that use Pepper's 3D interfaces.
273 // See crbug.com/264856 for details.
277 case __ARM_NR_cacheflush
:
281 case __NR_clock_getres
:
282 case __NR_clock_gettime
:
283 return sandbox::RestrictClockID();
286 return RestrictClone();
288 #if defined(__x86_64__)
291 #if defined(__i386__) || defined(__arm__)
294 return RestrictFcntlCommands();
297 return RestrictFutexOperation();
299 #if defined(__x86_64__)
302 #if defined(__i386__) || defined(__arm__)
305 return RestrictMmap();
307 return RestrictMprotect();
310 return RestrictPrctl();
312 #if defined(__i386__)
313 case __NR_socketcall
:
314 return RestrictSocketcall();
316 #if defined(__x86_64__) || defined(__arm__)
321 #if !defined(OS_NACL_NONSFI)
322 // nacl_helper in Non-SFI mode still uses socketpair() internally
324 // TODO(hidehiko): Remove this when the switching to nacl_helper_nonsfi
326 case __NR_socketpair
:
327 return RestrictSocketpair();
332 return RestrictTgkill(policy_pid_
);
335 // The behavior of brk on Linux is different from other system
336 // calls. It does not return errno but the current break on
337 // failure. glibc thinks brk failed if the return value of brk
338 // is less than the requested address (i.e., brk(addr) < addr).
339 // So, glibc thinks brk succeeded if we return -EPERM and we
340 // need to return zero instead.
344 if (IsGracefullyDenied(sysno
))
346 return CrashSIGSYS();
350 ResultExpr
NaClNonSfiBPFSandboxPolicy::InvalidSyscall() const {
351 return CrashSIGSYS();
354 bool InitializeBPFSandbox(base::ScopedFD proc_fd
) {
355 bool sandbox_is_initialized
= content::InitializeSandbox(
356 scoped_ptr
<sandbox::bpf_dsl::Policy
>(
357 new nacl::nonsfi::NaClNonSfiBPFSandboxPolicy()),
359 if (!sandbox_is_initialized
)
361 RunSandboxSanityChecks();
365 } // namespace nonsfi