Don't preload rarely seen large images
[chromium-blink-merge.git] / components / nacl / loader / nonsfi / nonsfi_sandbox.cc
blob475c28c28c637f4975287402af9c28b65132117c
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/nacl/loader/nonsfi/nonsfi_sandbox.h"
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <linux/net.h>
10 #include <sys/mman.h>
11 #include <sys/prctl.h>
12 #include <sys/socket.h>
13 #include <sys/syscall.h>
14 #include <sys/time.h>
16 #include "base/basictypes.h"
17 #include "base/logging.h"
18 #include "base/time/time.h"
19 #include "build/build_config.h"
20 #include "content/public/common/sandbox_init.h"
21 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
22 #include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
23 #include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h"
24 #include "sandbox/linux/system_headers/linux_futex.h"
25 #include "sandbox/linux/system_headers/linux_syscalls.h"
27 // Chrome OS Daisy (ARM) build environment and PNaCl toolchain do not define
28 // MAP_STACK.
29 #if !defined(MAP_STACK)
30 # if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY)
31 # define MAP_STACK 0x20000
32 # else
33 // Note that, on other architecture, MAP_STACK has different value (e.g. mips'
34 // MAP_STACK is 0x40000), though Non-SFI is not supported on such
35 // architectures.
36 # error "Unknown platform."
37 # endif
38 #endif // !defined(MAP_STACK)
40 #define CASES SANDBOX_BPF_DSL_CASES
42 using sandbox::CrashSIGSYS;
43 using sandbox::CrashSIGSYSClone;
44 using sandbox::CrashSIGSYSFutex;
45 using sandbox::CrashSIGSYSPrctl;
46 using sandbox::bpf_dsl::Allow;
47 using sandbox::bpf_dsl::Arg;
48 using sandbox::bpf_dsl::BoolExpr;
49 using sandbox::bpf_dsl::Error;
50 using sandbox::bpf_dsl::If;
51 using sandbox::bpf_dsl::ResultExpr;
53 namespace nacl {
54 namespace nonsfi {
55 namespace {
57 ResultExpr RestrictFcntlCommands() {
58 const Arg<int> cmd(1);
59 const Arg<long> long_arg(2);
61 // We allow following cases:
62 // 1. F_SETFD + FD_CLOEXEC: libevent's epoll_init uses this.
63 // 2. F_GETFL: Used by SetNonBlocking in
64 // message_pump_libevent.cc and Channel::ChannelImpl::CreatePipe
65 // in ipc_channel_posix.cc. Note that the latter does not work
66 // with EPERM.
67 // 3. F_SETFL: Used by evutil_make_socket_nonblocking in
68 // libevent and SetNonBlocking. As the latter mix O_NONBLOCK to
69 // the return value of F_GETFL, so we need to allow O_ACCMODE in
70 // addition to O_NONBLOCK.
71 const uint64_t kAllowedMask = O_ACCMODE | O_NONBLOCK;
72 return If((cmd == F_SETFD && long_arg == FD_CLOEXEC) || cmd == F_GETFL ||
73 (cmd == F_SETFL && (long_arg & ~kAllowedMask) == 0),
74 Allow()).Else(CrashSIGSYS());
77 ResultExpr RestrictClone() {
78 // We allow clone only for new thread creation.
79 int clone_flags =
80 CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
81 CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS;
82 #if !defined(OS_NACL_NONSFI)
83 clone_flags |= CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
84 #endif
85 const Arg<int> flags(0);
86 return If(flags == clone_flags, Allow()).Else(CrashSIGSYSClone());
89 ResultExpr RestrictFutexOperation() {
90 // TODO(hamaji): Allow only FUTEX_PRIVATE_FLAG futexes.
91 const uint64_t kAllowedFutexFlags = FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME;
92 const Arg<int> op(1);
93 return Switch(op & ~kAllowedFutexFlags)
94 .CASES((FUTEX_WAIT,
95 FUTEX_WAKE,
96 FUTEX_REQUEUE,
97 FUTEX_CMP_REQUEUE,
98 FUTEX_WAKE_OP,
99 FUTEX_WAIT_BITSET,
100 FUTEX_WAKE_BITSET),
101 Allow())
102 .Default(CrashSIGSYSFutex());
105 ResultExpr RestrictPrctl() {
106 // base::PlatformThread::SetName() uses PR_SET_NAME so we return
107 // EPERM for it. Otherwise, we will raise SIGSYS.
108 const Arg<int> option(0);
109 return If(option == PR_SET_NAME, Error(EPERM)).Else(CrashSIGSYSPrctl());
112 #if defined(__i386__)
113 ResultExpr RestrictSocketcall() {
114 // We only allow socketpair, sendmsg, and recvmsg.
115 const Arg<int> call(0);
116 return If(
117 #if !defined(OS_NACL_NONSFI)
118 // nacl_helper in Non-SFI mode still uses socketpair() internally
119 // via libevent.
120 // TODO(hidehiko): Remove this when the switching to nacl_helper_nonsfi
121 // is completed.
122 call == SYS_SOCKETPAIR ||
123 #endif
124 call == SYS_SHUTDOWN || call == SYS_SENDMSG || call == SYS_RECVMSG,
125 Allow()).Else(CrashSIGSYS());
127 #endif
129 ResultExpr RestrictMprotect() {
130 // TODO(jln, keescook, drewry): Limit the use of mprotect by adding
131 // some features to linux kernel.
132 const uint64_t kAllowedMask = PROT_READ | PROT_WRITE | PROT_EXEC;
133 const Arg<int> prot(2);
134 return If((prot & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS());
137 ResultExpr RestrictMmap() {
138 const uint64_t kAllowedFlagMask =
139 MAP_SHARED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_FIXED;
140 // When PROT_EXEC is specified, IRT mmap of Non-SFI NaCl helper
141 // calls mmap without PROT_EXEC and then adds PROT_EXEC by mprotect,
142 // so we do not need to allow PROT_EXEC in mmap.
143 const uint64_t kAllowedProtMask = PROT_READ | PROT_WRITE;
144 const Arg<int> prot(2), flags(3);
145 return If((prot & ~kAllowedProtMask) == 0 && (flags & ~kAllowedFlagMask) == 0,
146 Allow()).Else(CrashSIGSYS());
149 #if !defined(OS_NACL_NONSFI) && (defined(__x86_64__) || defined(__arm__))
150 ResultExpr RestrictSocketpair() {
151 // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen.
152 static_assert(AF_UNIX == PF_UNIX, "AF_UNIX must equal PF_UNIX.");
153 const Arg<int> domain(0);
154 return If(domain == AF_UNIX, Allow()).Else(CrashSIGSYS());
156 #endif
158 bool IsGracefullyDenied(int sysno) {
159 switch (sysno) {
160 // libevent tries this first and then falls back to poll if
161 // epoll_create fails.
162 case __NR_epoll_create:
163 // third_party/libevent uses them, but we can just return -1 from
164 // them as it is just checking getuid() != geteuid() and
165 // getgid() != getegid()
166 #if defined(__i386__) || defined(__arm__)
167 case __NR_getegid32:
168 case __NR_geteuid32:
169 case __NR_getgid32:
170 case __NR_getuid32:
171 #endif
172 case __NR_getegid:
173 case __NR_geteuid:
174 case __NR_getgid:
175 case __NR_getuid:
176 // tcmalloc calls madvise in TCMalloc_SystemRelease.
177 case __NR_madvise:
178 // EPERM instead of SIGSYS as glibc tries to open files in /proc.
179 // openat via opendir via get_nprocs_conf and open via get_nprocs.
180 // TODO(hamaji): Remove this when we switch to newlib.
181 case __NR_open:
182 case __NR_openat:
183 // For RunSandboxSanityChecks().
184 case __NR_ptrace:
185 // glibc uses this for its pthread implementation. If we return
186 // EPERM for this, glibc will stop using this.
187 // TODO(hamaji): newlib does not use this. Make this SIGTRAP once
188 // we have switched to newlib.
189 case __NR_set_robust_list:
190 // This is obsolete in ARM EABI, but x86 glibc indirectly calls
191 // this in sysconf.
192 #if defined(__i386__) || defined(__x86_64__)
193 case __NR_time:
194 #endif
195 return true;
197 default:
198 return false;
202 void RunSandboxSanityChecks() {
203 errno = 0;
204 // Make a ptrace request with an invalid PID.
205 long ptrace_ret = syscall(
206 __NR_ptrace, 3 /* = PTRACE_PEEKUSER */, -1 /* pid */, NULL, NULL);
207 CHECK_EQ(-1, ptrace_ret);
208 // Without the sandbox on, this ptrace call would ESRCH instead.
209 CHECK_EQ(EPERM, errno);
212 } // namespace
214 ResultExpr NaClNonSfiBPFSandboxPolicy::EvaluateSyscall(int sysno) const {
215 switch (sysno) {
216 // Allowed syscalls.
217 #if defined(__i386__) || defined(__arm__)
218 case __NR__llseek:
219 #elif defined(__x86_64__)
220 case __NR_lseek:
221 #endif
222 case __NR_close:
223 case __NR_dup:
224 case __NR_dup2:
225 case __NR_exit:
226 case __NR_exit_group:
227 #if defined(__i386__) || defined(__arm__)
228 case __NR_fstat64:
229 #elif defined(__x86_64__)
230 case __NR_fstat:
231 #endif
232 // TODO(hamaji): Remove the need of gettid. Currently, this is
233 // called from PlatformThread::CurrentId().
234 case __NR_gettid:
235 case __NR_gettimeofday:
236 case __NR_munmap:
237 case __NR_nanosleep:
238 // TODO(hamaji): Remove the need of pipe. Currently, this is
239 // called from base::MessagePumpLibevent::Init().
240 case __NR_pipe:
241 case __NR_poll:
242 case __NR_pread64:
243 case __NR_pwrite64:
244 case __NR_read:
245 case __NR_restart_syscall:
246 case __NR_sched_yield:
247 // __NR_times needed as clock() is called by CommandBufferHelper, which is
248 // used by NaCl applications that use Pepper's 3D interfaces.
249 // See crbug.com/264856 for details.
250 case __NR_times:
251 case __NR_write:
252 #if defined(__arm__)
253 case __ARM_NR_cacheflush:
254 #endif
255 return Allow();
257 case __NR_clock_getres:
258 case __NR_clock_gettime:
259 return sandbox::RestrictClockID();
261 case __NR_clone:
262 return RestrictClone();
264 #if defined(__x86_64__)
265 case __NR_fcntl:
266 #endif
267 #if defined(__i386__) || defined(__arm__)
268 case __NR_fcntl64:
269 #endif
270 return RestrictFcntlCommands();
272 case __NR_futex:
273 return RestrictFutexOperation();
275 #if defined(__x86_64__)
276 case __NR_mmap:
277 #endif
278 #if defined(__i386__) || defined(__arm__)
279 case __NR_mmap2:
280 #endif
281 return RestrictMmap();
282 case __NR_mprotect:
283 return RestrictMprotect();
285 case __NR_prctl:
286 return RestrictPrctl();
288 #if defined(__i386__)
289 case __NR_socketcall:
290 return RestrictSocketcall();
291 #endif
292 #if defined(__x86_64__) || defined(__arm__)
293 case __NR_recvmsg:
294 case __NR_sendmsg:
295 case __NR_shutdown:
296 return Allow();
297 #if !defined(OS_NACL_NONSFI)
298 // nacl_helper in Non-SFI mode still uses socketpair() internally
299 // via libevent.
300 // TODO(hidehiko): Remove this when the switching to nacl_helper_nonsfi
301 // is completed.
302 case __NR_socketpair:
303 return RestrictSocketpair();
304 #endif
305 #endif
307 case __NR_brk:
308 // The behavior of brk on Linux is different from other system
309 // calls. It does not return errno but the current break on
310 // failure. glibc thinks brk failed if the return value of brk
311 // is less than the requested address (i.e., brk(addr) < addr).
312 // So, glibc thinks brk succeeded if we return -EPERM and we
313 // need to return zero instead.
314 return Error(0);
316 default:
317 if (IsGracefullyDenied(sysno))
318 return Error(EPERM);
319 return CrashSIGSYS();
323 ResultExpr NaClNonSfiBPFSandboxPolicy::InvalidSyscall() const {
324 return CrashSIGSYS();
327 bool InitializeBPFSandbox(base::ScopedFD proc_fd) {
328 bool sandbox_is_initialized = content::InitializeSandbox(
329 scoped_ptr<sandbox::bpf_dsl::Policy>(
330 new nacl::nonsfi::NaClNonSfiBPFSandboxPolicy()),
331 proc_fd.Pass());
332 if (!sandbox_is_initialized)
333 return false;
334 RunSandboxSanityChecks();
335 return true;
338 } // namespace nonsfi
339 } // namespace nacl