Don't preload rarely seen large images
[chromium-blink-merge.git] / sandbox / linux / services / credentials.cc
blobdc62f1b21392b05012a6dda3a24b50be97bacf91
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "sandbox/linux/services/credentials.h"
7 #include <errno.h>
8 #include <signal.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <sys/syscall.h>
12 #include <sys/types.h>
13 #include <sys/wait.h>
14 #include <unistd.h>
16 #include "base/bind.h"
17 #include "base/files/file_path.h"
18 #include "base/files/file_util.h"
19 #include "base/logging.h"
20 #include "base/posix/eintr_wrapper.h"
21 #include "base/process/launch.h"
22 #include "base/template_util.h"
23 #include "base/third_party/valgrind/valgrind.h"
24 #include "build/build_config.h"
25 #include "sandbox/linux/services/namespace_utils.h"
26 #include "sandbox/linux/services/proc_util.h"
27 #include "sandbox/linux/services/syscall_wrappers.h"
28 #include "sandbox/linux/services/thread_helpers.h"
29 #include "sandbox/linux/system_headers/capability.h"
30 #include "sandbox/linux/system_headers/linux_signal.h"
32 namespace sandbox {
34 namespace {
36 bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
38 // Checks that the set of RES-uids and the set of RES-gids have
39 // one element each and return that element in |resuid| and |resgid|
40 // respectively. It's ok to pass NULL as one or both of the ids.
41 bool GetRESIds(uid_t* resuid, gid_t* resgid) {
42 uid_t ruid, euid, suid;
43 gid_t rgid, egid, sgid;
44 PCHECK(sys_getresuid(&ruid, &euid, &suid) == 0);
45 PCHECK(sys_getresgid(&rgid, &egid, &sgid) == 0);
46 const bool uids_are_equal = (ruid == euid) && (ruid == suid);
47 const bool gids_are_equal = (rgid == egid) && (rgid == sgid);
48 if (!uids_are_equal || !gids_are_equal) return false;
49 if (resuid) *resuid = euid;
50 if (resgid) *resgid = egid;
51 return true;
54 const int kExitSuccess = 0;
56 int ChrootToSelfFdinfo(void*) {
57 RAW_CHECK(sys_chroot("/proc/self/fdinfo/") == 0);
59 // CWD is essentially an implicit file descriptor, so be careful to not
60 // leave it behind.
61 RAW_CHECK(chdir("/") == 0);
62 _exit(kExitSuccess);
65 // chroot() to an empty dir that is "safe". To be safe, it must not contain
66 // any subdirectory (chroot-ing there would allow a chroot escape) and it must
67 // be impossible to create an empty directory there.
68 // We achieve this by doing the following:
69 // 1. We create a new process sharing file system information.
70 // 2. In the child, we chroot to /proc/self/fdinfo/
71 // This is already "safe", since fdinfo/ does not contain another directory and
72 // one cannot create another directory there.
73 // 3. The process dies
74 // After (3) happens, the directory is not available anymore in /proc.
75 bool ChrootToSafeEmptyDir() {
76 // We need to chroot to a fdinfo that is unique to a process and have that
77 // process die.
78 // 1. We don't want to simply fork() because duplicating the page tables is
79 // slow with a big address space.
80 // 2. We do not use a regular thread (that would unshare CLONE_FILES) because
81 // when we are in a PID namespace, we cannot easily get a handle to the
82 // /proc/tid directory for the thread (since /proc may not be aware of the
83 // PID namespace). With a process, we can just use /proc/self.
84 pid_t pid = -1;
85 char stack_buf[PTHREAD_STACK_MIN];
86 #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
87 defined(ARCH_CPU_MIPS64_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY)
88 // The stack grows downward.
89 void* stack = stack_buf + sizeof(stack_buf);
90 #else
91 #error "Unsupported architecture"
92 #endif
94 pid = clone(ChrootToSelfFdinfo, stack,
95 CLONE_VM | CLONE_VFORK | CLONE_FS | LINUX_SIGCHLD, nullptr,
96 nullptr, nullptr, nullptr);
97 PCHECK(pid != -1);
99 int status = -1;
100 PCHECK(HANDLE_EINTR(waitpid(pid, &status, 0)) == pid);
102 return WIFEXITED(status) && WEXITSTATUS(status) == kExitSuccess;
105 // CHECK() that an attempt to move to a new user namespace raised an expected
106 // errno.
107 void CheckCloneNewUserErrno(int error) {
108 // EPERM can happen if already in a chroot. EUSERS if too many nested
109 // namespaces are used. EINVAL for kernels that don't support the feature.
110 // Valgrind will ENOSYS unshare().
111 PCHECK(error == EPERM || error == EUSERS || error == EINVAL ||
112 error == ENOSYS);
115 // Converts a Capability to the corresponding Linux CAP_XXX value.
116 int CapabilityToKernelValue(Credentials::Capability cap) {
117 switch (cap) {
118 case Credentials::Capability::SYS_CHROOT:
119 return CAP_SYS_CHROOT;
120 case Credentials::Capability::SYS_ADMIN:
121 return CAP_SYS_ADMIN;
124 LOG(FATAL) << "Invalid Capability: " << static_cast<int>(cap);
125 return 0;
128 } // namespace.
130 // static
131 bool Credentials::DropAllCapabilities(int proc_fd) {
132 if (!SetCapabilities(proc_fd, std::vector<Capability>())) {
133 return false;
136 CHECK(!HasAnyCapability());
137 return true;
140 // static
141 bool Credentials::DropAllCapabilities() {
142 base::ScopedFD proc_fd(ProcUtil::OpenProc());
143 return Credentials::DropAllCapabilities(proc_fd.get());
146 // static
147 bool Credentials::DropAllCapabilitiesOnCurrentThread() {
148 return SetCapabilitiesOnCurrentThread(std::vector<Capability>());
151 // static
152 bool Credentials::SetCapabilitiesOnCurrentThread(
153 const std::vector<Capability>& caps) {
154 struct cap_hdr hdr = {};
155 hdr.version = _LINUX_CAPABILITY_VERSION_3;
156 struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}};
158 // Initially, cap has no capability flags set. Enable the effective and
159 // permitted flags only for the requested capabilities.
160 for (const Capability cap : caps) {
161 const int cap_num = CapabilityToKernelValue(cap);
162 const size_t index = CAP_TO_INDEX(cap_num);
163 const uint32_t mask = CAP_TO_MASK(cap_num);
164 data[index].effective |= mask;
165 data[index].permitted |= mask;
168 return sys_capset(&hdr, data) == 0;
171 // static
172 bool Credentials::SetCapabilities(int proc_fd,
173 const std::vector<Capability>& caps) {
174 DCHECK_LE(0, proc_fd);
176 #if !defined(THREAD_SANITIZER)
177 // With TSAN, accept to break the security model as it is a testing
178 // configuration.
179 CHECK(ThreadHelpers::IsSingleThreaded(proc_fd));
180 #endif
182 return SetCapabilitiesOnCurrentThread(caps);
185 bool Credentials::HasAnyCapability() {
186 struct cap_hdr hdr = {};
187 hdr.version = _LINUX_CAPABILITY_VERSION_3;
188 struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}};
190 PCHECK(sys_capget(&hdr, data) == 0);
192 for (size_t i = 0; i < arraysize(data); ++i) {
193 if (data[i].effective || data[i].permitted || data[i].inheritable) {
194 return true;
198 return false;
201 bool Credentials::HasCapability(Capability cap) {
202 struct cap_hdr hdr = {};
203 hdr.version = _LINUX_CAPABILITY_VERSION_3;
204 struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}};
206 PCHECK(sys_capget(&hdr, data) == 0);
208 const int cap_num = CapabilityToKernelValue(cap);
209 const size_t index = CAP_TO_INDEX(cap_num);
210 const uint32_t mask = CAP_TO_MASK(cap_num);
212 return (data[index].effective | data[index].permitted |
213 data[index].inheritable) &
214 mask;
217 // static
218 bool Credentials::CanCreateProcessInNewUserNS() {
219 // Valgrind will let clone(2) pass-through, but doesn't support unshare(),
220 // so always consider UserNS unsupported there.
221 if (IsRunningOnValgrind()) {
222 return false;
225 #if defined(THREAD_SANITIZER)
226 // With TSAN, processes will always have threads running and can never
227 // enter a new user namespace with MoveToNewUserNS().
228 return false;
229 #endif
231 // This is roughly a fork().
232 const pid_t pid = sys_clone(CLONE_NEWUSER | SIGCHLD, 0, 0, 0, 0);
234 if (pid == -1) {
235 CheckCloneNewUserErrno(errno);
236 return false;
239 // The parent process could have had threads. In the child, these threads
240 // have disappeared. Make sure to not do anything in the child, as this is a
241 // fragile execution environment.
242 if (pid == 0) {
243 _exit(kExitSuccess);
246 // Always reap the child.
247 int status = -1;
248 PCHECK(HANDLE_EINTR(waitpid(pid, &status, 0)) == pid);
249 CHECK(WIFEXITED(status));
250 CHECK_EQ(kExitSuccess, WEXITSTATUS(status));
252 // clone(2) succeeded, we can use CLONE_NEWUSER.
253 return true;
256 bool Credentials::MoveToNewUserNS() {
257 uid_t uid;
258 gid_t gid;
259 if (!GetRESIds(&uid, &gid)) {
260 // If all the uids (or gids) are not equal to each other, the security
261 // model will most likely confuse the caller, abort.
262 DVLOG(1) << "uids or gids differ!";
263 return false;
265 int ret = sys_unshare(CLONE_NEWUSER);
266 if (ret) {
267 const int unshare_errno = errno;
268 VLOG(1) << "Looks like unprivileged CLONE_NEWUSER may not be available "
269 << "on this kernel.";
270 CheckCloneNewUserErrno(unshare_errno);
271 return false;
274 if (NamespaceUtils::KernelSupportsDenySetgroups()) {
275 PCHECK(NamespaceUtils::DenySetgroups());
278 // The current {r,e,s}{u,g}id is now an overflow id (c.f.
279 // /proc/sys/kernel/overflowuid). Setup the uid and gid maps.
280 DCHECK(GetRESIds(NULL, NULL));
281 const char kGidMapFile[] = "/proc/self/gid_map";
282 const char kUidMapFile[] = "/proc/self/uid_map";
283 PCHECK(NamespaceUtils::WriteToIdMapFile(kGidMapFile, gid));
284 PCHECK(NamespaceUtils::WriteToIdMapFile(kUidMapFile, uid));
285 DCHECK(GetRESIds(NULL, NULL));
286 return true;
289 bool Credentials::DropFileSystemAccess(int proc_fd) {
290 CHECK_LE(0, proc_fd);
292 CHECK(ChrootToSafeEmptyDir());
293 CHECK(!base::DirectoryExists(base::FilePath("/proc")));
294 CHECK(!ProcUtil::HasOpenDirectory(proc_fd));
295 // We never let this function fail.
296 return true;
299 pid_t Credentials::ForkAndDropCapabilitiesInChild() {
300 pid_t pid = fork();
301 if (pid != 0) {
302 return pid;
305 // Since we just forked, we are single threaded.
306 PCHECK(DropAllCapabilitiesOnCurrentThread());
307 return 0;
310 } // namespace sandbox.