Re-subimission of https://codereview.chromium.org/1041213003/
[chromium-blink-merge.git] / content / browser / zygote_host / zygote_host_impl_linux.cc
blob3506207247e02e91284efb188fcccf7fb2900fa3
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/zygote_host/zygote_host_impl_linux.h"
7 #include <string.h>
8 #include <sys/socket.h>
9 #include <sys/stat.h>
10 #include <sys/types.h>
11 #include <unistd.h>
13 #include "base/base_switches.h"
14 #include "base/command_line.h"
15 #include "base/environment.h"
16 #include "base/files/file_enumerator.h"
17 #include "base/files/file_util.h"
18 #include "base/files/scoped_file.h"
19 #include "base/linux_util.h"
20 #include "base/logging.h"
21 #include "base/memory/linked_ptr.h"
22 #include "base/memory/scoped_ptr.h"
23 #include "base/memory/scoped_vector.h"
24 #include "base/metrics/histogram.h"
25 #include "base/metrics/sparse_histogram.h"
26 #include "base/path_service.h"
27 #include "base/posix/eintr_wrapper.h"
28 #include "base/posix/unix_domain_socket_linux.h"
29 #include "base/process/launch.h"
30 #include "base/process/memory.h"
31 #include "base/process/process_handle.h"
32 #include "base/strings/string_number_conversions.h"
33 #include "base/strings/string_util.h"
34 #include "base/strings/utf_string_conversions.h"
35 #include "base/time/time.h"
36 #include "content/browser/renderer_host/render_sandbox_host_linux.h"
37 #include "content/common/child_process_sandbox_support_impl_linux.h"
38 #include "content/common/zygote_commands_linux.h"
39 #include "content/public/browser/content_browser_client.h"
40 #include "content/public/common/content_switches.h"
41 #include "content/public/common/result_codes.h"
42 #include "sandbox/linux/services/credentials.h"
43 #include "sandbox/linux/services/namespace_sandbox.h"
44 #include "sandbox/linux/services/namespace_utils.h"
45 #include "sandbox/linux/suid/client/setuid_sandbox_host.h"
46 #include "sandbox/linux/suid/common/sandbox.h"
47 #include "ui/base/ui_base_switches.h"
48 #include "ui/gfx/switches.h"
50 #if defined(USE_TCMALLOC)
51 #include "third_party/tcmalloc/chromium/src/gperftools/heap-profiler.h"
52 #endif
54 namespace content {
56 namespace {
58 // Receive a fixed message on fd and return the sender's PID.
59 // Returns true if the message received matches the expected message.
60 bool ReceiveFixedMessage(int fd,
61 const char* expect_msg,
62 size_t expect_len,
63 base::ProcessId* sender_pid) {
64 char buf[expect_len + 1];
65 ScopedVector<base::ScopedFD> fds_vec;
67 const ssize_t len = UnixDomainSocket::RecvMsgWithPid(
68 fd, buf, sizeof(buf), &fds_vec, sender_pid);
69 if (static_cast<size_t>(len) != expect_len)
70 return false;
71 if (memcmp(buf, expect_msg, expect_len) != 0)
72 return false;
73 if (!fds_vec.empty())
74 return false;
75 return true;
78 } // namespace
80 // static
81 ZygoteHost* ZygoteHost::GetInstance() {
82 return ZygoteHostImpl::GetInstance();
85 ZygoteHostImpl::ZygoteHostImpl()
86 : control_fd_(-1),
87 control_lock_(),
88 pid_(-1),
89 init_(false),
90 use_suid_sandbox_for_adj_oom_score_(false),
91 sandbox_binary_(),
92 have_read_sandbox_status_word_(false),
93 sandbox_status_(0),
94 child_tracking_lock_(),
95 list_of_running_zygote_children_(),
96 should_teardown_after_last_child_exits_(false) {}
98 ZygoteHostImpl::~ZygoteHostImpl() { TearDown(); }
100 // static
101 ZygoteHostImpl* ZygoteHostImpl::GetInstance() {
102 return Singleton<ZygoteHostImpl>::get();
105 void ZygoteHostImpl::Init(const std::string& sandbox_cmd) {
106 DCHECK(!init_);
107 init_ = true;
109 base::FilePath chrome_path;
110 CHECK(PathService::Get(base::FILE_EXE, &chrome_path));
111 base::CommandLine cmd_line(chrome_path);
113 cmd_line.AppendSwitchASCII(switches::kProcessType, switches::kZygoteProcess);
115 int fds[2];
116 CHECK(socketpair(AF_UNIX, SOCK_SEQPACKET, 0, fds) == 0);
117 CHECK(UnixDomainSocket::EnableReceiveProcessId(fds[0]));
118 base::FileHandleMappingVector fds_to_map;
119 fds_to_map.push_back(std::make_pair(fds[1], kZygoteSocketPairFd));
121 base::LaunchOptions options;
122 const base::CommandLine& browser_command_line =
123 *base::CommandLine::ForCurrentProcess();
124 if (browser_command_line.HasSwitch(switches::kZygoteCmdPrefix)) {
125 cmd_line.PrependWrapper(
126 browser_command_line.GetSwitchValueNative(switches::kZygoteCmdPrefix));
128 // Append any switches from the browser process that need to be forwarded on
129 // to the zygote/renderers.
130 // Should this list be obtained from browser_render_process_host.cc?
131 static const char* kForwardSwitches[] = {
132 switches::kAllowSandboxDebugging,
133 switches::kDisableSeccompFilterSandbox,
134 switches::kEnableLogging, // Support, e.g., --enable-logging=stderr.
135 // Zygote process needs to know what resources to have loaded when it
136 // becomes a renderer process.
137 switches::kForceDeviceScaleFactor,
138 switches::kLoggingLevel,
139 switches::kNoSandbox,
140 switches::kPpapiInProcess,
141 switches::kRegisterPepperPlugins,
142 switches::kV,
143 switches::kVModule,
145 cmd_line.CopySwitchesFrom(browser_command_line, kForwardSwitches,
146 arraysize(kForwardSwitches));
148 GetContentClient()->browser()->AppendExtraCommandLineSwitches(&cmd_line, -1);
150 sandbox_binary_ = sandbox_cmd.c_str();
152 const bool using_namespace_sandbox = ShouldUseNamespaceSandbox();
153 // A non empty sandbox_cmd means we want a SUID sandbox.
154 const bool using_suid_sandbox =
155 !sandbox_cmd.empty() && !using_namespace_sandbox;
157 // Use the SUID sandbox for adjusting OOM scores when we are using the setuid
158 // or namespace sandbox. This is needed beacuse the processes are
159 // non-dumpable, so /proc/pid/oom_score_adj can only be written by root.
160 use_suid_sandbox_for_adj_oom_score_ =
161 using_namespace_sandbox || using_suid_sandbox;
163 // Start up the sandbox host process and get the file descriptor for the
164 // renderers to talk to it.
165 const int sfd = RenderSandboxHostLinux::GetInstance()->GetRendererSocket();
166 fds_to_map.push_back(std::make_pair(sfd, GetSandboxFD()));
168 base::ScopedFD dummy_fd;
169 if (using_suid_sandbox) {
170 scoped_ptr<sandbox::SetuidSandboxHost> sandbox_host(
171 sandbox::SetuidSandboxHost::Create());
172 sandbox_host->PrependWrapper(&cmd_line);
173 sandbox_host->SetupLaunchOptions(&options, &fds_to_map, &dummy_fd);
174 sandbox_host->SetupLaunchEnvironment();
177 options.fds_to_remap = &fds_to_map;
178 base::Process process =
179 using_namespace_sandbox
180 ? sandbox::NamespaceSandbox::LaunchProcess(cmd_line, options)
181 : base::LaunchProcess(cmd_line, options);
182 CHECK(process.IsValid()) << "Failed to launch zygote process";
184 dummy_fd.reset();
186 if (using_suid_sandbox || using_namespace_sandbox) {
187 // The SUID sandbox will execute the zygote in a new PID namespace, and
188 // the main zygote process will then fork from there. Watch now our
189 // elaborate dance to find and validate the zygote's PID.
191 // First we receive a message from the zygote boot process.
192 base::ProcessId boot_pid;
193 CHECK(ReceiveFixedMessage(
194 fds[0], kZygoteBootMessage, sizeof(kZygoteBootMessage), &boot_pid));
196 // Within the PID namespace, the zygote boot process thinks it's PID 1,
197 // but its real PID can never be 1. This gives us a reliable test that
198 // the kernel is translating the sender's PID to our namespace.
199 CHECK_GT(boot_pid, 1)
200 << "Received invalid process ID for zygote; kernel might be too old? "
201 "See crbug.com/357670 or try using --"
202 << switches::kDisableSetuidSandbox << " to workaround.";
204 // Now receive the message that the zygote's ready to go, along with the
205 // main zygote process's ID.
206 CHECK(ReceiveFixedMessage(
207 fds[0], kZygoteHelloMessage, sizeof(kZygoteHelloMessage), &pid_));
208 CHECK_GT(pid_, 1);
210 if (process.Pid() != pid_) {
211 // Reap the sandbox.
212 base::EnsureProcessGetsReaped(process.Pid());
214 } else {
215 // Not using the SUID sandbox.
216 // Note that ~base::Process() will reset the internal value, but there's no
217 // real "handle" on POSIX so that is safe.
218 pid_ = process.Pid();
221 close(fds[1]);
222 control_fd_ = fds[0];
224 Pickle pickle;
225 pickle.WriteInt(kZygoteCommandGetSandboxStatus);
226 if (!SendMessage(pickle, NULL))
227 LOG(FATAL) << "Cannot communicate with zygote";
228 // We don't wait for the reply. We'll read it in ReadReply.
231 void ZygoteHostImpl::TearDownAfterLastChild() {
232 bool do_teardown = false;
234 base::AutoLock lock(child_tracking_lock_);
235 should_teardown_after_last_child_exits_ = true;
236 do_teardown = list_of_running_zygote_children_.empty();
238 if (do_teardown) {
239 TearDown();
243 // Note: this is also called from the destructor.
244 void ZygoteHostImpl::TearDown() {
245 base::AutoLock lock(control_lock_);
246 if (control_fd_ > -1) {
247 // Closing the IPC channel will act as a notification to exit
248 // to the Zygote.
249 if (IGNORE_EINTR(close(control_fd_))) {
250 PLOG(ERROR) << "Could not close Zygote control channel.";
251 NOTREACHED();
253 control_fd_ = -1;
257 void ZygoteHostImpl::ZygoteChildBorn(pid_t process) {
258 base::AutoLock lock(child_tracking_lock_);
259 bool new_element_inserted =
260 list_of_running_zygote_children_.insert(process).second;
261 DCHECK(new_element_inserted);
264 void ZygoteHostImpl::ZygoteChildDied(pid_t process) {
265 bool do_teardown = false;
267 base::AutoLock lock(child_tracking_lock_);
268 size_t num_erased = list_of_running_zygote_children_.erase(process);
269 DCHECK_EQ(1U, num_erased);
270 do_teardown = should_teardown_after_last_child_exits_ &&
271 list_of_running_zygote_children_.empty();
273 if (do_teardown) {
274 TearDown();
278 bool ZygoteHostImpl::SendMessage(const Pickle& data,
279 const std::vector<int>* fds) {
280 DCHECK_NE(-1, control_fd_);
281 CHECK(data.size() <= kZygoteMaxMessageLength)
282 << "Trying to send too-large message to zygote (sending " << data.size()
283 << " bytes, max is " << kZygoteMaxMessageLength << ")";
284 CHECK(!fds || fds->size() <= UnixDomainSocket::kMaxFileDescriptors)
285 << "Trying to send message with too many file descriptors to zygote "
286 << "(sending " << fds->size() << ", max is "
287 << UnixDomainSocket::kMaxFileDescriptors << ")";
289 return UnixDomainSocket::SendMsg(control_fd_,
290 data.data(), data.size(),
291 fds ? *fds : std::vector<int>());
294 ssize_t ZygoteHostImpl::ReadReply(void* buf, size_t buf_len) {
295 DCHECK_NE(-1, control_fd_);
296 // At startup we send a kZygoteCommandGetSandboxStatus request to the zygote,
297 // but don't wait for the reply. Thus, the first time that we read from the
298 // zygote, we get the reply to that request.
299 if (!have_read_sandbox_status_word_) {
300 if (HANDLE_EINTR(read(control_fd_, &sandbox_status_,
301 sizeof(sandbox_status_))) !=
302 sizeof(sandbox_status_)) {
303 return -1;
306 have_read_sandbox_status_word_ = true;
307 UMA_HISTOGRAM_SPARSE_SLOWLY("Linux.SandboxStatus", sandbox_status_);
310 return HANDLE_EINTR(read(control_fd_, buf, buf_len));
313 pid_t ZygoteHostImpl::ForkRequest(const std::vector<std::string>& argv,
314 scoped_ptr<FileDescriptorInfo> mapping,
315 const std::string& process_type) {
316 DCHECK(init_);
317 Pickle pickle;
319 int raw_socks[2];
320 PCHECK(0 == socketpair(AF_UNIX, SOCK_SEQPACKET, 0, raw_socks));
321 base::ScopedFD my_sock(raw_socks[0]);
322 base::ScopedFD peer_sock(raw_socks[1]);
323 CHECK(UnixDomainSocket::EnableReceiveProcessId(my_sock.get()));
325 pickle.WriteInt(kZygoteCommandFork);
326 pickle.WriteString(process_type);
327 pickle.WriteInt(argv.size());
328 for (std::vector<std::string>::const_iterator
329 i = argv.begin(); i != argv.end(); ++i)
330 pickle.WriteString(*i);
332 // Fork requests contain one file descriptor for the PID oracle, and one
333 // more for each file descriptor mapping for the child process.
334 const size_t num_fds_to_send = 1 + mapping->GetMappingSize();
335 pickle.WriteInt(num_fds_to_send);
337 std::vector<int> fds;
339 // First FD to send is peer_sock.
340 // TODO(morrita): Ideally, this should be part of the mapping so that
341 // FileDescriptorInfo can manages its lifetime.
342 fds.push_back(peer_sock.get());
344 // The rest come from mapping.
345 for (size_t i = 0; i < mapping->GetMappingSize(); ++i) {
346 pickle.WriteUInt32(mapping->GetIDAt(i));
347 fds.push_back(mapping->GetFDAt(i));
350 // Sanity check that we've populated |fds| correctly.
351 DCHECK_EQ(num_fds_to_send, fds.size());
353 pid_t pid;
355 base::AutoLock lock(control_lock_);
356 if (!SendMessage(pickle, &fds))
357 return base::kNullProcessHandle;
358 mapping.reset();
359 peer_sock.reset();
362 char buf[sizeof(kZygoteChildPingMessage) + 1];
363 ScopedVector<base::ScopedFD> recv_fds;
364 base::ProcessId real_pid;
366 ssize_t n = UnixDomainSocket::RecvMsgWithPid(
367 my_sock.get(), buf, sizeof(buf), &recv_fds, &real_pid);
368 if (n != sizeof(kZygoteChildPingMessage) ||
369 0 != memcmp(buf,
370 kZygoteChildPingMessage,
371 sizeof(kZygoteChildPingMessage))) {
372 // Zygote children should still be trustworthy when they're supposed to
373 // ping us, so something's broken if we don't receive a valid ping.
374 LOG(ERROR) << "Did not receive ping from zygote child";
375 NOTREACHED();
376 real_pid = -1;
378 my_sock.reset();
380 // Always send PID back to zygote.
381 Pickle pid_pickle;
382 pid_pickle.WriteInt(kZygoteCommandForkRealPID);
383 pid_pickle.WriteInt(real_pid);
384 if (!SendMessage(pid_pickle, NULL))
385 return base::kNullProcessHandle;
388 // Read the reply, which pickles the PID and an optional UMA enumeration.
389 static const unsigned kMaxReplyLength = 2048;
390 char buf[kMaxReplyLength];
391 const ssize_t len = ReadReply(buf, sizeof(buf));
393 Pickle reply_pickle(buf, len);
394 PickleIterator iter(reply_pickle);
395 if (len <= 0 || !iter.ReadInt(&pid))
396 return base::kNullProcessHandle;
398 // If there is a nonempty UMA name string, then there is a UMA
399 // enumeration to record.
400 std::string uma_name;
401 int uma_sample;
402 int uma_boundary_value;
403 if (iter.ReadString(&uma_name) &&
404 !uma_name.empty() &&
405 iter.ReadInt(&uma_sample) &&
406 iter.ReadInt(&uma_boundary_value)) {
407 // We cannot use the UMA_HISTOGRAM_ENUMERATION macro here,
408 // because that's only for when the name is the same every time.
409 // Here we're using whatever name we got from the other side.
410 // But since it's likely that the same one will be used repeatedly
411 // (even though it's not guaranteed), we cache it here.
412 static base::HistogramBase* uma_histogram;
413 if (!uma_histogram || uma_histogram->histogram_name() != uma_name) {
414 uma_histogram = base::LinearHistogram::FactoryGet(
415 uma_name, 1,
416 uma_boundary_value,
417 uma_boundary_value + 1,
418 base::HistogramBase::kUmaTargetedHistogramFlag);
420 uma_histogram->Add(uma_sample);
423 if (pid <= 0)
424 return base::kNullProcessHandle;
427 #if !defined(OS_OPENBSD)
428 // This is just a starting score for a renderer or extension (the
429 // only types of processes that will be started this way). It will
430 // get adjusted as time goes on. (This is the same value as
431 // chrome::kLowestRendererOomScore in chrome/chrome_constants.h, but
432 // that's not something we can include here.)
433 const int kLowestRendererOomScore = 300;
434 AdjustRendererOOMScore(pid, kLowestRendererOomScore);
435 #endif
437 ZygoteChildBorn(pid);
438 return pid;
441 #if !defined(OS_OPENBSD)
442 void ZygoteHostImpl::AdjustRendererOOMScore(base::ProcessHandle pid,
443 int score) {
444 // 1) You can't change the oom_score_adj of a non-dumpable process
445 // (EPERM) unless you're root. Because of this, we can't set the
446 // oom_adj from the browser process.
448 // 2) We can't set the oom_score_adj before entering the sandbox
449 // because the zygote is in the sandbox and the zygote is as
450 // critical as the browser process. Its oom_adj value shouldn't
451 // be changed.
453 // 3) A non-dumpable process can't even change its own oom_score_adj
454 // because it's root owned 0644. The sandboxed processes don't
455 // even have /proc, but one could imagine passing in a descriptor
456 // from outside.
458 // So, in the normal case, we use the SUID binary to change it for us.
459 // However, Fedora (and other SELinux systems) don't like us touching other
460 // process's oom_score_adj (or oom_adj) values
461 // (https://bugzilla.redhat.com/show_bug.cgi?id=581256).
463 // The offical way to get the SELinux mode is selinux_getenforcemode, but I
464 // don't want to add another library to the build as it's sure to cause
465 // problems with other, non-SELinux distros.
467 // So we just check for files in /selinux. This isn't foolproof, but it's not
468 // bad and it's easy.
470 static bool selinux;
471 static bool selinux_valid = false;
473 if (!selinux_valid) {
474 const base::FilePath kSelinuxPath("/selinux");
475 base::FileEnumerator en(kSelinuxPath, false, base::FileEnumerator::FILES);
476 bool has_selinux_files = !en.Next().empty();
478 selinux = access(kSelinuxPath.value().c_str(), X_OK) == 0 &&
479 has_selinux_files;
480 selinux_valid = true;
483 if (use_suid_sandbox_for_adj_oom_score_ && !selinux) {
484 #if defined(USE_TCMALLOC)
485 // If heap profiling is running, these processes are not exiting, at least
486 // on ChromeOS. The easiest thing to do is not launch them when profiling.
487 // TODO(stevenjb): Investigate further and fix.
488 if (IsHeapProfilerRunning())
489 return;
490 #endif
491 std::vector<std::string> adj_oom_score_cmdline;
492 adj_oom_score_cmdline.push_back(sandbox_binary_);
493 adj_oom_score_cmdline.push_back(sandbox::kAdjustOOMScoreSwitch);
494 adj_oom_score_cmdline.push_back(base::Int64ToString(pid));
495 adj_oom_score_cmdline.push_back(base::IntToString(score));
497 base::Process sandbox_helper_process;
498 base::LaunchOptions options;
500 // sandbox_helper_process is a setuid binary.
501 options.allow_new_privs = true;
503 sandbox_helper_process =
504 base::LaunchProcess(adj_oom_score_cmdline, options);
505 if (sandbox_helper_process.IsValid())
506 base::EnsureProcessGetsReaped(sandbox_helper_process.Pid());
507 } else if (!use_suid_sandbox_for_adj_oom_score_) {
508 if (!base::AdjustOOMScore(pid, score))
509 PLOG(ERROR) << "Failed to adjust OOM score of renderer with pid " << pid;
512 #endif
514 void ZygoteHostImpl::EnsureProcessTerminated(pid_t process) {
515 DCHECK(init_);
516 Pickle pickle;
518 pickle.WriteInt(kZygoteCommandReap);
519 pickle.WriteInt(process);
520 if (!SendMessage(pickle, NULL))
521 LOG(ERROR) << "Failed to send Reap message to zygote";
522 ZygoteChildDied(process);
525 base::TerminationStatus ZygoteHostImpl::GetTerminationStatus(
526 base::ProcessHandle handle,
527 bool known_dead,
528 int* exit_code) {
529 DCHECK(init_);
530 Pickle pickle;
531 pickle.WriteInt(kZygoteCommandGetTerminationStatus);
532 pickle.WriteBool(known_dead);
533 pickle.WriteInt(handle);
535 static const unsigned kMaxMessageLength = 128;
536 char buf[kMaxMessageLength];
537 ssize_t len;
539 base::AutoLock lock(control_lock_);
540 if (!SendMessage(pickle, NULL))
541 LOG(ERROR) << "Failed to send GetTerminationStatus message to zygote";
542 len = ReadReply(buf, sizeof(buf));
545 // Set this now to handle the error cases.
546 if (exit_code)
547 *exit_code = RESULT_CODE_NORMAL_EXIT;
548 int status = base::TERMINATION_STATUS_NORMAL_TERMINATION;
550 if (len == -1) {
551 LOG(WARNING) << "Error reading message from zygote: " << errno;
552 } else if (len == 0) {
553 LOG(WARNING) << "Socket closed prematurely.";
554 } else {
555 Pickle read_pickle(buf, len);
556 int tmp_status, tmp_exit_code;
557 PickleIterator iter(read_pickle);
558 if (!iter.ReadInt(&tmp_status) || !iter.ReadInt(&tmp_exit_code)) {
559 LOG(WARNING)
560 << "Error parsing GetTerminationStatus response from zygote.";
561 } else {
562 if (exit_code)
563 *exit_code = tmp_exit_code;
564 status = tmp_status;
568 if (status != base::TERMINATION_STATUS_STILL_RUNNING) {
569 ZygoteChildDied(handle);
571 return static_cast<base::TerminationStatus>(status);
574 pid_t ZygoteHostImpl::GetPid() const {
575 return pid_;
578 int ZygoteHostImpl::GetSandboxStatus() const {
579 if (have_read_sandbox_status_word_)
580 return sandbox_status_;
581 return 0;
584 bool ZygoteHostImpl::ShouldUseNamespaceSandbox() {
585 const base::CommandLine& command_line =
586 *base::CommandLine::ForCurrentProcess();
587 if (command_line.HasSwitch(switches::kNoSandbox)) {
588 return false;
591 if (command_line.HasSwitch(switches::kDisableNamespaceSandbox)) {
592 return false;
595 if (!sandbox::Credentials::CanCreateProcessInNewUserNS()) {
596 return false;
599 return true;
602 } // namespace content