Updates.
[glibc/history.git] / nscd / connections.c
blobd975b1818f9404189eade1464d940dd4bc868d62
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <arpa/inet.h>
34 #ifdef HAVE_EPOLL
35 # include <sys/epoll.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/param.h>
39 #include <sys/poll.h>
40 #ifdef HAVE_SENDFILE
41 # include <sys/sendfile.h>
42 #endif
43 #include <sys/socket.h>
44 #include <sys/stat.h>
45 #include <sys/un.h>
47 #include "nscd.h"
48 #include "dbg_log.h"
49 #include "selinux.h"
50 #ifdef HAVE_SENDFILE
51 # include <kernel-features.h>
52 #endif
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
65 uid_t stat_uid;
66 static gid_t *server_groups;
67 #ifndef NGROUPS
68 # define NGROUPS 32
69 #endif
70 static int server_ngroups;
72 static pthread_attr_t attr;
74 static void begin_drop_privileges (void);
75 static void finish_drop_privileges (void);
77 /* Map request type to a string. */
78 const char *serv2str[LASTREQ] =
80 [GETPWBYNAME] = "GETPWBYNAME",
81 [GETPWBYUID] = "GETPWBYUID",
82 [GETGRBYNAME] = "GETGRBYNAME",
83 [GETGRBYGID] = "GETGRBYGID",
84 [GETHOSTBYNAME] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
88 [SHUTDOWN] = "SHUTDOWN",
89 [GETSTAT] = "GETSTAT",
90 [INVALIDATE] = "INVALIDATE",
91 [GETFDPW] = "GETFDPW",
92 [GETFDGR] = "GETFDGR",
93 [GETFDHST] = "GETFDHST",
94 [GETAI] = "GETAI",
95 [INITGROUPS] = "INITGROUPS"
98 /* The control data structures for the services. */
99 struct database_dyn dbs[lastdb] =
101 [pwddb] = {
102 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
103 .enabled = 0,
104 .check_file = 1,
105 .persistent = 0,
106 .shared = 0,
107 .max_db_size = DEFAULT_MAX_DB_SIZE,
108 .filename = "/etc/passwd",
109 .db_filename = _PATH_NSCD_PASSWD_DB,
110 .disabled_iov = &pwd_iov_disabled,
111 .postimeout = 3600,
112 .negtimeout = 20,
113 .wr_fd = -1,
114 .ro_fd = -1,
115 .mmap_used = false
117 [grpdb] = {
118 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
119 .enabled = 0,
120 .check_file = 1,
121 .persistent = 0,
122 .shared = 0,
123 .max_db_size = DEFAULT_MAX_DB_SIZE,
124 .filename = "/etc/group",
125 .db_filename = _PATH_NSCD_GROUP_DB,
126 .disabled_iov = &grp_iov_disabled,
127 .postimeout = 3600,
128 .negtimeout = 60,
129 .wr_fd = -1,
130 .ro_fd = -1,
131 .mmap_used = false
133 [hstdb] = {
134 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .shared = 0,
139 .max_db_size = DEFAULT_MAX_DB_SIZE,
140 .filename = "/etc/hosts",
141 .db_filename = _PATH_NSCD_HOSTS_DB,
142 .disabled_iov = &hst_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 20,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
152 /* Mapping of request type to database. */
153 static struct database_dyn *const serv2db[LASTREQ] =
155 [GETPWBYNAME] = &dbs[pwddb],
156 [GETPWBYUID] = &dbs[pwddb],
157 [GETGRBYNAME] = &dbs[grpdb],
158 [GETGRBYGID] = &dbs[grpdb],
159 [GETHOSTBYNAME] = &dbs[hstdb],
160 [GETHOSTBYNAMEv6] = &dbs[hstdb],
161 [GETHOSTBYADDR] = &dbs[hstdb],
162 [GETHOSTBYADDRv6] = &dbs[hstdb],
163 [GETFDPW] = &dbs[pwddb],
164 [GETFDGR] = &dbs[grpdb],
165 [GETFDHST] = &dbs[hstdb],
166 [GETAI] = &dbs[hstdb],
167 [INITGROUPS] = &dbs[grpdb]
171 /* Number of seconds between two cache pruning runs. */
172 #define CACHE_PRUNE_INTERVAL 15
175 /* Initial number of threads to use. */
176 int nthreads = -1;
177 /* Maximum number of threads to use. */
178 int max_nthreads = 32;
180 /* Socket for incoming connections. */
181 static int sock;
183 /* Number of times clients had to wait. */
184 unsigned long int client_queued;
187 ssize_t
188 writeall (int fd, const void *buf, size_t len)
190 size_t n = len;
191 ssize_t ret;
194 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
195 if (ret <= 0)
196 break;
197 buf = (const char *) buf + ret;
198 n -= ret;
200 while (n > 0);
201 return ret < 0 ? ret : len - n;
205 #ifdef HAVE_SENDFILE
206 ssize_t
207 sendfileall (int tofd, int fromfd, off_t off, size_t len)
209 ssize_t n = len;
210 ssize_t ret;
214 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
215 if (ret <= 0)
216 break;
217 n -= ret;
219 while (n > 0);
220 return ret < 0 ? ret : len - n;
222 #endif
225 enum usekey
227 use_not = 0,
228 /* The following three are not really used, they are symbolic constants. */
229 use_first = 16,
230 use_begin = 32,
231 use_end = 64,
233 use_he = 1,
234 use_he_begin = use_he | use_begin,
235 use_he_end = use_he | use_end,
236 #if SEPARATE_KEY
237 use_key = 2,
238 use_key_begin = use_key | use_begin,
239 use_key_end = use_key | use_end,
240 use_key_first = use_key_begin | use_first,
241 #endif
242 use_data = 3,
243 use_data_begin = use_data | use_begin,
244 use_data_end = use_data | use_end,
245 use_data_first = use_data_begin | use_first
249 static int
250 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
251 enum usekey use, ref_t start, size_t len)
253 assert (len >= 2);
255 if (start > first_free || start + len > first_free
256 || (start & BLOCK_ALIGN_M1))
257 return 0;
259 if (usemap[start] == use_not)
261 /* Add the start marker. */
262 usemap[start] = use | use_begin;
263 use &= ~use_first;
265 while (--len > 0)
266 if (usemap[++start] != use_not)
267 return 0;
268 else
269 usemap[start] = use;
271 /* Add the end marker. */
272 usemap[start] = use | use_end;
274 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
276 /* Hash entries can't be shared. */
277 if (use == use_he)
278 return 0;
280 usemap[start] |= (use & use_first);
281 use &= ~use_first;
283 while (--len > 1)
284 if (usemap[++start] != use)
285 return 0;
287 if (usemap[++start] != (use | use_end))
288 return 0;
290 else
291 /* Points to a wrong object or somewhere in the middle. */
292 return 0;
294 return 1;
298 /* Verify data in persistent database. */
299 static int
300 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
302 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb);
304 time_t now = time (NULL);
306 struct database_pers_head *head = mem;
307 struct database_pers_head head_copy = *head;
309 /* Check that the header that was read matches the head in the database. */
310 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
311 return 0;
313 /* First some easy tests: make sure the database header is sane. */
314 if (head->version != DB_VERSION
315 || head->header_size != sizeof (*head)
316 /* We allow a timestamp to be one hour ahead of the current time.
317 This should cover daylight saving time changes. */
318 || head->timestamp > now + 60 * 60 + 60
319 || (head->gc_cycle & 1)
320 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
321 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
322 || head->first_free < 0
323 || head->first_free > head->data_size
324 || (head->first_free & BLOCK_ALIGN_M1) != 0
325 || head->maxnentries < 0
326 || head->maxnsearched < 0)
327 return 0;
329 uint8_t *usemap = calloc (head->first_free, 1);
330 if (usemap == NULL)
331 return 0;
333 const char *data = (char *) &head->array[roundup (head->module,
334 ALIGN / sizeof (ref_t))];
336 nscd_ssize_t he_cnt = 0;
337 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
339 ref_t work = head->array[cnt];
341 while (work != ENDREF)
343 if (! check_use (data, head->first_free, usemap, use_he, work,
344 sizeof (struct hashentry)))
345 goto fail;
347 /* Now we know we can dereference the record. */
348 struct hashentry *here = (struct hashentry *) (data + work);
350 ++he_cnt;
352 /* Make sure the record is for this type of service. */
353 if (here->type >= LASTREQ
354 || serv2db[here->type] != &dbs[dbnr])
355 goto fail;
357 /* Validate boolean field value. */
358 if (here->first != false && here->first != true)
359 goto fail;
361 if (here->len < 0)
362 goto fail;
364 /* Now the data. */
365 if (here->packet < 0
366 || here->packet > head->first_free
367 || here->packet + sizeof (struct datahead) > head->first_free)
368 goto fail;
370 struct datahead *dh = (struct datahead *) (data + here->packet);
372 if (! check_use (data, head->first_free, usemap,
373 use_data | (here->first ? use_first : 0),
374 here->packet, dh->allocsize))
375 goto fail;
377 if (dh->allocsize < sizeof (struct datahead)
378 || dh->recsize > dh->allocsize
379 || (dh->notfound != false && dh->notfound != true)
380 || (dh->usable != false && dh->usable != true))
381 goto fail;
383 if (here->key < here->packet + sizeof (struct datahead)
384 || here->key > here->packet + dh->allocsize
385 || here->key + here->len > here->packet + dh->allocsize)
387 #if SEPARATE_KEY
388 /* If keys can appear outside of data, this should be done
389 instead. But gc doesn't mark the data in that case. */
390 if (! check_use (data, head->first_free, usemap,
391 use_key | (here->first ? use_first : 0),
392 here->key, here->len))
393 #endif
394 goto fail;
397 work = here->next;
401 if (he_cnt != head->nentries)
402 goto fail;
404 /* See if all data and keys had at least one reference from
405 he->first == true hashentry. */
406 for (ref_t idx = 0; idx < head->first_free; ++idx)
408 #if SEPARATE_KEY
409 if (usemap[idx] == use_key_begin)
410 goto fail;
411 #endif
412 if (usemap[idx] == use_data_begin)
413 goto fail;
416 /* Finally, make sure the database hasn't changed since the first test. */
417 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
418 goto fail;
420 free (usemap);
421 return 1;
423 fail:
424 free (usemap);
425 return 0;
429 /* Initialize database information structures. */
430 void
431 nscd_init (void)
433 /* Look up unprivileged uid/gid/groups before we start listening on the
434 socket */
435 if (server_user != NULL)
436 begin_drop_privileges ();
438 if (nthreads == -1)
439 /* No configuration for this value, assume a default. */
440 nthreads = 2 * lastdb;
442 for (size_t cnt = 0; cnt < lastdb; ++cnt)
443 if (dbs[cnt].enabled)
445 pthread_rwlock_init (&dbs[cnt].lock, NULL);
446 pthread_mutex_init (&dbs[cnt].memlock, NULL);
448 if (dbs[cnt].persistent)
450 /* Try to open the appropriate file on disk. */
451 int fd = open (dbs[cnt].db_filename, O_RDWR);
452 if (fd != -1)
454 struct stat64 st;
455 void *mem;
456 size_t total;
457 struct database_pers_head head;
458 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
459 sizeof (head)));
460 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
462 fail_db:
463 dbg_log (_("invalid persistent database file \"%s\": %s"),
464 dbs[cnt].db_filename, strerror (errno));
465 unlink (dbs[cnt].db_filename);
467 else if (head.module == 0 && head.data_size == 0)
469 /* The file has been created, but the head has not been
470 initialized yet. Remove the old file. */
471 unlink (dbs[cnt].db_filename);
473 else if (head.header_size != (int) sizeof (head))
475 dbg_log (_("invalid persistent database file \"%s\": %s"),
476 dbs[cnt].db_filename,
477 _("header size does not match"));
478 unlink (dbs[cnt].db_filename);
480 else if ((total = (sizeof (head)
481 + roundup (head.module * sizeof (ref_t),
482 ALIGN)
483 + head.data_size))
484 > st.st_size
485 || total < sizeof (head))
487 dbg_log (_("invalid persistent database file \"%s\": %s"),
488 dbs[cnt].db_filename,
489 _("file size does not match"));
490 unlink (dbs[cnt].db_filename);
492 /* Note we map with the maximum size allowed for the
493 database. This is likely much larger than the
494 actual file size. This is OK on most OSes since
495 extensions of the underlying file will
496 automatically translate more pages available for
497 memory access. */
498 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
499 PROT_READ | PROT_WRITE,
500 MAP_SHARED, fd, 0))
501 == MAP_FAILED)
502 goto fail_db;
503 else if (!verify_persistent_db (mem, &head, cnt))
505 munmap (mem, total);
506 dbg_log (_("invalid persistent database file \"%s\": %s"),
507 dbs[cnt].db_filename,
508 _("verification failed"));
509 unlink (dbs[cnt].db_filename);
511 else
513 /* Success. We have the database. */
514 dbs[cnt].head = mem;
515 dbs[cnt].memsize = total;
516 dbs[cnt].data = (char *)
517 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
518 ALIGN / sizeof (ref_t))];
519 dbs[cnt].mmap_used = true;
521 if (dbs[cnt].suggested_module > head.module)
522 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
523 dbnames[cnt]);
525 dbs[cnt].wr_fd = fd;
526 fd = -1;
527 /* We also need a read-only descriptor. */
528 if (dbs[cnt].shared)
530 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
531 if (dbs[cnt].ro_fd == -1)
532 dbg_log (_("\
533 cannot create read-only descriptor for \"%s\"; no mmap"),
534 dbs[cnt].db_filename);
537 // XXX Shall we test whether the descriptors actually
538 // XXX point to the same file?
541 /* Close the file descriptors in case something went
542 wrong in which case the variable have not been
543 assigned -1. */
544 if (fd != -1)
545 close (fd);
549 if (dbs[cnt].head == NULL)
551 /* No database loaded. Allocate the data structure,
552 possibly on disk. */
553 struct database_pers_head head;
554 size_t total = (sizeof (head)
555 + roundup (dbs[cnt].suggested_module
556 * sizeof (ref_t), ALIGN)
557 + (dbs[cnt].suggested_module
558 * DEFAULT_DATASIZE_PER_BUCKET));
560 /* Try to create the database. If we do not need a
561 persistent database create a temporary file. */
562 int fd;
563 int ro_fd = -1;
564 if (dbs[cnt].persistent)
566 fd = open (dbs[cnt].db_filename,
567 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
568 S_IRUSR | S_IWUSR);
569 if (fd != -1 && dbs[cnt].shared)
570 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
572 else
574 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
575 fd = mkstemp (fname);
577 /* We do not need the file name anymore after we
578 opened another file descriptor in read-only mode. */
579 if (fd != -1)
581 if (dbs[cnt].shared)
582 ro_fd = open (fname, O_RDONLY);
584 unlink (fname);
588 if (fd == -1)
590 if (errno == EEXIST)
592 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
593 dbnames[cnt], dbs[cnt].db_filename);
594 // XXX Correct way to terminate?
595 exit (1);
598 if (dbs[cnt].persistent)
599 dbg_log (_("cannot create %s; no persistent database used"),
600 dbs[cnt].db_filename);
601 else
602 dbg_log (_("cannot create %s; no sharing possible"),
603 dbs[cnt].db_filename);
605 dbs[cnt].persistent = 0;
606 // XXX remember: no mmap
608 else
610 /* Tell the user if we could not create the read-only
611 descriptor. */
612 if (ro_fd == -1 && dbs[cnt].shared)
613 dbg_log (_("\
614 cannot create read-only descriptor for \"%s\"; no mmap"),
615 dbs[cnt].db_filename);
617 /* Before we create the header, initialiye the hash
618 table. So that if we get interrupted if writing
619 the header we can recognize a partially initialized
620 database. */
621 size_t ps = sysconf (_SC_PAGESIZE);
622 char tmpbuf[ps];
623 assert (~ENDREF == 0);
624 memset (tmpbuf, '\xff', ps);
626 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
627 off_t offset = sizeof (head);
629 size_t towrite;
630 if (offset % ps != 0)
632 towrite = MIN (remaining, ps - (offset % ps));
633 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
634 goto write_fail;
635 offset += towrite;
636 remaining -= towrite;
639 while (remaining > ps)
641 if (pwrite (fd, tmpbuf, ps, offset) == -1)
642 goto write_fail;
643 offset += ps;
644 remaining -= ps;
647 if (remaining > 0
648 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
649 goto write_fail;
651 /* Create the header of the file. */
652 struct database_pers_head head =
654 .version = DB_VERSION,
655 .header_size = sizeof (head),
656 .module = dbs[cnt].suggested_module,
657 .data_size = (dbs[cnt].suggested_module
658 * DEFAULT_DATASIZE_PER_BUCKET),
659 .first_free = 0
661 void *mem;
663 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
664 != sizeof (head))
665 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
666 != 0)
667 || (mem = mmap (NULL, dbs[cnt].max_db_size,
668 PROT_READ | PROT_WRITE,
669 MAP_SHARED, fd, 0)) == MAP_FAILED)
671 write_fail:
672 unlink (dbs[cnt].db_filename);
673 dbg_log (_("cannot write to database file %s: %s"),
674 dbs[cnt].db_filename, strerror (errno));
675 dbs[cnt].persistent = 0;
677 else
679 /* Success. */
680 dbs[cnt].head = mem;
681 dbs[cnt].data = (char *)
682 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
683 ALIGN / sizeof (ref_t))];
684 dbs[cnt].memsize = total;
685 dbs[cnt].mmap_used = true;
687 /* Remember the descriptors. */
688 dbs[cnt].wr_fd = fd;
689 dbs[cnt].ro_fd = ro_fd;
690 fd = -1;
691 ro_fd = -1;
694 if (fd != -1)
695 close (fd);
696 if (ro_fd != -1)
697 close (ro_fd);
701 if (paranoia
702 && ((dbs[cnt].wr_fd != -1
703 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
704 || (dbs[cnt].ro_fd != -1
705 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
707 dbg_log (_("\
708 cannot set socket to close on exec: %s; disabling paranoia mode"),
709 strerror (errno));
710 paranoia = 0;
713 if (dbs[cnt].head == NULL)
715 /* We do not use the persistent database. Just
716 create an in-memory data structure. */
717 assert (! dbs[cnt].persistent);
719 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
720 + (dbs[cnt].suggested_module
721 * sizeof (ref_t)));
722 memset (dbs[cnt].head, '\0', sizeof (dbs[cnt].head));
723 assert (~ENDREF == 0);
724 memset (dbs[cnt].head->array, '\xff',
725 dbs[cnt].suggested_module * sizeof (ref_t));
726 dbs[cnt].head->module = dbs[cnt].suggested_module;
727 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
728 * dbs[cnt].head->module);
729 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
730 dbs[cnt].head->first_free = 0;
732 dbs[cnt].shared = 0;
733 assert (dbs[cnt].ro_fd == -1);
736 if (dbs[cnt].check_file)
738 /* We need the modification date of the file. */
739 struct stat64 st;
741 if (stat64 (dbs[cnt].filename, &st) < 0)
743 /* We cannot stat() the file, disable file checking. */
744 dbg_log (_("cannot stat() file `%s': %s"),
745 dbs[cnt].filename, strerror (errno));
746 dbs[cnt].check_file = 0;
748 else
749 dbs[cnt].file_mtime = st.st_mtime;
753 /* Create the socket. */
754 sock = socket (AF_UNIX, SOCK_STREAM, 0);
755 if (sock < 0)
757 dbg_log (_("cannot open socket: %s"), strerror (errno));
758 exit (errno == EACCES ? 4 : 1);
760 /* Bind a name to the socket. */
761 struct sockaddr_un sock_addr;
762 sock_addr.sun_family = AF_UNIX;
763 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
764 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
766 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
767 exit (errno == EACCES ? 4 : 1);
770 /* We don't want to get stuck on accept. */
771 int fl = fcntl (sock, F_GETFL);
772 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
774 dbg_log (_("cannot change socket to nonblocking mode: %s"),
775 strerror (errno));
776 exit (1);
779 /* The descriptor needs to be closed on exec. */
780 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
782 dbg_log (_("cannot set socket to close on exec: %s"),
783 strerror (errno));
784 exit (1);
787 /* Set permissions for the socket. */
788 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
790 /* Set the socket up to accept connections. */
791 if (listen (sock, SOMAXCONN) < 0)
793 dbg_log (_("cannot enable socket to accept connections: %s"),
794 strerror (errno));
795 exit (1);
798 /* Change to unprivileged uid/gid/groups if specifed in config file */
799 if (server_user != NULL)
800 finish_drop_privileges ();
804 /* Close the connections. */
805 void
806 close_sockets (void)
808 close (sock);
812 static void
813 invalidate_cache (char *key)
815 dbtype number;
817 if (strcmp (key, "passwd") == 0)
818 number = pwddb;
819 else if (strcmp (key, "group") == 0)
820 number = grpdb;
821 else if (__builtin_expect (strcmp (key, "hosts"), 0) == 0)
823 number = hstdb;
825 /* Re-initialize the resolver. resolv.conf might have changed. */
826 res_init ();
828 else
829 return;
831 if (dbs[number].enabled)
832 prune_cache (&dbs[number], LONG_MAX);
836 #ifdef SCM_RIGHTS
837 static void
838 send_ro_fd (struct database_dyn *db, char *key, int fd)
840 /* If we do not have an read-only file descriptor do nothing. */
841 if (db->ro_fd == -1)
842 return;
844 /* We need to send some data along with the descriptor. */
845 struct iovec iov[1];
846 iov[0].iov_base = key;
847 iov[0].iov_len = strlen (key) + 1;
849 /* Prepare the control message to transfer the descriptor. */
850 union
852 struct cmsghdr hdr;
853 char bytes[CMSG_SPACE (sizeof (int))];
854 } buf;
855 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
856 .msg_control = buf.bytes,
857 .msg_controllen = sizeof (buf) };
858 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
860 cmsg->cmsg_level = SOL_SOCKET;
861 cmsg->cmsg_type = SCM_RIGHTS;
862 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
864 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
866 msg.msg_controllen = cmsg->cmsg_len;
868 /* Send the control message. We repeat when we are interrupted but
869 everything else is ignored. */
870 #ifndef MSG_NOSIGNAL
871 # define MSG_NOSIGNAL 0
872 #endif
873 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
875 if (__builtin_expect (debug_level > 0, 0))
876 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
878 #endif /* SCM_RIGHTS */
881 /* Handle new request. */
882 static void
883 handle_request (int fd, request_header *req, void *key, uid_t uid)
885 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
887 if (debug_level > 0)
888 dbg_log (_("\
889 cannot handle old request version %d; current version is %d"),
890 req->version, NSCD_VERSION);
891 return;
894 /* Make the SELinux check before we go on to the standard checks. We
895 need to verify that the request type is valid, since it has not
896 yet been checked at this point. */
897 if (selinux_enabled
898 && __builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
899 && __builtin_expect (req->type, LASTREQ) < LASTREQ
900 && nscd_request_avc_has_perm (fd, req->type) != 0)
901 return;
903 struct database_dyn *db = serv2db[req->type];
905 // XXX Clean up so that each new command need not introduce a
906 // XXX new conditional.
907 if ((__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
908 && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
909 || req->type == GETAI || req->type == INITGROUPS)
911 if (__builtin_expect (debug_level, 0) > 0)
913 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
915 char buf[INET6_ADDRSTRLEN];
917 dbg_log ("\t%s (%s)", serv2str[req->type],
918 inet_ntop (req->type == GETHOSTBYADDR
919 ? AF_INET : AF_INET6,
920 key, buf, sizeof (buf)));
922 else
923 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
926 /* Is this service enabled? */
927 if (!db->enabled)
929 /* No, sent the prepared record. */
930 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
931 db->disabled_iov->iov_len,
932 MSG_NOSIGNAL))
933 != (ssize_t) db->disabled_iov->iov_len
934 && __builtin_expect (debug_level, 0) > 0)
936 /* We have problems sending the result. */
937 char buf[256];
938 dbg_log (_("cannot write result: %s"),
939 strerror_r (errno, buf, sizeof (buf)));
942 return;
945 /* Be sure we can read the data. */
946 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
948 ++db->head->rdlockdelayed;
949 pthread_rwlock_rdlock (&db->lock);
952 /* See whether we can handle it from the cache. */
953 struct datahead *cached;
954 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
955 db, uid);
956 if (cached != NULL)
958 /* Hurray it's in the cache. */
959 ssize_t nwritten;
961 #ifdef HAVE_SENDFILE
962 if (db->mmap_used || !cached->notfound)
964 assert (db->wr_fd != -1);
965 assert ((char *) cached->data > (char *) db->data);
966 assert ((char *) cached->data - (char *) db->head
967 + cached->recsize
968 <= (sizeof (struct database_pers_head)
969 + db->head->module * sizeof (ref_t)
970 + db->head->data_size));
971 nwritten = sendfileall (fd, db->wr_fd,
972 (char *) cached->data
973 - (char *) db->head, cached->recsize);
974 # ifndef __ASSUME_SENDFILE
975 if (nwritten == -1 && errno == ENOSYS)
976 goto use_write;
977 # endif
979 else
980 # ifndef __ASSUME_SENDFILE
981 use_write:
982 # endif
983 #endif
984 nwritten = writeall (fd, cached->data, cached->recsize);
986 if (nwritten != cached->recsize
987 && __builtin_expect (debug_level, 0) > 0)
989 /* We have problems sending the result. */
990 char buf[256];
991 dbg_log (_("cannot write result: %s"),
992 strerror_r (errno, buf, sizeof (buf)));
995 pthread_rwlock_unlock (&db->lock);
997 return;
1000 pthread_rwlock_unlock (&db->lock);
1002 else if (__builtin_expect (debug_level, 0) > 0)
1004 if (req->type == INVALIDATE)
1005 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1006 else
1007 dbg_log ("\t%s", serv2str[req->type]);
1010 /* Handle the request. */
1011 switch (req->type)
1013 case GETPWBYNAME:
1014 addpwbyname (db, fd, req, key, uid);
1015 break;
1017 case GETPWBYUID:
1018 addpwbyuid (db, fd, req, key, uid);
1019 break;
1021 case GETGRBYNAME:
1022 addgrbyname (db, fd, req, key, uid);
1023 break;
1025 case GETGRBYGID:
1026 addgrbygid (db, fd, req, key, uid);
1027 break;
1029 case GETHOSTBYNAME:
1030 addhstbyname (db, fd, req, key, uid);
1031 break;
1033 case GETHOSTBYNAMEv6:
1034 addhstbynamev6 (db, fd, req, key, uid);
1035 break;
1037 case GETHOSTBYADDR:
1038 addhstbyaddr (db, fd, req, key, uid);
1039 break;
1041 case GETHOSTBYADDRv6:
1042 addhstbyaddrv6 (db, fd, req, key, uid);
1043 break;
1045 case GETAI:
1046 addhstai (db, fd, req, key, uid);
1047 break;
1049 case INITGROUPS:
1050 addinitgroups (db, fd, req, key, uid);
1051 break;
1053 case GETSTAT:
1054 case SHUTDOWN:
1055 case INVALIDATE:
1057 /* Get the callers credentials. */
1058 #ifdef SO_PEERCRED
1059 struct ucred caller;
1060 socklen_t optlen = sizeof (caller);
1062 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1064 char buf[256];
1066 dbg_log (_("error getting callers id: %s"),
1067 strerror_r (errno, buf, sizeof (buf)));
1068 break;
1071 uid = caller.uid;
1072 #else
1073 /* Some systems have no SO_PEERCRED implementation. They don't
1074 care about security so we don't as well. */
1075 uid = 0;
1076 #endif
1079 /* Accept shutdown, getstat and invalidate only from root. For
1080 the stat call also allow the user specified in the config file. */
1081 if (req->type == GETSTAT)
1083 if (uid == 0 || uid == stat_uid)
1084 send_stats (fd, dbs);
1086 else if (uid == 0)
1088 if (req->type == INVALIDATE)
1089 invalidate_cache (key);
1090 else
1091 termination_handler (0);
1093 break;
1095 case GETFDPW:
1096 case GETFDGR:
1097 case GETFDHST:
1098 #ifdef SCM_RIGHTS
1099 send_ro_fd (serv2db[req->type], key, fd);
1100 #endif
1101 break;
1103 default:
1104 /* Ignore the command, it's nothing we know. */
1105 break;
1110 /* Restart the process. */
1111 static void
1112 restart (void)
1114 /* First determine the parameters. We do not use the parameters
1115 passed to main() since in case nscd is started by running the
1116 dynamic linker this will not work. Yes, this is not the usual
1117 case but nscd is part of glibc and we occasionally do this. */
1118 size_t buflen = 1024;
1119 char *buf = alloca (buflen);
1120 size_t readlen = 0;
1121 int fd = open ("/proc/self/cmdline", O_RDONLY);
1122 if (fd == -1)
1124 dbg_log (_("\
1125 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1126 strerror (errno));
1128 paranoia = 0;
1129 return;
1132 while (1)
1134 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1135 buflen - readlen));
1136 if (n == -1)
1138 dbg_log (_("\
1139 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1140 strerror (errno));
1142 close (fd);
1143 paranoia = 0;
1144 return;
1147 readlen += n;
1149 if (readlen < buflen)
1150 break;
1152 /* We might have to extend the buffer. */
1153 size_t old_buflen = buflen;
1154 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1155 buf = memmove (newp, buf, old_buflen);
1158 close (fd);
1160 /* Parse the command line. Worst case scenario: every two
1161 characters form one parameter (one character plus NUL). */
1162 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1163 int argc = 0;
1165 char *cp = buf;
1166 while (cp < buf + readlen)
1168 argv[argc++] = cp;
1169 cp = (char *) rawmemchr (cp, '\0') + 1;
1171 argv[argc] = NULL;
1173 /* Second, change back to the old user if we changed it. */
1174 if (server_user != NULL)
1176 if (setuid (old_uid) != 0)
1178 dbg_log (_("\
1179 cannot change to old UID: %s; disabling paranoia mode"),
1180 strerror (errno));
1182 paranoia = 0;
1183 return;
1186 if (setgid (old_gid) != 0)
1188 dbg_log (_("\
1189 cannot change to old GID: %s; disabling paranoia mode"),
1190 strerror (errno));
1192 setuid (server_uid);
1193 paranoia = 0;
1194 return;
1198 /* Next change back to the old working directory. */
1199 if (chdir (oldcwd) == -1)
1201 dbg_log (_("\
1202 cannot change to old working directory: %s; disabling paranoia mode"),
1203 strerror (errno));
1205 if (server_user != NULL)
1207 setuid (server_uid);
1208 setgid (server_gid);
1210 paranoia = 0;
1211 return;
1214 /* Synchronize memory. */
1215 for (int cnt = 0; cnt < lastdb; ++cnt)
1217 /* Make sure nobody keeps using the database. */
1218 dbs[cnt].head->timestamp = 0;
1220 if (dbs[cnt].persistent)
1221 // XXX async OK?
1222 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1225 /* The preparations are done. */
1226 execv ("/proc/self/exe", argv);
1228 /* If we come here, we will never be able to re-exec. */
1229 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1230 strerror (errno));
1232 if (server_user != NULL)
1234 setuid (server_uid);
1235 setgid (server_gid);
1237 if (chdir ("/") != 0)
1238 dbg_log (_("cannot change current working directory to \"/\": %s"),
1239 strerror (errno));
1240 paranoia = 0;
1244 /* List of file descriptors. */
1245 struct fdlist
1247 int fd;
1248 struct fdlist *next;
1250 /* Memory allocated for the list. */
1251 static struct fdlist *fdlist;
1252 /* List of currently ready-to-read file descriptors. */
1253 static struct fdlist *readylist;
1255 /* Conditional variable and mutex to signal availability of entries in
1256 READYLIST. The condvar is initialized dynamically since we might
1257 use a different clock depending on availability. */
1258 static pthread_cond_t readylist_cond;
1259 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1261 /* The clock to use with the condvar. */
1262 static clockid_t timeout_clock = CLOCK_REALTIME;
1264 /* Number of threads ready to handle the READYLIST. */
1265 static unsigned long int nready;
1268 /* This is the main loop. It is replicated in different threads but the
1269 `poll' call makes sure only one thread handles an incoming connection. */
1270 static void *
1271 __attribute__ ((__noreturn__))
1272 nscd_run (void *p)
1274 const long int my_number = (long int) p;
1275 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1276 struct timespec prune_ts;
1277 int to = 0;
1278 char buf[256];
1280 if (run_prune)
1282 setup_thread (&dbs[my_number]);
1284 /* We are running. */
1285 dbs[my_number].head->timestamp = time (NULL);
1287 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1288 /* Should never happen. */
1289 abort ();
1291 /* Compute timeout time. */
1292 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1295 /* Initial locking. */
1296 pthread_mutex_lock (&readylist_lock);
1298 /* One more thread available. */
1299 ++nready;
1301 while (1)
1303 while (readylist == NULL)
1305 if (run_prune)
1307 /* Wait, but not forever. */
1308 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1309 &prune_ts);
1311 /* If we were woken and there is no work to be done,
1312 just start pruning. */
1313 if (readylist == NULL && to == ETIMEDOUT)
1315 --nready;
1316 pthread_mutex_unlock (&readylist_lock);
1317 goto only_prune;
1320 else
1321 /* No need to timeout. */
1322 pthread_cond_wait (&readylist_cond, &readylist_lock);
1325 struct fdlist *it = readylist->next;
1326 if (readylist->next == readylist)
1327 /* Just one entry on the list. */
1328 readylist = NULL;
1329 else
1330 readylist->next = it->next;
1332 /* Extract the information and mark the record ready to be used
1333 again. */
1334 int fd = it->fd;
1335 it->next = NULL;
1337 /* One more thread available. */
1338 --nready;
1340 /* We are done with the list. */
1341 pthread_mutex_unlock (&readylist_lock);
1343 /* We do not want to block on a short read or so. */
1344 int fl = fcntl (fd, F_GETFL);
1345 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1346 goto close_and_out;
1348 /* Now read the request. */
1349 request_header req;
1350 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1351 != sizeof (req), 0))
1353 /* We failed to read data. Note that this also might mean we
1354 failed because we would have blocked. */
1355 if (debug_level > 0)
1356 dbg_log (_("short read while reading request: %s"),
1357 strerror_r (errno, buf, sizeof (buf)));
1358 goto close_and_out;
1361 /* Check whether this is a valid request type. */
1362 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1363 goto close_and_out;
1365 /* Some systems have no SO_PEERCRED implementation. They don't
1366 care about security so we don't as well. */
1367 uid_t uid = -1;
1368 #ifdef SO_PEERCRED
1369 pid_t pid = 0;
1371 if (__builtin_expect (debug_level > 0, 0))
1373 struct ucred caller;
1374 socklen_t optlen = sizeof (caller);
1376 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1377 pid = caller.pid;
1379 #endif
1381 /* It should not be possible to crash the nscd with a silly
1382 request (i.e., a terribly large key). We limit the size to 1kb. */
1383 #define MAXKEYLEN 1024
1384 if (__builtin_expect (req.key_len, 1) < 0
1385 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1387 if (debug_level > 0)
1388 dbg_log (_("key length in request too long: %d"), req.key_len);
1390 else
1392 /* Get the key. */
1393 char keybuf[MAXKEYLEN];
1395 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1396 req.key_len))
1397 != req.key_len, 0))
1399 /* Again, this can also mean we would have blocked. */
1400 if (debug_level > 0)
1401 dbg_log (_("short read while reading request key: %s"),
1402 strerror_r (errno, buf, sizeof (buf)));
1403 goto close_and_out;
1406 if (__builtin_expect (debug_level, 0) > 0)
1408 #ifdef SO_PEERCRED
1409 if (pid != 0)
1410 dbg_log (_("\
1411 handle_request: request received (Version = %d) from PID %ld"),
1412 req.version, (long int) pid);
1413 else
1414 #endif
1415 dbg_log (_("\
1416 handle_request: request received (Version = %d)"), req.version);
1419 /* Phew, we got all the data, now process it. */
1420 handle_request (fd, &req, keybuf, uid);
1423 close_and_out:
1424 /* We are done. */
1425 close (fd);
1427 /* Check whether we should be pruning the cache. */
1428 assert (run_prune || to == 0);
1429 if (to == ETIMEDOUT)
1431 only_prune:
1432 /* The pthread_cond_timedwait() call timed out. It is time
1433 to clean up the cache. */
1434 assert (my_number < lastdb);
1435 prune_cache (&dbs[my_number], time (NULL));
1437 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1438 /* Should never happen. */
1439 abort ();
1441 /* Compute next timeout time. */
1442 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1444 /* In case the list is emtpy we do not want to run the prune
1445 code right away again. */
1446 to = 0;
1449 /* Re-locking. */
1450 pthread_mutex_lock (&readylist_lock);
1452 /* One more thread available. */
1453 ++nready;
1458 static unsigned int nconns;
1460 static void
1461 fd_ready (int fd)
1463 pthread_mutex_lock (&readylist_lock);
1465 /* Find an empty entry in FDLIST. */
1466 size_t inner;
1467 for (inner = 0; inner < nconns; ++inner)
1468 if (fdlist[inner].next == NULL)
1469 break;
1470 assert (inner < nconns);
1472 fdlist[inner].fd = fd;
1474 if (readylist == NULL)
1475 readylist = fdlist[inner].next = &fdlist[inner];
1476 else
1478 fdlist[inner].next = readylist->next;
1479 readylist = readylist->next = &fdlist[inner];
1482 bool do_signal = true;
1483 if (__builtin_expect (nready == 0, 0))
1485 ++client_queued;
1486 do_signal = false;
1488 /* Try to start another thread to help out. */
1489 pthread_t th;
1490 if (nthreads < max_nthreads
1491 && pthread_create (&th, &attr, nscd_run,
1492 (void *) (long int) nthreads) == 0)
1494 /* We got another thread. */
1495 ++nthreads;
1496 /* The new thread might need a kick. */
1497 do_signal = true;
1502 pthread_mutex_unlock (&readylist_lock);
1504 /* Tell one of the worker threads there is work to do. */
1505 if (do_signal)
1506 pthread_cond_signal (&readylist_cond);
1510 /* Check whether restarting should happen. */
1511 static inline int
1512 restart_p (time_t now)
1514 return (paranoia && readylist == NULL && nready == nthreads
1515 && now >= restart_time);
1519 /* Array for times a connection was accepted. */
1520 static time_t *starttime;
1523 static void
1524 __attribute__ ((__noreturn__))
1525 main_loop_poll (void)
1527 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1528 * sizeof (conns[0]));
1530 conns[0].fd = sock;
1531 conns[0].events = POLLRDNORM;
1532 size_t nused = 1;
1533 size_t firstfree = 1;
1535 while (1)
1537 /* Wait for any event. We wait at most a couple of seconds so
1538 that we can check whether we should close any of the accepted
1539 connections since we have not received a request. */
1540 #define MAX_ACCEPT_TIMEOUT 30
1541 #define MIN_ACCEPT_TIMEOUT 5
1542 #define MAIN_THREAD_TIMEOUT \
1543 (MAX_ACCEPT_TIMEOUT * 1000 \
1544 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1546 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1548 time_t now = time (NULL);
1550 /* If there is a descriptor ready for reading or there is a new
1551 connection, process this now. */
1552 if (n > 0)
1554 if (conns[0].revents != 0)
1556 /* We have a new incoming connection. Accept the connection. */
1557 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1559 /* Use the descriptor if we have not reached the limit. */
1560 if (fd >= 0)
1562 if (firstfree < nconns)
1564 conns[firstfree].fd = fd;
1565 conns[firstfree].events = POLLRDNORM;
1566 starttime[firstfree] = now;
1567 if (firstfree >= nused)
1568 nused = firstfree + 1;
1571 ++firstfree;
1572 while (firstfree < nused && conns[firstfree].fd != -1);
1574 else
1575 /* We cannot use the connection so close it. */
1576 close (fd);
1579 --n;
1582 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1583 if (conns[cnt].revents != 0)
1585 fd_ready (conns[cnt].fd);
1587 /* Clean up the CONNS array. */
1588 conns[cnt].fd = -1;
1589 if (cnt < firstfree)
1590 firstfree = cnt;
1591 if (cnt == nused - 1)
1593 --nused;
1594 while (conns[nused - 1].fd == -1);
1596 --n;
1600 /* Now find entries which have timed out. */
1601 assert (nused > 0);
1603 /* We make the timeout length depend on the number of file
1604 descriptors currently used. */
1605 #define ACCEPT_TIMEOUT \
1606 (MAX_ACCEPT_TIMEOUT \
1607 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1608 time_t laststart = now - ACCEPT_TIMEOUT;
1610 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1612 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1614 /* Remove the entry, it timed out. */
1615 (void) close (conns[cnt].fd);
1616 conns[cnt].fd = -1;
1618 if (cnt < firstfree)
1619 firstfree = cnt;
1620 if (cnt == nused - 1)
1622 --nused;
1623 while (conns[nused - 1].fd == -1);
1627 if (restart_p (now))
1628 restart ();
1633 #ifdef HAVE_EPOLL
1634 static void
1635 main_loop_epoll (int efd)
1637 struct epoll_event ev = { 0, };
1638 int nused = 1;
1639 size_t highest = 0;
1641 /* Add the socket. */
1642 ev.events = EPOLLRDNORM;
1643 ev.data.fd = sock;
1644 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1645 /* We cannot use epoll. */
1646 return;
1648 while (1)
1650 struct epoll_event revs[100];
1651 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1653 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1655 time_t now = time (NULL);
1657 for (int cnt = 0; cnt < n; ++cnt)
1658 if (revs[cnt].data.fd == sock)
1660 /* A new connection. */
1661 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1663 if (fd >= 0)
1665 /* Try to add the new descriptor. */
1666 ev.data.fd = fd;
1667 if (fd >= nconns
1668 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1669 /* The descriptor is too large or something went
1670 wrong. Close the descriptor. */
1671 close (fd);
1672 else
1674 /* Remember when we accepted the connection. */
1675 starttime[fd] = now;
1677 if (fd > highest)
1678 highest = fd;
1680 ++nused;
1684 else
1686 /* Remove the descriptor from the epoll descriptor. */
1687 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1689 /* Get a worker to handle the request. */
1690 fd_ready (revs[cnt].data.fd);
1692 /* Reset the time. */
1693 starttime[revs[cnt].data.fd] = 0;
1694 if (revs[cnt].data.fd == highest)
1696 --highest;
1697 while (highest > 0 && starttime[highest] == 0);
1699 --nused;
1702 /* Now look for descriptors for accepted connections which have
1703 no reply in too long of a time. */
1704 time_t laststart = now - ACCEPT_TIMEOUT;
1705 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1706 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1708 /* We are waiting for this one for too long. Close it. */
1709 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1711 (void) close (cnt);
1713 starttime[cnt] = 0;
1714 if (cnt == highest)
1715 --highest;
1717 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1718 --highest;
1720 if (restart_p (now))
1721 restart ();
1724 #endif
1727 /* Start all the threads we want. The initial process is thread no. 1. */
1728 void
1729 start_threads (void)
1731 /* Initialize the conditional variable we will use. The only
1732 non-standard attribute we might use is the clock selection. */
1733 pthread_condattr_t condattr;
1734 pthread_condattr_init (&condattr);
1736 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1737 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1738 /* Determine whether the monotonous clock is available. */
1739 struct timespec dummy;
1740 # if _POSIX_MONOTONIC_CLOCK == 0
1741 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1742 # endif
1743 # if _POSIX_CLOCK_SELECTION == 0
1744 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1745 # endif
1746 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1747 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1748 timeout_clock = CLOCK_MONOTONIC;
1749 #endif
1751 pthread_cond_init (&readylist_cond, &condattr);
1752 pthread_condattr_destroy (&condattr);
1755 /* Create the attribute for the threads. They are all created
1756 detached. */
1757 pthread_attr_init (&attr);
1758 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1759 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1760 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1762 /* We allow less than LASTDB threads only for debugging. */
1763 if (debug_level == 0)
1764 nthreads = MAX (nthreads, lastdb);
1766 int nfailed = 0;
1767 for (long int i = 0; i < nthreads; ++i)
1769 pthread_t th;
1770 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1771 ++nfailed;
1773 if (nthreads - nfailed < lastdb)
1775 /* We could not start enough threads. */
1776 dbg_log (_("could only start %d threads; terminating"),
1777 nthreads - nfailed);
1778 exit (1);
1781 /* Determine how much room for descriptors we should initially
1782 allocate. This might need to change later if we cap the number
1783 with MAXCONN. */
1784 const long int nfds = sysconf (_SC_OPEN_MAX);
1785 #define MINCONN 32
1786 #define MAXCONN 16384
1787 if (nfds == -1 || nfds > MAXCONN)
1788 nconns = MAXCONN;
1789 else if (nfds < MINCONN)
1790 nconns = MINCONN;
1791 else
1792 nconns = nfds;
1794 /* We need memory to pass descriptors on to the worker threads. */
1795 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1796 /* Array to keep track when connection was accepted. */
1797 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1799 /* In the main thread we execute the loop which handles incoming
1800 connections. */
1801 #ifdef HAVE_EPOLL
1802 int efd = epoll_create (100);
1803 if (efd != -1)
1805 main_loop_epoll (efd);
1806 close (efd);
1808 #endif
1810 main_loop_poll ();
1814 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1815 this function is called, we are not listening on the nscd socket yet so
1816 we can just use the ordinary lookup functions without causing a lockup */
1817 static void
1818 begin_drop_privileges (void)
1820 struct passwd *pwd = getpwnam (server_user);
1822 if (pwd == NULL)
1824 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1825 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1826 server_user);
1829 server_uid = pwd->pw_uid;
1830 server_gid = pwd->pw_gid;
1832 /* Save the old UID/GID if we have to change back. */
1833 if (paranoia)
1835 old_uid = getuid ();
1836 old_gid = getgid ();
1839 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1841 /* This really must never happen. */
1842 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1843 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1846 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1848 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1849 == -1)
1851 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1852 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1857 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1858 run nscd as the user specified in the configuration file. */
1859 static void
1860 finish_drop_privileges (void)
1862 if (setgroups (server_ngroups, server_groups) == -1)
1864 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1865 error (EXIT_FAILURE, errno, _("setgroups failed"));
1868 if (setgid (server_gid) == -1)
1870 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1871 perror ("setgid");
1872 exit (4);
1875 if (setuid (server_uid) == -1)
1877 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1878 perror ("setuid");
1879 exit (4);