1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include <arpa/inet.h>
36 # include <sys/epoll.h>
39 # include <sys/inotify.h>
42 #include <sys/param.h>
45 # include <sys/sendfile.h>
47 #include <sys/socket.h>
54 #include <resolv/resolv.h>
56 # include <kernel-features.h>
60 /* Wrapper functions with error checking for standard functions. */
61 extern void *xmalloc (size_t n
);
62 extern void *xcalloc (size_t n
, size_t s
);
63 extern void *xrealloc (void *o
, size_t n
);
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user
;
67 static uid_t server_uid
;
68 static gid_t server_gid
;
69 const char *stat_user
;
71 static gid_t
*server_groups
;
75 static int server_ngroups
;
77 static pthread_attr_t attr
;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str
[LASTREQ
] =
85 [GETPWBYNAME
] = "GETPWBYNAME",
86 [GETPWBYUID
] = "GETPWBYUID",
87 [GETGRBYNAME
] = "GETGRBYNAME",
88 [GETGRBYGID
] = "GETGRBYGID",
89 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
93 [SHUTDOWN
] = "SHUTDOWN",
94 [GETSTAT
] = "GETSTAT",
95 [INVALIDATE
] = "INVALIDATE",
96 [GETFDPW
] = "GETFDPW",
97 [GETFDGR
] = "GETFDGR",
98 [GETFDHST
] = "GETFDHST",
100 [INITGROUPS
] = "INITGROUPS",
101 [GETSERVBYNAME
] = "GETSERVBYNAME",
102 [GETSERVBYPORT
] = "GETSERVBYPORT",
103 [GETFDSERV
] = "GETFDSERV"
106 /* The control data structures for the services. */
107 struct database_dyn dbs
[lastdb
] =
110 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
111 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
117 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
118 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
120 .filename
= "/etc/passwd",
121 .db_filename
= _PATH_NSCD_PASSWD_DB
,
122 .disabled_iov
= &pwd_iov_disabled
,
130 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
131 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
137 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
138 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
140 .filename
= "/etc/group",
141 .db_filename
= _PATH_NSCD_GROUP_DB
,
142 .disabled_iov
= &grp_iov_disabled
,
150 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
151 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
155 .propagate
= 0, /* Not used. */
157 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
158 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
160 .filename
= "/etc/hosts",
161 .db_filename
= _PATH_NSCD_HOSTS_DB
,
162 .disabled_iov
= &hst_iov_disabled
,
170 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
171 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
175 .propagate
= 0, /* Not used. */
177 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
178 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
180 .filename
= "/etc/services",
181 .db_filename
= _PATH_NSCD_SERVICES_DB
,
182 .disabled_iov
= &serv_iov_disabled
,
192 /* Mapping of request type to database. */
196 struct database_dyn
*db
;
197 } const reqinfo
[LASTREQ
] =
199 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
200 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
201 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
202 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
203 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
204 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
205 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
206 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
207 [SHUTDOWN
] = { false, NULL
},
208 [GETSTAT
] = { false, NULL
},
209 [SHUTDOWN
] = { false, NULL
},
210 [GETFDPW
] = { false, &dbs
[pwddb
] },
211 [GETFDGR
] = { false, &dbs
[grpdb
] },
212 [GETFDHST
] = { false, &dbs
[hstdb
] },
213 [GETAI
] = { true, &dbs
[hstdb
] },
214 [INITGROUPS
] = { true, &dbs
[grpdb
] },
215 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
216 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
217 [GETFDSERV
] = { false, &dbs
[servdb
] }
221 /* Initial number of threads to use. */
223 /* Maximum number of threads to use. */
224 int max_nthreads
= 32;
226 /* Socket for incoming connections. */
230 /* Inotify descriptor. */
231 static int inotify_fd
= -1;
233 /* Watch descriptor for resolver configuration file. */
234 static int resolv_conf_descr
= -1;
237 #ifndef __ASSUME_SOCK_CLOEXEC
238 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
239 before be know the result. */
240 static int have_sock_cloexec
;
241 /* The paccept syscall was introduced at the same time as SOCK_CLOEXEC. */
242 # define have_paccept -1 // XXX For the time being there is no such call
245 /* Number of times clients had to wait. */
246 unsigned long int client_queued
;
248 /* Data structure for recording in-flight memory allocation. */
249 __thread
struct mem_in_flight mem_in_flight attribute_tls_model_ie
;
250 /* Global list of the mem_in_flight variables of all the threads. */
251 struct mem_in_flight
*mem_in_flight_list
;
255 writeall (int fd
, const void *buf
, size_t len
)
261 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
264 buf
= (const char *) buf
+ ret
;
268 return ret
< 0 ? ret
: len
- n
;
274 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
281 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
287 return ret
< 0 ? ret
: len
- n
;
295 /* The following three are not really used, they are symbolic constants. */
301 use_he_begin
= use_he
| use_begin
,
302 use_he_end
= use_he
| use_end
,
305 use_key_begin
= use_key
| use_begin
,
306 use_key_end
= use_key
| use_end
,
307 use_key_first
= use_key_begin
| use_first
,
310 use_data_begin
= use_data
| use_begin
,
311 use_data_end
= use_data
| use_end
,
312 use_data_first
= use_data_begin
| use_first
317 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
318 enum usekey use
, ref_t start
, size_t len
)
322 if (start
> first_free
|| start
+ len
> first_free
323 || (start
& BLOCK_ALIGN_M1
))
326 if (usemap
[start
] == use_not
)
328 /* Add the start marker. */
329 usemap
[start
] = use
| use_begin
;
333 if (usemap
[++start
] != use_not
)
338 /* Add the end marker. */
339 usemap
[start
] = use
| use_end
;
341 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
343 /* Hash entries can't be shared. */
347 usemap
[start
] |= (use
& use_first
);
351 if (usemap
[++start
] != use
)
354 if (usemap
[++start
] != (use
| use_end
))
358 /* Points to a wrong object or somewhere in the middle. */
365 /* Verify data in persistent database. */
367 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
369 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
);
371 time_t now
= time (NULL
);
373 struct database_pers_head
*head
= mem
;
374 struct database_pers_head head_copy
= *head
;
376 /* Check that the header that was read matches the head in the database. */
377 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
380 /* First some easy tests: make sure the database header is sane. */
381 if (head
->version
!= DB_VERSION
382 || head
->header_size
!= sizeof (*head
)
383 /* We allow a timestamp to be one hour ahead of the current time.
384 This should cover daylight saving time changes. */
385 || head
->timestamp
> now
+ 60 * 60 + 60
386 || (head
->gc_cycle
& 1)
388 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
389 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
390 || head
->first_free
< 0
391 || head
->first_free
> head
->data_size
392 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
393 || head
->maxnentries
< 0
394 || head
->maxnsearched
< 0)
397 uint8_t *usemap
= calloc (head
->first_free
, 1);
401 const char *data
= (char *) &head
->array
[roundup (head
->module
,
402 ALIGN
/ sizeof (ref_t
))];
404 nscd_ssize_t he_cnt
= 0;
405 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
407 ref_t trail
= head
->array
[cnt
];
411 while (work
!= ENDREF
)
413 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
414 sizeof (struct hashentry
)))
417 /* Now we know we can dereference the record. */
418 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
422 /* Make sure the record is for this type of service. */
423 if (here
->type
>= LASTREQ
424 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
427 /* Validate boolean field value. */
428 if (here
->first
!= false && here
->first
!= true)
436 || here
->packet
> head
->first_free
437 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
440 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
442 if (! check_use (data
, head
->first_free
, usemap
,
443 use_data
| (here
->first
? use_first
: 0),
444 here
->packet
, dh
->allocsize
))
447 if (dh
->allocsize
< sizeof (struct datahead
)
448 || dh
->recsize
> dh
->allocsize
449 || (dh
->notfound
!= false && dh
->notfound
!= true)
450 || (dh
->usable
!= false && dh
->usable
!= true))
453 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
454 || here
->key
> here
->packet
+ dh
->allocsize
455 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
458 /* If keys can appear outside of data, this should be done
459 instead. But gc doesn't mark the data in that case. */
460 if (! check_use (data
, head
->first_free
, usemap
,
461 use_key
| (here
->first
? use_first
: 0),
462 here
->key
, here
->len
))
470 /* A circular list, this must not happen. */
473 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
478 if (he_cnt
!= head
->nentries
)
481 /* See if all data and keys had at least one reference from
482 he->first == true hashentry. */
483 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
486 if (usemap
[idx
] == use_key_begin
)
489 if (usemap
[idx
] == use_data_begin
)
493 /* Finally, make sure the database hasn't changed since the first test. */
494 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
507 # define EXTRA_O_FLAGS O_CLOEXEC
509 # define EXTRA_O_FLAGS 0
513 /* Initialize database information structures. */
517 /* Look up unprivileged uid/gid/groups before we start listening on the
519 if (server_user
!= NULL
)
520 begin_drop_privileges ();
523 /* No configuration for this value, assume a default. */
527 /* Use inotify to recognize changed files. */
528 inotify_fd
= inotify_init1 (IN_NONBLOCK
);
529 # ifndef __ASSUME_IN_NONBLOCK
530 if (inotify_fd
== -1 && errno
== ENOSYS
)
532 inotify_fd
= inotify_init ();
533 if (inotify_fd
!= -1)
534 fcntl (inotify_fd
, F_SETFL
, O_RDONLY
| O_NONBLOCK
);
539 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
540 if (dbs
[cnt
].enabled
)
542 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
543 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
545 if (dbs
[cnt
].persistent
)
547 /* Try to open the appropriate file on disk. */
548 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
555 struct database_pers_head head
;
556 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
558 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
561 /* The code is single-threaded at this point so
562 using strerror is just fine. */
563 msg
= strerror (errno
);
565 dbg_log (_("invalid persistent database file \"%s\": %s"),
566 dbs
[cnt
].db_filename
, msg
);
567 unlink (dbs
[cnt
].db_filename
);
569 else if (head
.module
== 0 && head
.data_size
== 0)
571 /* The file has been created, but the head has not
572 been initialized yet. */
573 msg
= _("uninitialized header");
576 else if (head
.header_size
!= (int) sizeof (head
))
578 msg
= _("header size does not match");
581 else if ((total
= (sizeof (head
)
582 + roundup (head
.module
* sizeof (ref_t
),
586 || total
< sizeof (head
))
588 msg
= _("file size does not match");
591 /* Note we map with the maximum size allowed for the
592 database. This is likely much larger than the
593 actual file size. This is OK on most OSes since
594 extensions of the underlying file will
595 automatically translate more pages available for
597 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
598 PROT_READ
| PROT_WRITE
,
602 else if (!verify_persistent_db (mem
, &head
, cnt
))
605 msg
= _("verification failed");
610 /* Success. We have the database. */
612 dbs
[cnt
].memsize
= total
;
613 dbs
[cnt
].data
= (char *)
614 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
615 ALIGN
/ sizeof (ref_t
))];
616 dbs
[cnt
].mmap_used
= true;
618 if (dbs
[cnt
].suggested_module
> head
.module
)
619 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
624 /* We also need a read-only descriptor. */
627 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
628 O_RDONLY
| EXTRA_O_FLAGS
);
629 if (dbs
[cnt
].ro_fd
== -1)
631 cannot create read-only descriptor for \"%s\"; no mmap"),
632 dbs
[cnt
].db_filename
);
635 // XXX Shall we test whether the descriptors actually
636 // XXX point to the same file?
639 /* Close the file descriptors in case something went
640 wrong in which case the variable have not been
647 if (dbs
[cnt
].head
== NULL
)
649 /* No database loaded. Allocate the data structure,
651 struct database_pers_head head
;
652 size_t total
= (sizeof (head
)
653 + roundup (dbs
[cnt
].suggested_module
654 * sizeof (ref_t
), ALIGN
)
655 + (dbs
[cnt
].suggested_module
656 * DEFAULT_DATASIZE_PER_BUCKET
));
658 /* Try to create the database. If we do not need a
659 persistent database create a temporary file. */
662 if (dbs
[cnt
].persistent
)
664 fd
= open (dbs
[cnt
].db_filename
,
665 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
667 if (fd
!= -1 && dbs
[cnt
].shared
)
668 ro_fd
= open (dbs
[cnt
].db_filename
,
669 O_RDONLY
| EXTRA_O_FLAGS
);
673 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
674 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
676 /* We do not need the file name anymore after we
677 opened another file descriptor in read-only mode. */
681 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
691 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
692 dbnames
[cnt
], dbs
[cnt
].db_filename
);
693 // XXX Correct way to terminate?
697 if (dbs
[cnt
].persistent
)
698 dbg_log (_("cannot create %s; no persistent database used"),
699 dbs
[cnt
].db_filename
);
701 dbg_log (_("cannot create %s; no sharing possible"),
702 dbs
[cnt
].db_filename
);
704 dbs
[cnt
].persistent
= 0;
705 // XXX remember: no mmap
709 /* Tell the user if we could not create the read-only
711 if (ro_fd
== -1 && dbs
[cnt
].shared
)
713 cannot create read-only descriptor for \"%s\"; no mmap"),
714 dbs
[cnt
].db_filename
);
716 /* Before we create the header, initialiye the hash
717 table. So that if we get interrupted if writing
718 the header we can recognize a partially initialized
720 size_t ps
= sysconf (_SC_PAGESIZE
);
722 assert (~ENDREF
== 0);
723 memset (tmpbuf
, '\xff', ps
);
725 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
726 off_t offset
= sizeof (head
);
729 if (offset
% ps
!= 0)
731 towrite
= MIN (remaining
, ps
- (offset
% ps
));
732 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
735 remaining
-= towrite
;
738 while (remaining
> ps
)
740 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
747 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
750 /* Create the header of the file. */
751 struct database_pers_head head
=
753 .version
= DB_VERSION
,
754 .header_size
= sizeof (head
),
755 .module
= dbs
[cnt
].suggested_module
,
756 .data_size
= (dbs
[cnt
].suggested_module
757 * DEFAULT_DATASIZE_PER_BUCKET
),
762 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
764 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
766 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
767 PROT_READ
| PROT_WRITE
,
768 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
771 unlink (dbs
[cnt
].db_filename
);
772 dbg_log (_("cannot write to database file %s: %s"),
773 dbs
[cnt
].db_filename
, strerror (errno
));
774 dbs
[cnt
].persistent
= 0;
780 dbs
[cnt
].data
= (char *)
781 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
782 ALIGN
/ sizeof (ref_t
))];
783 dbs
[cnt
].memsize
= total
;
784 dbs
[cnt
].mmap_used
= true;
786 /* Remember the descriptors. */
788 dbs
[cnt
].ro_fd
= ro_fd
;
800 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
801 /* We do not check here whether the O_CLOEXEC provided to the
802 open call was successful or not. The two fcntl calls are
803 only performed once each per process start-up and therefore
804 is not noticeable at all. */
806 && ((dbs
[cnt
].wr_fd
!= -1
807 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
808 || (dbs
[cnt
].ro_fd
!= -1
809 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
812 cannot set socket to close on exec: %s; disabling paranoia mode"),
818 if (dbs
[cnt
].head
== NULL
)
820 /* We do not use the persistent database. Just
821 create an in-memory data structure. */
822 assert (! dbs
[cnt
].persistent
);
824 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
825 + (dbs
[cnt
].suggested_module
827 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
828 assert (~ENDREF
== 0);
829 memset (dbs
[cnt
].head
->array
, '\xff',
830 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
831 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
832 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
833 * dbs
[cnt
].head
->module
);
834 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
835 dbs
[cnt
].head
->first_free
= 0;
838 assert (dbs
[cnt
].ro_fd
== -1);
841 dbs
[cnt
].inotify_descr
= -1;
842 if (dbs
[cnt
].check_file
)
846 || (dbs
[cnt
].inotify_descr
847 = inotify_add_watch (inotify_fd
, dbs
[cnt
].filename
,
848 IN_DELETE_SELF
| IN_MODIFY
)) < 0)
849 /* We cannot notice changes in the main thread. */
852 /* We need the modification date of the file. */
855 if (stat64 (dbs
[cnt
].filename
, &st
) < 0)
857 /* We cannot stat() the file, disable file checking. */
858 dbg_log (_("cannot stat() file `%s': %s"),
859 dbs
[cnt
].filename
, strerror (errno
));
860 dbs
[cnt
].check_file
= 0;
863 dbs
[cnt
].file_mtime
= st
.st_mtime
;
868 if (cnt
== hstdb
&& inotify_fd
>= -1)
869 /* We also monitor the resolver configuration file. */
870 resolv_conf_descr
= inotify_add_watch (inotify_fd
,
872 IN_DELETE_SELF
| IN_MODIFY
);
876 /* Create the socket. */
877 #ifndef __ASSUME_SOCK_CLOEXEC
879 if (have_sock_cloexec
>= 0)
882 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
883 #ifndef __ASSUME_SOCK_CLOEXEC
884 if (have_sock_cloexec
== 0)
885 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
888 #ifndef __ASSUME_SOCK_CLOEXEC
889 if (have_sock_cloexec
< 0)
890 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
894 dbg_log (_("cannot open socket: %s"), strerror (errno
));
895 exit (errno
== EACCES
? 4 : 1);
897 /* Bind a name to the socket. */
898 struct sockaddr_un sock_addr
;
899 sock_addr
.sun_family
= AF_UNIX
;
900 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
901 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
903 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
904 exit (errno
== EACCES
? 4 : 1);
907 #ifndef __ASSUME_SOCK_CLOEXEC
908 if (have_sock_cloexec
< 0)
910 /* We don't want to get stuck on accept. */
911 int fl
= fcntl (sock
, F_GETFL
);
912 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
914 dbg_log (_("cannot change socket to nonblocking mode: %s"),
919 /* The descriptor needs to be closed on exec. */
920 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
922 dbg_log (_("cannot set socket to close on exec: %s"),
929 /* Set permissions for the socket. */
930 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
932 /* Set the socket up to accept connections. */
933 if (listen (sock
, SOMAXCONN
) < 0)
935 dbg_log (_("cannot enable socket to accept connections: %s"),
940 /* Change to unprivileged uid/gid/groups if specifed in config file */
941 if (server_user
!= NULL
)
942 finish_drop_privileges ();
946 /* Close the connections. */
955 invalidate_cache (char *key
, int fd
)
960 for (number
= pwddb
; number
< lastdb
; ++number
)
961 if (strcmp (key
, dbnames
[number
]) == 0)
963 if (dbs
[number
].reset_res
)
969 if (number
== lastdb
)
972 writeall (fd
, &resp
, sizeof (resp
));
976 if (dbs
[number
].enabled
)
978 pthread_mutex_lock (&dbs
[number
].prune_lock
);
979 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
980 pthread_mutex_unlock (&dbs
[number
].prune_lock
);
985 writeall (fd
, &resp
, sizeof (resp
));
992 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
994 /* If we do not have an read-only file descriptor do nothing. */
998 /* We need to send some data along with the descriptor. */
999 uint64_t mapsize
= (db
->head
->data_size
1000 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1001 + sizeof (struct database_pers_head
));
1002 struct iovec iov
[2];
1003 iov
[0].iov_base
= key
;
1004 iov
[0].iov_len
= strlen (key
) + 1;
1005 iov
[1].iov_base
= &mapsize
;
1006 iov
[1].iov_len
= sizeof (mapsize
);
1008 /* Prepare the control message to transfer the descriptor. */
1012 char bytes
[CMSG_SPACE (sizeof (int))];
1014 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1015 .msg_control
= buf
.bytes
,
1016 .msg_controllen
= sizeof (buf
) };
1017 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1019 cmsg
->cmsg_level
= SOL_SOCKET
;
1020 cmsg
->cmsg_type
= SCM_RIGHTS
;
1021 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1023 *(int *) CMSG_DATA (cmsg
) = db
->ro_fd
;
1025 msg
.msg_controllen
= cmsg
->cmsg_len
;
1027 /* Send the control message. We repeat when we are interrupted but
1028 everything else is ignored. */
1029 #ifndef MSG_NOSIGNAL
1030 # define MSG_NOSIGNAL 0
1032 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1034 if (__builtin_expect (debug_level
> 0, 0))
1035 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1037 #endif /* SCM_RIGHTS */
1040 /* Handle new request. */
1042 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1044 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1046 if (debug_level
> 0)
1048 cannot handle old request version %d; current version is %d"),
1049 req
->version
, NSCD_VERSION
);
1053 /* Perform the SELinux check before we go on to the standard checks. */
1054 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1056 if (debug_level
> 0)
1065 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1066 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1070 request from %ld not handled due to missing permission"), (long int) pid
);
1075 request from '%s' [%ld] not handled due to missing permission"),
1076 buf
, (long int) pid
);
1079 dbg_log (_("request not handled due to missing permission"));
1085 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1087 /* See whether we can service the request from the cache. */
1088 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1090 if (__builtin_expect (debug_level
, 0) > 0)
1092 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1094 char buf
[INET6_ADDRSTRLEN
];
1096 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1097 inet_ntop (req
->type
== GETHOSTBYADDR
1098 ? AF_INET
: AF_INET6
,
1099 key
, buf
, sizeof (buf
)));
1102 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1105 /* Is this service enabled? */
1106 if (__builtin_expect (!db
->enabled
, 0))
1108 /* No, sent the prepared record. */
1109 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1110 db
->disabled_iov
->iov_len
,
1112 != (ssize_t
) db
->disabled_iov
->iov_len
1113 && __builtin_expect (debug_level
, 0) > 0)
1115 /* We have problems sending the result. */
1117 dbg_log (_("cannot write result: %s"),
1118 strerror_r (errno
, buf
, sizeof (buf
)));
1124 /* Be sure we can read the data. */
1125 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
1127 ++db
->head
->rdlockdelayed
;
1128 pthread_rwlock_rdlock (&db
->lock
);
1131 /* See whether we can handle it from the cache. */
1132 struct datahead
*cached
;
1133 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1137 /* Hurray it's in the cache. */
1140 #ifdef HAVE_SENDFILE
1141 if (__builtin_expect (db
->mmap_used
, 1))
1143 assert (db
->wr_fd
!= -1);
1144 assert ((char *) cached
->data
> (char *) db
->data
);
1145 assert ((char *) cached
->data
- (char *) db
->head
1147 <= (sizeof (struct database_pers_head
)
1148 + db
->head
->module
* sizeof (ref_t
)
1149 + db
->head
->data_size
));
1150 nwritten
= sendfileall (fd
, db
->wr_fd
,
1151 (char *) cached
->data
1152 - (char *) db
->head
, cached
->recsize
);
1153 # ifndef __ASSUME_SENDFILE
1154 if (nwritten
== -1 && errno
== ENOSYS
)
1159 # ifndef __ASSUME_SENDFILE
1163 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1165 if (nwritten
!= cached
->recsize
1166 && __builtin_expect (debug_level
, 0) > 0)
1168 /* We have problems sending the result. */
1170 dbg_log (_("cannot write result: %s"),
1171 strerror_r (errno
, buf
, sizeof (buf
)));
1174 pthread_rwlock_unlock (&db
->lock
);
1179 pthread_rwlock_unlock (&db
->lock
);
1181 else if (__builtin_expect (debug_level
, 0) > 0)
1183 if (req
->type
== INVALIDATE
)
1184 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1186 dbg_log ("\t%s", serv2str
[req
->type
]);
1189 /* Handle the request. */
1193 addpwbyname (db
, fd
, req
, key
, uid
);
1197 addpwbyuid (db
, fd
, req
, key
, uid
);
1201 addgrbyname (db
, fd
, req
, key
, uid
);
1205 addgrbygid (db
, fd
, req
, key
, uid
);
1209 addhstbyname (db
, fd
, req
, key
, uid
);
1212 case GETHOSTBYNAMEv6
:
1213 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1217 addhstbyaddr (db
, fd
, req
, key
, uid
);
1220 case GETHOSTBYADDRv6
:
1221 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1225 addhstai (db
, fd
, req
, key
, uid
);
1229 addinitgroups (db
, fd
, req
, key
, uid
);
1233 addservbyname (db
, fd
, req
, key
, uid
);
1237 addservbyport (db
, fd
, req
, key
, uid
);
1244 /* Get the callers credentials. */
1246 struct ucred caller
;
1247 socklen_t optlen
= sizeof (caller
);
1249 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1253 dbg_log (_("error getting caller's id: %s"),
1254 strerror_r (errno
, buf
, sizeof (buf
)));
1260 /* Some systems have no SO_PEERCRED implementation. They don't
1261 care about security so we don't as well. */
1266 /* Accept shutdown, getstat and invalidate only from root. For
1267 the stat call also allow the user specified in the config file. */
1268 if (req
->type
== GETSTAT
)
1270 if (uid
== 0 || uid
== stat_uid
)
1271 send_stats (fd
, dbs
);
1275 if (req
->type
== INVALIDATE
)
1276 invalidate_cache (key
, fd
);
1278 termination_handler (0);
1287 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1292 /* Ignore the command, it's nothing we know. */
1298 /* Restart the process. */
1302 /* First determine the parameters. We do not use the parameters
1303 passed to main() since in case nscd is started by running the
1304 dynamic linker this will not work. Yes, this is not the usual
1305 case but nscd is part of glibc and we occasionally do this. */
1306 size_t buflen
= 1024;
1307 char *buf
= alloca (buflen
);
1309 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1313 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1322 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1327 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1337 if (readlen
< buflen
)
1340 /* We might have to extend the buffer. */
1341 size_t old_buflen
= buflen
;
1342 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1343 buf
= memmove (newp
, buf
, old_buflen
);
1348 /* Parse the command line. Worst case scenario: every two
1349 characters form one parameter (one character plus NUL). */
1350 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1354 while (cp
< buf
+ readlen
)
1357 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1361 /* Second, change back to the old user if we changed it. */
1362 if (server_user
!= NULL
)
1364 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1367 cannot change to old UID: %s; disabling paranoia mode"),
1374 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1377 cannot change to old GID: %s; disabling paranoia mode"),
1380 setuid (server_uid
);
1386 /* Next change back to the old working directory. */
1387 if (chdir (oldcwd
) == -1)
1390 cannot change to old working directory: %s; disabling paranoia mode"),
1393 if (server_user
!= NULL
)
1395 setuid (server_uid
);
1396 setgid (server_gid
);
1402 /* Synchronize memory. */
1403 int32_t certainly
[lastdb
];
1404 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1405 if (dbs
[cnt
].enabled
)
1407 /* Make sure nobody keeps using the database. */
1408 dbs
[cnt
].head
->timestamp
= 0;
1409 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1410 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1412 if (dbs
[cnt
].persistent
)
1414 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1417 /* The preparations are done. */
1418 execv ("/proc/self/exe", argv
);
1420 /* If we come here, we will never be able to re-exec. */
1421 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1424 if (server_user
!= NULL
)
1426 setuid (server_uid
);
1427 setgid (server_gid
);
1429 if (chdir ("/") != 0)
1430 dbg_log (_("cannot change current working directory to \"/\": %s"),
1434 /* Reenable the databases. */
1435 time_t now
= time (NULL
);
1436 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1437 if (dbs
[cnt
].enabled
)
1439 dbs
[cnt
].head
->timestamp
= now
;
1440 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1445 /* List of file descriptors. */
1449 struct fdlist
*next
;
1451 /* Memory allocated for the list. */
1452 static struct fdlist
*fdlist
;
1453 /* List of currently ready-to-read file descriptors. */
1454 static struct fdlist
*readylist
;
1456 /* Conditional variable and mutex to signal availability of entries in
1457 READYLIST. The condvar is initialized dynamically since we might
1458 use a different clock depending on availability. */
1459 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1460 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1462 /* The clock to use with the condvar. */
1463 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1465 /* Number of threads ready to handle the READYLIST. */
1466 static unsigned long int nready
;
1469 /* Function for the clean-up threads. */
1471 __attribute__ ((__noreturn__
))
1472 nscd_run_prune (void *p
)
1474 const long int my_number
= (long int) p
;
1475 assert (dbs
[my_number
].enabled
);
1477 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1479 time_t now
= time (NULL
);
1481 /* We are running. */
1482 dbs
[my_number
].head
->timestamp
= now
;
1484 struct timespec prune_ts
;
1485 if (__builtin_expect (clock_gettime (timeout_clock
, &prune_ts
) == -1, 0))
1486 /* Should never happen. */
1489 /* Compute the initial timeout time. Prevent all the timers to go
1490 off at the same time by adding a db-based value. */
1491 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1492 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1494 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1495 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1497 pthread_mutex_lock (prune_lock
);
1500 /* Wait, but not forever. */
1502 if (! dbs
[my_number
].clear_cache
)
1503 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1504 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1508 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1509 || dbs
[my_number
].clear_cache
)
1511 /* We will determine the new timout values based on the
1512 cache content. Should there be concurrent additions to
1513 the cache which are not accounted for in the cache
1514 pruning we want to know about it. Therefore set the
1515 timeout to the maximum. It will be descreased when adding
1516 new entries to the cache, if necessary. */
1517 if (sizeof (time_t) == sizeof (long int))
1518 dbs
[my_number
].wakeup_time
= LONG_MAX
;
1520 dbs
[my_number
].wakeup_time
= INT_MAX
;
1522 /* Unconditionally reset the flag. */
1523 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1524 dbs
[my_number
].clear_cache
= 0;
1526 pthread_mutex_unlock (prune_lock
);
1528 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1530 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1531 /* If clients cannot determine for sure whether nscd is running
1532 we need to wake up occasionally to update the timestamp.
1533 Wait 90% of the update period. */
1534 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1535 if (__builtin_expect (! dont_need_update
, 0))
1537 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1538 dbs
[my_number
].head
->timestamp
= now
;
1541 pthread_mutex_lock (prune_lock
);
1543 /* Make it known when we will wake up again. */
1544 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1545 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1547 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1550 /* The cache was just pruned. Do not do it again now. Just
1551 use the new timeout value. */
1552 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1554 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1555 /* Should never happen. */
1558 /* Compute next timeout time. */
1559 prune_ts
.tv_sec
+= next_wait
;
1564 /* This is the main loop. It is replicated in different threads but
1565 the the use of the ready list makes sure only one thread handles an
1566 incoming connection. */
1568 __attribute__ ((__noreturn__
))
1569 nscd_run_worker (void *p
)
1573 /* Initialize the memory-in-flight list. */
1574 for (enum in_flight idx
= 0; idx
< IDX_last
; ++idx
)
1575 mem_in_flight
.block
[idx
].dbidx
= -1;
1576 /* And queue this threads structure. */
1578 mem_in_flight
.next
= mem_in_flight_list
;
1579 while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list
,
1581 mem_in_flight
.next
) != 0);
1583 /* Initial locking. */
1584 pthread_mutex_lock (&readylist_lock
);
1586 /* One more thread available. */
1591 while (readylist
== NULL
)
1592 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1594 struct fdlist
*it
= readylist
->next
;
1595 if (readylist
->next
== readylist
)
1596 /* Just one entry on the list. */
1599 readylist
->next
= it
->next
;
1601 /* Extract the information and mark the record ready to be used
1606 /* One more thread available. */
1609 /* We are done with the list. */
1610 pthread_mutex_unlock (&readylist_lock
);
1612 #ifndef __ASSUME_SOCK_CLOEXEC
1613 if (have_sock_cloexec
< 0)
1615 /* We do not want to block on a short read or so. */
1616 int fl
= fcntl (fd
, F_GETFL
);
1617 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1622 /* Now read the request. */
1624 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1625 != sizeof (req
), 0))
1627 /* We failed to read data. Note that this also might mean we
1628 failed because we would have blocked. */
1629 if (debug_level
> 0)
1630 dbg_log (_("short read while reading request: %s"),
1631 strerror_r (errno
, buf
, sizeof (buf
)));
1635 /* Check whether this is a valid request type. */
1636 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1639 /* Some systems have no SO_PEERCRED implementation. They don't
1640 care about security so we don't as well. */
1645 if (__builtin_expect (debug_level
> 0, 0))
1647 struct ucred caller
;
1648 socklen_t optlen
= sizeof (caller
);
1650 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1654 const pid_t pid
= 0;
1657 /* It should not be possible to crash the nscd with a silly
1658 request (i.e., a terribly large key). We limit the size to 1kb. */
1659 if (__builtin_expect (req
.key_len
, 1) < 0
1660 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1662 if (debug_level
> 0)
1663 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1668 char keybuf
[MAXKEYLEN
];
1670 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1674 /* Again, this can also mean we would have blocked. */
1675 if (debug_level
> 0)
1676 dbg_log (_("short read while reading request key: %s"),
1677 strerror_r (errno
, buf
, sizeof (buf
)));
1681 if (__builtin_expect (debug_level
, 0) > 0)
1686 handle_request: request received (Version = %d) from PID %ld"),
1687 req
.version
, (long int) pid
);
1691 handle_request: request received (Version = %d)"), req
.version
);
1694 /* Phew, we got all the data, now process it. */
1695 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1703 pthread_mutex_lock (&readylist_lock
);
1705 /* One more thread available. */
1711 static unsigned int nconns
;
1716 pthread_mutex_lock (&readylist_lock
);
1718 /* Find an empty entry in FDLIST. */
1720 for (inner
= 0; inner
< nconns
; ++inner
)
1721 if (fdlist
[inner
].next
== NULL
)
1723 assert (inner
< nconns
);
1725 fdlist
[inner
].fd
= fd
;
1727 if (readylist
== NULL
)
1728 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1731 fdlist
[inner
].next
= readylist
->next
;
1732 readylist
= readylist
->next
= &fdlist
[inner
];
1735 bool do_signal
= true;
1736 if (__builtin_expect (nready
== 0, 0))
1741 /* Try to start another thread to help out. */
1743 if (nthreads
< max_nthreads
1744 && pthread_create (&th
, &attr
, nscd_run_worker
,
1745 (void *) (long int) nthreads
) == 0)
1747 /* We got another thread. */
1749 /* The new thread might need a kick. */
1755 pthread_mutex_unlock (&readylist_lock
);
1757 /* Tell one of the worker threads there is work to do. */
1759 pthread_cond_signal (&readylist_cond
);
1763 /* Check whether restarting should happen. */
1765 restart_p (time_t now
)
1767 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1768 && now
>= restart_time
);
1772 /* Array for times a connection was accepted. */
1773 static time_t *starttime
;
1777 __attribute__ ((__noreturn__
))
1778 main_loop_poll (void)
1780 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1781 * sizeof (conns
[0]));
1784 conns
[0].events
= POLLRDNORM
;
1786 size_t firstfree
= 1;
1789 if (inotify_fd
!= -1)
1791 conns
[1].fd
= inotify_fd
;
1792 conns
[1].events
= POLLRDNORM
;
1800 /* Wait for any event. We wait at most a couple of seconds so
1801 that we can check whether we should close any of the accepted
1802 connections since we have not received a request. */
1803 #define MAX_ACCEPT_TIMEOUT 30
1804 #define MIN_ACCEPT_TIMEOUT 5
1805 #define MAIN_THREAD_TIMEOUT \
1806 (MAX_ACCEPT_TIMEOUT * 1000 \
1807 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1809 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1811 time_t now
= time (NULL
);
1813 /* If there is a descriptor ready for reading or there is a new
1814 connection, process this now. */
1817 if (conns
[0].revents
!= 0)
1819 /* We have a new incoming connection. Accept the connection. */
1822 #ifndef __ASSUME_PACCEPT
1824 if (have_paccept
>= 0)
1828 fd
= TEMP_FAILURE_RETRY (paccept (sock
, NULL
, NULL
, NULL
,
1830 #ifndef __ASSUME_PACCEPT
1831 if (have_paccept
== 0)
1832 have_paccept
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
1836 #ifndef __ASSUME_PACCEPT
1837 if (have_paccept
< 0)
1838 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1841 /* Use the descriptor if we have not reached the limit. */
1844 if (firstfree
< nconns
)
1846 conns
[firstfree
].fd
= fd
;
1847 conns
[firstfree
].events
= POLLRDNORM
;
1848 starttime
[firstfree
] = now
;
1849 if (firstfree
>= nused
)
1850 nused
= firstfree
+ 1;
1854 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
1857 /* We cannot use the connection so close it. */
1866 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
1868 if (conns
[1].revents
!= 0)
1870 bool to_clear
[lastdb
] = { false, };
1874 # define PATH_MAX 1024
1876 struct inotify_event i
;
1877 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1882 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
1884 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
1886 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
1889 /* Something went wrong when reading the inotify
1890 data. Better disable inotify. */
1892 disabled inotify after read error %d"),
1904 /* Check which of the files changed. */
1905 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1906 if (inev
.i
.wd
== dbs
[dbcnt
].inotify_descr
)
1908 to_clear
[dbcnt
] = true;
1912 if (inev
.i
.wd
== resolv_conf_descr
)
1915 to_clear
[hstdb
] = true;
1920 /* Actually perform the cache clearing. */
1921 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1922 if (to_clear
[dbcnt
])
1924 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1925 dbs
[dbcnt
].clear_cache
= 1;
1926 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1927 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1937 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
1938 if (conns
[cnt
].revents
!= 0)
1940 fd_ready (conns
[cnt
].fd
);
1942 /* Clean up the CONNS array. */
1944 if (cnt
< firstfree
)
1946 if (cnt
== nused
- 1)
1949 while (conns
[nused
- 1].fd
== -1);
1955 /* Now find entries which have timed out. */
1958 /* We make the timeout length depend on the number of file
1959 descriptors currently used. */
1960 #define ACCEPT_TIMEOUT \
1961 (MAX_ACCEPT_TIMEOUT \
1962 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1963 time_t laststart
= now
- ACCEPT_TIMEOUT
;
1965 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
1967 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
1969 /* Remove the entry, it timed out. */
1970 (void) close (conns
[cnt
].fd
);
1973 if (cnt
< firstfree
)
1975 if (cnt
== nused
- 1)
1978 while (conns
[nused
- 1].fd
== -1);
1982 if (restart_p (now
))
1990 main_loop_epoll (int efd
)
1992 struct epoll_event ev
= { 0, };
1996 /* Add the socket. */
1997 ev
.events
= EPOLLRDNORM
;
1999 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2000 /* We cannot use epoll. */
2004 if (inotify_fd
!= -1)
2006 ev
.events
= EPOLLRDNORM
;
2007 ev
.data
.fd
= inotify_fd
;
2008 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2009 /* We cannot use epoll. */
2017 struct epoll_event revs
[100];
2018 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2020 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2022 time_t now
= time (NULL
);
2024 for (int cnt
= 0; cnt
< n
; ++cnt
)
2025 if (revs
[cnt
].data
.fd
== sock
)
2027 /* A new connection. */
2028 int fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2032 /* Try to add the new descriptor. */
2035 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2036 /* The descriptor is too large or something went
2037 wrong. Close the descriptor. */
2041 /* Remember when we accepted the connection. */
2042 starttime
[fd
] = now
;
2052 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2054 bool to_clear
[lastdb
] = { false, };
2057 struct inotify_event i
;
2058 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
2063 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2065 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2067 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
, 0))
2069 /* Something went wrong when reading the inotify
2070 data. Better disable inotify. */
2071 dbg_log (_("disabled inotify after read error %d"),
2073 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2081 /* Check which of the files changed. */
2082 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2083 if (inev
.i
.wd
== dbs
[dbcnt
].inotify_descr
)
2085 to_clear
[dbcnt
] = true;
2089 if (inev
.i
.wd
== resolv_conf_descr
)
2092 to_clear
[hstdb
] = true;
2097 /* Actually perform the cache clearing. */
2098 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2099 if (to_clear
[dbcnt
])
2101 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
2102 dbs
[dbcnt
].clear_cache
= 1;
2103 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
2104 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
2110 /* Remove the descriptor from the epoll descriptor. */
2111 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2113 /* Get a worker to handle the request. */
2114 fd_ready (revs
[cnt
].data
.fd
);
2116 /* Reset the time. */
2117 starttime
[revs
[cnt
].data
.fd
] = 0;
2118 if (revs
[cnt
].data
.fd
== highest
)
2121 while (highest
> 0 && starttime
[highest
] == 0);
2126 /* Now look for descriptors for accepted connections which have
2127 no reply in too long of a time. */
2128 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2129 assert (starttime
[sock
] == 0);
2130 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2131 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2132 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2134 /* We are waiting for this one for too long. Close it. */
2135 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2143 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2146 if (restart_p (now
))
2153 /* Start all the threads we want. The initial process is thread no. 1. */
2155 start_threads (void)
2157 /* Initialize the conditional variable we will use. The only
2158 non-standard attribute we might use is the clock selection. */
2159 pthread_condattr_t condattr
;
2160 pthread_condattr_init (&condattr
);
2162 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2163 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2164 /* Determine whether the monotonous clock is available. */
2165 struct timespec dummy
;
2166 # if _POSIX_MONOTONIC_CLOCK == 0
2167 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2169 # if _POSIX_CLOCK_SELECTION == 0
2170 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2172 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2173 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2174 timeout_clock
= CLOCK_MONOTONIC
;
2177 /* Create the attribute for the threads. They are all created
2179 pthread_attr_init (&attr
);
2180 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2181 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2182 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2184 /* We allow less than LASTDB threads only for debugging. */
2185 if (debug_level
== 0)
2186 nthreads
= MAX (nthreads
, lastdb
);
2188 /* Create the threads which prune the databases. */
2189 // XXX Ideally this work would be done by some of the worker threads.
2190 // XXX But this is problematic since we would need to be able to wake
2191 // XXX them up explicitly as well as part of the group handling the
2192 // XXX ready-list. This requires an operation where we can wait on
2193 // XXX two conditional variables at the same time. This operation
2194 // XXX does not exist (yet).
2195 for (long int i
= 0; i
< lastdb
; ++i
)
2197 /* Initialize the conditional variable. */
2198 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2200 dbg_log (_("could not initialize conditional variable"));
2206 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2208 dbg_log (_("could not start clean-up thread; terminating"));
2213 pthread_condattr_destroy (&condattr
);
2215 for (long int i
= 0; i
< nthreads
; ++i
)
2218 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2222 dbg_log (_("could not start any worker thread; terminating"));
2230 /* Determine how much room for descriptors we should initially
2231 allocate. This might need to change later if we cap the number
2233 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2235 #define MAXCONN 16384
2236 if (nfds
== -1 || nfds
> MAXCONN
)
2238 else if (nfds
< MINCONN
)
2243 /* We need memory to pass descriptors on to the worker threads. */
2244 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2245 /* Array to keep track when connection was accepted. */
2246 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2248 /* In the main thread we execute the loop which handles incoming
2251 int efd
= epoll_create (100);
2254 main_loop_epoll (efd
);
2263 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2264 this function is called, we are not listening on the nscd socket yet so
2265 we can just use the ordinary lookup functions without causing a lockup */
2267 begin_drop_privileges (void)
2269 struct passwd
*pwd
= getpwnam (server_user
);
2273 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2274 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
2278 server_uid
= pwd
->pw_uid
;
2279 server_gid
= pwd
->pw_gid
;
2281 /* Save the old UID/GID if we have to change back. */
2284 old_uid
= getuid ();
2285 old_gid
= getgid ();
2288 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2290 /* This really must never happen. */
2291 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2292 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
2295 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2297 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2300 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2301 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2306 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2307 run nscd as the user specified in the configuration file. */
2309 finish_drop_privileges (void)
2311 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2312 /* We need to preserve the capabilities to connect to the audit daemon. */
2313 cap_t new_caps
= preserve_capabilities ();
2316 if (setgroups (server_ngroups
, server_groups
) == -1)
2318 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2319 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
2324 res
= setresgid (server_gid
, server_gid
, old_gid
);
2326 res
= setgid (server_gid
);
2329 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2335 res
= setresuid (server_uid
, server_uid
, old_uid
);
2337 res
= setuid (server_uid
);
2340 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2345 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2346 /* Remove the temporary capabilities. */
2347 install_real_capabilities (new_caps
);