1 /* This file contains a collection of miscellaneous procedures. Some of them
2 * perform simple system calls. Some others do a little part of system calls
3 * that are mostly performed by the Memory Manager.
5 * The entry points into this file are
6 * do_dup: perform the DUP system call
7 * do_fcntl: perform the FCNTL system call
8 * do_sync: perform the SYNC system call
9 * do_fsync: perform the FSYNC system call
10 * pm_reboot: sync disks and prepare for shutdown
11 * pm_fork: adjust the tables after PM has performed a FORK system call
12 * do_exec: handle files with FD_CLOEXEC on after PM has done an EXEC
13 * do_exit: a process has exited; note that in the tables
14 * do_set: set uid or gid for some process
15 * do_revive: revive a process that was waiting for something (e.g. TTY)
16 * do_svrctl: file system control
17 * do_getsysinfo: request copy of FS data structure
18 * pm_dumpcore: create a core dump
26 #include <minix/callnr.h>
27 #include <minix/safecopies.h>
28 #include <minix/endpoint.h>
29 #include <minix/com.h>
30 #include <minix/sysinfo.h>
31 #include <minix/u64.h>
32 #include <sys/ptrace.h>
33 #include <sys/svrctl.h>
36 #include "scratchpad.h"
38 #include <minix/vfsif.h>
43 #define CORE_NAME "core"
44 #define CORE_MODE 0777 /* mode to use on core image files */
46 #if ENABLE_SYSCALL_STATS
47 unsigned long calls_stats
[NCALLS
];
50 static void free_proc(struct fproc
*freed
, int flags
);
52 static int dumpcore(int proc_e, struct mem_map *seg_ptr);
53 static int write_bytes(struct inode *rip, off_t off, char *buf, size_t
55 static int write_seg(struct inode *rip, off_t off, int proc_e, int seg,
56 off_t seg_off, phys_bytes seg_bytes);
59 /*===========================================================================*
61 *===========================================================================*/
64 vir_bytes src_addr
, dst_addr
;
68 what
= job_m_in
.SI_WHAT
;
69 dst_addr
= (vir_bytes
) job_m_in
.SI_WHERE
;
70 buf_size
= (size_t) job_m_in
.SI_SIZE
;
72 /* Only su may call do_getsysinfo. This call may leak information (and is not
73 * stable enough to be part of the API/ABI). In the future, requests from
74 * non-system processes should be denied.
77 if (!super_user
) return(EPERM
);
81 src_addr
= (vir_bytes
) fproc
;
82 len
= sizeof(struct fproc
) * NR_PROCS
;
85 src_addr
= (vir_bytes
) dmap
;
86 len
= sizeof(struct dmap
) * NR_DEVICES
;
88 #if ENABLE_SYSCALL_STATS
90 src_addr
= (vir_bytes
) calls_stats
;
91 len
= sizeof(calls_stats
);
101 return sys_datacopy(SELF
, src_addr
, who_e
, dst_addr
, len
);
104 /*===========================================================================*
106 *===========================================================================*/
109 /* Perform the dup(fd) or dup2(fd,fd2) system call. These system calls are
110 * obsolete. In fact, it is not even possible to invoke them using the
111 * current library because the library routines call fcntl(). They are
112 * provided to permit old binary programs to continue to run.
119 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
122 /* Is the file descriptor valid? */
123 rfd
= scratch(fp
).file
.fd_nr
& ~DUP_MASK
; /* kill off dup2 bit, if on */
124 if ((f
= get_filp(rfd
, VNODE_READ
)) == NULL
) return(err_code
);
126 /* Distinguish between dup and dup2. */
127 if (!(scratch(fp
).file
.fd_nr
& DUP_MASK
)) { /* bit not on */
129 r
= get_fd(0, 0, &rfd2
, NULL
);
131 /* dup2(old_fd, new_fd) */
132 if (rfd2
< 0 || rfd2
>= OPEN_MAX
) {
134 } else if (rfd
== rfd2
) { /* ignore the call: dup2(x, x) */
137 /* All is fine, close new_fd if necessary */
138 unlock_filp(f
); /* or it might deadlock on do_close */
139 (void) close_fd(fp
, rfd2
); /* cannot fail */
140 f
= get_filp(rfd
, VNODE_READ
); /* lock old_fd again */
141 if (f
== NULL
) return(err_code
);
146 /* Success. Set up new file descriptors. */
148 fp
->fp_filp
[rfd2
] = f
;
149 FD_SET(rfd2
, &fp
->fp_filp_inuse
);
157 /*===========================================================================*
159 *===========================================================================*/
162 /* Perform the fcntl(fd, request, ...) system call. */
164 register struct filp
*f
;
165 int new_fd
, fl
, r
= OK
, fcntl_req
, fcntl_argx
;
166 tll_access_t locktype
;
168 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
169 scratch(fp
).io
.io_buffer
= job_m_in
.buffer
;
170 scratch(fp
).io
.io_nbytes
= job_m_in
.nbytes
; /* a.k.a. m_in.request */
171 fcntl_req
= job_m_in
.request
;
172 fcntl_argx
= job_m_in
.addr
;
174 /* Is the file descriptor valid? */
175 locktype
= (fcntl_req
== F_FREESP
) ? VNODE_WRITE
: VNODE_READ
;
176 if ((f
= get_filp(scratch(fp
).file
.fd_nr
, locktype
)) == NULL
)
181 /* This replaces the old dup() system call. */
182 if (fcntl_argx
< 0 || fcntl_argx
>= OPEN_MAX
) r
= EINVAL
;
183 else if ((r
= get_fd(fcntl_argx
, 0, &new_fd
, NULL
)) == OK
) {
185 fp
->fp_filp
[new_fd
] = f
;
186 FD_SET(new_fd
, &fp
->fp_filp_inuse
);
192 /* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
194 if (FD_ISSET(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
))
199 /* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
200 if (fcntl_argx
& FD_CLOEXEC
)
201 FD_SET(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
);
203 FD_CLR(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
);
207 /* Get file status flags (O_NONBLOCK and O_APPEND). */
208 fl
= f
->filp_flags
& (O_NONBLOCK
| O_APPEND
| O_ACCMODE
);
213 /* Set file status flags (O_NONBLOCK and O_APPEND). */
214 fl
= O_NONBLOCK
| O_APPEND
| O_REOPEN
;
215 f
->filp_flags
= (f
->filp_flags
& ~fl
) | (fcntl_argx
& fl
);
221 /* Set or clear a file lock. */
222 r
= lock_op(f
, fcntl_req
);
227 /* Free a section of a file */
229 struct flock flock_arg
;
232 /* Check if it's a regular file. */
233 if (!S_ISREG(f
->filp_vno
->v_mode
)) r
= EINVAL
;
234 else if (!(f
->filp_mode
& W_BIT
)) r
= EBADF
;
236 /* Copy flock data from userspace. */
237 r
= sys_datacopy(who_e
, (vir_bytes
) scratch(fp
).io
.io_buffer
,
238 SELF
, (vir_bytes
) &flock_arg
,
243 /* Convert starting offset to signed. */
244 offset
= (signed long) flock_arg
.l_start
;
246 /* Figure out starting position base. */
247 switch(flock_arg
.l_whence
) {
248 case SEEK_SET
: start
= 0; break;
250 if (ex64hi(f
->filp_pos
) != 0)
251 panic("do_fcntl: position in file too high");
252 start
= ex64lo(f
->filp_pos
);
254 case SEEK_END
: start
= f
->filp_vno
->v_size
; break;
259 /* Check for overflow or underflow. */
260 if (offset
> 0 && start
+ offset
< start
) r
= EINVAL
;
261 else if (offset
< 0 && start
+ offset
> start
) r
= EINVAL
;
264 if (start
< 0) r
= EINVAL
;
268 if (flock_arg
.l_len
!= 0) {
269 if (start
>= f
->filp_vno
->v_size
) r
= EINVAL
;
270 else if ((end
= start
+ flock_arg
.l_len
) <= start
) r
= EINVAL
;
271 else if (end
> f
->filp_vno
->v_size
) end
= f
->filp_vno
->v_size
;
277 r
= req_ftrunc(f
->filp_vno
->v_fs_e
, f
->filp_vno
->v_inode_nr
,start
,end
);
279 if (r
== OK
&& flock_arg
.l_len
== 0)
280 f
->filp_vno
->v_size
= start
;
293 /*===========================================================================*
295 *===========================================================================*/
301 for (vmp
= &vmnt
[0]; vmp
< &vmnt
[NR_MNTS
]; ++vmp
) {
302 if (vmp
->m_dev
!= NO_DEV
&& vmp
->m_fs_e
!= NONE
&&
303 vmp
->m_root_node
!= NULL
) {
304 if ((r
= lock_vmnt(vmp
, VMNT_EXCL
)) != OK
)
306 req_sync(vmp
->m_fs_e
);
314 /*===========================================================================*
316 *===========================================================================*/
319 /* Perform the fsync() system call. */
325 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
327 if ((rfilp
= get_filp(scratch(fp
).file
.fd_nr
, VNODE_READ
)) == NULL
)
329 dev
= rfilp
->filp_vno
->v_dev
;
330 for (vmp
= &vmnt
[0]; vmp
< &vmnt
[NR_MNTS
]; ++vmp
) {
331 if (vmp
->m_dev
!= NO_DEV
&& vmp
->m_dev
== dev
&&
332 vmp
->m_fs_e
!= NONE
&& vmp
->m_root_node
!= NULL
) {
334 if ((r
= lock_vmnt(vmp
, VMNT_EXCL
)) != OK
)
336 req_sync(vmp
->m_fs_e
);
346 /*===========================================================================*
348 *===========================================================================*/
351 /* Perform the VFS side of the reboot call. */
357 /* Do exit processing for all leftover processes and servers,
358 * but don't actually exit them (if they were really gone, PM
359 * will tell us about it).
361 for (i
= 0; i
< NR_PROCS
; i
++) {
364 /* Don't just free the proc right away, but let it finish what it was
367 if (rfp
->fp_endpoint
!= NONE
)
376 /*===========================================================================*
378 *===========================================================================*/
379 void pm_fork(endpoint_t pproc
, endpoint_t cproc
, pid_t cpid
)
381 /* Perform those aspects of the fork() system call that relate to files.
382 * In particular, let the child inherit its parent's file descriptors.
383 * The parent and child parameters tell who forked off whom. The file
384 * system uses the same slot numbers as the kernel. Only PM makes this call.
387 struct fproc
*cp
, *pp
;
388 int i
, parentno
, childno
;
391 /* Check up-to-dateness of fproc. */
392 okendpt(pproc
, &parentno
);
394 /* PM gives child endpoint, which implies process slot information.
395 * Don't call isokendpt, because that will verify if the endpoint
396 * number is correct in fproc, which it won't be.
398 childno
= _ENDPOINT_P(cproc
);
399 if (childno
< 0 || childno
>= NR_PROCS
)
400 panic("VFS: bogus child for forking: %d", cproc
);
401 if (fproc
[childno
].fp_pid
!= PID_FREE
)
402 panic("VFS: forking on top of in-use child: %d", childno
);
404 /* Copy the parent's fproc struct to the child. */
405 /* However, the mutex variables belong to a slot and must stay the same. */
406 c_fp_lock
= fproc
[childno
].fp_lock
;
407 fproc
[childno
] = fproc
[parentno
];
408 fproc
[childno
].fp_lock
= c_fp_lock
;
410 /* Increase the counters in the 'filp' table. */
411 cp
= &fproc
[childno
];
412 pp
= &fproc
[parentno
];
414 for (i
= 0; i
< OPEN_MAX
; i
++)
415 if (cp
->fp_filp
[i
] != NULL
) cp
->fp_filp
[i
]->filp_count
++;
417 /* Fill in new process and endpoint id. */
419 cp
->fp_endpoint
= cproc
;
421 /* A forking process never has an outstanding grant, as it isn't blocking on
423 if (GRANT_VALID(pp
->fp_grant
)) {
424 panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp
->fp_endpoint
,
427 if (GRANT_VALID(cp
->fp_grant
)) {
428 panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp
->fp_endpoint
,
432 /* A child is not a process leader, not being revived, etc. */
433 cp
->fp_flags
= FP_NOFLAGS
;
435 /* Record the fact that both root and working dir have another user. */
436 if (cp
->fp_rd
) dup_vnode(cp
->fp_rd
);
437 if (cp
->fp_wd
) dup_vnode(cp
->fp_wd
);
440 /*===========================================================================*
442 *===========================================================================*/
443 static void free_proc(struct fproc
*exiter
, int flags
)
446 register struct fproc
*rfp
;
447 register struct filp
*rfilp
;
448 register struct vnode
*vp
;
451 if (exiter
->fp_endpoint
== NONE
)
452 panic("free_proc: already free");
454 if (fp_is_blocked(exiter
))
455 unpause(exiter
->fp_endpoint
);
457 /* Loop on file descriptors, closing any that are open. */
458 for (i
= 0; i
< OPEN_MAX
; i
++) {
459 (void) close_fd(exiter
, i
);
462 /* Release root and working directories. */
463 if (exiter
->fp_rd
) { put_vnode(exiter
->fp_rd
); exiter
->fp_rd
= NULL
; }
464 if (exiter
->fp_wd
) { put_vnode(exiter
->fp_wd
); exiter
->fp_wd
= NULL
; }
466 /* The rest of these actions is only done when processes actually exit. */
467 if (!(flags
& FP_EXITING
)) return;
469 exiter
->fp_flags
|= FP_EXITING
;
471 /* Check if any process is SUSPENDed on this driver.
472 * If a driver exits, unmap its entries in the dmap table.
473 * (unmapping has to be done after the first step, because the
474 * dmap table is used in the first step.)
476 unsuspend_by_endpt(exiter
->fp_endpoint
);
477 dmap_unmap_by_endpt(exiter
->fp_endpoint
);
479 worker_stop_by_endpt(exiter
->fp_endpoint
); /* Unblock waiting threads */
480 vmnt_unmap_by_endpt(exiter
->fp_endpoint
); /* Invalidate open files if this
481 * was an active FS */
483 /* Invalidate endpoint number for error and sanity checks. */
484 exiter
->fp_endpoint
= NONE
;
486 /* If a session leader exits and it has a controlling tty, then revoke
487 * access to its controlling tty from all other processes using it.
489 if ((exiter
->fp_flags
& FP_SESLDR
) && exiter
->fp_tty
!= 0) {
490 dev
= exiter
->fp_tty
;
491 for (rfp
= &fproc
[0]; rfp
< &fproc
[NR_PROCS
]; rfp
++) {
492 if(rfp
->fp_pid
== PID_FREE
) continue;
493 if (rfp
->fp_tty
== dev
) rfp
->fp_tty
= 0;
495 for (i
= 0; i
< OPEN_MAX
; i
++) {
496 if ((rfilp
= rfp
->fp_filp
[i
]) == NULL
) continue;
497 if (rfilp
->filp_mode
== FILP_CLOSED
) continue;
498 vp
= rfilp
->filp_vno
;
499 if (!S_ISCHR(vp
->v_mode
)) continue;
500 if ((dev_t
) vp
->v_sdev
!= dev
) continue;
501 lock_filp(rfilp
, VNODE_READ
);
502 (void) dev_close(dev
, rfilp
-filp
); /* Ignore any errors, even
505 rfilp
->filp_mode
= FILP_CLOSED
;
511 /* Exit done. Mark slot as free. */
512 exiter
->fp_pid
= PID_FREE
;
513 if (exiter
->fp_flags
& FP_PENDING
)
514 pending
--; /* No longer pending job, not going to do it */
515 exiter
->fp_flags
= FP_NOFLAGS
;
518 /*===========================================================================*
520 *===========================================================================*/
524 /* Perform the file system portion of the exit(status) system call. */
527 /* Nevertheless, pretend that the call came from the user. */
528 okendpt(proc
, &exitee_p
);
529 fp
= &fproc
[exitee_p
];
530 free_proc(fp
, FP_EXITING
);
533 /*===========================================================================*
535 *===========================================================================*/
536 void pm_setgid(proc_e
, egid
, rgid
)
541 register struct fproc
*tfp
;
544 okendpt(proc_e
, &slot
);
547 tfp
->fp_effgid
= egid
;
548 tfp
->fp_realgid
= rgid
;
552 /*===========================================================================*
554 *===========================================================================*/
555 void pm_setgroups(proc_e
, ngroups
, groups
)
563 okendpt(proc_e
, &slot
);
565 if (ngroups
* sizeof(gid_t
) > sizeof(rfp
->fp_sgroups
))
566 panic("VFS: pm_setgroups: too much data to copy");
567 if (sys_datacopy(who_e
, (vir_bytes
) groups
, SELF
, (vir_bytes
) rfp
->fp_sgroups
,
568 ngroups
* sizeof(gid_t
)) == OK
) {
569 rfp
->fp_ngroups
= ngroups
;
571 panic("VFS: pm_setgroups: datacopy failed");
575 /*===========================================================================*
577 *===========================================================================*/
578 void pm_setuid(proc_e
, euid
, ruid
)
586 okendpt(proc_e
, &slot
);
589 tfp
->fp_effuid
= euid
;
590 tfp
->fp_realuid
= ruid
;
593 /*===========================================================================*
595 *===========================================================================*/
601 svrctl
= job_m_in
.svrctl_req
;
602 ptr
= (vir_bytes
) job_m_in
.svrctl_argp
;
603 if (((svrctl
>> 8) & 0xFF) != 'M') return(EINVAL
);
609 struct sysgetenv sysgetenv
;
614 /* Copy sysgetenv structure to VFS */
615 if (sys_datacopy(who_e
, ptr
, SELF
, (vir_bytes
) &sysgetenv
,
616 sizeof(sysgetenv
)) != OK
)
619 /* Basic sanity checking */
620 if (svrctl
== VFSSETPARAM
) {
621 if (sysgetenv
.keylen
<= 0 ||
622 sysgetenv
.keylen
> (sizeof(search_key
) - 1) ||
623 sysgetenv
.vallen
<= 0 ||
624 sysgetenv
.vallen
>= sizeof(val
)) {
629 /* Copy parameter "key" */
630 if ((s
= sys_datacopy(who_e
, (vir_bytes
) sysgetenv
.key
,
631 SELF
, (vir_bytes
) search_key
,
632 sysgetenv
.keylen
)) != OK
)
634 search_key
[sysgetenv
.keylen
] = '\0'; /* Limit string */
636 /* Is it a parameter we know? */
637 if (svrctl
== VFSSETPARAM
) {
638 if (!strcmp(search_key
, "verbose")) {
640 if ((s
= sys_datacopy(who_e
,
641 (vir_bytes
) sysgetenv
.val
, SELF
,
642 (vir_bytes
) &val
, sysgetenv
.vallen
)) != OK
)
644 val
[sysgetenv
.vallen
] = '\0'; /* Limit string */
645 verbose_val
= atoi(val
);
646 if (verbose_val
< 0 || verbose_val
> 4) {
649 verbose
= verbose_val
;
654 } else { /* VFSGETPARAM */
658 if (!strcmp(search_key
, "print_traces")) {
659 mthread_stacktraces();
661 sysgetenv
.vallen
= 0;
663 } else if (!strcmp(search_key
, "active_threads")) {
664 int active
= NR_WTHREADS
- worker_available();
665 snprintf(small_buf
, sizeof(small_buf
) - 1,
667 sysgetenv
.vallen
= strlen(small_buf
);
672 if ((s
= sys_datacopy(SELF
,
673 (vir_bytes
) &sysgetenv
, who_e
, ptr
,
674 sizeof(sysgetenv
))) != OK
)
676 if (sysgetenv
.val
!= 0) {
677 if ((s
= sys_datacopy(SELF
,
678 (vir_bytes
) small_buf
, who_e
,
679 (vir_bytes
) sysgetenv
.val
,
680 sysgetenv
.vallen
)) != OK
)
693 /*===========================================================================*
695 *===========================================================================*/
696 int pm_dumpcore(endpoint_t proc_e
, int csig
, vir_bytes exe_name
)
698 int slot
, r
= OK
, core_fd
;
700 char core_path
[PATH_MAX
];
701 char proc_name
[PROC_NAME_LEN
];
703 okendpt(proc_e
, &slot
);
707 snprintf(core_path
, PATH_MAX
, "%s.%d", CORE_NAME
, fp
->fp_pid
);
708 core_fd
= common_open(core_path
, O_WRONLY
| O_CREAT
| O_TRUNC
, CORE_MODE
);
709 if (core_fd
< 0) { r
= core_fd
; goto core_exit
; }
711 /* get process' name */
712 r
= sys_datacopy(PM_PROC_NR
, exe_name
, VFS_PROC_NR
, (vir_bytes
) proc_name
,
714 if (r
!= OK
) goto core_exit
;
715 proc_name
[PROC_NAME_LEN
- 1] = '\0';
717 if ((f
= get_filp(core_fd
, VNODE_WRITE
)) == NULL
) { r
=EBADF
; goto core_exit
; }
718 write_elf_core_file(f
, csig
, proc_name
);
720 (void) close_fd(fp
, core_fd
); /* ignore failure, we're exiting anyway */
724 free_proc(fp
, FP_EXITING
);
728 /*===========================================================================*
730 *===========================================================================*/
734 char key
[DS_MAX_KEYLEN
];
735 char *blkdrv_prefix
= "drv.blk.";
736 char *chrdrv_prefix
= "drv.chr.";
739 endpoint_t owner_endpoint
;
743 my_job
= *((struct job
*) arg
);
746 /* Get the event and the owner from DS. */
747 while ((r
= ds_check(key
, &type
, &owner_endpoint
)) == OK
) {
748 /* Only check for block and character driver up events. */
749 if (!strncmp(key
, blkdrv_prefix
, strlen(blkdrv_prefix
))) {
751 } else if (!strncmp(key
, chrdrv_prefix
, strlen(chrdrv_prefix
))) {
757 if ((r
= ds_retrieve_u32(key
, &value
)) != OK
) {
758 printf("VFS: ds_event: ds_retrieve_u32 failed\n");
761 if (value
!= DS_DRIVER_UP
) continue;
764 dmap_endpt_up(owner_endpoint
, is_blk
);
767 if (r
!= ENOENT
) printf("VFS: ds_event: ds_check failed: %d\n", r
);
769 thread_cleanup(NULL
);