1 /* This file contains a collection of miscellaneous procedures. Some of them
2 * perform simple system calls. Some others do a little part of system calls
3 * that are mostly performed by the Memory Manager.
5 * The entry points into this file are
6 * do_dup: perform the DUP system call
7 * do_fcntl: perform the FCNTL system call
8 * do_sync: perform the SYNC system call
9 * do_fsync: perform the FSYNC system call
10 * pm_reboot: sync disks and prepare for shutdown
11 * pm_fork: adjust the tables after PM has performed a FORK system call
12 * do_exec: handle files with FD_CLOEXEC on after PM has done an EXEC
13 * do_exit: a process has exited; note that in the tables
14 * do_set: set uid or gid for some process
15 * do_revive: revive a process that was waiting for something (e.g. TTY)
16 * do_svrctl: file system control
17 * do_getsysinfo: request copy of FS data structure
18 * pm_dumpcore: create a core dump
26 #include <minix/callnr.h>
27 #include <minix/safecopies.h>
28 #include <minix/endpoint.h>
29 #include <minix/com.h>
30 #include <minix/sysinfo.h>
31 #include <minix/u64.h>
32 #include <sys/ptrace.h>
33 #include <sys/svrctl.h>
36 #include "scratchpad.h"
38 #include <minix/vfsif.h>
43 #define CORE_NAME "core"
44 #define CORE_MODE 0777 /* mode to use on core image files */
46 #if ENABLE_SYSCALL_STATS
47 unsigned long calls_stats
[NCALLS
];
50 static void free_proc(struct fproc
*freed
, int flags
);
52 static int dumpcore(int proc_e, struct mem_map *seg_ptr);
53 static int write_bytes(struct inode *rip, off_t off, char *buf, size_t
55 static int write_seg(struct inode *rip, off_t off, int proc_e, int seg,
56 off_t seg_off, phys_bytes seg_bytes);
59 /*===========================================================================*
61 *===========================================================================*/
64 vir_bytes src_addr
, dst_addr
;
68 what
= job_m_in
.SI_WHAT
;
69 dst_addr
= (vir_bytes
) job_m_in
.SI_WHERE
;
70 buf_size
= (size_t) job_m_in
.SI_SIZE
;
72 /* Only su may call do_getsysinfo. This call may leak information (and is not
73 * stable enough to be part of the API/ABI). In the future, requests from
74 * non-system processes should be denied.
77 if (!super_user
) return(EPERM
);
81 src_addr
= (vir_bytes
) fproc
;
82 len
= sizeof(struct fproc
) * NR_PROCS
;
85 src_addr
= (vir_bytes
) dmap
;
86 len
= sizeof(struct dmap
) * NR_DEVICES
;
88 #if ENABLE_SYSCALL_STATS
90 src_addr
= (vir_bytes
) calls_stats
;
91 len
= sizeof(calls_stats
);
96 src_addr
= (vir_bytes
) vmnt
;
97 len
= sizeof(struct vmnt
) * NR_MNTS
;
106 return sys_datacopy(SELF
, src_addr
, who_e
, dst_addr
, len
);
109 /*===========================================================================*
111 *===========================================================================*/
114 /* Perform the dup(fd) or dup2(fd,fd2) system call. These system calls are
115 * obsolete. In fact, it is not even possible to invoke them using the
116 * current library because the library routines call fcntl(). They are
117 * provided to permit old binary programs to continue to run.
124 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
127 /* Is the file descriptor valid? */
128 rfd
= scratch(fp
).file
.fd_nr
& ~DUP_MASK
; /* kill off dup2 bit, if on */
129 if ((f
= get_filp(rfd
, VNODE_READ
)) == NULL
) return(err_code
);
131 /* Distinguish between dup and dup2. */
132 if (!(scratch(fp
).file
.fd_nr
& DUP_MASK
)) { /* bit not on */
134 r
= get_fd(0, 0, &rfd2
, NULL
);
136 /* dup2(old_fd, new_fd) */
137 if (rfd2
< 0 || rfd2
>= OPEN_MAX
) {
139 } else if (rfd
== rfd2
) { /* ignore the call: dup2(x, x) */
142 /* All is fine, close new_fd if necessary */
143 unlock_filp(f
); /* or it might deadlock on do_close */
144 (void) close_fd(fp
, rfd2
); /* cannot fail */
145 f
= get_filp(rfd
, VNODE_READ
); /* lock old_fd again */
146 if (f
== NULL
) return(err_code
);
151 /* Success. Set up new file descriptors. */
153 fp
->fp_filp
[rfd2
] = f
;
154 FD_SET(rfd2
, &fp
->fp_filp_inuse
);
162 /*===========================================================================*
164 *===========================================================================*/
167 /* Perform the fcntl(fd, request, ...) system call. */
169 register struct filp
*f
;
170 int new_fd
, fl
, r
= OK
, fcntl_req
, fcntl_argx
;
171 tll_access_t locktype
;
173 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
174 scratch(fp
).io
.io_buffer
= job_m_in
.buffer
;
175 scratch(fp
).io
.io_nbytes
= job_m_in
.nbytes
; /* a.k.a. m_in.request */
176 fcntl_req
= job_m_in
.request
;
177 fcntl_argx
= job_m_in
.addr
;
179 /* Is the file descriptor valid? */
180 locktype
= (fcntl_req
== F_FREESP
) ? VNODE_WRITE
: VNODE_READ
;
181 if ((f
= get_filp(scratch(fp
).file
.fd_nr
, locktype
)) == NULL
)
186 /* This replaces the old dup() system call. */
187 if (fcntl_argx
< 0 || fcntl_argx
>= OPEN_MAX
) r
= EINVAL
;
188 else if ((r
= get_fd(fcntl_argx
, 0, &new_fd
, NULL
)) == OK
) {
190 fp
->fp_filp
[new_fd
] = f
;
191 FD_SET(new_fd
, &fp
->fp_filp_inuse
);
197 /* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
199 if (FD_ISSET(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
))
204 /* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
205 if (fcntl_argx
& FD_CLOEXEC
)
206 FD_SET(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
);
208 FD_CLR(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
);
212 /* Get file status flags (O_NONBLOCK and O_APPEND). */
213 fl
= f
->filp_flags
& (O_NONBLOCK
| O_APPEND
| O_ACCMODE
);
218 /* Set file status flags (O_NONBLOCK and O_APPEND). */
219 fl
= O_NONBLOCK
| O_APPEND
| O_REOPEN
;
220 f
->filp_flags
= (f
->filp_flags
& ~fl
) | (fcntl_argx
& fl
);
226 /* Set or clear a file lock. */
227 r
= lock_op(f
, fcntl_req
);
232 /* Free a section of a file */
234 struct flock flock_arg
;
237 /* Check if it's a regular file. */
238 if (!S_ISREG(f
->filp_vno
->v_mode
)) r
= EINVAL
;
239 else if (!(f
->filp_mode
& W_BIT
)) r
= EBADF
;
241 /* Copy flock data from userspace. */
242 r
= sys_datacopy(who_e
, (vir_bytes
) scratch(fp
).io
.io_buffer
,
243 SELF
, (vir_bytes
) &flock_arg
,
248 /* Convert starting offset to signed. */
249 offset
= (signed long) flock_arg
.l_start
;
251 /* Figure out starting position base. */
252 switch(flock_arg
.l_whence
) {
253 case SEEK_SET
: start
= 0; break;
255 if (ex64hi(f
->filp_pos
) != 0)
256 panic("do_fcntl: position in file too high");
257 start
= ex64lo(f
->filp_pos
);
259 case SEEK_END
: start
= f
->filp_vno
->v_size
; break;
264 /* Check for overflow or underflow. */
265 if (offset
> 0 && start
+ offset
< start
) r
= EINVAL
;
266 else if (offset
< 0 && start
+ offset
> start
) r
= EINVAL
;
269 if (start
< 0) r
= EINVAL
;
273 if (flock_arg
.l_len
!= 0) {
274 if (start
>= f
->filp_vno
->v_size
) r
= EINVAL
;
275 else if ((end
= start
+ flock_arg
.l_len
) <= start
) r
= EINVAL
;
276 else if (end
> f
->filp_vno
->v_size
) end
= f
->filp_vno
->v_size
;
282 r
= req_ftrunc(f
->filp_vno
->v_fs_e
, f
->filp_vno
->v_inode_nr
,start
,end
);
284 if (r
== OK
&& flock_arg
.l_len
== 0)
285 f
->filp_vno
->v_size
= start
;
298 /*===========================================================================*
300 *===========================================================================*/
306 for (vmp
= &vmnt
[0]; vmp
< &vmnt
[NR_MNTS
]; ++vmp
) {
307 if ((r
= lock_vmnt(vmp
, VMNT_READ
)) != OK
)
309 if (vmp
->m_dev
!= NO_DEV
&& vmp
->m_fs_e
!= NONE
&&
310 vmp
->m_root_node
!= NULL
) {
311 req_sync(vmp
->m_fs_e
);
319 /*===========================================================================*
321 *===========================================================================*/
324 /* Perform the fsync() system call. */
330 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
332 if ((rfilp
= get_filp(scratch(fp
).file
.fd_nr
, VNODE_READ
)) == NULL
)
335 dev
= rfilp
->filp_vno
->v_dev
;
338 for (vmp
= &vmnt
[0]; vmp
< &vmnt
[NR_MNTS
]; ++vmp
) {
339 if (vmp
->m_dev
!= dev
) continue;
340 if ((r
= lock_vmnt(vmp
, VMNT_READ
)) != OK
)
342 if (vmp
->m_dev
!= NO_DEV
&& vmp
->m_dev
== dev
&&
343 vmp
->m_fs_e
!= NONE
&& vmp
->m_root_node
!= NULL
) {
345 req_sync(vmp
->m_fs_e
);
353 /*===========================================================================*
355 *===========================================================================*/
358 /* Perform the VFS side of the reboot call. */
364 /* Do exit processing for all leftover processes and servers, but don't
365 * actually exit them (if they were really gone, PM will tell us about it).
366 * Skip processes that handle parts of the file system; we first need to give
367 * them the chance to unmount (which should be possible as all normal
368 * processes have no open files anymore).
370 for (i
= 0; i
< NR_PROCS
; i
++) {
373 /* Don't just free the proc right away, but let it finish what it was
376 if (rfp
->fp_endpoint
!= NONE
&& find_vmnt(rfp
->fp_endpoint
) == NULL
)
382 unmount_all(0 /* Don't force */);
384 /* Try to exit all processes again including File Servers */
385 for (i
= 0; i
< NR_PROCS
; i
++) {
388 /* Don't just free the proc right away, but let it finish what it was
391 if (rfp
->fp_endpoint
!= NONE
)
397 unmount_all(1 /* Force */);
401 /*===========================================================================*
403 *===========================================================================*/
404 void pm_fork(endpoint_t pproc
, endpoint_t cproc
, pid_t cpid
)
406 /* Perform those aspects of the fork() system call that relate to files.
407 * In particular, let the child inherit its parent's file descriptors.
408 * The parent and child parameters tell who forked off whom. The file
409 * system uses the same slot numbers as the kernel. Only PM makes this call.
412 struct fproc
*cp
, *pp
;
413 int i
, parentno
, childno
;
416 /* Check up-to-dateness of fproc. */
417 okendpt(pproc
, &parentno
);
419 /* PM gives child endpoint, which implies process slot information.
420 * Don't call isokendpt, because that will verify if the endpoint
421 * number is correct in fproc, which it won't be.
423 childno
= _ENDPOINT_P(cproc
);
424 if (childno
< 0 || childno
>= NR_PROCS
)
425 panic("VFS: bogus child for forking: %d", cproc
);
426 if (fproc
[childno
].fp_pid
!= PID_FREE
)
427 panic("VFS: forking on top of in-use child: %d", childno
);
429 /* Copy the parent's fproc struct to the child. */
430 /* However, the mutex variables belong to a slot and must stay the same. */
431 c_fp_lock
= fproc
[childno
].fp_lock
;
432 fproc
[childno
] = fproc
[parentno
];
433 fproc
[childno
].fp_lock
= c_fp_lock
;
435 /* Increase the counters in the 'filp' table. */
436 cp
= &fproc
[childno
];
437 pp
= &fproc
[parentno
];
439 for (i
= 0; i
< OPEN_MAX
; i
++)
440 if (cp
->fp_filp
[i
] != NULL
) cp
->fp_filp
[i
]->filp_count
++;
442 /* Fill in new process and endpoint id. */
444 cp
->fp_endpoint
= cproc
;
446 /* A forking process never has an outstanding grant, as it isn't blocking on
448 if (GRANT_VALID(pp
->fp_grant
)) {
449 panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp
->fp_endpoint
,
452 if (GRANT_VALID(cp
->fp_grant
)) {
453 panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp
->fp_endpoint
,
457 /* A child is not a process leader, not being revived, etc. */
458 cp
->fp_flags
= FP_NOFLAGS
;
460 /* Record the fact that both root and working dir have another user. */
461 if (cp
->fp_rd
) dup_vnode(cp
->fp_rd
);
462 if (cp
->fp_wd
) dup_vnode(cp
->fp_wd
);
465 /*===========================================================================*
467 *===========================================================================*/
468 static void free_proc(struct fproc
*exiter
, int flags
)
471 register struct fproc
*rfp
;
472 register struct filp
*rfilp
;
473 register struct vnode
*vp
;
476 if (exiter
->fp_endpoint
== NONE
)
477 panic("free_proc: already free");
479 if (fp_is_blocked(exiter
))
480 unpause(exiter
->fp_endpoint
);
482 /* Loop on file descriptors, closing any that are open. */
483 for (i
= 0; i
< OPEN_MAX
; i
++) {
484 (void) close_fd(exiter
, i
);
487 /* Release root and working directories. */
488 if (exiter
->fp_rd
) { put_vnode(exiter
->fp_rd
); exiter
->fp_rd
= NULL
; }
489 if (exiter
->fp_wd
) { put_vnode(exiter
->fp_wd
); exiter
->fp_wd
= NULL
; }
491 /* The rest of these actions is only done when processes actually exit. */
492 if (!(flags
& FP_EXITING
)) return;
494 exiter
->fp_flags
|= FP_EXITING
;
496 /* Check if any process is SUSPENDed on this driver.
497 * If a driver exits, unmap its entries in the dmap table.
498 * (unmapping has to be done after the first step, because the
499 * dmap table is used in the first step.)
501 unsuspend_by_endpt(exiter
->fp_endpoint
);
502 dmap_unmap_by_endpt(exiter
->fp_endpoint
);
504 worker_stop_by_endpt(exiter
->fp_endpoint
); /* Unblock waiting threads */
505 vmnt_unmap_by_endpt(exiter
->fp_endpoint
); /* Invalidate open files if this
506 * was an active FS */
508 /* Invalidate endpoint number for error and sanity checks. */
509 exiter
->fp_endpoint
= NONE
;
511 /* If a session leader exits and it has a controlling tty, then revoke
512 * access to its controlling tty from all other processes using it.
514 if ((exiter
->fp_flags
& FP_SESLDR
) && exiter
->fp_tty
!= 0) {
515 dev
= exiter
->fp_tty
;
516 for (rfp
= &fproc
[0]; rfp
< &fproc
[NR_PROCS
]; rfp
++) {
517 if(rfp
->fp_pid
== PID_FREE
) continue;
518 if (rfp
->fp_tty
== dev
) rfp
->fp_tty
= 0;
520 for (i
= 0; i
< OPEN_MAX
; i
++) {
521 if ((rfilp
= rfp
->fp_filp
[i
]) == NULL
) continue;
522 if (rfilp
->filp_mode
== FILP_CLOSED
) continue;
523 vp
= rfilp
->filp_vno
;
524 if (!S_ISCHR(vp
->v_mode
)) continue;
525 if ((dev_t
) vp
->v_sdev
!= dev
) continue;
526 lock_filp(rfilp
, VNODE_READ
);
527 (void) dev_close(dev
, rfilp
-filp
); /* Ignore any errors, even
530 rfilp
->filp_mode
= FILP_CLOSED
;
536 /* Exit done. Mark slot as free. */
537 exiter
->fp_pid
= PID_FREE
;
538 if (exiter
->fp_flags
& FP_PENDING
)
539 pending
--; /* No longer pending job, not going to do it */
540 exiter
->fp_flags
= FP_NOFLAGS
;
543 /*===========================================================================*
545 *===========================================================================*/
549 /* Perform the file system portion of the exit(status) system call. */
552 /* Nevertheless, pretend that the call came from the user. */
553 okendpt(proc
, &exitee_p
);
554 fp
= &fproc
[exitee_p
];
555 free_proc(fp
, FP_EXITING
);
558 /*===========================================================================*
560 *===========================================================================*/
561 void pm_setgid(proc_e
, egid
, rgid
)
566 register struct fproc
*tfp
;
569 okendpt(proc_e
, &slot
);
572 tfp
->fp_effgid
= egid
;
573 tfp
->fp_realgid
= rgid
;
577 /*===========================================================================*
579 *===========================================================================*/
580 void pm_setgroups(proc_e
, ngroups
, groups
)
588 okendpt(proc_e
, &slot
);
590 if (ngroups
* sizeof(gid_t
) > sizeof(rfp
->fp_sgroups
))
591 panic("VFS: pm_setgroups: too much data to copy");
592 if (sys_datacopy(who_e
, (vir_bytes
) groups
, SELF
, (vir_bytes
) rfp
->fp_sgroups
,
593 ngroups
* sizeof(gid_t
)) == OK
) {
594 rfp
->fp_ngroups
= ngroups
;
596 panic("VFS: pm_setgroups: datacopy failed");
600 /*===========================================================================*
602 *===========================================================================*/
603 void pm_setuid(proc_e
, euid
, ruid
)
611 okendpt(proc_e
, &slot
);
614 tfp
->fp_effuid
= euid
;
615 tfp
->fp_realuid
= ruid
;
618 /*===========================================================================*
620 *===========================================================================*/
626 svrctl
= job_m_in
.svrctl_req
;
627 ptr
= (vir_bytes
) job_m_in
.svrctl_argp
;
628 if (((svrctl
>> 8) & 0xFF) != 'M') return(EINVAL
);
634 struct sysgetenv sysgetenv
;
639 /* Copy sysgetenv structure to VFS */
640 if (sys_datacopy(who_e
, ptr
, SELF
, (vir_bytes
) &sysgetenv
,
641 sizeof(sysgetenv
)) != OK
)
644 /* Basic sanity checking */
645 if (svrctl
== VFSSETPARAM
) {
646 if (sysgetenv
.keylen
<= 0 ||
647 sysgetenv
.keylen
> (sizeof(search_key
) - 1) ||
648 sysgetenv
.vallen
<= 0 ||
649 sysgetenv
.vallen
>= sizeof(val
)) {
654 /* Copy parameter "key" */
655 if ((s
= sys_datacopy(who_e
, (vir_bytes
) sysgetenv
.key
,
656 SELF
, (vir_bytes
) search_key
,
657 sysgetenv
.keylen
)) != OK
)
659 search_key
[sysgetenv
.keylen
] = '\0'; /* Limit string */
661 /* Is it a parameter we know? */
662 if (svrctl
== VFSSETPARAM
) {
663 if (!strcmp(search_key
, "verbose")) {
665 if ((s
= sys_datacopy(who_e
,
666 (vir_bytes
) sysgetenv
.val
, SELF
,
667 (vir_bytes
) &val
, sysgetenv
.vallen
)) != OK
)
669 val
[sysgetenv
.vallen
] = '\0'; /* Limit string */
670 verbose_val
= atoi(val
);
671 if (verbose_val
< 0 || verbose_val
> 4) {
674 verbose
= verbose_val
;
679 } else { /* VFSGETPARAM */
683 if (!strcmp(search_key
, "print_traces")) {
684 mthread_stacktraces();
686 sysgetenv
.vallen
= 0;
688 } else if (!strcmp(search_key
, "active_threads")) {
689 int active
= NR_WTHREADS
- worker_available();
690 snprintf(small_buf
, sizeof(small_buf
) - 1,
692 sysgetenv
.vallen
= strlen(small_buf
);
697 if ((s
= sys_datacopy(SELF
,
698 (vir_bytes
) &sysgetenv
, who_e
, ptr
,
699 sizeof(sysgetenv
))) != OK
)
701 if (sysgetenv
.val
!= 0) {
702 if ((s
= sys_datacopy(SELF
,
703 (vir_bytes
) small_buf
, who_e
,
704 (vir_bytes
) sysgetenv
.val
,
705 sysgetenv
.vallen
)) != OK
)
718 /*===========================================================================*
720 *===========================================================================*/
721 int pm_dumpcore(endpoint_t proc_e
, int csig
, vir_bytes exe_name
)
723 int slot
, r
= OK
, core_fd
;
725 char core_path
[PATH_MAX
];
726 char proc_name
[PROC_NAME_LEN
];
728 okendpt(proc_e
, &slot
);
731 /* if a process is blocked, scratch(fp).file.fd_nr holds the fd it's blocked
732 * on. free it up for use by common_open().
734 if (fp_is_blocked(fp
))
735 unpause(fp
->fp_endpoint
);
738 snprintf(core_path
, PATH_MAX
, "%s.%d", CORE_NAME
, fp
->fp_pid
);
739 core_fd
= common_open(core_path
, O_WRONLY
| O_CREAT
| O_TRUNC
, CORE_MODE
);
740 if (core_fd
< 0) { r
= core_fd
; goto core_exit
; }
742 /* get process' name */
743 r
= sys_datacopy(PM_PROC_NR
, exe_name
, VFS_PROC_NR
, (vir_bytes
) proc_name
,
745 if (r
!= OK
) goto core_exit
;
746 proc_name
[PROC_NAME_LEN
- 1] = '\0';
748 if ((f
= get_filp(core_fd
, VNODE_WRITE
)) == NULL
) { r
=EBADF
; goto core_exit
; }
749 write_elf_core_file(f
, csig
, proc_name
);
751 (void) close_fd(fp
, core_fd
); /* ignore failure, we're exiting anyway */
755 free_proc(fp
, FP_EXITING
);
759 /*===========================================================================*
761 *===========================================================================*/
765 char key
[DS_MAX_KEYLEN
];
766 char *blkdrv_prefix
= "drv.blk.";
767 char *chrdrv_prefix
= "drv.chr.";
770 endpoint_t owner_endpoint
;
774 my_job
= *((struct job
*) arg
);
777 /* Get the event and the owner from DS. */
778 while ((r
= ds_check(key
, &type
, &owner_endpoint
)) == OK
) {
779 /* Only check for block and character driver up events. */
780 if (!strncmp(key
, blkdrv_prefix
, strlen(blkdrv_prefix
))) {
782 } else if (!strncmp(key
, chrdrv_prefix
, strlen(chrdrv_prefix
))) {
788 if ((r
= ds_retrieve_u32(key
, &value
)) != OK
) {
789 printf("VFS: ds_event: ds_retrieve_u32 failed\n");
792 if (value
!= DS_DRIVER_UP
) continue;
795 dmap_endpt_up(owner_endpoint
, is_blk
);
798 if (r
!= ENOENT
) printf("VFS: ds_event: ds_check failed: %d\n", r
);
800 thread_cleanup(NULL
);