1 /* This file contains a collection of miscellaneous procedures. Some of them
2 * perform simple system calls. Some others do a little part of system calls
3 * that are mostly performed by the Memory Manager.
5 * The entry points into this file are
6 * do_dup: perform the DUP system call
7 * do_fcntl: perform the FCNTL system call
8 * do_sync: perform the SYNC system call
9 * do_fsync: perform the FSYNC system call
10 * pm_reboot: sync disks and prepare for shutdown
11 * pm_fork: adjust the tables after PM has performed a FORK system call
12 * do_exec: handle files with FD_CLOEXEC on after PM has done an EXEC
13 * do_exit: a process has exited; note that in the tables
14 * do_set: set uid or gid for some process
15 * do_revive: revive a process that was waiting for something (e.g. TTY)
16 * do_svrctl: file system control
17 * do_getsysinfo: request copy of FS data structure
18 * pm_dumpcore: create a core dump
26 #include <minix/callnr.h>
27 #include <minix/safecopies.h>
28 #include <minix/endpoint.h>
29 #include <minix/com.h>
30 #include <minix/sysinfo.h>
31 #include <minix/u64.h>
32 #include <sys/ptrace.h>
33 #include <sys/svrctl.h>
36 #include "scratchpad.h"
38 #include <minix/vfsif.h>
43 #define CORE_NAME "core"
44 #define CORE_MODE 0777 /* mode to use on core image files */
46 #if ENABLE_SYSCALL_STATS
47 unsigned long calls_stats
[NCALLS
];
50 static void free_proc(struct fproc
*freed
, int flags
);
52 static int dumpcore(int proc_e, struct mem_map *seg_ptr);
53 static int write_bytes(struct inode *rip, off_t off, char *buf, size_t
55 static int write_seg(struct inode *rip, off_t off, int proc_e, int seg,
56 off_t seg_off, phys_bytes seg_bytes);
59 /*===========================================================================*
61 *===========================================================================*/
64 vir_bytes src_addr
, dst_addr
;
68 what
= job_m_in
.SI_WHAT
;
69 dst_addr
= (vir_bytes
) job_m_in
.SI_WHERE
;
70 buf_size
= (size_t) job_m_in
.SI_SIZE
;
72 /* Only su may call do_getsysinfo. This call may leak information (and is not
73 * stable enough to be part of the API/ABI). In the future, requests from
74 * non-system processes should be denied.
77 if (!super_user
) return(EPERM
);
81 src_addr
= (vir_bytes
) fproc
;
82 len
= sizeof(struct fproc
) * NR_PROCS
;
85 src_addr
= (vir_bytes
) dmap
;
86 len
= sizeof(struct dmap
) * NR_DEVICES
;
88 #if ENABLE_SYSCALL_STATS
90 src_addr
= (vir_bytes
) calls_stats
;
91 len
= sizeof(calls_stats
);
101 return sys_datacopy(SELF
, src_addr
, who_e
, dst_addr
, len
);
104 /*===========================================================================*
106 *===========================================================================*/
109 /* Perform the dup(fd) or dup2(fd,fd2) system call. These system calls are
110 * obsolete. In fact, it is not even possible to invoke them using the
111 * current library because the library routines call fcntl(). They are
112 * provided to permit old binary programs to continue to run.
119 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
122 /* Is the file descriptor valid? */
123 rfd
= scratch(fp
).file
.fd_nr
& ~DUP_MASK
; /* kill off dup2 bit, if on */
124 if ((f
= get_filp(rfd
, VNODE_READ
)) == NULL
) return(err_code
);
126 /* Distinguish between dup and dup2. */
127 if (!(scratch(fp
).file
.fd_nr
& DUP_MASK
)) { /* bit not on */
129 r
= get_fd(0, 0, &rfd2
, NULL
);
131 /* dup2(old_fd, new_fd) */
132 if (rfd2
< 0 || rfd2
>= OPEN_MAX
) {
134 } else if (rfd
== rfd2
) { /* ignore the call: dup2(x, x) */
137 /* All is fine, close new_fd if necessary */
138 unlock_filp(f
); /* or it might deadlock on do_close */
139 (void) close_fd(fp
, rfd2
); /* cannot fail */
140 f
= get_filp(rfd
, VNODE_READ
); /* lock old_fd again */
141 if (f
== NULL
) return(err_code
);
146 /* Success. Set up new file descriptors. */
148 fp
->fp_filp
[rfd2
] = f
;
149 FD_SET(rfd2
, &fp
->fp_filp_inuse
);
157 /*===========================================================================*
159 *===========================================================================*/
162 /* Perform the fcntl(fd, request, ...) system call. */
164 register struct filp
*f
;
165 int new_fd
, fl
, r
= OK
, fcntl_req
, fcntl_argx
;
166 tll_access_t locktype
;
168 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
169 scratch(fp
).io
.io_buffer
= job_m_in
.buffer
;
170 scratch(fp
).io
.io_nbytes
= job_m_in
.nbytes
; /* a.k.a. m_in.request */
171 fcntl_req
= job_m_in
.request
;
172 fcntl_argx
= job_m_in
.addr
;
174 /* Is the file descriptor valid? */
175 locktype
= (fcntl_req
== F_FREESP
) ? VNODE_WRITE
: VNODE_READ
;
176 if ((f
= get_filp(scratch(fp
).file
.fd_nr
, locktype
)) == NULL
)
181 /* This replaces the old dup() system call. */
182 if (fcntl_argx
< 0 || fcntl_argx
>= OPEN_MAX
) r
= EINVAL
;
183 else if ((r
= get_fd(fcntl_argx
, 0, &new_fd
, NULL
)) == OK
) {
185 fp
->fp_filp
[new_fd
] = f
;
186 FD_SET(new_fd
, &fp
->fp_filp_inuse
);
192 /* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
194 if (FD_ISSET(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
))
199 /* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
200 if (fcntl_argx
& FD_CLOEXEC
)
201 FD_SET(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
);
203 FD_CLR(scratch(fp
).file
.fd_nr
, &fp
->fp_cloexec_set
);
207 /* Get file status flags (O_NONBLOCK and O_APPEND). */
208 fl
= f
->filp_flags
& (O_NONBLOCK
| O_APPEND
| O_ACCMODE
);
213 /* Set file status flags (O_NONBLOCK and O_APPEND). */
214 fl
= O_NONBLOCK
| O_APPEND
| O_REOPEN
;
215 f
->filp_flags
= (f
->filp_flags
& ~fl
) | (fcntl_argx
& fl
);
221 /* Set or clear a file lock. */
222 r
= lock_op(f
, fcntl_req
);
227 /* Free a section of a file */
229 struct flock flock_arg
;
232 /* Check if it's a regular file. */
233 if (!S_ISREG(f
->filp_vno
->v_mode
)) r
= EINVAL
;
234 else if (!(f
->filp_mode
& W_BIT
)) r
= EBADF
;
236 /* Copy flock data from userspace. */
237 r
= sys_datacopy(who_e
, (vir_bytes
) scratch(fp
).io
.io_buffer
,
238 SELF
, (vir_bytes
) &flock_arg
,
243 /* Convert starting offset to signed. */
244 offset
= (signed long) flock_arg
.l_start
;
246 /* Figure out starting position base. */
247 switch(flock_arg
.l_whence
) {
248 case SEEK_SET
: start
= 0; break;
250 if (ex64hi(f
->filp_pos
) != 0)
251 panic("do_fcntl: position in file too high");
252 start
= ex64lo(f
->filp_pos
);
254 case SEEK_END
: start
= f
->filp_vno
->v_size
; break;
259 /* Check for overflow or underflow. */
260 if (offset
> 0 && start
+ offset
< start
) r
= EINVAL
;
261 else if (offset
< 0 && start
+ offset
> start
) r
= EINVAL
;
264 if (start
< 0) r
= EINVAL
;
268 if (flock_arg
.l_len
!= 0) {
269 if (start
>= f
->filp_vno
->v_size
) r
= EINVAL
;
270 else if ((end
= start
+ flock_arg
.l_len
) <= start
) r
= EINVAL
;
271 else if (end
> f
->filp_vno
->v_size
) end
= f
->filp_vno
->v_size
;
277 r
= req_ftrunc(f
->filp_vno
->v_fs_e
, f
->filp_vno
->v_inode_nr
,start
,end
);
279 if (r
== OK
&& flock_arg
.l_len
== 0)
280 f
->filp_vno
->v_size
= start
;
293 /*===========================================================================*
295 *===========================================================================*/
301 for (vmp
= &vmnt
[0]; vmp
< &vmnt
[NR_MNTS
]; ++vmp
) {
302 if ((r
= lock_vmnt(vmp
, VMNT_EXCL
)) != OK
)
304 if (vmp
->m_dev
!= NO_DEV
&& vmp
->m_fs_e
!= NONE
&&
305 vmp
->m_root_node
!= NULL
) {
306 req_sync(vmp
->m_fs_e
);
314 /*===========================================================================*
316 *===========================================================================*/
319 /* Perform the fsync() system call. */
325 scratch(fp
).file
.fd_nr
= job_m_in
.fd
;
327 if ((rfilp
= get_filp(scratch(fp
).file
.fd_nr
, VNODE_READ
)) == NULL
)
329 dev
= rfilp
->filp_vno
->v_dev
;
330 for (vmp
= &vmnt
[0]; vmp
< &vmnt
[NR_MNTS
]; ++vmp
) {
331 if (vmp
->m_dev
!= NO_DEV
&& vmp
->m_dev
== dev
&&
332 vmp
->m_fs_e
!= NONE
&& vmp
->m_root_node
!= NULL
) {
334 if ((r
= lock_vmnt(vmp
, VMNT_EXCL
)) != OK
)
336 req_sync(vmp
->m_fs_e
);
346 /*===========================================================================*
348 *===========================================================================*/
351 /* Perform the VFS side of the reboot call. */
357 /* Do exit processing for all leftover processes and servers, but don't
358 * actually exit them (if they were really gone, PM will tell us about it).
359 * Skip processes that handle parts of the file system; we first need to give
360 * them the chance to unmount (which should be possible as all normal
361 * processes have no open files anymore).
363 for (i
= 0; i
< NR_PROCS
; i
++) {
366 /* Don't just free the proc right away, but let it finish what it was
369 if (rfp
->fp_endpoint
!= NONE
&& find_vmnt(rfp
->fp_endpoint
) == NULL
)
375 unmount_all(0 /* Don't force */);
377 /* Try to exit all processes again including File Servers */
378 for (i
= 0; i
< NR_PROCS
; i
++) {
381 /* Don't just free the proc right away, but let it finish what it was
384 if (rfp
->fp_endpoint
!= NONE
)
390 unmount_all(1 /* Force */);
394 /*===========================================================================*
396 *===========================================================================*/
397 void pm_fork(endpoint_t pproc
, endpoint_t cproc
, pid_t cpid
)
399 /* Perform those aspects of the fork() system call that relate to files.
400 * In particular, let the child inherit its parent's file descriptors.
401 * The parent and child parameters tell who forked off whom. The file
402 * system uses the same slot numbers as the kernel. Only PM makes this call.
405 struct fproc
*cp
, *pp
;
406 int i
, parentno
, childno
;
409 /* Check up-to-dateness of fproc. */
410 okendpt(pproc
, &parentno
);
412 /* PM gives child endpoint, which implies process slot information.
413 * Don't call isokendpt, because that will verify if the endpoint
414 * number is correct in fproc, which it won't be.
416 childno
= _ENDPOINT_P(cproc
);
417 if (childno
< 0 || childno
>= NR_PROCS
)
418 panic("VFS: bogus child for forking: %d", cproc
);
419 if (fproc
[childno
].fp_pid
!= PID_FREE
)
420 panic("VFS: forking on top of in-use child: %d", childno
);
422 /* Copy the parent's fproc struct to the child. */
423 /* However, the mutex variables belong to a slot and must stay the same. */
424 c_fp_lock
= fproc
[childno
].fp_lock
;
425 fproc
[childno
] = fproc
[parentno
];
426 fproc
[childno
].fp_lock
= c_fp_lock
;
428 /* Increase the counters in the 'filp' table. */
429 cp
= &fproc
[childno
];
430 pp
= &fproc
[parentno
];
432 for (i
= 0; i
< OPEN_MAX
; i
++)
433 if (cp
->fp_filp
[i
] != NULL
) cp
->fp_filp
[i
]->filp_count
++;
435 /* Fill in new process and endpoint id. */
437 cp
->fp_endpoint
= cproc
;
439 /* A forking process never has an outstanding grant, as it isn't blocking on
441 if (GRANT_VALID(pp
->fp_grant
)) {
442 panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp
->fp_endpoint
,
445 if (GRANT_VALID(cp
->fp_grant
)) {
446 panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp
->fp_endpoint
,
450 /* A child is not a process leader, not being revived, etc. */
451 cp
->fp_flags
= FP_NOFLAGS
;
453 /* Record the fact that both root and working dir have another user. */
454 if (cp
->fp_rd
) dup_vnode(cp
->fp_rd
);
455 if (cp
->fp_wd
) dup_vnode(cp
->fp_wd
);
458 /*===========================================================================*
460 *===========================================================================*/
461 static void free_proc(struct fproc
*exiter
, int flags
)
464 register struct fproc
*rfp
;
465 register struct filp
*rfilp
;
466 register struct vnode
*vp
;
469 if (exiter
->fp_endpoint
== NONE
)
470 panic("free_proc: already free");
472 if (fp_is_blocked(exiter
))
473 unpause(exiter
->fp_endpoint
);
475 /* Loop on file descriptors, closing any that are open. */
476 for (i
= 0; i
< OPEN_MAX
; i
++) {
477 (void) close_fd(exiter
, i
);
480 /* Release root and working directories. */
481 if (exiter
->fp_rd
) { put_vnode(exiter
->fp_rd
); exiter
->fp_rd
= NULL
; }
482 if (exiter
->fp_wd
) { put_vnode(exiter
->fp_wd
); exiter
->fp_wd
= NULL
; }
484 /* The rest of these actions is only done when processes actually exit. */
485 if (!(flags
& FP_EXITING
)) return;
487 exiter
->fp_flags
|= FP_EXITING
;
489 /* Check if any process is SUSPENDed on this driver.
490 * If a driver exits, unmap its entries in the dmap table.
491 * (unmapping has to be done after the first step, because the
492 * dmap table is used in the first step.)
494 unsuspend_by_endpt(exiter
->fp_endpoint
);
495 dmap_unmap_by_endpt(exiter
->fp_endpoint
);
497 worker_stop_by_endpt(exiter
->fp_endpoint
); /* Unblock waiting threads */
498 vmnt_unmap_by_endpt(exiter
->fp_endpoint
); /* Invalidate open files if this
499 * was an active FS */
501 /* Invalidate endpoint number for error and sanity checks. */
502 exiter
->fp_endpoint
= NONE
;
504 /* If a session leader exits and it has a controlling tty, then revoke
505 * access to its controlling tty from all other processes using it.
507 if ((exiter
->fp_flags
& FP_SESLDR
) && exiter
->fp_tty
!= 0) {
508 dev
= exiter
->fp_tty
;
509 for (rfp
= &fproc
[0]; rfp
< &fproc
[NR_PROCS
]; rfp
++) {
510 if(rfp
->fp_pid
== PID_FREE
) continue;
511 if (rfp
->fp_tty
== dev
) rfp
->fp_tty
= 0;
513 for (i
= 0; i
< OPEN_MAX
; i
++) {
514 if ((rfilp
= rfp
->fp_filp
[i
]) == NULL
) continue;
515 if (rfilp
->filp_mode
== FILP_CLOSED
) continue;
516 vp
= rfilp
->filp_vno
;
517 if (!S_ISCHR(vp
->v_mode
)) continue;
518 if ((dev_t
) vp
->v_sdev
!= dev
) continue;
519 lock_filp(rfilp
, VNODE_READ
);
520 (void) dev_close(dev
, rfilp
-filp
); /* Ignore any errors, even
523 rfilp
->filp_mode
= FILP_CLOSED
;
529 /* Exit done. Mark slot as free. */
530 exiter
->fp_pid
= PID_FREE
;
531 if (exiter
->fp_flags
& FP_PENDING
)
532 pending
--; /* No longer pending job, not going to do it */
533 exiter
->fp_flags
= FP_NOFLAGS
;
536 /*===========================================================================*
538 *===========================================================================*/
542 /* Perform the file system portion of the exit(status) system call. */
545 /* Nevertheless, pretend that the call came from the user. */
546 okendpt(proc
, &exitee_p
);
547 fp
= &fproc
[exitee_p
];
548 free_proc(fp
, FP_EXITING
);
551 /*===========================================================================*
553 *===========================================================================*/
554 void pm_setgid(proc_e
, egid
, rgid
)
559 register struct fproc
*tfp
;
562 okendpt(proc_e
, &slot
);
565 tfp
->fp_effgid
= egid
;
566 tfp
->fp_realgid
= rgid
;
570 /*===========================================================================*
572 *===========================================================================*/
573 void pm_setgroups(proc_e
, ngroups
, groups
)
581 okendpt(proc_e
, &slot
);
583 if (ngroups
* sizeof(gid_t
) > sizeof(rfp
->fp_sgroups
))
584 panic("VFS: pm_setgroups: too much data to copy");
585 if (sys_datacopy(who_e
, (vir_bytes
) groups
, SELF
, (vir_bytes
) rfp
->fp_sgroups
,
586 ngroups
* sizeof(gid_t
)) == OK
) {
587 rfp
->fp_ngroups
= ngroups
;
589 panic("VFS: pm_setgroups: datacopy failed");
593 /*===========================================================================*
595 *===========================================================================*/
596 void pm_setuid(proc_e
, euid
, ruid
)
604 okendpt(proc_e
, &slot
);
607 tfp
->fp_effuid
= euid
;
608 tfp
->fp_realuid
= ruid
;
611 /*===========================================================================*
613 *===========================================================================*/
619 svrctl
= job_m_in
.svrctl_req
;
620 ptr
= (vir_bytes
) job_m_in
.svrctl_argp
;
621 if (((svrctl
>> 8) & 0xFF) != 'M') return(EINVAL
);
627 struct sysgetenv sysgetenv
;
632 /* Copy sysgetenv structure to VFS */
633 if (sys_datacopy(who_e
, ptr
, SELF
, (vir_bytes
) &sysgetenv
,
634 sizeof(sysgetenv
)) != OK
)
637 /* Basic sanity checking */
638 if (svrctl
== VFSSETPARAM
) {
639 if (sysgetenv
.keylen
<= 0 ||
640 sysgetenv
.keylen
> (sizeof(search_key
) - 1) ||
641 sysgetenv
.vallen
<= 0 ||
642 sysgetenv
.vallen
>= sizeof(val
)) {
647 /* Copy parameter "key" */
648 if ((s
= sys_datacopy(who_e
, (vir_bytes
) sysgetenv
.key
,
649 SELF
, (vir_bytes
) search_key
,
650 sysgetenv
.keylen
)) != OK
)
652 search_key
[sysgetenv
.keylen
] = '\0'; /* Limit string */
654 /* Is it a parameter we know? */
655 if (svrctl
== VFSSETPARAM
) {
656 if (!strcmp(search_key
, "verbose")) {
658 if ((s
= sys_datacopy(who_e
,
659 (vir_bytes
) sysgetenv
.val
, SELF
,
660 (vir_bytes
) &val
, sysgetenv
.vallen
)) != OK
)
662 val
[sysgetenv
.vallen
] = '\0'; /* Limit string */
663 verbose_val
= atoi(val
);
664 if (verbose_val
< 0 || verbose_val
> 4) {
667 verbose
= verbose_val
;
672 } else { /* VFSGETPARAM */
676 if (!strcmp(search_key
, "print_traces")) {
677 mthread_stacktraces();
679 sysgetenv
.vallen
= 0;
681 } else if (!strcmp(search_key
, "active_threads")) {
682 int active
= NR_WTHREADS
- worker_available();
683 snprintf(small_buf
, sizeof(small_buf
) - 1,
685 sysgetenv
.vallen
= strlen(small_buf
);
690 if ((s
= sys_datacopy(SELF
,
691 (vir_bytes
) &sysgetenv
, who_e
, ptr
,
692 sizeof(sysgetenv
))) != OK
)
694 if (sysgetenv
.val
!= 0) {
695 if ((s
= sys_datacopy(SELF
,
696 (vir_bytes
) small_buf
, who_e
,
697 (vir_bytes
) sysgetenv
.val
,
698 sysgetenv
.vallen
)) != OK
)
711 /*===========================================================================*
713 *===========================================================================*/
714 int pm_dumpcore(endpoint_t proc_e
, int csig
, vir_bytes exe_name
)
716 int slot
, r
= OK
, core_fd
;
718 char core_path
[PATH_MAX
];
719 char proc_name
[PROC_NAME_LEN
];
721 okendpt(proc_e
, &slot
);
725 snprintf(core_path
, PATH_MAX
, "%s.%d", CORE_NAME
, fp
->fp_pid
);
726 core_fd
= common_open(core_path
, O_WRONLY
| O_CREAT
| O_TRUNC
, CORE_MODE
);
727 if (core_fd
< 0) { r
= core_fd
; goto core_exit
; }
729 /* get process' name */
730 r
= sys_datacopy(PM_PROC_NR
, exe_name
, VFS_PROC_NR
, (vir_bytes
) proc_name
,
732 if (r
!= OK
) goto core_exit
;
733 proc_name
[PROC_NAME_LEN
- 1] = '\0';
735 if ((f
= get_filp(core_fd
, VNODE_WRITE
)) == NULL
) { r
=EBADF
; goto core_exit
; }
736 write_elf_core_file(f
, csig
, proc_name
);
738 (void) close_fd(fp
, core_fd
); /* ignore failure, we're exiting anyway */
742 free_proc(fp
, FP_EXITING
);
746 /*===========================================================================*
748 *===========================================================================*/
752 char key
[DS_MAX_KEYLEN
];
753 char *blkdrv_prefix
= "drv.blk.";
754 char *chrdrv_prefix
= "drv.chr.";
757 endpoint_t owner_endpoint
;
761 my_job
= *((struct job
*) arg
);
764 /* Get the event and the owner from DS. */
765 while ((r
= ds_check(key
, &type
, &owner_endpoint
)) == OK
) {
766 /* Only check for block and character driver up events. */
767 if (!strncmp(key
, blkdrv_prefix
, strlen(blkdrv_prefix
))) {
769 } else if (!strncmp(key
, chrdrv_prefix
, strlen(chrdrv_prefix
))) {
775 if ((r
= ds_retrieve_u32(key
, &value
)) != OK
) {
776 printf("VFS: ds_event: ds_retrieve_u32 failed\n");
779 if (value
!= DS_DRIVER_UP
) continue;
782 dmap_endpt_up(owner_endpoint
, is_blk
);
785 if (r
!= ENOENT
) printf("VFS: ds_event: ds_check failed: %d\n", r
);
787 thread_cleanup(NULL
);