2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* $NetBSD: procfs_vnops.c,v 1.32 1995/02/03 16:18:55 mycroft Exp $ */
25 * Copyright (c) 1993 Jan-Simon Pendry
27 * The Regents of the University of California. All rights reserved.
29 * This code is derived from software contributed to Berkeley by
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)procfs_vnops.c 8.8 (Berkeley) 6/15/94
64 * procfs vnode interface
67 #include <sys/param.h>
68 #include <sys/systm.h>
70 #include <sys/kernel.h>
73 #include <sys/vnode.h>
74 #include <sys/namei.h>
75 #include <sys/malloc.h>
76 #include <sys/dirent.h>
77 #include <sys/resourcevar.h>
78 #include <sys/ptrace.h>
79 #include <vm/vm.h> /* for PAGE_SIZE */
80 #include <machine/reg.h>
81 #include <vfs/vfs_support.h>
82 #include <miscfs/procfs/procfs.h>
90 * This is a list of the valid names in the
91 * process-specific sub-directories. It is
92 * used in procfs_lookup and procfs_readdir
99 int (*pt_valid
) __P((struct proc
*p
));
101 #define N(s) sizeof(s)-1, s
102 /* name type validp */
103 { DT_DIR
, N("."), Pproc
, NULL
},
104 { DT_DIR
, N(".."), Proot
, NULL
},
105 { DT_REG
, N("file"), Pfile
, procfs_validfile
},
106 { DT_REG
, N("mem"), Pmem
, NULL
},
107 { DT_REG
, N("regs"), Pregs
, procfs_validregs
},
108 { DT_REG
, N("fpregs"), Pfpregs
, procfs_validfpregs
},
109 { DT_REG
, N("ctl"), Pctl
, NULL
},
110 { DT_REG
, N("status"), Pstatus
, NULL
},
111 { DT_REG
, N("note"), Pnote
, NULL
},
112 { DT_REG
, N("notepg"), Pnotepg
, NULL
},
115 static int nproc_targets
= sizeof(proc_targets
) / sizeof(proc_targets
[0]);
117 static pid_t atopid
__P((const char *, u_int
));
120 * set things up for doing i/o on
121 * the pfsnode (vp). (vp) is locked
122 * on entry, and should be left locked
125 * for procfs we don't need to do anything
126 * in particular for i/o. all that is done
127 * is to support exclusive open on process
131 struct vop_open_args
/* {
134 struct ucred *a_cred;
138 struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
140 switch (pfs
->pfs_type
) {
142 if (PFIND(pfs
->pfs_pid
) == 0)
143 return (ENOENT
); /* was ESRCH, jsp */
145 if ((pfs
->pfs_flags
& FWRITE
) && (ap
->a_mode
& O_EXCL
) ||
146 (pfs
->pfs_flags
& O_EXCL
) && (ap
->a_mode
& FWRITE
))
149 if (ap
->a_mode
& FWRITE
)
150 pfs
->pfs_flags
= ap
->a_mode
& (FWRITE
|O_EXCL
);
162 * close the pfsnode (vp) after doing i/o.
163 * (vp) is not locked on entry or exit.
165 * nothing to do for procfs other than undo
166 * any exclusive open flag (see _open above).
169 struct vop_close_args
/* {
172 struct ucred *a_cred;
176 struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
178 switch (pfs
->pfs_type
) {
180 if ((ap
->a_fflag
& FWRITE
) && (pfs
->pfs_flags
& O_EXCL
))
181 pfs
->pfs_flags
&= ~(FWRITE
|O_EXCL
);
189 * do an ioctl operation on pfsnode (vp).
190 * (vp) is not locked on entry or exit.
193 struct vop_ioctl_args
/* {
198 struct ucred *a_cred;
207 * do block mapping for pfsnode (vp).
208 * since we don't use the buffer cache
209 * for procfs this function should never
210 * be called. in any case, it's not clear
211 * what part of the kernel ever makes use
212 * of this function. for sanity, this is the
213 * usual no-op bmap, although returning
214 * (EIO) would be a reasonable alternative.
217 struct vop_bmap_args
/* {
220 struct vnode **a_vpp;
225 if (ap
->a_vpp
!= NULL
)
226 *ap
->a_vpp
= ap
->a_vp
;
227 if (ap
->a_bnp
!= NULL
)
228 *ap
->a_bnp
= ap
->a_bn
;
233 * _inactive is called when the pfsnode
234 * is vrele'd and the reference count goes
235 * to zero. (vp) will be on the vnode free
236 * list, so to get it back vget() must be
239 * for procfs, check if the process is still
240 * alive and if it isn't then just throw away
241 * the vnode by calling vgone(). this may
242 * be overkill and a waste of time since the
243 * chances are that the process will still be
244 * there and PFIND is not free.
246 * (vp) is not locked on entry or exit.
249 struct vop_inactive_args
/* {
253 struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
255 if (PFIND(pfs
->pfs_pid
) == 0)
262 * _reclaim is called when getnewvnode()
263 * wants to make use of an entry on the vnode
264 * free list. at this time the filesystem needs
265 * to free any private data and remove the node
266 * from any private lists.
269 struct vop_reclaim_args
/* {
274 return (procfs_freevp(ap
->a_vp
));
278 * Return POSIX pathconf information applicable to special devices.
281 struct vop_pathconf_args
/* {
284 register_t *a_retval;
288 switch (ap
->a_name
) {
290 *ap
->a_retval
= LINK_MAX
;
293 *ap
->a_retval
= MAX_CANON
;
296 *ap
->a_retval
= MAX_INPUT
;
299 *ap
->a_retval
= PIPE_BUF
;
301 case _PC_CHOWN_RESTRICTED
:
305 *ap
->a_retval
= _POSIX_VDISABLE
;
314 * _print is used for debugging.
315 * just print a readable description
319 struct vop_print_args
/* {
323 struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
325 printf("tag VT_PROCFS, type %s, pid %d, mode %x, flags %x\n",
326 pfs
->pfs_type
, pfs
->pfs_pid
, pfs
->pfs_mode
, pfs
->pfs_flags
);
330 * _abortop is called when operations such as
331 * rename and create fail. this entry is responsible
332 * for undoing any side-effects caused by the lookup.
333 * this will always include freeing the pathname buffer.
336 struct vop_abortop_args
/* {
338 struct componentname *a_cnp;
342 if ((ap
->a_cnp
->cn_flags
& (HASBUF
| SAVESTART
)) == HASBUF
)
343 FREE_ZONE(ap
->a_cnp
->cn_pnbuf
, ap
->a_cnp
->cn_pnlen
, M_NAMEI
);
348 * generic entry point for unsupported operations
357 * Invent attributes for pfsnode (vp) and store
359 * Directories lengths are returned as zero since
360 * any real length would require the genuine size
361 * to be computed, and nothing cares anyway.
363 * this is relatively minimal for procfs.
366 struct vop_getattr_args
/* {
369 struct ucred *a_cred;
373 struct pfsnode
*pfs
= VTOPFS(ap
->a_vp
);
374 struct vattr
*vap
= ap
->a_vap
;
378 /* first check the process still exists */
379 switch (pfs
->pfs_type
) {
386 procp
= PFIND(pfs
->pfs_pid
);
393 /* start by zeroing out the attributes */
396 /* next do all the common fields */
397 vap
->va_type
= ap
->a_vp
->v_type
;
398 vap
->va_mode
= pfs
->pfs_mode
;
399 vap
->va_fileid
= pfs
->pfs_fileno
;
401 vap
->va_blocksize
= PAGE_SIZE
;
402 vap
->va_bytes
= vap
->va_size
= 0;
405 * Make all times be current TOD.
406 * It would be possible to get the process start
407 * time from the p_stat structure, but there's
408 * no "file creation" time stamp anyway, and the
409 * p_stat structure is not addressible if u. gets
410 * swapped out for that process.
413 * Note that microtime() returns a timeval, not a timespec.
415 microtime(&vap
->va_ctime
);
416 vap
->va_atime
= vap
->va_mtime
= vap
->va_ctime
;
419 * If the process has exercised some setuid or setgid
420 * privilege, then rip away read/write permission so
421 * that only root can gain access.
423 switch (pfs
->pfs_type
) {
427 if (procp
->p_flag
& P_SUGID
)
428 vap
->va_mode
&= ~((VREAD
|VWRITE
)|
430 ((VREAD
|VWRITE
)>>6));
436 vap
->va_uid
= procp
->p_ucred
->cr_uid
;
437 vap
->va_gid
= procp
->p_ucred
->cr_gid
;
442 * now do the object specific fields
444 * The size could be set from struct reg, but it's hardly
445 * worth the trouble, and it puts some (potentially) machine
446 * dependent data into this machine-independent code. If it
447 * becomes important then this function should break out into
448 * a per-file stat function in the corresponding .c file.
451 switch (pfs
->pfs_type
) {
454 * Set nlink to 1 to tell fts(3) we don't actually know.
459 vap
->va_size
= vap
->va_bytes
= DEV_BSIZE
;
463 char buf
[16]; /* should be enough */
467 vap
->va_size
= vap
->va_bytes
=
468 sprintf(buf
, "%ld", (long)curproc
->p_pid
);
474 vap
->va_uid
= procp
->p_ucred
->cr_uid
;
475 vap
->va_gid
= procp
->p_ucred
->cr_gid
;
476 vap
->va_size
= vap
->va_bytes
= DEV_BSIZE
;
484 vap
->va_bytes
= vap
->va_size
=
485 ctob(procp
->p_vmspace
->vm_tsize
+
486 procp
->p_vmspace
->vm_dsize
+
487 procp
->p_vmspace
->vm_ssize
);
490 #if defined(PT_GETREGS) || defined(PT_SETREGS)
492 vap
->va_bytes
= vap
->va_size
= sizeof(struct reg
);
496 #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
498 vap
->va_bytes
= vap
->va_size
= sizeof(struct fpreg
);
509 panic("procfs_getattr");
516 struct vop_setattr_args
/* {
519 struct ucred *a_cred;
524 * just fake out attribute setting
525 * it's not good to generate an error
526 * return, otherwise things like creat()
527 * will fail when they try to set the
528 * file length to 0. worse, this means
529 * that echo $note > /proc/$pid/note will fail.
536 * implement access checking.
538 * actually, the check for super-user is slightly
539 * broken since it will allow read access to write-only
540 * objects. this doesn't cause any particular trouble
541 * but does mean that the i/o entry points need to check
542 * that the operation really does make sense.
545 struct vop_access_args
/* {
548 struct ucred *a_cred;
555 if (error
= VOP_GETATTR(ap
->a_vp
, &va
, ap
->a_cred
, ap
->a_p
))
558 return (vaccess(va
.va_mode
, va
.va_uid
, va
.va_gid
, ap
->a_mode
,
563 * lookup. this is incredibly complicated in the
564 * general case, however for most pseudo-filesystems
565 * very little needs to be done.
567 * unless you want to get a migraine, just make sure your
568 * filesystem doesn't do any locking of its own. otherwise
569 * read and inwardly digest ufs_lookup().
572 struct vop_lookup_args
/* {
573 struct vnode * a_dvp;
574 struct vnode ** a_vpp;
575 struct componentname * a_cnp;
578 struct componentname
*cnp
= ap
->a_cnp
;
579 struct vnode
**vpp
= ap
->a_vpp
;
580 struct vnode
*dvp
= ap
->a_dvp
;
581 char *pname
= cnp
->cn_nameptr
;
582 struct proc_target
*pt
;
591 if (cnp
->cn_nameiop
== DELETE
|| cnp
->cn_nameiop
== RENAME
)
594 if (cnp
->cn_namelen
== 1 && *pname
== '.') {
602 switch (pfs
->pfs_type
) {
604 if (cnp
->cn_flags
& ISDOTDOT
)
607 if (CNEQ(cnp
, "curproc", 7))
608 return (procfs_allocvp(dvp
->v_mount
, vpp
, 0, Pcurproc
));
610 pid
= atopid(pname
, cnp
->cn_namelen
);
618 return (procfs_allocvp(dvp
->v_mount
, vpp
, pid
, Pproc
));
621 if (cnp
->cn_flags
& ISDOTDOT
)
622 return (procfs_root(dvp
->v_mount
, vpp
));
624 p
= PFIND(pfs
->pfs_pid
);
628 for (pt
= proc_targets
, i
= 0; i
< nproc_targets
; pt
++, i
++) {
629 if (cnp
->cn_namelen
== pt
->pt_namlen
&&
630 bcmp(pt
->pt_name
, pname
, cnp
->cn_namelen
) == 0 &&
631 (pt
->pt_valid
== NULL
|| (*pt
->pt_valid
)(p
)))
637 if (pt
->pt_pfstype
== Pfile
) {
638 fvp
= procfs_findtextvp(p
);
639 /* We already checked that it exists. */
646 return (procfs_allocvp(dvp
->v_mount
, vpp
, pfs
->pfs_pid
,
653 return (cnp
->cn_nameiop
== LOOKUP
? ENOENT
: EROFS
);
661 return (procfs_findtextvp(p
) != NULLVP
);
665 * readdir returns directory entries from pfsnode (vp).
667 * the strategy here with procfs is to generate a single
668 * directory entry at a time (struct pfsdent) and then
669 * copy that out to userland using uiomove. a more efficent
670 * though more complex implementation, would try to minimize
671 * the number of calls to uiomove(). for procfs, this is
672 * hardly worth the added code complexity.
674 * this should just be done through read()
677 struct vop_readdir_args
/* {
680 struct ucred *a_cred;
686 struct uio
*uio
= ap
->a_uio
;
688 struct pfsdent
*dp
= &d
;
695 * We don't allow exporting procfs mounts, and currently local
696 * requests do not need cookies.
699 panic("procfs_readdir: not hungry");
701 pfs
= VTOPFS(ap
->a_vp
);
703 if (uio
->uio_resid
< UIO_MX
)
705 if (uio
->uio_offset
& (UIO_MX
-1))
707 if (uio
->uio_offset
< 0)
712 i
= uio
->uio_offset
/ UIO_MX
;
714 switch (pfs
->pfs_type
) {
716 * this is for the process-specific sub-directories.
717 * all that is needed to is copy out all the entries
718 * from the procent[] table (top of this file).
722 struct proc_target
*pt
;
724 p
= PFIND(pfs
->pfs_pid
);
728 for (pt
= &proc_targets
[i
];
729 uio
->uio_resid
>= UIO_MX
&& i
< nproc_targets
; pt
++, i
++) {
730 if (pt
->pt_valid
&& (*pt
->pt_valid
)(p
) == 0)
733 dp
->d_reclen
= UIO_MX
;
734 dp
->d_fileno
= PROCFS_FILENO(pfs
->pfs_pid
, pt
->pt_pfstype
);
735 dp
->d_namlen
= pt
->pt_namlen
;
736 bcopy(pt
->pt_name
, dp
->d_name
, pt
->pt_namlen
+ 1);
737 dp
->d_type
= pt
->pt_type
;
739 if (error
= uiomove((caddr_t
)dp
, UIO_MX
, uio
))
747 * this is for the root of the procfs filesystem
748 * what is needed is a special entry for "curproc"
749 * followed by an entry for each process on allproc
760 volatile struct proc
*p
= allproc
.lh_first
;
763 for (; p
&& uio
->uio_resid
>= UIO_MX
; i
++, pcnt
++) {
764 bzero((char *) dp
, UIO_MX
);
765 dp
->d_reclen
= UIO_MX
;
770 dp
->d_fileno
= PROCFS_FILENO(0, Proot
);
771 dp
->d_namlen
= i
+ 1;
772 bcopy("..", dp
->d_name
, dp
->d_namlen
);
773 dp
->d_name
[i
+ 1] = '\0';
778 dp
->d_fileno
= PROCFS_FILENO(0, Pcurproc
);
780 bcopy("curproc", dp
->d_name
, 8);
787 p
= p
->p_list
.le_next
;
791 dp
->d_fileno
= PROCFS_FILENO(p
->p_pid
, Pproc
);
792 dp
->d_namlen
= sprintf(dp
->d_name
, "%ld",
795 p
= p
->p_list
.le_next
;
799 if (error
= uiomove((caddr_t
)dp
, UIO_MX
, uio
))
805 if (p
== 0 && doingzomb
== 0) {
807 p
= zombproc
.lh_first
;
821 uio
->uio_offset
= i
* UIO_MX
;
827 * readlink reads the link of `curproc'
830 struct vop_readlink_args
*ap
;
832 struct uio
*uio
= ap
->a_uio
;
833 char buf
[16]; /* should be enough */
836 if (VTOPFS(ap
->a_vp
)->pfs_fileno
!= PROCFS_FILENO(0, Pcurproc
))
839 len
= sprintf(buf
, "%ld", (long)curproc
->p_pid
);
841 return (uiomove((caddr_t
)buf
, len
, ap
->a_uio
));
845 * convert decimal ascii to pid_t
856 if (c
< '0' || c
> '9')
858 p
= 10 * p
+ (c
- '0');
867 * procfs vnode operations.
870 #define VOPFUNC int (*)(void *)
872 int (**procfs_vnodeop_p
)(void *);
873 struct vnodeopv_entry_desc procfs_vnodeop_entries
[] = {
874 { &vop_default_desc
, (VOPFUNC
)vn_default_error
},
875 { &vop_lookup_desc
, (VOPFUNC
)procfs_lookup
}, /* lookup */
876 { &vop_create_desc
, (VOPFUNC
)procfs_create
}, /* create */
877 { &vop_mknod_desc
, (VOPFUNC
)procfs_mknod
}, /* mknod */
878 { &vop_open_desc
, (VOPFUNC
)procfs_open
}, /* open */
879 { &vop_close_desc
, (VOPFUNC
)procfs_close
}, /* close */
880 { &vop_access_desc
, (VOPFUNC
)procfs_access
}, /* access */
881 { &vop_getattr_desc
, (VOPFUNC
)procfs_getattr
}, /* getattr */
882 { &vop_setattr_desc
, (VOPFUNC
)procfs_setattr
}, /* setattr */
883 { &vop_read_desc
, (VOPFUNC
)procfs_read
}, /* read */
884 { &vop_write_desc
, (VOPFUNC
)procfs_write
}, /* write */
885 { &vop_ioctl_desc
, (VOPFUNC
)procfs_ioctl
}, /* ioctl */
886 { &vop_select_desc
, (VOPFUNC
)procfs_select
}, /* select */
887 { &vop_mmap_desc
, (VOPFUNC
)procfs_mmap
}, /* mmap */
888 { &vop_fsync_desc
, (VOPFUNC
)procfs_fsync
}, /* fsync */
889 { &vop_seek_desc
, (VOPFUNC
)procfs_seek
}, /* seek */
890 { &vop_remove_desc
, (VOPFUNC
)procfs_remove
}, /* remove */
891 { &vop_link_desc
, (VOPFUNC
)procfs_link
}, /* link */
892 { &vop_rename_desc
, (VOPFUNC
)procfs_rename
}, /* rename */
893 { &vop_mkdir_desc
, (VOPFUNC
)procfs_mkdir
}, /* mkdir */
894 { &vop_rmdir_desc
, (VOPFUNC
)procfs_rmdir
}, /* rmdir */
895 { &vop_symlink_desc
, (VOPFUNC
)procfs_symlink
}, /* symlink */
896 { &vop_readdir_desc
, (VOPFUNC
)procfs_readdir
}, /* readdir */
897 { &vop_readlink_desc
, (VOPFUNC
)procfs_readlink
}, /* readlink */
898 { &vop_abortop_desc
, (VOPFUNC
)procfs_abortop
}, /* abortop */
899 { &vop_inactive_desc
, (VOPFUNC
)procfs_inactive
}, /* inactive */
900 { &vop_reclaim_desc
, (VOPFUNC
)procfs_reclaim
}, /* reclaim */
901 { &vop_lock_desc
, (VOPFUNC
)procfs_lock
}, /* lock */
902 { &vop_unlock_desc
, (VOPFUNC
)procfs_unlock
}, /* unlock */
903 { &vop_bmap_desc
, (VOPFUNC
)procfs_bmap
}, /* bmap */
904 { &vop_strategy_desc
, (VOPFUNC
)procfs_strategy
}, /* strategy */
905 { &vop_print_desc
, (VOPFUNC
)procfs_print
}, /* print */
906 { &vop_islocked_desc
, (VOPFUNC
)procfs_islocked
}, /* islocked */
907 { &vop_pathconf_desc
, (VOPFUNC
)procfs_pathconf
}, /* pathconf */
908 { &vop_advlock_desc
, (VOPFUNC
)procfs_advlock
}, /* advlock */
909 { &vop_blkatoff_desc
, (VOPFUNC
)procfs_blkatoff
}, /* blkatoff */
910 { &vop_valloc_desc
, (VOPFUNC
)procfs_valloc
}, /* valloc */
911 { &vop_vfree_desc
, (VOPFUNC
)procfs_vfree
}, /* vfree */
912 { &vop_truncate_desc
, (VOPFUNC
)procfs_truncate
}, /* truncate */
913 { &vop_update_desc
, (VOPFUNC
)procfs_update
}, /* update */
914 { &vop_copyfile_desc
, (VOPFUNC
)err_copyfile
}, /* Copyfile */
915 { (struct vnodeop_desc
*)NULL
, (int(*)())NULL
}
917 struct vnodeopv_desc procfs_vnodeop_opv_desc
=
918 { &procfs_vnodeop_p
, procfs_vnodeop_entries
};