2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 * Copyright (c) 1990, 1996-1998 Apple Computer, Inc.
24 * All Rights Reserved.
27 * posix_shm.c : Support for POSIX shared memory APIs
30 * Author: Ananthakrishna Ramesh
38 #include <sys/cdefs.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
43 #include <sys/filedesc.h>
47 #include <sys/mount.h>
48 #include <sys/namei.h>
49 #include <sys/vnode.h>
50 #include <sys/ioctl.h>
52 #include <sys/malloc.h>
55 #include <bsm/audit_kernel.h>
57 #include <mach/mach_types.h>
58 #include <mach/vm_prot.h>
59 #include <mach/vm_inherit.h>
60 #include <mach/kern_return.h>
61 #include <mach/memory_object_control.h>
64 #define PSHMNAMLEN 31 /* maximum name segment length we bother with */
67 unsigned int pshm_flags
;
68 unsigned int pshm_usecount
;
73 char pshm_name
[PSHMNAMLEN
+ 1]; /* segment name */
74 void * pshm_memobject
;
76 unsigned int pshm_readcount
;
77 unsigned int pshm_writecount
;
78 struct proc
* pshm_proc
;
79 #endif /* DIAGNOSTIC */
81 #define PSHMINFO_NULL (struct pshminfo *)0
84 #define PSHM_DEFINED 2
85 #define PSHM_ALLOCATED 4
87 #define PSHM_INUSE 0x10
88 #define PSHM_REMOVED 0x20
89 #define PSHM_INCREATE 0x40
90 #define PSHM_INDELETE 0x80
93 LIST_ENTRY(pshmcache
) pshm_hash
; /* hash chain */
94 struct pshminfo
*pshminfo
; /* vnode the name refers to */
95 int pshm_nlen
; /* length of name */
96 char pshm_name
[PSHMNAMLEN
+ 1]; /* segment name */
98 #define PSHMCACHE_NULL (struct pshmcache *)0
101 long goodhits
; /* hits that we can really use */
102 long neghits
; /* negative hits that we can use */
103 long badhits
; /* hits we must drop */
104 long falsehits
; /* hits with id mismatch */
105 long miss
; /* misses */
106 long longnames
; /* long names that ignore cache */
110 char *pshm_nameptr
; /* pointer to looked up name */
111 long pshm_namelen
; /* length of looked up component */
112 u_long pshm_hash
; /* hash value of looked up name */
118 struct pshminfo
*pinfo
;
119 unsigned int pshm_usecount
;
121 unsigned int readcnt
;
122 unsigned int writecnt
;
125 #define PSHMNODE_NULL (struct pshmnode *)0
128 #define PSHMHASH(pnp) \
129 (&pshmhashtbl[(pnp)->pshm_hash & pshmhash])
130 LIST_HEAD(pshmhashhead
, pshmcache
) *pshmhashtbl
; /* Hash Table */
131 u_long pshmhash
; /* size of hash table - 1 */
132 long pshmnument
; /* number of cache entries allocated */
133 struct pshmstats pshmstats
; /* cache effectiveness statistics */
135 static int pshm_read
__P((struct file
*fp
, struct uio
*uio
,
136 struct ucred
*cred
, int flags
, struct proc
*p
));
137 static int pshm_write
__P((struct file
*fp
, struct uio
*uio
,
138 struct ucred
*cred
, int flags
, struct proc
*p
));
139 static int pshm_ioctl
__P((struct file
*fp
, u_long com
,
140 caddr_t data
, struct proc
*p
));
141 static int pshm_select
__P((struct file
*fp
, int which
, void *wql
,
143 static int pshm_closefile
__P((struct file
*fp
, struct proc
*p
));
145 static int pshm_kqfilter
__P((struct file
*fp
, struct knote
*kn
, struct proc
*p
));
147 struct fileops pshmops
=
148 { pshm_read
, pshm_write
, pshm_ioctl
, pshm_select
, pshm_closefile
, pshm_kqfilter
};
151 * Lookup an entry in the cache
154 * status of -1 is returned if matches
155 * If the lookup determines that the name does not exist
156 * (negative cacheing), a status of ENOENT is returned. If the lookup
157 * fails, a status of zero is returned.
161 pshm_cache_search(pshmp
, pnp
, pcache
)
162 struct pshminfo
**pshmp
;
163 struct pshmname
*pnp
;
164 struct pshmcache
**pcache
;
166 register struct pshmcache
*pcp
, *nnp
;
167 register struct pshmhashhead
*pcpp
;
169 if (pnp
->pshm_namelen
> PSHMNAMLEN
) {
170 pshmstats
.longnames
++;
174 pcpp
= PSHMHASH(pnp
);
175 for (pcp
= pcpp
->lh_first
; pcp
!= 0; pcp
= nnp
) {
176 nnp
= pcp
->pshm_hash
.le_next
;
177 if (pcp
->pshm_nlen
== pnp
->pshm_namelen
&&
178 !bcmp(pcp
->pshm_name
, pnp
->pshm_nameptr
, (u_int
)pcp
-> pshm_nlen
))
187 /* We found a "positive" match, return the vnode */
189 pshmstats
.goodhits
++;
191 *pshmp
= pcp
->pshminfo
;
197 * We found a "negative" match, ENOENT notifies client of this match.
198 * The nc_vpid field records whether this is a whiteout.
205 * Add an entry to the cache.
208 pshm_cache_add(pshmp
, pnp
)
209 struct pshminfo
*pshmp
;
210 struct pshmname
*pnp
;
212 register struct pshmcache
*pcp
;
213 register struct pshmhashhead
*pcpp
;
214 struct pshminfo
*dpinfo
;
215 struct pshmcache
*dpcp
;
218 if (pnp
->pshm_namelen
> NCHNAMLEN
)
219 panic("cache_enter: name too long");
223 * We allocate a new entry if we are less than the maximum
224 * allowed and the one at the front of the LRU list is in use.
225 * Otherwise we use the one at the front of the LRU list.
227 pcp
= (struct pshmcache
*)_MALLOC(sizeof(struct pshmcache
), M_SHM
, M_WAITOK
);
228 /* if the entry has already been added by some one else return */
229 if (pshm_cache_search(&dpinfo
, pnp
, &dpcp
) == -1) {
235 bzero(pcp
, sizeof(struct pshmcache
));
237 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
238 * For negative entries, we have to record whether it is a whiteout.
239 * the whiteout flag is stored in the nc_vpid field which is
242 pcp
->pshminfo
= pshmp
;
243 pcp
->pshm_nlen
= pnp
->pshm_namelen
;
244 bcopy(pnp
->pshm_nameptr
, pcp
->pshm_name
, (unsigned)pcp
->pshm_nlen
);
245 pcpp
= PSHMHASH(pnp
);
248 register struct pshmcache
*p
;
250 for (p
= pcpp
->lh_first
; p
!= 0; p
= p
->pshm_hash
.le_next
)
252 panic("cache_enter: duplicate");
255 LIST_INSERT_HEAD(pcpp
, pcp
, pshm_hash
);
260 * Name cache initialization, from vfs_init() when we are booting
265 pshmhashtbl
= hashinit(desiredvnodes
, M_SHM
, &pshmhash
);
269 * Invalidate a all entries to particular vnode.
271 * We actually just increment the v_id, that will do it. The entries will
272 * be purged by lookup as they get found. If the v_id wraps around, we
273 * need to ditch the entire cache, to avoid confusion. No valid vnode will
274 * ever have (v_id == 0).
277 pshm_cache_purge(void)
279 struct pshmcache
*pcp
;
280 struct pshmhashhead
*pcpp
;
282 for (pcpp
= &pshmhashtbl
[pshmhash
]; pcpp
>= pshmhashtbl
; pcpp
--) {
283 while (pcp
= pcpp
->lh_first
)
284 pshm_cache_delete(pcp
);
288 pshm_cache_delete(pcp
)
289 struct pshmcache
*pcp
;
292 if (pcp
->pshm_hash
.le_prev
== 0)
293 panic("namecache purge le_prev");
294 if (pcp
->pshm_hash
.le_next
== pcp
)
295 panic("namecache purge le_next");
296 #endif /* DIAGNOSTIC */
297 LIST_REMOVE(pcp
, pshm_hash
);
298 pcp
->pshm_hash
.le_prev
= 0;
303 struct shm_open_args
{
310 shm_open(p
, uap
, retval
)
312 register struct shm_open_args
*uap
;
315 register struct filedesc
*fdp
= p
->p_fd
;
316 register struct file
*fp
;
317 register struct vnode
*vp
;
320 int type
, indx
, error
;
322 struct pshminfo
*pinfo
;
323 extern struct fileops pshmops
;
327 size_t pathlen
, plen
;
329 int cmode
= uap
->mode
;
331 struct pshmnode
* pnode
= PSHMNODE_NULL
;
332 struct pshmcache
* pcache
= PSHMCACHE_NULL
;
335 AUDIT_ARG(fflags
, uap
->oflag
);
336 AUDIT_ARG(mode
, uap
->mode
);
337 pinfo
= PSHMINFO_NULL
;
339 MALLOC_ZONE(pnbuf
, caddr_t
,
340 MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
341 pathlen
= MAXPATHLEN
;
342 error
= copyinstr((void *)uap
->name
, (void *)pnbuf
,
343 MAXPATHLEN
, &pathlen
);
347 AUDIT_ARG(text
, pnbuf
);
348 if (pathlen
> PSHMNAMLEN
) {
349 error
= ENAMETOOLONG
;
354 #ifdef PSXSHM_NAME_RESTRICT
356 if (*nameptr
== '/') {
357 while (*(nameptr
++) == '/') {
366 #endif /* PSXSHM_NAME_RESTRICT */
370 nd
.pshm_nameptr
= nameptr
;
371 nd
.pshm_namelen
= plen
;
374 for (cp
= nameptr
, i
=1; *cp
!= 0 && i
<= plen
; i
++, cp
++) {
375 nd
.pshm_hash
+= (unsigned char)*cp
* i
;
378 error
= pshm_cache_search(&pinfo
, &nd
, &pcache
);
380 if (error
== ENOENT
) {
389 fmode
= FFLAGS(uap
->oflag
);
390 if ((fmode
& (FREAD
| FWRITE
))==0) {
395 if (error
= falloc(p
, &nfp
, &indx
))
401 if (fmode
& O_CREAT
) {
402 if ((fmode
& O_EXCL
) && incache
) {
403 AUDIT_ARG(posix_ipc_perm
, pinfo
->pshm_uid
,
404 pinfo
->pshm_gid
, pinfo
->pshm_mode
);
406 /* shm obj exists and opened O_EXCL */
408 if (pinfo
->pshm_flags
& PSHM_INDELETE
) {
415 /* create a new one */
416 pinfo
= (struct pshminfo
*)_MALLOC(sizeof(struct pshminfo
), M_SHM
, M_WAITOK
);
417 bzero(pinfo
, sizeof(struct pshminfo
));
419 pinfo
->pshm_flags
= PSHM_DEFINED
| PSHM_INCREATE
;
420 pinfo
->pshm_usecount
= 1;
421 pinfo
->pshm_mode
= cmode
;
422 pinfo
->pshm_uid
= p
->p_ucred
->cr_uid
;
423 pinfo
->pshm_gid
= p
->p_ucred
->cr_gid
;
426 if( pinfo
->pshm_flags
& PSHM_INDELETE
) {
430 AUDIT_ARG(posix_ipc_perm
, pinfo
->pshm_uid
,
431 pinfo
->pshm_gid
, pinfo
->pshm_mode
);
432 if (error
= pshm_access(pinfo
, fmode
, p
->p_ucred
, p
))
437 /* O_CREAT is not set and the shm obecj does not exist */
441 if( pinfo
->pshm_flags
& PSHM_INDELETE
) {
445 if (error
= pshm_access(pinfo
, fmode
, p
->p_ucred
, p
))
448 if (fmode
& O_TRUNC
) {
454 pinfo
->pshm_writecount
++;
456 pinfo
->pshm_readcount
++;
458 pnode
= (struct pshmnode
*)_MALLOC(sizeof(struct pshmnode
), M_SHM
, M_WAITOK
);
459 bzero(pnode
, sizeof(struct pshmnode
));
462 if (error
= pshm_cache_add(pinfo
, &nd
)) {
466 pinfo
->pshm_flags
&= ~PSHM_INCREATE
;
467 pinfo
->pshm_usecount
++;
468 pnode
->pinfo
= pinfo
;
469 fp
->f_flag
= fmode
& FMASK
;
470 fp
->f_type
= DTYPE_PSXSHM
;
471 fp
->f_ops
= &pshmops
;
472 fp
->f_data
= (caddr_t
)pnode
;
473 *fdflags(p
, indx
) &= ~UF_RESERVED
;
475 FREE_ZONE(pnbuf
, MAXPATHLEN
, M_NAMEI
);
487 FREE_ZONE(pnbuf
, MAXPATHLEN
, M_NAMEI
);
494 pshm_truncate(p
, fp
, fd
, length
, retval
)
501 struct pshminfo
* pinfo
;
502 struct pshmnode
* pnode
;
504 vm_offset_t user_addr
;
508 if (fp
->f_type
!= DTYPE_PSXSHM
) {
513 if (((pnode
= (struct pshmnode
*)fp
->f_data
)) == PSHMNODE_NULL
)
516 if ((pinfo
= pnode
->pinfo
) == PSHMINFO_NULL
)
518 if ((pinfo
->pshm_flags
& (PSHM_DEFINED
| PSHM_ALLOCATED
))
523 size
= round_page_64(length
);
524 kret
= vm_allocate(current_map(), &user_addr
, size
, TRUE
);
525 if (kret
!= KERN_SUCCESS
)
528 kret
= mach_make_memory_entry (current_map(), &size
,
529 user_addr
, VM_PROT_DEFAULT
, &mem_object
, 0);
531 if (kret
!= KERN_SUCCESS
)
534 vm_deallocate(current_map(), user_addr
, size
);
536 pinfo
->pshm_flags
&= ~PSHM_DEFINED
;
537 pinfo
->pshm_flags
= PSHM_ALLOCATED
;
538 pinfo
->pshm_memobject
= mem_object
;
539 pinfo
->pshm_length
= size
;
544 case KERN_INVALID_ADDRESS
:
547 case KERN_PROTECTION_FAILURE
:
557 struct pshmnode
*pnode
;
560 struct pshminfo
*pinfo
;
562 if ((pinfo
= pnode
->pinfo
) == PSHMINFO_NULL
)
565 bzero(sb
, sizeof(struct stat
));
566 sb
->st_mode
= pinfo
->pshm_mode
;
567 sb
->st_uid
= pinfo
->pshm_uid
;
568 sb
->st_gid
= pinfo
->pshm_gid
;
569 sb
->st_size
= pinfo
->pshm_length
;
575 pshm_access(struct pshminfo
*pinfo
, int mode
, struct ucred
*cred
, struct proc
*p
)
581 /* Otherwise, user id 0 always gets access. */
582 if (cred
->cr_uid
== 0)
587 /* Otherwise, check the owner. */
588 if (cred
->cr_uid
== pinfo
->pshm_uid
) {
593 return ((pinfo
->pshm_mode
& mask
) == mask
? 0 : EACCES
);
596 /* Otherwise, check the groups. */
597 for (i
= 0, gp
= cred
->cr_groups
; i
< cred
->cr_ngroups
; i
++, gp
++)
598 if (pinfo
->pshm_gid
== *gp
) {
603 return ((pinfo
->pshm_mode
& mask
) == mask
? 0 : EACCES
);
606 /* Otherwise, check everyone else. */
611 return ((pinfo
->pshm_mode
& mask
) == mask
? 0 : EACCES
);
620 #ifdef DOUBLE_ALIGN_PARAMS
627 pshm_mmap(struct proc
*p
, struct mmap_args
*uap
, register_t
*retval
, struct file
*fp
, vm_size_t pageoff
)
629 vm_offset_t user_addr
= (vm_offset_t
)uap
->addr
;
630 vm_size_t user_size
= (vm_size_t
)uap
->len
;
631 int prot
= uap
->prot
;
632 int flags
= uap
->flags
;
633 vm_object_offset_t file_pos
= (vm_object_offset_t
)uap
->pos
;
636 boolean_t find_space
,docow
;
638 struct pshminfo
* pinfo
;
639 struct pshmnode
* pnode
;
645 if ((flags
& MAP_SHARED
) == 0)
649 if ((prot
& PROT_WRITE
) && ((fp
->f_flag
& FWRITE
) == 0)) {
653 if (((pnode
= (struct pshmnode
*)fp
->f_data
)) == PSHMNODE_NULL
)
656 if ((pinfo
= pnode
->pinfo
) == PSHMINFO_NULL
)
659 if ((pinfo
->pshm_flags
& PSHM_ALLOCATED
) != PSHM_ALLOCATED
) {
662 if (user_size
> pinfo
->pshm_length
) {
665 if ((off_t
)user_size
+ file_pos
> pinfo
->pshm_length
) {
668 if ((mem_object
= pinfo
->pshm_memobject
) == NULL
) {
673 user_map
= current_map();
675 if ((flags
& MAP_FIXED
) == 0) {
677 user_addr
= round_page_32(user_addr
);
679 if (user_addr
!= trunc_page_32(user_addr
))
682 (void) vm_deallocate(user_map
, user_addr
, user_size
);
686 kret
= vm_map_64(user_map
, &user_addr
, user_size
,
687 0, find_space
, pinfo
->pshm_memobject
, file_pos
, docow
,
688 prot
, VM_PROT_DEFAULT
,
691 if (kret
!= KERN_SUCCESS
)
693 kret
= vm_inherit(user_map
, user_addr
, user_size
,
695 if (kret
!= KERN_SUCCESS
) {
696 (void) vm_deallocate(user_map
, user_addr
, user_size
);
699 pnode
->mapp_addr
= user_addr
;
700 pnode
->map_size
= user_size
;
701 pinfo
->pshm_flags
|= (PSHM_MAPPED
| PSHM_INUSE
);
705 *fdflags(p
, fd
) |= UF_MAPPED
;
706 *retval
= (register_t
)(user_addr
+ pageoff
);
708 case KERN_INVALID_ADDRESS
:
711 case KERN_PROTECTION_FAILURE
:
719 struct shm_unlink_args
{
724 shm_unlink(p
, uap
, retval
)
726 register struct shm_unlink_args
*uap
;
729 register struct filedesc
*fdp
= p
->p_fd
;
730 register struct file
*fp
;
734 struct pshminfo
*pinfo
;
735 extern struct fileops pshmops
;
739 size_t pathlen
, plen
;
742 struct pshmnode
* pnode
= PSHMNODE_NULL
;
743 struct pshmcache
*pcache
= PSHMCACHE_NULL
;
746 pinfo
= PSHMINFO_NULL
;
748 MALLOC_ZONE(pnbuf
, caddr_t
,
749 MAXPATHLEN
, M_NAMEI
, M_WAITOK
);
750 pathlen
= MAXPATHLEN
;
751 error
= copyinstr((void *)uap
->name
, (void *)pnbuf
,
752 MAXPATHLEN
, &pathlen
);
756 AUDIT_ARG(text
, pnbuf
);
757 if (pathlen
> PSHMNAMLEN
) {
758 error
= ENAMETOOLONG
;
763 #ifdef PSXSHM_NAME_RESTRICT
765 if (*nameptr
== '/') {
766 while (*(nameptr
++) == '/') {
775 #endif /* PSXSHM_NAME_RESTRICT */
779 nd
.pshm_nameptr
= nameptr
;
780 nd
.pshm_namelen
= plen
;
783 for (cp
= nameptr
, i
=1; *cp
!= 0 && i
<= plen
; i
++, cp
++) {
784 nd
.pshm_hash
+= (unsigned char)*cp
* i
;
787 error
= pshm_cache_search(&pinfo
, &nd
, &pcache
);
789 if (error
== ENOENT
) {
800 if ((pinfo
->pshm_flags
& (PSHM_DEFINED
| PSHM_ALLOCATED
))==0) {
804 if (pinfo
->pshm_flags
& PSHM_INDELETE
) {
809 if (pinfo
->pshm_memobject
== NULL
) {
814 AUDIT_ARG(posix_ipc_perm
, pinfo
->pshm_uid
, pinfo
->pshm_gid
,
816 pinfo
->pshm_flags
|= PSHM_INDELETE
;
817 pinfo
->pshm_usecount
--;
818 kret
= mach_destroy_memory_entry(pinfo
->pshm_memobject
);
819 pshm_cache_delete(pcache
);
820 _FREE(pcache
, M_SHM
);
821 pinfo
->pshm_flags
|= PSHM_REMOVED
;
824 FREE_ZONE(pnbuf
, MAXPATHLEN
, M_NAMEI
);
828 case KERN_INVALID_ADDRESS
:
829 case KERN_PROTECTION_FAILURE
:
837 pshm_close(pnode
, flags
, cred
, p
)
838 register struct pshmnode
*pnode
;
845 register struct pshminfo
*pinfo
;
847 if ((pinfo
= pnode
->pinfo
) == PSHMINFO_NULL
)
850 if ((pinfo
->pshm_flags
& PSHM_ALLOCATED
) != PSHM_ALLOCATED
) {
854 if(!pinfo
->pshm_usecount
) {
855 kprintf("negative usecount in pshm_close\n");
857 #endif /* DIAGNOSTIC */
858 pinfo
->pshm_usecount
--;
860 if ((pinfo
->pshm_flags
& PSHM_REMOVED
) && !pinfo
->pshm_usecount
) {
868 pshm_closefile(fp
, p
)
872 return (pshm_close(((struct pshmnode
*)fp
->f_data
), fp
->f_flag
,
877 pshm_read(fp
, uio
, cred
, flags
, p
)
888 pshm_write(fp
, uio
, cred
, flags
, p
)
899 pshm_ioctl(fp
, com
, data
, p
)
909 pshm_select(fp
, which
, wql
, p
)
919 pshm_kqfilter(fp
, kn
, p
)