2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
39 #include <meta_features.h>
41 #include <kern/task.h>
42 #include <kern/thread.h>
43 #include <kern/debug.h>
44 #include <kern/lock.h>
45 #include <mach/mach_traps.h>
46 #include <mach/time_value.h>
47 #include <mach/vm_map.h>
48 #include <mach/vm_param.h>
49 #include <mach/vm_prot.h>
50 #include <mach/port.h>
52 #include <sys/file_internal.h>
53 #include <sys/param.h>
54 #include <sys/systm.h>
56 #include <sys/namei.h>
57 #include <sys/proc_internal.h>
58 #include <sys/kauth.h>
61 #include <sys/vnode_internal.h>
62 #include <sys/mount.h>
63 #include <sys/trace.h>
64 #include <sys/kernel.h>
65 #include <sys/ubc_internal.h>
67 #include <sys/syslog.h>
69 #include <sys/sysproto.h>
71 #include <sys/sysctl.h>
73 #include <bsm/audit_kernel.h>
74 #include <bsm/audit_kevents.h>
76 #include <kern/kalloc.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
80 #include <machine/spl.h>
82 #include <mach/shared_memory_server.h>
83 #include <vm/vm_shared_memory_server.h>
85 #include <vm/vm_protos.h>
88 log_nx_failure(addr64_t vaddr
, vm_prot_t prot
)
90 printf("NX failure: %s - vaddr=%qx, prot=%x\n", current_proc()->p_comm
, vaddr
, prot
);
100 return (vm_map_check_protection(
102 vm_map_trunc_page(addr
), vm_map_round_page(addr
+len
),
103 prot
== B_READ
? VM_PROT_READ
: VM_PROT_WRITE
));
112 kret
= vm_map_wire(current_map(), vm_map_trunc_page(addr
),
113 vm_map_round_page(addr
+len
),
114 VM_PROT_READ
| VM_PROT_WRITE
,FALSE
);
119 case KERN_INVALID_ADDRESS
:
122 case KERN_PROTECTION_FAILURE
:
133 __unused
int dirtied
)
138 vm_map_offset_t vaddr
;
145 pmap
= get_task_pmap(current_task());
146 for (vaddr
= vm_map_trunc_page(addr
);
147 vaddr
< vm_map_round_page(addr
+len
);
148 vaddr
+= PAGE_SIZE
) {
149 paddr
= pmap_extract(pmap
, vaddr
);
150 pg
= PHYS_TO_VM_PAGE(paddr
);
151 vm_page_set_modified(pg
);
158 kret
= vm_map_unwire(current_map(), vm_map_trunc_page(addr
),
159 vm_map_round_page(addr
+len
), FALSE
);
163 case KERN_INVALID_ADDRESS
:
166 case KERN_PROTECTION_FAILURE
:
180 character
= (char)byte
;
181 return (copyout((void *)&(character
), addr
, sizeof(char)) == 0 ? 0 : -1);
191 character
= (char)byte
;
192 return (copyout((void *)&(character
), addr
, sizeof(char)) == 0 ? 0 : -1);
195 int fubyte(user_addr_t addr
)
199 if (copyin(addr
, (void *) &byte
, sizeof(char)))
204 int fuibyte(user_addr_t addr
)
208 if (copyin(addr
, (void *) &(byte
), sizeof(char)))
218 return (copyout((void *) &word
, addr
, sizeof(int)) == 0 ? 0 : -1);
221 long fuword(user_addr_t addr
)
225 if (copyin(addr
, (void *) &word
, sizeof(int)))
230 /* suiword and fuiword are the same as suword and fuword, respectively */
237 return (copyout((void *) &word
, addr
, sizeof(int)) == 0 ? 0 : -1);
240 long fuiword(user_addr_t addr
)
244 if (copyin(addr
, (void *) &word
, sizeof(int)))
250 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
251 * fetching and setting of process-sized size_t and pointer values.
254 sulong(user_addr_t addr
, int64_t word
)
257 if (IS_64BIT_PROCESS(current_proc())) {
258 return(copyout((void *)&word
, addr
, sizeof(word
)) == 0 ? 0 : -1);
260 return(suiword(addr
, (long)word
));
265 fulong(user_addr_t addr
)
269 if (IS_64BIT_PROCESS(current_proc())) {
270 if (copyin(addr
, (void *)&longword
, sizeof(longword
)) != 0)
274 return((int64_t)fuiword(addr
));
279 suulong(user_addr_t addr
, uint64_t uword
)
282 if (IS_64BIT_PROCESS(current_proc())) {
283 return(copyout((void *)&uword
, addr
, sizeof(uword
)) == 0 ? 0 : -1);
285 return(suiword(addr
, (u_long
)uword
));
290 fuulong(user_addr_t addr
)
294 if (IS_64BIT_PROCESS(current_proc())) {
295 if (copyin(addr
, (void *)&ulongword
, sizeof(ulongword
)) != 0)
299 return((uint64_t)fuiword(addr
));
304 swapon(__unused
struct proc
*procp
, __unused
struct swapon_args
*uap
, __unused
int *retval
)
312 struct pid_for_task_args
*args
)
314 mach_port_name_t t
= args
->t
;
315 user_addr_t pid_addr
= args
->pid
;
319 kern_return_t err
= KERN_SUCCESS
;
320 boolean_t funnel_state
;
322 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK
);
323 AUDIT_ARG(mach_port1
, t
);
325 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
326 t1
= port_name_to_task(t
);
328 if (t1
== TASK_NULL
) {
332 p
= get_bsdtask_info(t1
);
343 (void) copyout((char *) &pid
, pid_addr
, sizeof(int));
344 thread_funnel_set(kernel_flock
, funnel_state
);
345 AUDIT_MACH_SYSCALL_EXIT(err
);
350 * Routine: task_for_pid
352 * Get the task port for another "process", named by its
353 * process ID on the same host as "target_task".
355 * Only permitted to privileged processes, or processes
356 * with the same user ID.
358 * XXX This should be a BSD system call, not a Mach trap!!!
362 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
363 * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged
364 * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks
367 static int tfp_policy
= KERN_TFP_POLICY_RESTRICTED
;
368 /* the groutp is inited to kmem group and is modifiable by sysctl */
369 static int tfp_group_inited
= 0; /* policy groups are loaded ... */
370 static gid_t tfp_group_ronly
= 0; /* procview group */
371 static gid_t tfp_group_rw
= 0; /* procmod group */
375 struct task_for_pid_args
*args
)
377 mach_port_name_t target_tport
= args
->target_tport
;
379 user_addr_t task_addr
= args
->t
;
380 struct uthread
*uthread
;
384 mach_port_name_t tret
;
388 boolean_t funnel_state
;
389 boolean_t ispermitted
= FALSE
;
391 char procname
[MAXCOMLEN
+1];
392 #endif /* DIAGNOSTIC */
394 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID
);
396 AUDIT_ARG(mach_port1
, target_tport
);
398 t1
= port_name_to_task(target_tport
);
399 if (t1
== TASK_NULL
) {
400 (void ) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
401 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
402 return(KERN_FAILURE
);
405 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
410 * Delayed binding of thread credential to process credential, if we
411 * are not running with an explicitly set thread credential.
413 uthread
= get_bsdthread_info(current_thread());
414 if (uthread
->uu_ucred
!= p1
->p_ucred
&&
415 (uthread
->uu_flag
& UT_SETUID
) == 0) {
416 kauth_cred_t old
= uthread
->uu_ucred
;
418 kauth_cred_ref(p1
->p_ucred
);
419 uthread
->uu_ucred
= p1
->p_ucred
;
421 if (IS_VALID_CRED(old
))
422 kauth_cred_unref(&old
);
426 AUDIT_ARG(process
, p
);
429 * XXX p_ucred check can be bogus in multithreaded processes,
430 * XXX unless the funnel is held.
432 switch (tfp_policy
) {
434 case KERN_TFP_POLICY_PERMISSIVE
:
435 /* self or suser or related ones */
436 if ((p
!= (struct proc
*) 0)
437 && (p
->p_stat
!= SZOMB
)
438 && (p1
!= (struct proc
*) 0)
441 || !(suser(kauth_cred_get(), 0))
442 || ((kauth_cred_getuid(p
->p_ucred
) == kauth_cred_getuid(kauth_cred_get())) &&
443 ((p
->p_ucred
->cr_ruid
== kauth_cred_get()->cr_ruid
))
444 && ((p
->p_flag
& P_SUGID
) == 0))
450 case KERN_TFP_POLICY_RESTRICTED
:
451 /* self or suser or setgid and related ones only */
452 if ((p
!= (struct proc
*) 0)
453 && (p1
!= (struct proc
*) 0)
454 && (p
->p_stat
!= SZOMB
)
457 || !(suser(kauth_cred_get(), 0))
458 || (((tfp_group_inited
!= 0) &&
460 ((kauth_cred_ismember_gid(kauth_cred_get(),
461 tfp_group_ronly
, &is_member
) == 0) && is_member
)
462 ||((kauth_cred_ismember_gid(kauth_cred_get(),
463 tfp_group_rw
, &is_member
) == 0) && is_member
)
466 && ((kauth_cred_getuid(p
->p_ucred
) == kauth_cred_getuid(kauth_cred_get())) &&
467 ((p
->p_ucred
->cr_ruid
== kauth_cred_get()->cr_ruid
))
468 && ((p
->p_flag
& P_SUGID
) == 0))
476 case KERN_TFP_POLICY_DENY
:
477 /* self or suser only */
479 /* do not return task port of other task at all */
480 if ((p1
!= (struct proc
*) 0) && (p
!= (struct proc
*) 0) && (p
->p_stat
!= SZOMB
)
481 && ((p1
== p
) || !(suser(kauth_cred_get(), 0))))
489 if (ispermitted
== TRUE
) {
490 if (p
->task
!= TASK_NULL
) {
491 task_reference(p
->task
);
492 sright
= (void *)convert_task_to_port(p
->task
);
493 tret
= ipc_port_copyout_send(
495 get_task_ipcspace(current_task()));
497 tret
= MACH_PORT_NULL
;
498 AUDIT_ARG(mach_port2
, tret
);
499 (void ) copyout((char *)&tret
, task_addr
, sizeof(mach_port_name_t
));
501 error
= KERN_SUCCESS
;
507 * There is no guarantee that p_comm is null terminated and
508 * kernel implementation of string functions are complete. So
509 * ensure stale info is not leaked out, bzero the buffer
511 bzero(&procname
[0], MAXCOMLEN
+1);
512 strncpy(&procname
[0], &p1
->p_comm
[0], MAXCOMLEN
);
513 if (tfp_policy
!= KERN_TFP_POLICY_PERMISSIVE
)
514 log(LOG_NOTICE
, "(%d: %s)tfp: failed on %d:\n",
515 ((p1
!= PROC_NULL
)?(p1
->p_pid
):0), &procname
[0],
516 ((p
!= PROC_NULL
)?(p
->p_pid
):0));
518 #endif /* DIAGNOSTIC */
521 tret
= MACH_PORT_NULL
;
522 (void) copyout((char *) &tret
, task_addr
, sizeof(mach_port_name_t
));
523 error
= KERN_FAILURE
;
525 thread_funnel_set(kernel_flock
, funnel_state
);
526 AUDIT_MACH_SYSCALL_EXIT(error
);
531 * Routine: task_name_for_pid
533 * Get the task name port for another "process", named by its
534 * process ID on the same host as "target_task".
536 * Only permitted to privileged processes, or processes
537 * with the same user ID.
539 * XXX This should be a BSD system call, not a Mach trap!!!
544 struct task_name_for_pid_args
*args
)
546 mach_port_name_t target_tport
= args
->target_tport
;
548 user_addr_t task_addr
= args
->t
;
549 struct uthread
*uthread
;
553 mach_port_name_t tret
;
556 boolean_t funnel_state
;
558 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID
);
560 AUDIT_ARG(mach_port1
, target_tport
);
562 t1
= port_name_to_task(target_tport
);
563 if (t1
== TASK_NULL
) {
564 (void ) copyout((char *)&t1
, task_addr
, sizeof(mach_port_name_t
));
565 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE
);
566 return(KERN_FAILURE
);
569 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
574 * Delayed binding of thread credential to process credential, if we
575 * are not running with an explicitly set thread credential.
578 * XXX p_ucred check can be bogus in multithreaded processes,
579 * XXX unless the funnel is held.
581 uthread
= get_bsdthread_info(current_thread());
582 if (uthread
->uu_ucred
!= p1
->p_ucred
&&
583 (uthread
->uu_flag
& UT_SETUID
) == 0) {
584 kauth_cred_t old
= uthread
->uu_ucred
;
586 kauth_cred_ref(p1
->p_ucred
);
587 uthread
->uu_ucred
= p1
->p_ucred
;
589 if (IS_VALID_CRED(old
))
590 kauth_cred_unref(&old
);
594 AUDIT_ARG(process
, p
);
596 if ((p
!= (struct proc
*) 0)
597 && (p
->p_stat
!= SZOMB
)
598 && (p1
!= (struct proc
*) 0)
600 || !(suser(kauth_cred_get(), 0))
601 || ((kauth_cred_getuid(p
->p_ucred
) == kauth_cred_getuid(kauth_cred_get())) &&
602 ((p
->p_ucred
->cr_ruid
== kauth_cred_get()->cr_ruid
)))))
604 if (p
->task
!= TASK_NULL
)
606 task_reference(p
->task
);
607 sright
= (void *)convert_task_name_to_port(p
->task
);
608 tret
= ipc_port_copyout_send(
610 get_task_ipcspace(current_task()));
612 tret
= MACH_PORT_NULL
;
613 AUDIT_ARG(mach_port2
, tret
);
614 (void ) copyout((char *)&tret
, task_addr
, sizeof(mach_port_name_t
));
616 error
= KERN_SUCCESS
;
621 tret
= MACH_PORT_NULL
;
622 (void) copyout((char *) &tret
, task_addr
, sizeof(mach_port_name_t
));
623 error
= KERN_FAILURE
;
625 thread_funnel_set(kernel_flock
, funnel_state
);
626 AUDIT_MACH_SYSCALL_EXIT(error
);
631 sysctl_settfp_policy(__unused
struct sysctl_oid
*oidp
, void *arg1
,
632 __unused
int arg2
, struct sysctl_req
*req
)
637 error
= SYSCTL_OUT(req
, arg1
, sizeof(int));
638 if (error
|| req
->newptr
== USER_ADDR_NULL
)
644 if ((error
= SYSCTL_IN(req
, &new_value
, sizeof(int)))) {
647 if ((new_value
== KERN_TFP_POLICY_DENY
)
648 || (new_value
== KERN_TFP_POLICY_PERMISSIVE
)
649 || (new_value
== KERN_TFP_POLICY_RESTRICTED
))
650 tfp_policy
= new_value
;
659 sysctl_settfp_groups(__unused
struct sysctl_oid
*oidp
, void *arg1
,
660 __unused
int arg2
, struct sysctl_req
*req
)
665 error
= SYSCTL_OUT(req
, arg1
, sizeof(int));
666 if (error
|| req
->newptr
== USER_ADDR_NULL
)
673 * Once set; cannot be reset till next boot. Launchd will set this
674 * in its pid 1 init and no one can set after that.
676 if (tfp_group_inited
!= 0)
679 if ((error
= SYSCTL_IN(req
, &new_value
, sizeof(int)))) {
683 if (new_value
>= 100)
686 if (arg1
== &tfp_group_ronly
)
687 tfp_group_ronly
= new_value
;
688 else if (arg1
== &tfp_group_rw
)
689 tfp_group_rw
= new_value
;
692 if ((tfp_group_ronly
!= 0 ) && (tfp_group_rw
!= 0 ))
693 tfp_group_inited
= 1;
700 SYSCTL_NODE(_kern
, KERN_TFP
, tfp
, CTLFLAG_RW
, 0, "tfp");
701 SYSCTL_PROC(_kern_tfp
, KERN_TFP_POLICY
, policy
, CTLTYPE_INT
| CTLFLAG_RW
,
702 &tfp_policy
, sizeof(uint32_t), &sysctl_settfp_policy
,"I","policy");
703 SYSCTL_PROC(_kern_tfp
, KERN_TFP_READ_GROUP
, read_group
, CTLTYPE_INT
| CTLFLAG_RW
,
704 &tfp_group_ronly
, sizeof(uint32_t), &sysctl_settfp_groups
,"I","read_group");
705 SYSCTL_PROC(_kern_tfp
, KERN_TFP_RW_GROUP
, rw_group
, CTLTYPE_INT
| CTLFLAG_RW
,
706 &tfp_group_rw
, sizeof(uint32_t), &sysctl_settfp_groups
,"I","rw_group");
709 SYSCTL_INT(_vm
, OID_AUTO
, shared_region_trace_level
, CTLFLAG_RW
, &shared_region_trace_level
, 0, "");
712 * Try and cap the number of mappings the user might be trying to deal with,
713 * so that we don't end up allocating insane amounts of wired memory in the
714 * kernel based on bogus user arguments.
715 * There are 2 shared regions (TEXT and DATA). The size of each submap
716 * is SHARED_TEXT_REGION_SIZE and we can have at most 1 VM map entry per page,
717 * so the maximum number of mappings we could ever have to deal with is...
719 #define SHARED_REGION_MAX_MAPPINGS ((2 *SHARED_TEXT_REGION_SIZE) >> PAGE_SHIFT)
722 * shared_region_make_private_np:
724 * This system call is for "dyld" only.
726 * It creates a private copy of the current process's "shared region" for
727 * split libraries. "dyld" uses this when the shared region is full or
728 * it needs to load a split library that conflicts with an already loaded one
729 * that this process doesn't need. "dyld" specifies a set of address ranges
730 * that it wants to keep in the now-private "shared region". These cover
731 * the set of split libraries that the process needs so far. The kernel needs
732 * to deallocate the rest of the shared region, so that it's available for
733 * more libraries for this process.
736 shared_region_make_private_np(
738 struct shared_region_make_private_np_args
*uap
,
739 __unused
int *retvalp
)
743 boolean_t using_shared_regions
;
744 user_addr_t user_ranges
;
745 unsigned int range_count
;
746 vm_size_t ranges_size
;
747 struct shared_region_range_np
*ranges
;
748 shared_region_mapping_t shared_region
;
749 struct shared_region_task_mappings task_mapping_info
;
750 shared_region_mapping_t next
;
754 range_count
= uap
->rangeCount
;
755 user_ranges
= uap
->ranges
;
756 ranges_size
= (vm_size_t
) (range_count
* sizeof (ranges
[0]));
759 SHARED_REGION_TRACE_INFO
,
760 ("shared_region: %p [%d(%s)] "
761 "make_private(rangecount=%d)\n",
762 current_thread(), p
->p_pid
, p
->p_comm
, range_count
));
764 /* allocate kernel space for the "ranges" */
765 if (range_count
!= 0) {
766 if (range_count
> SHARED_REGION_MAX_MAPPINGS
) {
770 if ((mach_vm_size_t
) ranges_size
!=
771 (mach_vm_size_t
) range_count
* sizeof (ranges
[0])) {
772 /* 32-bit integer overflow */
776 kr
= kmem_alloc(kernel_map
,
777 (vm_offset_t
*) &ranges
,
779 if (kr
!= KERN_SUCCESS
) {
784 /* copy "ranges" from user-space */
785 error
= copyin(user_ranges
,
793 if (p
->p_flag
& P_NOSHLIB
) {
794 /* no split library has been mapped for this process so far */
795 using_shared_regions
= FALSE
;
797 /* this process has already mapped some split libraries */
798 using_shared_regions
= TRUE
;
802 * Get a private copy of the current shared region.
803 * Do not chain it to the system-wide shared region, as we'll want
804 * to map other split libraries in place of the old ones. We want
805 * to completely detach from the system-wide shared region and go our
806 * own way after this point, not sharing anything with other processes.
808 error
= clone_system_shared_regions(using_shared_regions
,
809 FALSE
, /* chain_regions */
815 /* get info on the newly allocated shared region */
816 vm_get_shared_region(current_task(), &shared_region
);
817 task_mapping_info
.self
= (vm_offset_t
) shared_region
;
818 shared_region_mapping_info(shared_region
,
819 &(task_mapping_info
.text_region
),
820 &(task_mapping_info
.text_size
),
821 &(task_mapping_info
.data_region
),
822 &(task_mapping_info
.data_size
),
823 &(task_mapping_info
.region_mappings
),
824 &(task_mapping_info
.client_base
),
825 &(task_mapping_info
.alternate_base
),
826 &(task_mapping_info
.alternate_next
),
827 &(task_mapping_info
.fs_base
),
828 &(task_mapping_info
.system
),
829 &(task_mapping_info
.flags
),
833 * We now have our private copy of the shared region, as it was before
834 * the call to clone_system_shared_regions(). We now need to clean it
835 * up and keep only the memory areas described by the "ranges" array.
837 kr
= shared_region_cleanup(range_count
, ranges
, &task_mapping_info
);
848 if (ranges
!= NULL
) {
849 kmem_free(kernel_map
,
850 (vm_offset_t
) ranges
,
856 SHARED_REGION_TRACE_INFO
,
857 ("shared_region: %p [%d(%s)] "
858 "make_private(rangecount=%d) -> %d "
859 "shared_region=%p[%x,%x,%x]\n",
860 current_thread(), p
->p_pid
, p
->p_comm
,
861 range_count
, error
, shared_region
,
862 task_mapping_info
.fs_base
,
863 task_mapping_info
.system
,
864 task_mapping_info
.flags
));
871 * shared_region_map_file_np:
873 * This system call is for "dyld" only.
875 * "dyld" wants to map parts of a split library in the shared region.
876 * We get a file descriptor on the split library to be mapped and a set
877 * of mapping instructions, describing which parts of the file to map in\
878 * which areas of the shared segment and with what protection.
879 * The "shared region" is split in 2 areas:
880 * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections),
881 * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
885 shared_region_map_file_np(
887 struct shared_region_map_file_np_args
*uap
,
888 __unused
int *retvalp
)
893 unsigned int mapping_count
;
894 user_addr_t user_mappings
; /* 64-bit */
895 user_addr_t user_slide_p
; /* 64-bit */
896 struct shared_file_mapping_np
*mappings
;
897 vm_size_t mappings_size
;
899 mach_vm_offset_t slide
;
901 struct vfs_context context
;
902 memory_object_control_t file_control
;
903 memory_object_size_t file_size
;
904 shared_region_mapping_t shared_region
;
905 struct shared_region_task_mappings task_mapping_info
;
906 shared_region_mapping_t next
;
907 shared_region_mapping_t default_shared_region
;
908 boolean_t using_default_region
;
911 mach_vm_offset_t base_offset
, end_offset
;
912 mach_vm_offset_t original_base_offset
;
913 boolean_t mappings_in_segment
;
914 #define SFM_MAX_STACK 6
915 struct shared_file_mapping_np stack_mappings
[SFM_MAX_STACK
];
923 /* get file descriptor for split library from arguments */
926 /* get file structure from file descriptor */
927 error
= fp_lookup(p
, fd
, &fp
, 0);
930 SHARED_REGION_TRACE_ERROR
,
931 ("shared_region: %p [%d(%s)] map_file: "
932 "fd=%d lookup failed (error=%d)\n",
933 current_thread(), p
->p_pid
, p
->p_comm
, fd
, error
));
937 /* make sure we're attempting to map a vnode */
938 if (fp
->f_fglob
->fg_type
!= DTYPE_VNODE
) {
940 SHARED_REGION_TRACE_ERROR
,
941 ("shared_region: %p [%d(%s)] map_file: "
942 "fd=%d not a vnode (type=%d)\n",
943 current_thread(), p
->p_pid
, p
->p_comm
,
944 fd
, fp
->f_fglob
->fg_type
));
949 /* we need at least read permission on the file */
950 if (! (fp
->f_fglob
->fg_flag
& FREAD
)) {
952 SHARED_REGION_TRACE_ERROR
,
953 ("shared_region: %p [%d(%s)] map_file: "
954 "fd=%d not readable\n",
955 current_thread(), p
->p_pid
, p
->p_comm
, fd
));
960 /* get vnode from file structure */
961 error
= vnode_getwithref((vnode_t
)fp
->f_fglob
->fg_data
);
964 SHARED_REGION_TRACE_ERROR
,
965 ("shared_region: %p [%d(%s)] map_file: "
966 "fd=%d getwithref failed (error=%d)\n",
967 current_thread(), p
->p_pid
, p
->p_comm
, fd
, error
));
970 vp
= (struct vnode
*) fp
->f_fglob
->fg_data
;
972 /* make sure the vnode is a regular file */
973 if (vp
->v_type
!= VREG
) {
975 SHARED_REGION_TRACE_ERROR
,
976 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
977 "not a file (type=%d)\n",
978 current_thread(), p
->p_pid
, p
->p_comm
,
979 vp
, vp
->v_name
, vp
->v_type
));
989 context
.vc_ucred
= kauth_cred_get();
990 if ((error
= vnode_size(vp
, &fs
, &context
)) != 0) {
992 SHARED_REGION_TRACE_ERROR
,
993 ("shared_region: %p [%d(%s)] "
994 "map_file(%p:'%s'): "
995 "vnode_size(%p) failed (error=%d)\n",
996 current_thread(), p
->p_pid
, p
->p_comm
,
997 vp
, vp
->v_name
, vp
));
1004 * Get the list of mappings the caller wants us to establish.
1006 mapping_count
= uap
->mappingCount
; /* the number of mappings */
1007 mappings_size
= (vm_size_t
) (mapping_count
* sizeof (mappings
[0]));
1008 if (mapping_count
== 0) {
1009 SHARED_REGION_TRACE(
1010 SHARED_REGION_TRACE_INFO
,
1011 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1013 current_thread(), p
->p_pid
, p
->p_comm
,
1015 error
= 0; /* no mappings: we're done ! */
1017 } else if (mapping_count
<= SFM_MAX_STACK
) {
1018 mappings
= &stack_mappings
[0];
1020 if (mapping_count
> SHARED_REGION_MAX_MAPPINGS
) {
1024 if ((mach_vm_size_t
) mappings_size
!=
1025 (mach_vm_size_t
) mapping_count
* sizeof (mappings
[0])) {
1026 /* 32-bit integer overflow */
1030 kr
= kmem_alloc(kernel_map
,
1031 (vm_offset_t
*) &mappings
,
1033 if (kr
!= KERN_SUCCESS
) {
1034 SHARED_REGION_TRACE(
1035 SHARED_REGION_TRACE_ERROR
,
1036 ("shared_region: %p [%d(%s)] "
1037 "map_file(%p:'%s'): "
1038 "failed to allocate %d mappings (kr=0x%x)\n",
1039 current_thread(), p
->p_pid
, p
->p_comm
,
1040 vp
, vp
->v_name
, mapping_count
, kr
));
1046 user_mappings
= uap
->mappings
; /* the mappings, in user space */
1047 error
= copyin(user_mappings
,
1051 SHARED_REGION_TRACE(
1052 SHARED_REGION_TRACE_ERROR
,
1053 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1054 "failed to copyin %d mappings (error=%d)\n",
1055 current_thread(), p
->p_pid
, p
->p_comm
,
1056 vp
, vp
->v_name
, mapping_count
, error
));
1061 * If the caller provides a "slide" pointer, it means they're OK
1062 * with us moving the mappings around to make them fit.
1064 user_slide_p
= uap
->slide_p
;
1067 * Make each mapping address relative to the beginning of the
1068 * shared region. Check that all mappings are in the shared region.
1069 * Compute the maximum set of protections required to tell the
1070 * buffer cache how we mapped the file (see call to ubc_map() below).
1072 max_prot
= VM_PROT_NONE
;
1075 mappings_in_segment
= TRUE
;
1076 for (j
= 0; j
< mapping_count
; j
++) {
1077 mach_vm_offset_t segment
;
1078 segment
= (mappings
[j
].sfm_address
&
1079 GLOBAL_SHARED_SEGMENT_MASK
);
1080 if (segment
!= GLOBAL_SHARED_TEXT_SEGMENT
&&
1081 segment
!= GLOBAL_SHARED_DATA_SEGMENT
) {
1082 /* this mapping is not in the shared region... */
1083 if (user_slide_p
== NULL
) {
1084 /* ... and we can't slide it in: fail */
1085 SHARED_REGION_TRACE(
1086 SHARED_REGION_TRACE_CONFLICT
,
1087 ("shared_region: %p [%d(%s)] "
1088 "map_file(%p:'%s'): "
1089 "mapping %p not in shared segment & "
1091 current_thread(), p
->p_pid
, p
->p_comm
,
1093 mappings
[j
].sfm_address
));
1098 /* expect all mappings to be outside */
1099 mappings_in_segment
= FALSE
;
1100 } else if (mappings_in_segment
!= FALSE
) {
1101 /* other mappings were not outside: fail */
1102 SHARED_REGION_TRACE(
1103 SHARED_REGION_TRACE_CONFLICT
,
1104 ("shared_region: %p [%d(%s)] "
1105 "map_file(%p:'%s'): "
1106 "mapping %p not in shared segment & "
1107 "other mappings in shared segment\n",
1108 current_thread(), p
->p_pid
, p
->p_comm
,
1110 mappings
[j
].sfm_address
));
1114 /* we'll try and slide that mapping in the segments */
1117 /* expect all mappings to be inside */
1118 mappings_in_segment
= TRUE
;
1119 } else if (mappings_in_segment
!= TRUE
) {
1120 /* other mappings were not inside: fail */
1121 SHARED_REGION_TRACE(
1122 SHARED_REGION_TRACE_CONFLICT
,
1123 ("shared_region: %p [%d(%s)] "
1124 "map_file(%p:'%s'): "
1125 "mapping %p in shared segment & "
1126 "others in shared segment\n",
1127 current_thread(), p
->p_pid
, p
->p_comm
,
1129 mappings
[j
].sfm_address
));
1133 /* get a relative offset inside the shared segments */
1134 mappings
[j
].sfm_address
-= GLOBAL_SHARED_TEXT_SEGMENT
;
1136 if ((mappings
[j
].sfm_address
& SHARED_TEXT_REGION_MASK
)
1138 base_offset
= (mappings
[j
].sfm_address
&
1139 SHARED_TEXT_REGION_MASK
);
1141 if ((mappings
[j
].sfm_address
& SHARED_TEXT_REGION_MASK
) +
1142 mappings
[j
].sfm_size
> end_offset
) {
1144 (mappings
[j
].sfm_address
&
1145 SHARED_TEXT_REGION_MASK
) +
1146 mappings
[j
].sfm_size
;
1148 max_prot
|= mappings
[j
].sfm_max_prot
;
1150 /* Make all mappings relative to the base_offset */
1151 base_offset
= vm_map_trunc_page(base_offset
);
1152 end_offset
= vm_map_round_page(end_offset
);
1153 for (j
= 0; j
< mapping_count
; j
++) {
1154 mappings
[j
].sfm_address
-= base_offset
;
1156 original_base_offset
= base_offset
;
1157 if (mappings_in_segment
== FALSE
) {
1159 * We're trying to map a library that was not pre-bound to
1160 * be in the shared segments. We want to try and slide it
1161 * back into the shared segments but as far back as possible,
1162 * so that it doesn't clash with pre-bound libraries. Set
1163 * the base_offset to the end of the region, so that it can't
1164 * possibly fit there and will have to be slid.
1166 base_offset
= SHARED_TEXT_REGION_SIZE
- end_offset
;
1169 /* get the file's memory object handle */
1170 UBCINFOCHECK("shared_region_map_file_np", vp
);
1171 file_control
= ubc_getobject(vp
, UBC_HOLDOBJECT
);
1172 if (file_control
== MEMORY_OBJECT_CONTROL_NULL
) {
1173 SHARED_REGION_TRACE(
1174 SHARED_REGION_TRACE_ERROR
,
1175 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1176 "ubc_getobject() failed\n",
1177 current_thread(), p
->p_pid
, p
->p_comm
,
1184 * Get info about the current process's shared region.
1185 * This might change if we decide we need to clone the shared region.
1187 vm_get_shared_region(current_task(), &shared_region
);
1188 task_mapping_info
.self
= (vm_offset_t
) shared_region
;
1189 shared_region_mapping_info(shared_region
,
1190 &(task_mapping_info
.text_region
),
1191 &(task_mapping_info
.text_size
),
1192 &(task_mapping_info
.data_region
),
1193 &(task_mapping_info
.data_size
),
1194 &(task_mapping_info
.region_mappings
),
1195 &(task_mapping_info
.client_base
),
1196 &(task_mapping_info
.alternate_base
),
1197 &(task_mapping_info
.alternate_next
),
1198 &(task_mapping_info
.fs_base
),
1199 &(task_mapping_info
.system
),
1200 &(task_mapping_info
.flags
),
1204 * Are we using the system's current shared region
1205 * for this environment ?
1207 default_shared_region
=
1208 lookup_default_shared_region(ENV_DEFAULT_ROOT
,
1209 task_mapping_info
.system
);
1210 if (shared_region
== default_shared_region
) {
1211 using_default_region
= TRUE
;
1213 using_default_region
= FALSE
;
1215 shared_region_mapping_dealloc(default_shared_region
);
1217 if (vp
->v_mount
!= rootvnode
->v_mount
&&
1218 using_default_region
) {
1220 * The split library is not on the root filesystem. We don't
1221 * want to polute the system-wide ("default") shared region
1223 * Reject the mapping. The caller (dyld) should "privatize"
1224 * (via shared_region_make_private()) the shared region and
1225 * try to establish the mapping privately for this process.
1227 SHARED_REGION_TRACE(
1228 SHARED_REGION_TRACE_CONFLICT
,
1229 ("shared_region: %p [%d(%s)] "
1230 "map_file(%p:'%s'): "
1231 "not on root volume\n",
1232 current_thread(), p
->p_pid
, p
->p_comm
,
1240 * Map the split library.
1242 kr
= map_shared_file(mapping_count
,
1248 (user_slide_p
) ? &slide
: NULL
);
1250 if (kr
== KERN_SUCCESS
) {
1252 * The mapping was successful. Let the buffer cache know
1253 * that we've mapped that file with these protections. This
1254 * prevents the vnode from getting recycled while it's mapped.
1256 (void) ubc_map(vp
, max_prot
);
1259 SHARED_REGION_TRACE(
1260 SHARED_REGION_TRACE_CONFLICT
,
1261 ("shared_region: %p [%d(%s)] "
1262 "map_file(%p:'%s'): "
1263 "map_shared_file failed, kr=0x%x\n",
1264 current_thread(), p
->p_pid
, p
->p_comm
,
1265 vp
, vp
->v_name
, kr
));
1267 case KERN_INVALID_ADDRESS
:
1270 case KERN_PROTECTION_FAILURE
:
1277 case KERN_INVALID_ARGUMENT
:
1284 if (p
->p_flag
& P_NOSHLIB
) {
1285 /* signal that this process is now using split libraries */
1286 p
->p_flag
&= ~P_NOSHLIB
;
1291 * The caller provided a pointer to a "slide" offset. Let
1292 * them know by how much we slid the mappings.
1294 if (mappings_in_segment
== FALSE
) {
1296 * We faked the base_offset earlier, so undo that
1297 * and take into account the real base_offset.
1299 slide
+= SHARED_TEXT_REGION_SIZE
- end_offset
;
1300 slide
-= original_base_offset
;
1302 * The mappings were slid into the shared segments
1303 * and "slide" is relative to the beginning of the
1304 * shared segments. Adjust it to be absolute.
1306 slide
+= GLOBAL_SHARED_TEXT_SEGMENT
;
1308 error
= copyout(&slide
,
1312 SHARED_REGION_TRACE(
1313 SHARED_REGION_TRACE_CONFLICT
,
1314 ("shared_region: %p [%d(%s)] "
1315 "map_file(%p:'%s'): "
1317 current_thread(), p
->p_pid
, p
->p_comm
,
1318 vp
, vp
->v_name
, slide
));
1325 * release the vnode...
1326 * ubc_map() still holds it for us in the non-error case
1328 (void) vnode_put(vp
);
1332 /* release the file descriptor */
1333 fp_drop(p
, fd
, fp
, 0);
1336 if (mappings
!= NULL
&&
1337 mappings
!= &stack_mappings
[0]) {
1338 kmem_free(kernel_map
,
1339 (vm_offset_t
) mappings
,
1349 __unused
struct proc
*p
,
1350 __unused
struct load_shared_file_args
*uap
,
1351 __unused
int *retval
)
1358 __unused
struct proc
*p
,
1359 __unused
struct reset_shared_file_args
*uap
,
1360 __unused
int *retval
)
1366 new_system_shared_regions(
1367 __unused
struct proc
*p
,
1368 __unused
struct new_system_shared_regions_args
*uap
,
1369 __unused
int *retval
)
1377 clone_system_shared_regions(
1378 int shared_regions_active
,
1382 shared_region_mapping_t new_shared_region
;
1383 shared_region_mapping_t next
;
1384 shared_region_mapping_t old_shared_region
;
1385 struct shared_region_task_mappings old_info
;
1386 struct shared_region_task_mappings new_info
;
1388 vm_get_shared_region(current_task(), &old_shared_region
);
1389 old_info
.self
= (vm_offset_t
)old_shared_region
;
1390 shared_region_mapping_info(old_shared_region
,
1391 &(old_info
.text_region
),
1392 &(old_info
.text_size
),
1393 &(old_info
.data_region
),
1394 &(old_info
.data_size
),
1395 &(old_info
.region_mappings
),
1396 &(old_info
.client_base
),
1397 &(old_info
.alternate_base
),
1398 &(old_info
.alternate_next
),
1399 &(old_info
.fs_base
),
1401 &(old_info
.flags
), &next
);
1403 if (shared_regions_active
||
1404 base_vnode
== ENV_DEFAULT_ROOT
) {
1405 if (shared_file_create_system_region(&new_shared_region
,
1410 if (old_shared_region
&&
1411 base_vnode
== ENV_DEFAULT_ROOT
) {
1412 base_vnode
= old_info
.fs_base
;
1415 lookup_default_shared_region(base_vnode
,
1417 if (new_shared_region
== NULL
) {
1418 shared_file_boot_time_init(base_vnode
,
1420 vm_get_shared_region(current_task(),
1421 &new_shared_region
);
1423 vm_set_shared_region(current_task(), new_shared_region
);
1425 if (old_shared_region
)
1426 shared_region_mapping_dealloc(old_shared_region
);
1428 new_info
.self
= (vm_offset_t
)new_shared_region
;
1429 shared_region_mapping_info(new_shared_region
,
1430 &(new_info
.text_region
),
1431 &(new_info
.text_size
),
1432 &(new_info
.data_region
),
1433 &(new_info
.data_size
),
1434 &(new_info
.region_mappings
),
1435 &(new_info
.client_base
),
1436 &(new_info
.alternate_base
),
1437 &(new_info
.alternate_next
),
1438 &(new_info
.fs_base
),
1440 &(new_info
.flags
), &next
);
1441 if(shared_regions_active
) {
1442 if(vm_region_clone(old_info
.text_region
, new_info
.text_region
)) {
1443 panic("clone_system_shared_regions: shared region mis-alignment 1");
1444 shared_region_mapping_dealloc(new_shared_region
);
1447 if (vm_region_clone(old_info
.data_region
, new_info
.data_region
)) {
1448 panic("clone_system_shared_regions: shared region mis-alignment 2");
1449 shared_region_mapping_dealloc(new_shared_region
);
1452 if (chain_regions
) {
1454 * We want a "shadowed" clone, a private superset of the old
1455 * shared region. The info about the old mappings is still
1458 shared_region_object_chain_attach(
1459 new_shared_region
, old_shared_region
);
1462 if (!chain_regions
) {
1464 * We want a completely detached clone with no link to
1465 * the old shared region. We'll be removing some mappings
1466 * in our private, cloned, shared region, so the old mappings
1467 * will become irrelevant to us. Since we have a private
1468 * "shared region" now, it isn't going to be shared with
1469 * anyone else and we won't need to maintain mappings info.
1471 shared_region_object_chain_detached(new_shared_region
);
1473 if (vm_map_region_replace(current_map(), old_info
.text_region
,
1474 new_info
.text_region
, old_info
.client_base
,
1475 old_info
.client_base
+old_info
.text_size
)) {
1476 panic("clone_system_shared_regions: shared region mis-alignment 3");
1477 shared_region_mapping_dealloc(new_shared_region
);
1480 if(vm_map_region_replace(current_map(), old_info
.data_region
,
1481 new_info
.data_region
,
1482 old_info
.client_base
+ old_info
.text_size
,
1483 old_info
.client_base
1484 + old_info
.text_size
+ old_info
.data_size
)) {
1485 panic("clone_system_shared_regions: shared region mis-alignment 4");
1486 shared_region_mapping_dealloc(new_shared_region
);
1489 vm_set_shared_region(current_task(), new_shared_region
);
1491 /* consume the reference which wasn't accounted for in object */
1493 if (!shared_regions_active
|| !chain_regions
)
1494 shared_region_mapping_dealloc(old_shared_region
);
1496 SHARED_REGION_TRACE(
1497 SHARED_REGION_TRACE_INFO
,
1498 ("shared_region: %p task=%p "
1499 "clone(active=%d, base=0x%x,chain=%d) "
1500 "old=%p[%x,%x,%x] new=%p[%x,%x,%x]\n",
1501 current_thread(), current_task(),
1502 shared_regions_active
, base_vnode
, chain_regions
,
1516 /* header for the profile name file. The profiled app info is held */
1517 /* in the data file and pointed to by elements in the name file */
1519 struct profile_names_header
{
1520 unsigned int number_of_profiles
;
1521 unsigned int user_id
;
1522 unsigned int version
;
1523 off_t element_array
;
1524 unsigned int spare1
;
1525 unsigned int spare2
;
1526 unsigned int spare3
;
1529 struct profile_element
{
1532 unsigned int mod_date
;
1537 struct global_profile
{
1538 struct vnode
*names_vp
;
1539 struct vnode
*data_vp
;
1540 vm_offset_t buf_ptr
;
1546 struct global_profile_cache
{
1549 struct global_profile profiles
[3];
1552 /* forward declarations */
1553 int bsd_open_page_cache_files(unsigned int user
,
1554 struct global_profile
**profile
);
1555 void bsd_close_page_cache_files(struct global_profile
*profile
);
1556 int bsd_search_page_cache_data_base(
1558 struct profile_names_header
*database
,
1560 unsigned int mod_date
,
1563 unsigned int *profile_size
);
1565 struct global_profile_cache global_user_profile_cache
=
1566 {3, 0, {{NULL
, NULL
, 0, 0, 0, 0},
1567 {NULL
, NULL
, 0, 0, 0, 0},
1568 {NULL
, NULL
, 0, 0, 0, 0}} };
1570 /* BSD_OPEN_PAGE_CACHE_FILES: */
1571 /* Caller provides a user id. This id was used in */
1572 /* prepare_profile_database to create two unique absolute */
1573 /* file paths to the associated profile files. These files */
1574 /* are either opened or bsd_open_page_cache_files returns an */
1575 /* error. The header of the names file is then consulted. */
1576 /* The header and the vnodes for the names and data files are */
1580 bsd_open_page_cache_files(
1582 struct global_profile
**profile
)
1584 const char *cache_path
= "/var/vm/app_profile/";
1592 struct vnode
*names_vp
;
1593 struct vnode
*data_vp
;
1594 vm_offset_t names_buf
;
1595 vm_offset_t buf_ptr
;
1597 int profile_names_length
;
1598 int profile_data_length
;
1599 char *profile_data_string
;
1600 char *profile_names_string
;
1604 struct vfs_context context
;
1608 struct nameidata nd_names
;
1609 struct nameidata nd_data
;
1615 context
.vc_proc
= p
;
1616 context
.vc_ucred
= kauth_cred_get();
1619 for(i
= 0; i
<global_user_profile_cache
.max_ele
; i
++) {
1620 if((global_user_profile_cache
.profiles
[i
].user
== user
)
1621 && (global_user_profile_cache
.profiles
[i
].data_vp
1623 *profile
= &global_user_profile_cache
.profiles
[i
];
1624 /* already in cache, we're done */
1625 if ((*profile
)->busy
) {
1627 * drop funnel and wait
1629 (void)tsleep((void *)
1631 PRIBIO
, "app_profile", 0);
1634 (*profile
)->busy
= 1;
1635 (*profile
)->age
= global_user_profile_cache
.age
;
1638 * entries in cache are held with a valid
1639 * usecount... take an iocount which will
1640 * be dropped in "bsd_close_page_cache_files"
1641 * which is called after the read or writes to
1642 * these files are done
1644 if ( (vnode_getwithref((*profile
)->data_vp
)) ) {
1646 vnode_rele((*profile
)->data_vp
);
1647 vnode_rele((*profile
)->names_vp
);
1649 (*profile
)->data_vp
= NULL
;
1650 (*profile
)->busy
= 0;
1655 if ( (vnode_getwithref((*profile
)->names_vp
)) ) {
1657 vnode_put((*profile
)->data_vp
);
1658 vnode_rele((*profile
)->data_vp
);
1659 vnode_rele((*profile
)->names_vp
);
1661 (*profile
)->data_vp
= NULL
;
1662 (*profile
)->busy
= 0;
1667 global_user_profile_cache
.age
+=1;
1672 lru
= global_user_profile_cache
.age
;
1674 for(i
= 0; i
<global_user_profile_cache
.max_ele
; i
++) {
1675 /* Skip entry if it is in the process of being reused */
1676 if(global_user_profile_cache
.profiles
[i
].data_vp
==
1677 (struct vnode
*)0xFFFFFFFF)
1679 /* Otherwise grab the first empty entry */
1680 if(global_user_profile_cache
.profiles
[i
].data_vp
== NULL
) {
1681 *profile
= &global_user_profile_cache
.profiles
[i
];
1682 (*profile
)->age
= global_user_profile_cache
.age
;
1685 /* Otherwise grab the oldest entry */
1686 if(global_user_profile_cache
.profiles
[i
].age
< lru
) {
1687 lru
= global_user_profile_cache
.profiles
[i
].age
;
1688 *profile
= &global_user_profile_cache
.profiles
[i
];
1692 /* Did we set it? */
1693 if (*profile
== NULL
) {
1695 * No entries are available; this can only happen if all
1696 * of them are currently in the process of being reused;
1697 * if this happens, we sleep on the address of the first
1698 * element, and restart. This is less than ideal, but we
1699 * know it will work because we know that there will be a
1700 * wakeup on any entry currently in the process of being
1703 * XXX Reccomend a two handed clock and more than 3 total
1704 * XXX cache entries at some point in the future.
1707 * drop funnel and wait
1709 (void)tsleep((void *)
1710 &global_user_profile_cache
.profiles
[0],
1711 PRIBIO
, "app_profile", 0);
1716 * If it's currently busy, we've picked the one at the end of the
1717 * LRU list, but it's currently being actively used. We sleep on
1718 * its address and restart.
1720 if ((*profile
)->busy
) {
1722 * drop funnel and wait
1724 (void)tsleep((void *)
1726 PRIBIO
, "app_profile", 0);
1729 (*profile
)->busy
= 1;
1730 (*profile
)->user
= user
;
1733 * put dummy value in for now to get competing request to wait
1734 * above until we are finished
1736 * Save the data_vp before setting it, so we can set it before
1737 * we kmem_free() or vrele(). If we don't do this, then we
1738 * have a potential funnel race condition we have to deal with.
1740 data_vp
= (*profile
)->data_vp
;
1741 (*profile
)->data_vp
= (struct vnode
*)0xFFFFFFFF;
1744 * Age the cache here in all cases; this guarantees that we won't
1745 * be reusing only one entry over and over, once the system reaches
1748 global_user_profile_cache
.age
+=1;
1750 if(data_vp
!= NULL
) {
1751 kmem_free(kernel_map
,
1752 (*profile
)->buf_ptr
, 4 * PAGE_SIZE
);
1753 if ((*profile
)->names_vp
) {
1754 vnode_rele((*profile
)->names_vp
);
1755 (*profile
)->names_vp
= NULL
;
1757 vnode_rele(data_vp
);
1760 /* Try to open the appropriate users profile files */
1761 /* If neither file is present, try to create them */
1762 /* If one file is present and the other not, fail. */
1763 /* If the files do exist, check them for the app_file */
1764 /* requested and read it in if present */
1766 ret
= kmem_alloc(kernel_map
,
1767 (vm_offset_t
*)&profile_data_string
, PATH_MAX
);
1770 (*profile
)->data_vp
= NULL
;
1771 (*profile
)->busy
= 0;
1776 /* Split the buffer in half since we know the size of */
1777 /* our file path and our allocation is adequate for */
1778 /* both file path names */
1779 profile_names_string
= profile_data_string
+ (PATH_MAX
/2);
1782 strcpy(profile_data_string
, cache_path
);
1783 strcpy(profile_names_string
, cache_path
);
1784 profile_names_length
= profile_data_length
1785 = strlen(profile_data_string
);
1786 substring
= profile_data_string
+ profile_data_length
;
1787 sprintf(substring
, "%x_data", user
);
1788 substring
= profile_names_string
+ profile_names_length
;
1789 sprintf(substring
, "%x_names", user
);
1791 /* We now have the absolute file names */
1793 ret
= kmem_alloc(kernel_map
,
1794 (vm_offset_t
*)&names_buf
, 4 * PAGE_SIZE
);
1796 kmem_free(kernel_map
,
1797 (vm_offset_t
)profile_data_string
, PATH_MAX
);
1798 (*profile
)->data_vp
= NULL
;
1799 (*profile
)->busy
= 0;
1804 NDINIT(&nd_names
, LOOKUP
, FOLLOW
| LOCKLEAF
,
1805 UIO_SYSSPACE32
, CAST_USER_ADDR_T(profile_names_string
), &context
);
1806 NDINIT(&nd_data
, LOOKUP
, FOLLOW
| LOCKLEAF
,
1807 UIO_SYSSPACE32
, CAST_USER_ADDR_T(profile_data_string
), &context
);
1809 if ( (error
= vn_open(&nd_data
, FREAD
| FWRITE
, 0)) ) {
1811 printf("bsd_open_page_cache_files: CacheData file not found %s\n",
1812 profile_data_string
);
1814 kmem_free(kernel_map
,
1815 (vm_offset_t
)names_buf
, 4 * PAGE_SIZE
);
1816 kmem_free(kernel_map
,
1817 (vm_offset_t
)profile_data_string
, PATH_MAX
);
1818 (*profile
)->data_vp
= NULL
;
1819 (*profile
)->busy
= 0;
1823 data_vp
= nd_data
.ni_vp
;
1825 if ( (error
= vn_open(&nd_names
, FREAD
| FWRITE
, 0)) ) {
1826 printf("bsd_open_page_cache_files: NamesData file not found %s\n",
1827 profile_data_string
);
1828 kmem_free(kernel_map
,
1829 (vm_offset_t
)names_buf
, 4 * PAGE_SIZE
);
1830 kmem_free(kernel_map
,
1831 (vm_offset_t
)profile_data_string
, PATH_MAX
);
1833 vnode_rele(data_vp
);
1836 (*profile
)->data_vp
= NULL
;
1837 (*profile
)->busy
= 0;
1841 names_vp
= nd_names
.ni_vp
;
1843 if ((error
= vnode_size(names_vp
, &file_size
, &context
)) != 0) {
1844 printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string
);
1845 kmem_free(kernel_map
,
1846 (vm_offset_t
)profile_data_string
, PATH_MAX
);
1847 kmem_free(kernel_map
,
1848 (vm_offset_t
)names_buf
, 4 * PAGE_SIZE
);
1850 vnode_rele(names_vp
);
1851 vnode_put(names_vp
);
1852 vnode_rele(data_vp
);
1855 (*profile
)->data_vp
= NULL
;
1856 (*profile
)->busy
= 0;
1862 if(size
> 4 * PAGE_SIZE
)
1863 size
= 4 * PAGE_SIZE
;
1864 buf_ptr
= names_buf
;
1869 error
= vn_rdwr(UIO_READ
, names_vp
, (caddr_t
)buf_ptr
,
1871 UIO_SYSSPACE32
, IO_NODELOCKED
, kauth_cred_get(),
1873 resid
= (vm_size_t
) resid_int
;
1874 if((error
) || (size
== resid
)) {
1878 kmem_free(kernel_map
,
1879 (vm_offset_t
)profile_data_string
, PATH_MAX
);
1880 kmem_free(kernel_map
,
1881 (vm_offset_t
)names_buf
, 4 * PAGE_SIZE
);
1883 vnode_rele(names_vp
);
1884 vnode_put(names_vp
);
1885 vnode_rele(data_vp
);
1888 (*profile
)->data_vp
= NULL
;
1889 (*profile
)->busy
= 0;
1893 buf_ptr
+= size
-resid
;
1894 resid_off
+= size
-resid
;
1897 kmem_free(kernel_map
, (vm_offset_t
)profile_data_string
, PATH_MAX
);
1899 (*profile
)->names_vp
= names_vp
;
1900 (*profile
)->data_vp
= data_vp
;
1901 (*profile
)->buf_ptr
= names_buf
;
1904 * at this point, the both the names_vp and the data_vp have
1905 * both a valid usecount and an iocount held
1912 bsd_close_page_cache_files(
1913 struct global_profile
*profile
)
1915 vnode_put(profile
->data_vp
);
1916 vnode_put(profile
->names_vp
);
1923 bsd_read_page_cache_file(
1928 struct vnode
*app_vp
,
1929 vm_offset_t
*buffer
,
1930 vm_offset_t
*bufsize
)
1933 boolean_t funnel_state
;
1940 unsigned int profile_size
;
1942 vm_offset_t names_buf
;
1943 struct vnode_attr va
;
1944 struct vfs_context context
;
1948 struct vnode
*names_vp
;
1949 struct vnode
*data_vp
;
1951 struct global_profile
*uid_files
;
1953 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
1955 /* Try to open the appropriate users profile files */
1956 /* If neither file is present, try to create them */
1957 /* If one file is present and the other not, fail. */
1958 /* If the files do exist, check them for the app_file */
1959 /* requested and read it in if present */
1962 error
= bsd_open_page_cache_files(user
, &uid_files
);
1964 thread_funnel_set(kernel_flock
, funnel_state
);
1970 names_vp
= uid_files
->names_vp
;
1971 data_vp
= uid_files
->data_vp
;
1972 names_buf
= uid_files
->buf_ptr
;
1974 context
.vc_proc
= p
;
1975 context
.vc_ucred
= kauth_cred_get();
1978 VATTR_WANTED(&va
, va_fileid
);
1979 VATTR_WANTED(&va
, va_modify_time
);
1981 if ((error
= vnode_getattr(app_vp
, &va
, &context
))) {
1982 printf("bsd_read_cache_file: Can't stat app file %s\n", app_name
);
1983 bsd_close_page_cache_files(uid_files
);
1984 thread_funnel_set(kernel_flock
, funnel_state
);
1988 *fid
= (u_long
)va
.va_fileid
;
1989 *mod
= va
.va_modify_time
.tv_sec
;
1991 if (bsd_search_page_cache_data_base(
1993 (struct profile_names_header
*)names_buf
,
1995 (unsigned int) va
.va_modify_time
.tv_sec
,
1996 (u_long
)va
.va_fileid
, &profile
, &profile_size
) == 0) {
1997 /* profile is an offset in the profile data base */
1998 /* It is zero if no profile data was found */
2000 if(profile_size
== 0) {
2003 bsd_close_page_cache_files(uid_files
);
2004 thread_funnel_set(kernel_flock
, funnel_state
);
2007 ret
= (vm_offset_t
)(kmem_alloc(kernel_map
, buffer
, profile_size
));
2009 bsd_close_page_cache_files(uid_files
);
2010 thread_funnel_set(kernel_flock
, funnel_state
);
2013 *bufsize
= profile_size
;
2014 while(profile_size
) {
2016 error
= vn_rdwr(UIO_READ
, data_vp
,
2017 (caddr_t
) *buffer
, profile_size
,
2018 profile
, UIO_SYSSPACE32
, IO_NODELOCKED
,
2019 kauth_cred_get(), &resid_int
, p
);
2020 resid
= (vm_size_t
) resid_int
;
2021 if((error
) || (profile_size
== resid
)) {
2022 bsd_close_page_cache_files(uid_files
);
2023 kmem_free(kernel_map
, (vm_offset_t
)*buffer
, profile_size
);
2024 thread_funnel_set(kernel_flock
, funnel_state
);
2027 profile
+= profile_size
- resid
;
2028 profile_size
= resid
;
2030 bsd_close_page_cache_files(uid_files
);
2031 thread_funnel_set(kernel_flock
, funnel_state
);
2034 bsd_close_page_cache_files(uid_files
);
2035 thread_funnel_set(kernel_flock
, funnel_state
);
2042 bsd_search_page_cache_data_base(
2044 struct profile_names_header
*database
,
2046 unsigned int mod_date
,
2049 unsigned int *profile_size
)
2055 struct profile_element
*element
;
2056 unsigned int ele_total
;
2057 unsigned int extended_list
= 0;
2062 vm_offset_t local_buf
= 0;
2069 if(((vm_offset_t
)database
->element_array
) !=
2070 sizeof(struct profile_names_header
)) {
2073 element
= (struct profile_element
*)(
2074 (vm_offset_t
)database
->element_array
+
2075 (vm_offset_t
)database
);
2077 ele_total
= database
->number_of_profiles
;
2082 /* note: code assumes header + n*ele comes out on a page boundary */
2083 if(((local_buf
== 0) && (sizeof(struct profile_names_header
) +
2084 (ele_total
* sizeof(struct profile_element
)))
2085 > (PAGE_SIZE
* 4)) ||
2086 ((local_buf
!= 0) &&
2087 (ele_total
* sizeof(struct profile_element
))
2088 > (PAGE_SIZE
* 4))) {
2089 extended_list
= ele_total
;
2090 if(element
== (struct profile_element
*)
2091 ((vm_offset_t
)database
->element_array
+
2092 (vm_offset_t
)database
)) {
2093 ele_total
= ((PAGE_SIZE
* 4)/sizeof(struct profile_element
)) - 1;
2095 ele_total
= (PAGE_SIZE
* 4)/sizeof(struct profile_element
);
2097 extended_list
-= ele_total
;
2099 for (i
=0; i
<ele_total
; i
++) {
2100 if((mod_date
== element
[i
].mod_date
)
2101 && (inode
== element
[i
].inode
)) {
2102 if(strncmp(element
[i
].name
, app_name
, 12) == 0) {
2103 *profile
= element
[i
].addr
;
2104 *profile_size
= element
[i
].size
;
2105 if(local_buf
!= 0) {
2106 kmem_free(kernel_map
, local_buf
, 4 * PAGE_SIZE
);
2112 if(extended_list
== 0)
2114 if(local_buf
== 0) {
2115 ret
= kmem_alloc(kernel_map
, &local_buf
, 4 * PAGE_SIZE
);
2116 if(ret
!= KERN_SUCCESS
) {
2120 element
= (struct profile_element
*)local_buf
;
2121 ele_total
= extended_list
;
2123 file_off
+= 4 * PAGE_SIZE
;
2124 if((ele_total
* sizeof(struct profile_element
)) >
2126 size
= PAGE_SIZE
* 4;
2128 size
= ele_total
* sizeof(struct profile_element
);
2133 error
= vn_rdwr(UIO_READ
, vp
,
2134 CAST_DOWN(caddr_t
, (local_buf
+ resid_off
)),
2135 size
, file_off
+ resid_off
, UIO_SYSSPACE32
,
2136 IO_NODELOCKED
, kauth_cred_get(), &resid_int
, p
);
2137 resid
= (vm_size_t
) resid_int
;
2138 if((error
) || (size
== resid
)) {
2139 if(local_buf
!= 0) {
2140 kmem_free(kernel_map
, local_buf
, 4 * PAGE_SIZE
);
2144 resid_off
+= size
-resid
;
2148 if(local_buf
!= 0) {
2149 kmem_free(kernel_map
, local_buf
, 4 * PAGE_SIZE
);
2155 bsd_write_page_cache_file(
2167 boolean_t funnel_state
;
2169 struct vfs_context context
;
2171 unsigned int profile_size
;
2173 vm_offset_t names_buf
;
2174 struct vnode
*names_vp
;
2175 struct vnode
*data_vp
;
2176 struct profile_names_header
*profile_header
;
2178 struct global_profile
*uid_files
;
2181 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
2184 error
= bsd_open_page_cache_files(user
, &uid_files
);
2186 thread_funnel_set(kernel_flock
, funnel_state
);
2192 names_vp
= uid_files
->names_vp
;
2193 data_vp
= uid_files
->data_vp
;
2194 names_buf
= uid_files
->buf_ptr
;
2196 /* Stat data file for size */
2198 context
.vc_proc
= p
;
2199 context
.vc_ucred
= kauth_cred_get();
2201 if ((error
= vnode_size(data_vp
, &file_size
, &context
)) != 0) {
2202 printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name
);
2203 bsd_close_page_cache_files(uid_files
);
2204 thread_funnel_set(kernel_flock
, funnel_state
);
2208 if (bsd_search_page_cache_data_base(names_vp
,
2209 (struct profile_names_header
*)names_buf
,
2210 file_name
, (unsigned int) mod
,
2211 fid
, &profile
, &profile_size
) == 0) {
2212 /* profile is an offset in the profile data base */
2213 /* It is zero if no profile data was found */
2215 if(profile_size
== 0) {
2216 unsigned int header_size
;
2217 vm_offset_t buf_ptr
;
2219 /* Our Write case */
2221 /* read header for last entry */
2223 (struct profile_names_header
*)names_buf
;
2224 name_offset
= sizeof(struct profile_names_header
) +
2225 (sizeof(struct profile_element
)
2226 * profile_header
->number_of_profiles
);
2227 profile_header
->number_of_profiles
+= 1;
2229 if(name_offset
< PAGE_SIZE
* 4) {
2230 struct profile_element
*name
;
2231 /* write new entry */
2232 name
= (struct profile_element
*)
2233 (names_buf
+ (vm_offset_t
)name_offset
);
2234 name
->addr
= file_size
;
2236 name
->mod_date
= mod
;
2238 strncpy (name
->name
, file_name
, 12);
2240 unsigned int ele_size
;
2241 struct profile_element name
;
2242 /* write new entry */
2243 name
.addr
= file_size
;
2245 name
.mod_date
= mod
;
2247 strncpy (name
.name
, file_name
, 12);
2248 /* write element out separately */
2249 ele_size
= sizeof(struct profile_element
);
2250 buf_ptr
= (vm_offset_t
)&name
;
2251 resid_off
= name_offset
;
2254 error
= vn_rdwr(UIO_WRITE
, names_vp
,
2256 ele_size
, resid_off
,
2257 UIO_SYSSPACE32
, IO_NODELOCKED
,
2258 kauth_cred_get(), &resid
, p
);
2260 printf("bsd_write_page_cache_file: Can't write name_element %x\n", user
);
2261 bsd_close_page_cache_files(
2268 buf_ptr
+= (vm_offset_t
)
2270 resid_off
+= ele_size
-resid
;
2275 if(name_offset
< PAGE_SIZE
* 4) {
2276 header_size
= name_offset
+
2277 sizeof(struct profile_element
);
2281 sizeof(struct profile_names_header
);
2283 buf_ptr
= (vm_offset_t
)profile_header
;
2286 /* write names file header */
2287 while(header_size
) {
2288 error
= vn_rdwr(UIO_WRITE
, names_vp
,
2290 header_size
, resid_off
,
2291 UIO_SYSSPACE32
, IO_NODELOCKED
,
2292 kauth_cred_get(), &resid
, p
);
2294 printf("bsd_write_page_cache_file: Can't write header %x\n", user
);
2295 bsd_close_page_cache_files(
2298 kernel_flock
, funnel_state
);
2301 buf_ptr
+= (vm_offset_t
)header_size
-resid
;
2302 resid_off
+= header_size
-resid
;
2303 header_size
= resid
;
2305 /* write profile to data file */
2306 resid_off
= file_size
;
2308 error
= vn_rdwr(UIO_WRITE
, data_vp
,
2309 (caddr_t
)buffer
, size
, resid_off
,
2310 UIO_SYSSPACE32
, IO_NODELOCKED
,
2311 kauth_cred_get(), &resid
, p
);
2313 printf("bsd_write_page_cache_file: Can't write header %x\n", user
);
2314 bsd_close_page_cache_files(
2317 kernel_flock
, funnel_state
);
2320 buffer
+= size
-resid
;
2321 resid_off
+= size
-resid
;
2324 bsd_close_page_cache_files(uid_files
);
2325 thread_funnel_set(kernel_flock
, funnel_state
);
2328 /* Someone else wrote a twin profile before us */
2329 bsd_close_page_cache_files(uid_files
);
2330 thread_funnel_set(kernel_flock
, funnel_state
);
2333 bsd_close_page_cache_files(uid_files
);
2334 thread_funnel_set(kernel_flock
, funnel_state
);
2341 prepare_profile_database(int user
)
2343 const char *cache_path
= "/var/vm/app_profile/";
2350 struct vnode
*names_vp
;
2351 struct vnode
*data_vp
;
2352 vm_offset_t names_buf
;
2353 vm_offset_t buf_ptr
;
2355 int profile_names_length
;
2356 int profile_data_length
;
2357 char *profile_data_string
;
2358 char *profile_names_string
;
2361 struct vnode_attr va
;
2362 struct vfs_context context
;
2364 struct profile_names_header
*profile_header
;
2367 struct nameidata nd_names
;
2368 struct nameidata nd_data
;
2372 context
.vc_proc
= p
;
2373 context
.vc_ucred
= kauth_cred_get();
2375 ret
= kmem_alloc(kernel_map
,
2376 (vm_offset_t
*)&profile_data_string
, PATH_MAX
);
2382 /* Split the buffer in half since we know the size of */
2383 /* our file path and our allocation is adequate for */
2384 /* both file path names */
2385 profile_names_string
= profile_data_string
+ (PATH_MAX
/2);
2388 strcpy(profile_data_string
, cache_path
);
2389 strcpy(profile_names_string
, cache_path
);
2390 profile_names_length
= profile_data_length
2391 = strlen(profile_data_string
);
2392 substring
= profile_data_string
+ profile_data_length
;
2393 sprintf(substring
, "%x_data", user
);
2394 substring
= profile_names_string
+ profile_names_length
;
2395 sprintf(substring
, "%x_names", user
);
2397 /* We now have the absolute file names */
2399 ret
= kmem_alloc(kernel_map
,
2400 (vm_offset_t
*)&names_buf
, 4 * PAGE_SIZE
);
2402 kmem_free(kernel_map
,
2403 (vm_offset_t
)profile_data_string
, PATH_MAX
);
2407 NDINIT(&nd_names
, LOOKUP
, FOLLOW
,
2408 UIO_SYSSPACE32
, CAST_USER_ADDR_T(profile_names_string
), &context
);
2409 NDINIT(&nd_data
, LOOKUP
, FOLLOW
,
2410 UIO_SYSSPACE32
, CAST_USER_ADDR_T(profile_data_string
), &context
);
2412 if ( (error
= vn_open(&nd_data
,
2413 O_CREAT
| O_EXCL
| FWRITE
, S_IRUSR
|S_IWUSR
)) ) {
2414 kmem_free(kernel_map
,
2415 (vm_offset_t
)names_buf
, 4 * PAGE_SIZE
);
2416 kmem_free(kernel_map
,
2417 (vm_offset_t
)profile_data_string
, PATH_MAX
);
2421 data_vp
= nd_data
.ni_vp
;
2423 if ( (error
= vn_open(&nd_names
,
2424 O_CREAT
| O_EXCL
| FWRITE
, S_IRUSR
|S_IWUSR
)) ) {
2425 printf("prepare_profile_database: Can't create CacheNames %s\n",
2426 profile_data_string
);
2427 kmem_free(kernel_map
,
2428 (vm_offset_t
)names_buf
, 4 * PAGE_SIZE
);
2429 kmem_free(kernel_map
,
2430 (vm_offset_t
)profile_data_string
, PATH_MAX
);
2432 vnode_rele(data_vp
);
2437 names_vp
= nd_names
.ni_vp
;
2439 /* Write Header for new names file */
2441 profile_header
= (struct profile_names_header
*)names_buf
;
2443 profile_header
->number_of_profiles
= 0;
2444 profile_header
->user_id
= user
;
2445 profile_header
->version
= 1;
2446 profile_header
->element_array
=
2447 sizeof(struct profile_names_header
);
2448 profile_header
->spare1
= 0;
2449 profile_header
->spare2
= 0;
2450 profile_header
->spare3
= 0;
2452 size
= sizeof(struct profile_names_header
);
2453 buf_ptr
= (vm_offset_t
)profile_header
;
2457 error
= vn_rdwr(UIO_WRITE
, names_vp
,
2458 (caddr_t
)buf_ptr
, size
, resid_off
,
2459 UIO_SYSSPACE32
, IO_NODELOCKED
,
2460 kauth_cred_get(), &resid
, p
);
2462 printf("prepare_profile_database: Can't write header %s\n", profile_names_string
);
2463 kmem_free(kernel_map
,
2464 (vm_offset_t
)names_buf
, 4 * PAGE_SIZE
);
2465 kmem_free(kernel_map
,
2466 (vm_offset_t
)profile_data_string
,
2469 vnode_rele(names_vp
);
2470 vnode_put(names_vp
);
2471 vnode_rele(data_vp
);
2476 buf_ptr
+= size
-resid
;
2477 resid_off
+= size
-resid
;
2481 VATTR_SET(&va
, va_uid
, user
);
2483 error
= vnode_setattr(names_vp
, &va
, &context
);
2485 printf("prepare_profile_database: "
2486 "Can't set user %s\n", profile_names_string
);
2488 vnode_rele(names_vp
);
2489 vnode_put(names_vp
);
2492 VATTR_SET(&va
, va_uid
, user
);
2493 error
= vnode_setattr(data_vp
, &va
, &context
);
2495 printf("prepare_profile_database: "
2496 "Can't set user %s\n", profile_data_string
);
2498 vnode_rele(data_vp
);
2501 kmem_free(kernel_map
,
2502 (vm_offset_t
)profile_data_string
, PATH_MAX
);
2503 kmem_free(kernel_map
,
2504 (vm_offset_t
)names_buf
, 4 * PAGE_SIZE
);