]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
d60fc526cb196113e74771dba0e02bf6ea4d911c
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34
35 /*
36 */
37
38
39 #include <meta_features.h>
40
41 #include <kern/task.h>
42 #include <kern/thread.h>
43 #include <kern/debug.h>
44 #include <kern/lock.h>
45 #include <mach/mach_traps.h>
46 #include <mach/time_value.h>
47 #include <mach/vm_map.h>
48 #include <mach/vm_param.h>
49 #include <mach/vm_prot.h>
50 #include <mach/port.h>
51
52 #include <sys/file_internal.h>
53 #include <sys/param.h>
54 #include <sys/systm.h>
55 #include <sys/dir.h>
56 #include <sys/namei.h>
57 #include <sys/proc_internal.h>
58 #include <sys/kauth.h>
59 #include <sys/vm.h>
60 #include <sys/file.h>
61 #include <sys/vnode_internal.h>
62 #include <sys/mount.h>
63 #include <sys/trace.h>
64 #include <sys/kernel.h>
65 #include <sys/ubc_internal.h>
66 #include <sys/user.h>
67 #include <sys/syslog.h>
68 #include <sys/stat.h>
69 #include <sys/sysproto.h>
70 #include <sys/mman.h>
71 #include <sys/sysctl.h>
72
73 #include <bsm/audit_kernel.h>
74 #include <bsm/audit_kevents.h>
75
76 #include <kern/kalloc.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
79
80 #include <machine/spl.h>
81
82 #include <mach/shared_memory_server.h>
83 #include <vm/vm_shared_memory_server.h>
84
85 #include <vm/vm_protos.h>
86
87 void
88 log_nx_failure(addr64_t vaddr, vm_prot_t prot)
89 {
90 printf("NX failure: %s - vaddr=%qx, prot=%x\n", current_proc()->p_comm, vaddr, prot);
91 }
92
93
94 int
95 useracc(
96 user_addr_t addr,
97 user_size_t len,
98 int prot)
99 {
100 return (vm_map_check_protection(
101 current_map(),
102 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
103 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
104 }
105
106 int
107 vslock(
108 user_addr_t addr,
109 user_size_t len)
110 {
111 kern_return_t kret;
112 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
113 vm_map_round_page(addr+len),
114 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
115
116 switch (kret) {
117 case KERN_SUCCESS:
118 return (0);
119 case KERN_INVALID_ADDRESS:
120 case KERN_NO_SPACE:
121 return (ENOMEM);
122 case KERN_PROTECTION_FAILURE:
123 return (EACCES);
124 default:
125 return (EINVAL);
126 }
127 }
128
129 int
130 vsunlock(
131 user_addr_t addr,
132 user_size_t len,
133 __unused int dirtied)
134 {
135 #if FIXME /* [ */
136 pmap_t pmap;
137 vm_page_t pg;
138 vm_map_offset_t vaddr;
139 ppnum_t paddr;
140 #endif /* FIXME ] */
141 kern_return_t kret;
142
143 #if FIXME /* [ */
144 if (dirtied) {
145 pmap = get_task_pmap(current_task());
146 for (vaddr = vm_map_trunc_page(addr);
147 vaddr < vm_map_round_page(addr+len);
148 vaddr += PAGE_SIZE) {
149 paddr = pmap_extract(pmap, vaddr);
150 pg = PHYS_TO_VM_PAGE(paddr);
151 vm_page_set_modified(pg);
152 }
153 }
154 #endif /* FIXME ] */
155 #ifdef lint
156 dirtied++;
157 #endif /* lint */
158 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
159 vm_map_round_page(addr+len), FALSE);
160 switch (kret) {
161 case KERN_SUCCESS:
162 return (0);
163 case KERN_INVALID_ADDRESS:
164 case KERN_NO_SPACE:
165 return (ENOMEM);
166 case KERN_PROTECTION_FAILURE:
167 return (EACCES);
168 default:
169 return (EINVAL);
170 }
171 }
172
173 int
174 subyte(
175 user_addr_t addr,
176 int byte)
177 {
178 char character;
179
180 character = (char)byte;
181 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
182 }
183
184 int
185 suibyte(
186 user_addr_t addr,
187 int byte)
188 {
189 char character;
190
191 character = (char)byte;
192 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
193 }
194
195 int fubyte(user_addr_t addr)
196 {
197 unsigned char byte;
198
199 if (copyin(addr, (void *) &byte, sizeof(char)))
200 return(-1);
201 return(byte);
202 }
203
204 int fuibyte(user_addr_t addr)
205 {
206 unsigned char byte;
207
208 if (copyin(addr, (void *) &(byte), sizeof(char)))
209 return(-1);
210 return(byte);
211 }
212
213 int
214 suword(
215 user_addr_t addr,
216 long word)
217 {
218 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
219 }
220
221 long fuword(user_addr_t addr)
222 {
223 long word;
224
225 if (copyin(addr, (void *) &word, sizeof(int)))
226 return(-1);
227 return(word);
228 }
229
230 /* suiword and fuiword are the same as suword and fuword, respectively */
231
232 int
233 suiword(
234 user_addr_t addr,
235 long word)
236 {
237 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
238 }
239
240 long fuiword(user_addr_t addr)
241 {
242 long word;
243
244 if (copyin(addr, (void *) &word, sizeof(int)))
245 return(-1);
246 return(word);
247 }
248
249 /*
250 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
251 * fetching and setting of process-sized size_t and pointer values.
252 */
253 int
254 sulong(user_addr_t addr, int64_t word)
255 {
256
257 if (IS_64BIT_PROCESS(current_proc())) {
258 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
259 } else {
260 return(suiword(addr, (long)word));
261 }
262 }
263
264 int64_t
265 fulong(user_addr_t addr)
266 {
267 int64_t longword;
268
269 if (IS_64BIT_PROCESS(current_proc())) {
270 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
271 return(-1);
272 return(longword);
273 } else {
274 return((int64_t)fuiword(addr));
275 }
276 }
277
278 int
279 suulong(user_addr_t addr, uint64_t uword)
280 {
281
282 if (IS_64BIT_PROCESS(current_proc())) {
283 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
284 } else {
285 return(suiword(addr, (u_long)uword));
286 }
287 }
288
289 uint64_t
290 fuulong(user_addr_t addr)
291 {
292 uint64_t ulongword;
293
294 if (IS_64BIT_PROCESS(current_proc())) {
295 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
296 return(-1ULL);
297 return(ulongword);
298 } else {
299 return((uint64_t)fuiword(addr));
300 }
301 }
302
303 int
304 swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval)
305 {
306 return(ENOTSUP);
307 }
308
309
310 kern_return_t
311 pid_for_task(
312 struct pid_for_task_args *args)
313 {
314 mach_port_name_t t = args->t;
315 user_addr_t pid_addr = args->pid;
316 struct proc * p;
317 task_t t1;
318 int pid = -1;
319 kern_return_t err = KERN_SUCCESS;
320 boolean_t funnel_state;
321
322 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
323 AUDIT_ARG(mach_port1, t);
324
325 funnel_state = thread_funnel_set(kernel_flock, TRUE);
326 t1 = port_name_to_task(t);
327
328 if (t1 == TASK_NULL) {
329 err = KERN_FAILURE;
330 goto pftout;
331 } else {
332 p = get_bsdtask_info(t1);
333 if (p) {
334 pid = proc_pid(p);
335 err = KERN_SUCCESS;
336 } else {
337 err = KERN_FAILURE;
338 }
339 }
340 task_deallocate(t1);
341 pftout:
342 AUDIT_ARG(pid, pid);
343 (void) copyout((char *) &pid, pid_addr, sizeof(int));
344 thread_funnel_set(kernel_flock, funnel_state);
345 AUDIT_MACH_SYSCALL_EXIT(err);
346 return(err);
347 }
348
349 /*
350 * Routine: task_for_pid
351 * Purpose:
352 * Get the task port for another "process", named by its
353 * process ID on the same host as "target_task".
354 *
355 * Only permitted to privileged processes, or processes
356 * with the same user ID.
357 *
358 * XXX This should be a BSD system call, not a Mach trap!!!
359 */
360 /*
361 *
362 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
363 * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged
364 * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks
365 *
366 */
367 static int tfp_policy = KERN_TFP_POLICY_RESTRICTED;
368 /* the groutp is inited to kmem group and is modifiable by sysctl */
369 static int tfp_group_inited = 0; /* policy groups are loaded ... */
370 static gid_t tfp_group_ronly = 0; /* procview group */
371 static gid_t tfp_group_rw = 0; /* procmod group */
372
373 kern_return_t
374 task_for_pid(
375 struct task_for_pid_args *args)
376 {
377 mach_port_name_t target_tport = args->target_tport;
378 int pid = args->pid;
379 user_addr_t task_addr = args->t;
380 struct uthread *uthread;
381 struct proc *p;
382 struct proc *p1;
383 task_t t1;
384 mach_port_name_t tret;
385 void * sright;
386 int error = 0;
387 int is_member = 0;
388 boolean_t funnel_state;
389 boolean_t ispermitted = FALSE;
390 #if DIAGNOSTIC
391 char procname[MAXCOMLEN+1];
392 #endif /* DIAGNOSTIC */
393
394 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
395 AUDIT_ARG(pid, pid);
396 AUDIT_ARG(mach_port1, target_tport);
397
398 t1 = port_name_to_task(target_tport);
399 if (t1 == TASK_NULL) {
400 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
401 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
402 return(KERN_FAILURE);
403 }
404
405 funnel_state = thread_funnel_set(kernel_flock, TRUE);
406
407 p1 = current_proc();
408
409 /*
410 * Delayed binding of thread credential to process credential, if we
411 * are not running with an explicitly set thread credential.
412 */
413 uthread = get_bsdthread_info(current_thread());
414 if (uthread->uu_ucred != p1->p_ucred &&
415 (uthread->uu_flag & UT_SETUID) == 0) {
416 kauth_cred_t old = uthread->uu_ucred;
417 proc_lock(p1);
418 kauth_cred_ref(p1->p_ucred);
419 uthread->uu_ucred = p1->p_ucred;
420 proc_unlock(p1);
421 if (IS_VALID_CRED(old))
422 kauth_cred_unref(&old);
423 }
424
425 p = pfind(pid);
426 AUDIT_ARG(process, p);
427
428 /*
429 * XXX p_ucred check can be bogus in multithreaded processes,
430 * XXX unless the funnel is held.
431 */
432 switch (tfp_policy) {
433
434 case KERN_TFP_POLICY_PERMISSIVE:
435 /* self or suser or related ones */
436 if ((p != (struct proc *) 0)
437 && (p->p_stat != SZOMB)
438 && (p1 != (struct proc *) 0)
439 && (
440 (p1 == p)
441 || !(suser(kauth_cred_get(), 0))
442 || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
443 ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
444 && ((p->p_flag & P_SUGID) == 0))
445 )
446 )
447 ispermitted = TRUE;
448 break;
449
450 case KERN_TFP_POLICY_RESTRICTED:
451 /* self or suser or setgid and related ones only */
452 if ((p != (struct proc *) 0)
453 && (p1 != (struct proc *) 0)
454 && (p->p_stat != SZOMB)
455 && (
456 (p1 == p)
457 || !(suser(kauth_cred_get(), 0))
458 || (((tfp_group_inited != 0) &&
459 (
460 ((kauth_cred_ismember_gid(kauth_cred_get(),
461 tfp_group_ronly, &is_member) == 0) && is_member)
462 ||((kauth_cred_ismember_gid(kauth_cred_get(),
463 tfp_group_rw, &is_member) == 0) && is_member)
464 )
465 )
466 && ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
467 ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
468 && ((p->p_flag & P_SUGID) == 0))
469 )
470 )
471 )
472 ispermitted = TRUE;
473
474 break;
475
476 case KERN_TFP_POLICY_DENY:
477 /* self or suser only */
478 default:
479 /* do not return task port of other task at all */
480 if ((p1 != (struct proc *) 0) && (p != (struct proc *) 0) && (p->p_stat != SZOMB)
481 && ((p1 == p) || !(suser(kauth_cred_get(), 0))))
482 ispermitted = TRUE;
483 else
484 ispermitted = FALSE;
485 break;
486 };
487
488
489 if (ispermitted == TRUE) {
490 if (p->task != TASK_NULL) {
491 task_reference(p->task);
492 sright = (void *)convert_task_to_port(p->task);
493 tret = ipc_port_copyout_send(
494 sright,
495 get_task_ipcspace(current_task()));
496 } else
497 tret = MACH_PORT_NULL;
498 AUDIT_ARG(mach_port2, tret);
499 (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
500 task_deallocate(t1);
501 error = KERN_SUCCESS;
502 goto tfpout;
503 }
504 #if DIAGNOSTIC
505 else {
506 /*
507 * There is no guarantee that p_comm is null terminated and
508 * kernel implementation of string functions are complete. So
509 * ensure stale info is not leaked out, bzero the buffer
510 */
511 bzero(&procname[0], MAXCOMLEN+1);
512 strncpy(&procname[0], &p1->p_comm[0], MAXCOMLEN);
513 if (tfp_policy != KERN_TFP_POLICY_PERMISSIVE)
514 log(LOG_NOTICE, "(%d: %s)tfp: failed on %d:\n",
515 ((p1 != PROC_NULL)?(p1->p_pid):0), &procname[0],
516 ((p != PROC_NULL)?(p->p_pid):0));
517 }
518 #endif /* DIAGNOSTIC */
519
520 task_deallocate(t1);
521 tret = MACH_PORT_NULL;
522 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
523 error = KERN_FAILURE;
524 tfpout:
525 thread_funnel_set(kernel_flock, funnel_state);
526 AUDIT_MACH_SYSCALL_EXIT(error);
527 return(error);
528 }
529
530 /*
531 * Routine: task_name_for_pid
532 * Purpose:
533 * Get the task name port for another "process", named by its
534 * process ID on the same host as "target_task".
535 *
536 * Only permitted to privileged processes, or processes
537 * with the same user ID.
538 *
539 * XXX This should be a BSD system call, not a Mach trap!!!
540 */
541
542 kern_return_t
543 task_name_for_pid(
544 struct task_name_for_pid_args *args)
545 {
546 mach_port_name_t target_tport = args->target_tport;
547 int pid = args->pid;
548 user_addr_t task_addr = args->t;
549 struct uthread *uthread;
550 struct proc *p;
551 struct proc *p1;
552 task_t t1;
553 mach_port_name_t tret;
554 void * sright;
555 int error = 0;
556 boolean_t funnel_state;
557
558 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
559 AUDIT_ARG(pid, pid);
560 AUDIT_ARG(mach_port1, target_tport);
561
562 t1 = port_name_to_task(target_tport);
563 if (t1 == TASK_NULL) {
564 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
565 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
566 return(KERN_FAILURE);
567 }
568
569 funnel_state = thread_funnel_set(kernel_flock, TRUE);
570
571 p1 = current_proc();
572
573 /*
574 * Delayed binding of thread credential to process credential, if we
575 * are not running with an explicitly set thread credential.
576 */
577 /*
578 * XXX p_ucred check can be bogus in multithreaded processes,
579 * XXX unless the funnel is held.
580 */
581 uthread = get_bsdthread_info(current_thread());
582 if (uthread->uu_ucred != p1->p_ucred &&
583 (uthread->uu_flag & UT_SETUID) == 0) {
584 kauth_cred_t old = uthread->uu_ucred;
585 proc_lock(p1);
586 kauth_cred_ref(p1->p_ucred);
587 uthread->uu_ucred = p1->p_ucred;
588 proc_unlock(p1);
589 if (IS_VALID_CRED(old))
590 kauth_cred_unref(&old);
591 }
592
593 p = pfind(pid);
594 AUDIT_ARG(process, p);
595
596 if ((p != (struct proc *) 0)
597 && (p->p_stat != SZOMB)
598 && (p1 != (struct proc *) 0)
599 && ((p1 == p)
600 || !(suser(kauth_cred_get(), 0))
601 || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
602 ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)))))
603 {
604 if (p->task != TASK_NULL)
605 {
606 task_reference(p->task);
607 sright = (void *)convert_task_name_to_port(p->task);
608 tret = ipc_port_copyout_send(
609 sright,
610 get_task_ipcspace(current_task()));
611 } else
612 tret = MACH_PORT_NULL;
613 AUDIT_ARG(mach_port2, tret);
614 (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
615 task_deallocate(t1);
616 error = KERN_SUCCESS;
617 goto tnfpout;
618 }
619
620 task_deallocate(t1);
621 tret = MACH_PORT_NULL;
622 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
623 error = KERN_FAILURE;
624 tnfpout:
625 thread_funnel_set(kernel_flock, funnel_state);
626 AUDIT_MACH_SYSCALL_EXIT(error);
627 return(error);
628 }
629
630 static int
631 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
632 __unused int arg2, struct sysctl_req *req)
633 {
634 int error = 0;
635 int new_value;
636
637 error = SYSCTL_OUT(req, arg1, sizeof(int));
638 if (error || req->newptr == USER_ADDR_NULL)
639 return(error);
640
641 if (!is_suser())
642 return(EPERM);
643
644 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
645 goto out;
646 }
647 if ((new_value == KERN_TFP_POLICY_DENY)
648 || (new_value == KERN_TFP_POLICY_PERMISSIVE)
649 || (new_value == KERN_TFP_POLICY_RESTRICTED))
650 tfp_policy = new_value;
651 else
652 error = EINVAL;
653 out:
654 return(error);
655
656 }
657
658 static int
659 sysctl_settfp_groups(__unused struct sysctl_oid *oidp, void *arg1,
660 __unused int arg2, struct sysctl_req *req)
661 {
662 int error = 0;
663 int new_value;
664
665 error = SYSCTL_OUT(req, arg1, sizeof(int));
666 if (error || req->newptr == USER_ADDR_NULL)
667 return(error);
668
669 if (!is_suser())
670 return(EPERM);
671
672 /*
673 * Once set; cannot be reset till next boot. Launchd will set this
674 * in its pid 1 init and no one can set after that.
675 */
676 if (tfp_group_inited != 0)
677 return(EPERM);
678
679 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
680 goto out;
681 }
682
683 if (new_value >= 100)
684 error = EINVAL;
685 else {
686 if (arg1 == &tfp_group_ronly)
687 tfp_group_ronly = new_value;
688 else if (arg1 == &tfp_group_rw)
689 tfp_group_rw = new_value;
690 else
691 error = EINVAL;
692 if ((tfp_group_ronly != 0 ) && (tfp_group_rw != 0 ))
693 tfp_group_inited = 1;
694 }
695
696 out:
697 return(error);
698 }
699
700 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW, 0, "tfp");
701 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
702 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
703 SYSCTL_PROC(_kern_tfp, KERN_TFP_READ_GROUP, read_group, CTLTYPE_INT | CTLFLAG_RW,
704 &tfp_group_ronly, sizeof(uint32_t), &sysctl_settfp_groups ,"I","read_group");
705 SYSCTL_PROC(_kern_tfp, KERN_TFP_RW_GROUP, rw_group, CTLTYPE_INT | CTLFLAG_RW,
706 &tfp_group_rw, sizeof(uint32_t), &sysctl_settfp_groups ,"I","rw_group");
707
708
709 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, &shared_region_trace_level, 0, "");
710
711 /*
712 * Try and cap the number of mappings the user might be trying to deal with,
713 * so that we don't end up allocating insane amounts of wired memory in the
714 * kernel based on bogus user arguments.
715 * There are 2 shared regions (TEXT and DATA). The size of each submap
716 * is SHARED_TEXT_REGION_SIZE and we can have at most 1 VM map entry per page,
717 * so the maximum number of mappings we could ever have to deal with is...
718 */
719 #define SHARED_REGION_MAX_MAPPINGS ((2 *SHARED_TEXT_REGION_SIZE) >> PAGE_SHIFT)
720
721 /*
722 * shared_region_make_private_np:
723 *
724 * This system call is for "dyld" only.
725 *
726 * It creates a private copy of the current process's "shared region" for
727 * split libraries. "dyld" uses this when the shared region is full or
728 * it needs to load a split library that conflicts with an already loaded one
729 * that this process doesn't need. "dyld" specifies a set of address ranges
730 * that it wants to keep in the now-private "shared region". These cover
731 * the set of split libraries that the process needs so far. The kernel needs
732 * to deallocate the rest of the shared region, so that it's available for
733 * more libraries for this process.
734 */
735 int
736 shared_region_make_private_np(
737 struct proc *p,
738 struct shared_region_make_private_np_args *uap,
739 __unused int *retvalp)
740 {
741 int error;
742 kern_return_t kr;
743 boolean_t using_shared_regions;
744 user_addr_t user_ranges;
745 unsigned int range_count;
746 vm_size_t ranges_size;
747 struct shared_region_range_np *ranges;
748 shared_region_mapping_t shared_region;
749 struct shared_region_task_mappings task_mapping_info;
750 shared_region_mapping_t next;
751
752 ranges = NULL;
753
754 range_count = uap->rangeCount;
755 user_ranges = uap->ranges;
756 ranges_size = (vm_size_t) (range_count * sizeof (ranges[0]));
757
758 SHARED_REGION_TRACE(
759 SHARED_REGION_TRACE_INFO,
760 ("shared_region: %p [%d(%s)] "
761 "make_private(rangecount=%d)\n",
762 current_thread(), p->p_pid, p->p_comm, range_count));
763
764 /* allocate kernel space for the "ranges" */
765 if (range_count != 0) {
766 if (range_count > SHARED_REGION_MAX_MAPPINGS) {
767 error = EINVAL;
768 goto done;
769 }
770 if ((mach_vm_size_t) ranges_size !=
771 (mach_vm_size_t) range_count * sizeof (ranges[0])) {
772 /* 32-bit integer overflow */
773 error = EINVAL;
774 goto done;
775 }
776 kr = kmem_alloc(kernel_map,
777 (vm_offset_t *) &ranges,
778 ranges_size);
779 if (kr != KERN_SUCCESS) {
780 error = ENOMEM;
781 goto done;
782 }
783
784 /* copy "ranges" from user-space */
785 error = copyin(user_ranges,
786 ranges,
787 ranges_size);
788 if (error) {
789 goto done;
790 }
791 }
792
793 if (p->p_flag & P_NOSHLIB) {
794 /* no split library has been mapped for this process so far */
795 using_shared_regions = FALSE;
796 } else {
797 /* this process has already mapped some split libraries */
798 using_shared_regions = TRUE;
799 }
800
801 /*
802 * Get a private copy of the current shared region.
803 * Do not chain it to the system-wide shared region, as we'll want
804 * to map other split libraries in place of the old ones. We want
805 * to completely detach from the system-wide shared region and go our
806 * own way after this point, not sharing anything with other processes.
807 */
808 error = clone_system_shared_regions(using_shared_regions,
809 FALSE, /* chain_regions */
810 ENV_DEFAULT_ROOT);
811 if (error) {
812 goto done;
813 }
814
815 /* get info on the newly allocated shared region */
816 vm_get_shared_region(current_task(), &shared_region);
817 task_mapping_info.self = (vm_offset_t) shared_region;
818 shared_region_mapping_info(shared_region,
819 &(task_mapping_info.text_region),
820 &(task_mapping_info.text_size),
821 &(task_mapping_info.data_region),
822 &(task_mapping_info.data_size),
823 &(task_mapping_info.region_mappings),
824 &(task_mapping_info.client_base),
825 &(task_mapping_info.alternate_base),
826 &(task_mapping_info.alternate_next),
827 &(task_mapping_info.fs_base),
828 &(task_mapping_info.system),
829 &(task_mapping_info.flags),
830 &next);
831
832 /*
833 * We now have our private copy of the shared region, as it was before
834 * the call to clone_system_shared_regions(). We now need to clean it
835 * up and keep only the memory areas described by the "ranges" array.
836 */
837 kr = shared_region_cleanup(range_count, ranges, &task_mapping_info);
838 switch (kr) {
839 case KERN_SUCCESS:
840 error = 0;
841 break;
842 default:
843 error = EINVAL;
844 goto done;
845 }
846
847 done:
848 if (ranges != NULL) {
849 kmem_free(kernel_map,
850 (vm_offset_t) ranges,
851 ranges_size);
852 ranges = NULL;
853 }
854
855 SHARED_REGION_TRACE(
856 SHARED_REGION_TRACE_INFO,
857 ("shared_region: %p [%d(%s)] "
858 "make_private(rangecount=%d) -> %d "
859 "shared_region=%p[%x,%x,%x]\n",
860 current_thread(), p->p_pid, p->p_comm,
861 range_count, error, shared_region,
862 task_mapping_info.fs_base,
863 task_mapping_info.system,
864 task_mapping_info.flags));
865
866 return error;
867 }
868
869
870 /*
871 * shared_region_map_file_np:
872 *
873 * This system call is for "dyld" only.
874 *
875 * "dyld" wants to map parts of a split library in the shared region.
876 * We get a file descriptor on the split library to be mapped and a set
877 * of mapping instructions, describing which parts of the file to map in\
878 * which areas of the shared segment and with what protection.
879 * The "shared region" is split in 2 areas:
880 * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections),
881 * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
882 *
883 */
884 int
885 shared_region_map_file_np(
886 struct proc *p,
887 struct shared_region_map_file_np_args *uap,
888 __unused int *retvalp)
889 {
890 int error;
891 kern_return_t kr;
892 int fd;
893 unsigned int mapping_count;
894 user_addr_t user_mappings; /* 64-bit */
895 user_addr_t user_slide_p; /* 64-bit */
896 struct shared_file_mapping_np *mappings;
897 vm_size_t mappings_size;
898 struct fileproc *fp;
899 mach_vm_offset_t slide;
900 struct vnode *vp;
901 struct vfs_context context;
902 memory_object_control_t file_control;
903 memory_object_size_t file_size;
904 shared_region_mapping_t shared_region;
905 struct shared_region_task_mappings task_mapping_info;
906 shared_region_mapping_t next;
907 shared_region_mapping_t default_shared_region;
908 boolean_t using_default_region;
909 unsigned int j;
910 vm_prot_t max_prot;
911 mach_vm_offset_t base_offset, end_offset;
912 mach_vm_offset_t original_base_offset;
913 boolean_t mappings_in_segment;
914 #define SFM_MAX_STACK 6
915 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
916
917 mappings_size = 0;
918 mappings = NULL;
919 mapping_count = 0;
920 fp = NULL;
921 vp = NULL;
922
923 /* get file descriptor for split library from arguments */
924 fd = uap->fd;
925
926 /* get file structure from file descriptor */
927 error = fp_lookup(p, fd, &fp, 0);
928 if (error) {
929 SHARED_REGION_TRACE(
930 SHARED_REGION_TRACE_ERROR,
931 ("shared_region: %p [%d(%s)] map_file: "
932 "fd=%d lookup failed (error=%d)\n",
933 current_thread(), p->p_pid, p->p_comm, fd, error));
934 goto done;
935 }
936
937 /* make sure we're attempting to map a vnode */
938 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
939 SHARED_REGION_TRACE(
940 SHARED_REGION_TRACE_ERROR,
941 ("shared_region: %p [%d(%s)] map_file: "
942 "fd=%d not a vnode (type=%d)\n",
943 current_thread(), p->p_pid, p->p_comm,
944 fd, fp->f_fglob->fg_type));
945 error = EINVAL;
946 goto done;
947 }
948
949 /* we need at least read permission on the file */
950 if (! (fp->f_fglob->fg_flag & FREAD)) {
951 SHARED_REGION_TRACE(
952 SHARED_REGION_TRACE_ERROR,
953 ("shared_region: %p [%d(%s)] map_file: "
954 "fd=%d not readable\n",
955 current_thread(), p->p_pid, p->p_comm, fd));
956 error = EPERM;
957 goto done;
958 }
959
960 /* get vnode from file structure */
961 error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data);
962 if (error) {
963 SHARED_REGION_TRACE(
964 SHARED_REGION_TRACE_ERROR,
965 ("shared_region: %p [%d(%s)] map_file: "
966 "fd=%d getwithref failed (error=%d)\n",
967 current_thread(), p->p_pid, p->p_comm, fd, error));
968 goto done;
969 }
970 vp = (struct vnode *) fp->f_fglob->fg_data;
971
972 /* make sure the vnode is a regular file */
973 if (vp->v_type != VREG) {
974 SHARED_REGION_TRACE(
975 SHARED_REGION_TRACE_ERROR,
976 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
977 "not a file (type=%d)\n",
978 current_thread(), p->p_pid, p->p_comm,
979 vp, vp->v_name, vp->v_type));
980 error = EINVAL;
981 goto done;
982 }
983
984 /* get vnode size */
985 {
986 off_t fs;
987
988 context.vc_proc = p;
989 context.vc_ucred = kauth_cred_get();
990 if ((error = vnode_size(vp, &fs, &context)) != 0) {
991 SHARED_REGION_TRACE(
992 SHARED_REGION_TRACE_ERROR,
993 ("shared_region: %p [%d(%s)] "
994 "map_file(%p:'%s'): "
995 "vnode_size(%p) failed (error=%d)\n",
996 current_thread(), p->p_pid, p->p_comm,
997 vp, vp->v_name, vp));
998 goto done;
999 }
1000 file_size = fs;
1001 }
1002
1003 /*
1004 * Get the list of mappings the caller wants us to establish.
1005 */
1006 mapping_count = uap->mappingCount; /* the number of mappings */
1007 mappings_size = (vm_size_t) (mapping_count * sizeof (mappings[0]));
1008 if (mapping_count == 0) {
1009 SHARED_REGION_TRACE(
1010 SHARED_REGION_TRACE_INFO,
1011 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1012 "no mappings\n",
1013 current_thread(), p->p_pid, p->p_comm,
1014 vp, vp->v_name));
1015 error = 0; /* no mappings: we're done ! */
1016 goto done;
1017 } else if (mapping_count <= SFM_MAX_STACK) {
1018 mappings = &stack_mappings[0];
1019 } else {
1020 if (mapping_count > SHARED_REGION_MAX_MAPPINGS) {
1021 error = EINVAL;
1022 goto done;
1023 }
1024 if ((mach_vm_size_t) mappings_size !=
1025 (mach_vm_size_t) mapping_count * sizeof (mappings[0])) {
1026 /* 32-bit integer overflow */
1027 error = EINVAL;
1028 goto done;
1029 }
1030 kr = kmem_alloc(kernel_map,
1031 (vm_offset_t *) &mappings,
1032 mappings_size);
1033 if (kr != KERN_SUCCESS) {
1034 SHARED_REGION_TRACE(
1035 SHARED_REGION_TRACE_ERROR,
1036 ("shared_region: %p [%d(%s)] "
1037 "map_file(%p:'%s'): "
1038 "failed to allocate %d mappings (kr=0x%x)\n",
1039 current_thread(), p->p_pid, p->p_comm,
1040 vp, vp->v_name, mapping_count, kr));
1041 error = ENOMEM;
1042 goto done;
1043 }
1044 }
1045
1046 user_mappings = uap->mappings; /* the mappings, in user space */
1047 error = copyin(user_mappings,
1048 mappings,
1049 mappings_size);
1050 if (error != 0) {
1051 SHARED_REGION_TRACE(
1052 SHARED_REGION_TRACE_ERROR,
1053 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1054 "failed to copyin %d mappings (error=%d)\n",
1055 current_thread(), p->p_pid, p->p_comm,
1056 vp, vp->v_name, mapping_count, error));
1057 goto done;
1058 }
1059
1060 /*
1061 * If the caller provides a "slide" pointer, it means they're OK
1062 * with us moving the mappings around to make them fit.
1063 */
1064 user_slide_p = uap->slide_p;
1065
1066 /*
1067 * Make each mapping address relative to the beginning of the
1068 * shared region. Check that all mappings are in the shared region.
1069 * Compute the maximum set of protections required to tell the
1070 * buffer cache how we mapped the file (see call to ubc_map() below).
1071 */
1072 max_prot = VM_PROT_NONE;
1073 base_offset = -1LL;
1074 end_offset = 0;
1075 mappings_in_segment = TRUE;
1076 for (j = 0; j < mapping_count; j++) {
1077 mach_vm_offset_t segment;
1078 segment = (mappings[j].sfm_address &
1079 GLOBAL_SHARED_SEGMENT_MASK);
1080 if (segment != GLOBAL_SHARED_TEXT_SEGMENT &&
1081 segment != GLOBAL_SHARED_DATA_SEGMENT) {
1082 /* this mapping is not in the shared region... */
1083 if (user_slide_p == NULL) {
1084 /* ... and we can't slide it in: fail */
1085 SHARED_REGION_TRACE(
1086 SHARED_REGION_TRACE_CONFLICT,
1087 ("shared_region: %p [%d(%s)] "
1088 "map_file(%p:'%s'): "
1089 "mapping %p not in shared segment & "
1090 "no sliding\n",
1091 current_thread(), p->p_pid, p->p_comm,
1092 vp, vp->v_name,
1093 mappings[j].sfm_address));
1094 error = EINVAL;
1095 goto done;
1096 }
1097 if (j == 0) {
1098 /* expect all mappings to be outside */
1099 mappings_in_segment = FALSE;
1100 } else if (mappings_in_segment != FALSE) {
1101 /* other mappings were not outside: fail */
1102 SHARED_REGION_TRACE(
1103 SHARED_REGION_TRACE_CONFLICT,
1104 ("shared_region: %p [%d(%s)] "
1105 "map_file(%p:'%s'): "
1106 "mapping %p not in shared segment & "
1107 "other mappings in shared segment\n",
1108 current_thread(), p->p_pid, p->p_comm,
1109 vp, vp->v_name,
1110 mappings[j].sfm_address));
1111 error = EINVAL;
1112 goto done;
1113 }
1114 /* we'll try and slide that mapping in the segments */
1115 } else {
1116 if (j == 0) {
1117 /* expect all mappings to be inside */
1118 mappings_in_segment = TRUE;
1119 } else if (mappings_in_segment != TRUE) {
1120 /* other mappings were not inside: fail */
1121 SHARED_REGION_TRACE(
1122 SHARED_REGION_TRACE_CONFLICT,
1123 ("shared_region: %p [%d(%s)] "
1124 "map_file(%p:'%s'): "
1125 "mapping %p in shared segment & "
1126 "others in shared segment\n",
1127 current_thread(), p->p_pid, p->p_comm,
1128 vp, vp->v_name,
1129 mappings[j].sfm_address));
1130 error = EINVAL;
1131 goto done;
1132 }
1133 /* get a relative offset inside the shared segments */
1134 mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT;
1135 }
1136 if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK)
1137 < base_offset) {
1138 base_offset = (mappings[j].sfm_address &
1139 SHARED_TEXT_REGION_MASK);
1140 }
1141 if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) +
1142 mappings[j].sfm_size > end_offset) {
1143 end_offset =
1144 (mappings[j].sfm_address &
1145 SHARED_TEXT_REGION_MASK) +
1146 mappings[j].sfm_size;
1147 }
1148 max_prot |= mappings[j].sfm_max_prot;
1149 }
1150 /* Make all mappings relative to the base_offset */
1151 base_offset = vm_map_trunc_page(base_offset);
1152 end_offset = vm_map_round_page(end_offset);
1153 for (j = 0; j < mapping_count; j++) {
1154 mappings[j].sfm_address -= base_offset;
1155 }
1156 original_base_offset = base_offset;
1157 if (mappings_in_segment == FALSE) {
1158 /*
1159 * We're trying to map a library that was not pre-bound to
1160 * be in the shared segments. We want to try and slide it
1161 * back into the shared segments but as far back as possible,
1162 * so that it doesn't clash with pre-bound libraries. Set
1163 * the base_offset to the end of the region, so that it can't
1164 * possibly fit there and will have to be slid.
1165 */
1166 base_offset = SHARED_TEXT_REGION_SIZE - end_offset;
1167 }
1168
1169 /* get the file's memory object handle */
1170 UBCINFOCHECK("shared_region_map_file_np", vp);
1171 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1172 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1173 SHARED_REGION_TRACE(
1174 SHARED_REGION_TRACE_ERROR,
1175 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1176 "ubc_getobject() failed\n",
1177 current_thread(), p->p_pid, p->p_comm,
1178 vp, vp->v_name));
1179 error = EINVAL;
1180 goto done;
1181 }
1182
1183 /*
1184 * Get info about the current process's shared region.
1185 * This might change if we decide we need to clone the shared region.
1186 */
1187 vm_get_shared_region(current_task(), &shared_region);
1188 task_mapping_info.self = (vm_offset_t) shared_region;
1189 shared_region_mapping_info(shared_region,
1190 &(task_mapping_info.text_region),
1191 &(task_mapping_info.text_size),
1192 &(task_mapping_info.data_region),
1193 &(task_mapping_info.data_size),
1194 &(task_mapping_info.region_mappings),
1195 &(task_mapping_info.client_base),
1196 &(task_mapping_info.alternate_base),
1197 &(task_mapping_info.alternate_next),
1198 &(task_mapping_info.fs_base),
1199 &(task_mapping_info.system),
1200 &(task_mapping_info.flags),
1201 &next);
1202
1203 /*
1204 * Are we using the system's current shared region
1205 * for this environment ?
1206 */
1207 default_shared_region =
1208 lookup_default_shared_region(ENV_DEFAULT_ROOT,
1209 task_mapping_info.system);
1210 if (shared_region == default_shared_region) {
1211 using_default_region = TRUE;
1212 } else {
1213 using_default_region = FALSE;
1214 }
1215 shared_region_mapping_dealloc(default_shared_region);
1216
1217 if (vp->v_mount != rootvnode->v_mount &&
1218 using_default_region) {
1219 /*
1220 * The split library is not on the root filesystem. We don't
1221 * want to polute the system-wide ("default") shared region
1222 * with it.
1223 * Reject the mapping. The caller (dyld) should "privatize"
1224 * (via shared_region_make_private()) the shared region and
1225 * try to establish the mapping privately for this process.
1226 */
1227 SHARED_REGION_TRACE(
1228 SHARED_REGION_TRACE_CONFLICT,
1229 ("shared_region: %p [%d(%s)] "
1230 "map_file(%p:'%s'): "
1231 "not on root volume\n",
1232 current_thread(), p->p_pid, p->p_comm,
1233 vp->v_name));
1234 error = EXDEV;
1235 goto done;
1236 }
1237
1238
1239 /*
1240 * Map the split library.
1241 */
1242 kr = map_shared_file(mapping_count,
1243 mappings,
1244 file_control,
1245 file_size,
1246 &task_mapping_info,
1247 base_offset,
1248 (user_slide_p) ? &slide : NULL);
1249
1250 if (kr == KERN_SUCCESS) {
1251 /*
1252 * The mapping was successful. Let the buffer cache know
1253 * that we've mapped that file with these protections. This
1254 * prevents the vnode from getting recycled while it's mapped.
1255 */
1256 (void) ubc_map(vp, max_prot);
1257 error = 0;
1258 } else {
1259 SHARED_REGION_TRACE(
1260 SHARED_REGION_TRACE_CONFLICT,
1261 ("shared_region: %p [%d(%s)] "
1262 "map_file(%p:'%s'): "
1263 "map_shared_file failed, kr=0x%x\n",
1264 current_thread(), p->p_pid, p->p_comm,
1265 vp, vp->v_name, kr));
1266 switch (kr) {
1267 case KERN_INVALID_ADDRESS:
1268 error = EFAULT;
1269 goto done;
1270 case KERN_PROTECTION_FAILURE:
1271 error = EPERM;
1272 goto done;
1273 case KERN_NO_SPACE:
1274 error = ENOMEM;
1275 goto done;
1276 case KERN_FAILURE:
1277 case KERN_INVALID_ARGUMENT:
1278 default:
1279 error = EINVAL;
1280 goto done;
1281 }
1282 }
1283
1284 if (p->p_flag & P_NOSHLIB) {
1285 /* signal that this process is now using split libraries */
1286 p->p_flag &= ~P_NOSHLIB;
1287 }
1288
1289 if (user_slide_p) {
1290 /*
1291 * The caller provided a pointer to a "slide" offset. Let
1292 * them know by how much we slid the mappings.
1293 */
1294 if (mappings_in_segment == FALSE) {
1295 /*
1296 * We faked the base_offset earlier, so undo that
1297 * and take into account the real base_offset.
1298 */
1299 slide += SHARED_TEXT_REGION_SIZE - end_offset;
1300 slide -= original_base_offset;
1301 /*
1302 * The mappings were slid into the shared segments
1303 * and "slide" is relative to the beginning of the
1304 * shared segments. Adjust it to be absolute.
1305 */
1306 slide += GLOBAL_SHARED_TEXT_SEGMENT;
1307 }
1308 error = copyout(&slide,
1309 user_slide_p,
1310 sizeof (slide));
1311 if (slide != 0) {
1312 SHARED_REGION_TRACE(
1313 SHARED_REGION_TRACE_CONFLICT,
1314 ("shared_region: %p [%d(%s)] "
1315 "map_file(%p:'%s'): "
1316 "slid by 0x%llx\n",
1317 current_thread(), p->p_pid, p->p_comm,
1318 vp, vp->v_name, slide));
1319 }
1320 }
1321
1322 done:
1323 if (vp != NULL) {
1324 /*
1325 * release the vnode...
1326 * ubc_map() still holds it for us in the non-error case
1327 */
1328 (void) vnode_put(vp);
1329 vp = NULL;
1330 }
1331 if (fp != NULL) {
1332 /* release the file descriptor */
1333 fp_drop(p, fd, fp, 0);
1334 fp = NULL;
1335 }
1336 if (mappings != NULL &&
1337 mappings != &stack_mappings[0]) {
1338 kmem_free(kernel_map,
1339 (vm_offset_t) mappings,
1340 mappings_size);
1341 }
1342 mappings = NULL;
1343
1344 return error;
1345 }
1346
1347 int
1348 load_shared_file(
1349 __unused struct proc *p,
1350 __unused struct load_shared_file_args *uap,
1351 __unused int *retval)
1352 {
1353 return ENOSYS;
1354 }
1355
1356 int
1357 reset_shared_file(
1358 __unused struct proc *p,
1359 __unused struct reset_shared_file_args *uap,
1360 __unused int *retval)
1361 {
1362 return ENOSYS;
1363 }
1364
1365 int
1366 new_system_shared_regions(
1367 __unused struct proc *p,
1368 __unused struct new_system_shared_regions_args *uap,
1369 __unused int *retval)
1370 {
1371 return ENOSYS;
1372 }
1373
1374
1375
1376 int
1377 clone_system_shared_regions(
1378 int shared_regions_active,
1379 int chain_regions,
1380 int base_vnode)
1381 {
1382 shared_region_mapping_t new_shared_region;
1383 shared_region_mapping_t next;
1384 shared_region_mapping_t old_shared_region;
1385 struct shared_region_task_mappings old_info;
1386 struct shared_region_task_mappings new_info;
1387
1388 vm_get_shared_region(current_task(), &old_shared_region);
1389 old_info.self = (vm_offset_t)old_shared_region;
1390 shared_region_mapping_info(old_shared_region,
1391 &(old_info.text_region),
1392 &(old_info.text_size),
1393 &(old_info.data_region),
1394 &(old_info.data_size),
1395 &(old_info.region_mappings),
1396 &(old_info.client_base),
1397 &(old_info.alternate_base),
1398 &(old_info.alternate_next),
1399 &(old_info.fs_base),
1400 &(old_info.system),
1401 &(old_info.flags), &next);
1402
1403 if (shared_regions_active ||
1404 base_vnode == ENV_DEFAULT_ROOT) {
1405 if (shared_file_create_system_region(&new_shared_region,
1406 old_info.fs_base,
1407 old_info.system))
1408 return ENOMEM;
1409 } else {
1410 if (old_shared_region &&
1411 base_vnode == ENV_DEFAULT_ROOT) {
1412 base_vnode = old_info.fs_base;
1413 }
1414 new_shared_region =
1415 lookup_default_shared_region(base_vnode,
1416 old_info.system);
1417 if (new_shared_region == NULL) {
1418 shared_file_boot_time_init(base_vnode,
1419 old_info.system);
1420 vm_get_shared_region(current_task(),
1421 &new_shared_region);
1422 } else {
1423 vm_set_shared_region(current_task(), new_shared_region);
1424 }
1425 if (old_shared_region)
1426 shared_region_mapping_dealloc(old_shared_region);
1427 }
1428 new_info.self = (vm_offset_t)new_shared_region;
1429 shared_region_mapping_info(new_shared_region,
1430 &(new_info.text_region),
1431 &(new_info.text_size),
1432 &(new_info.data_region),
1433 &(new_info.data_size),
1434 &(new_info.region_mappings),
1435 &(new_info.client_base),
1436 &(new_info.alternate_base),
1437 &(new_info.alternate_next),
1438 &(new_info.fs_base),
1439 &(new_info.system),
1440 &(new_info.flags), &next);
1441 if(shared_regions_active) {
1442 if(vm_region_clone(old_info.text_region, new_info.text_region)) {
1443 panic("clone_system_shared_regions: shared region mis-alignment 1");
1444 shared_region_mapping_dealloc(new_shared_region);
1445 return(EINVAL);
1446 }
1447 if (vm_region_clone(old_info.data_region, new_info.data_region)) {
1448 panic("clone_system_shared_regions: shared region mis-alignment 2");
1449 shared_region_mapping_dealloc(new_shared_region);
1450 return(EINVAL);
1451 }
1452 if (chain_regions) {
1453 /*
1454 * We want a "shadowed" clone, a private superset of the old
1455 * shared region. The info about the old mappings is still
1456 * valid for us.
1457 */
1458 shared_region_object_chain_attach(
1459 new_shared_region, old_shared_region);
1460 }
1461 }
1462 if (!chain_regions) {
1463 /*
1464 * We want a completely detached clone with no link to
1465 * the old shared region. We'll be removing some mappings
1466 * in our private, cloned, shared region, so the old mappings
1467 * will become irrelevant to us. Since we have a private
1468 * "shared region" now, it isn't going to be shared with
1469 * anyone else and we won't need to maintain mappings info.
1470 */
1471 shared_region_object_chain_detached(new_shared_region);
1472 }
1473 if (vm_map_region_replace(current_map(), old_info.text_region,
1474 new_info.text_region, old_info.client_base,
1475 old_info.client_base+old_info.text_size)) {
1476 panic("clone_system_shared_regions: shared region mis-alignment 3");
1477 shared_region_mapping_dealloc(new_shared_region);
1478 return(EINVAL);
1479 }
1480 if(vm_map_region_replace(current_map(), old_info.data_region,
1481 new_info.data_region,
1482 old_info.client_base + old_info.text_size,
1483 old_info.client_base
1484 + old_info.text_size + old_info.data_size)) {
1485 panic("clone_system_shared_regions: shared region mis-alignment 4");
1486 shared_region_mapping_dealloc(new_shared_region);
1487 return(EINVAL);
1488 }
1489 vm_set_shared_region(current_task(), new_shared_region);
1490
1491 /* consume the reference which wasn't accounted for in object */
1492 /* chain attach */
1493 if (!shared_regions_active || !chain_regions)
1494 shared_region_mapping_dealloc(old_shared_region);
1495
1496 SHARED_REGION_TRACE(
1497 SHARED_REGION_TRACE_INFO,
1498 ("shared_region: %p task=%p "
1499 "clone(active=%d, base=0x%x,chain=%d) "
1500 "old=%p[%x,%x,%x] new=%p[%x,%x,%x]\n",
1501 current_thread(), current_task(),
1502 shared_regions_active, base_vnode, chain_regions,
1503 old_shared_region,
1504 old_info.fs_base,
1505 old_info.system,
1506 old_info.flags,
1507 new_shared_region,
1508 new_info.fs_base,
1509 new_info.system,
1510 new_info.flags));
1511
1512 return(0);
1513
1514 }
1515
1516 /* header for the profile name file. The profiled app info is held */
1517 /* in the data file and pointed to by elements in the name file */
1518
1519 struct profile_names_header {
1520 unsigned int number_of_profiles;
1521 unsigned int user_id;
1522 unsigned int version;
1523 off_t element_array;
1524 unsigned int spare1;
1525 unsigned int spare2;
1526 unsigned int spare3;
1527 };
1528
1529 struct profile_element {
1530 off_t addr;
1531 vm_size_t size;
1532 unsigned int mod_date;
1533 unsigned int inode;
1534 char name[12];
1535 };
1536
1537 struct global_profile {
1538 struct vnode *names_vp;
1539 struct vnode *data_vp;
1540 vm_offset_t buf_ptr;
1541 unsigned int user;
1542 unsigned int age;
1543 unsigned int busy;
1544 };
1545
1546 struct global_profile_cache {
1547 int max_ele;
1548 unsigned int age;
1549 struct global_profile profiles[3];
1550 };
1551
1552 /* forward declarations */
1553 int bsd_open_page_cache_files(unsigned int user,
1554 struct global_profile **profile);
1555 void bsd_close_page_cache_files(struct global_profile *profile);
1556 int bsd_search_page_cache_data_base(
1557 struct vnode *vp,
1558 struct profile_names_header *database,
1559 char *app_name,
1560 unsigned int mod_date,
1561 unsigned int inode,
1562 off_t *profile,
1563 unsigned int *profile_size);
1564
1565 struct global_profile_cache global_user_profile_cache =
1566 {3, 0, {{NULL, NULL, 0, 0, 0, 0},
1567 {NULL, NULL, 0, 0, 0, 0},
1568 {NULL, NULL, 0, 0, 0, 0}} };
1569
1570 /* BSD_OPEN_PAGE_CACHE_FILES: */
1571 /* Caller provides a user id. This id was used in */
1572 /* prepare_profile_database to create two unique absolute */
1573 /* file paths to the associated profile files. These files */
1574 /* are either opened or bsd_open_page_cache_files returns an */
1575 /* error. The header of the names file is then consulted. */
1576 /* The header and the vnodes for the names and data files are */
1577 /* returned. */
1578
1579 int
1580 bsd_open_page_cache_files(
1581 unsigned int user,
1582 struct global_profile **profile)
1583 {
1584 const char *cache_path = "/var/vm/app_profile/";
1585 struct proc *p;
1586 int error;
1587 vm_size_t resid;
1588 off_t resid_off;
1589 unsigned int lru;
1590 vm_size_t size;
1591
1592 struct vnode *names_vp;
1593 struct vnode *data_vp;
1594 vm_offset_t names_buf;
1595 vm_offset_t buf_ptr;
1596
1597 int profile_names_length;
1598 int profile_data_length;
1599 char *profile_data_string;
1600 char *profile_names_string;
1601 char *substring;
1602
1603 off_t file_size;
1604 struct vfs_context context;
1605
1606 kern_return_t ret;
1607
1608 struct nameidata nd_names;
1609 struct nameidata nd_data;
1610 int i;
1611
1612
1613 p = current_proc();
1614
1615 context.vc_proc = p;
1616 context.vc_ucred = kauth_cred_get();
1617
1618 restart:
1619 for(i = 0; i<global_user_profile_cache.max_ele; i++) {
1620 if((global_user_profile_cache.profiles[i].user == user)
1621 && (global_user_profile_cache.profiles[i].data_vp
1622 != NULL)) {
1623 *profile = &global_user_profile_cache.profiles[i];
1624 /* already in cache, we're done */
1625 if ((*profile)->busy) {
1626 /*
1627 * drop funnel and wait
1628 */
1629 (void)tsleep((void *)
1630 *profile,
1631 PRIBIO, "app_profile", 0);
1632 goto restart;
1633 }
1634 (*profile)->busy = 1;
1635 (*profile)->age = global_user_profile_cache.age;
1636
1637 /*
1638 * entries in cache are held with a valid
1639 * usecount... take an iocount which will
1640 * be dropped in "bsd_close_page_cache_files"
1641 * which is called after the read or writes to
1642 * these files are done
1643 */
1644 if ( (vnode_getwithref((*profile)->data_vp)) ) {
1645
1646 vnode_rele((*profile)->data_vp);
1647 vnode_rele((*profile)->names_vp);
1648
1649 (*profile)->data_vp = NULL;
1650 (*profile)->busy = 0;
1651 wakeup(*profile);
1652
1653 goto restart;
1654 }
1655 if ( (vnode_getwithref((*profile)->names_vp)) ) {
1656
1657 vnode_put((*profile)->data_vp);
1658 vnode_rele((*profile)->data_vp);
1659 vnode_rele((*profile)->names_vp);
1660
1661 (*profile)->data_vp = NULL;
1662 (*profile)->busy = 0;
1663 wakeup(*profile);
1664
1665 goto restart;
1666 }
1667 global_user_profile_cache.age+=1;
1668 return 0;
1669 }
1670 }
1671
1672 lru = global_user_profile_cache.age;
1673 *profile = NULL;
1674 for(i = 0; i<global_user_profile_cache.max_ele; i++) {
1675 /* Skip entry if it is in the process of being reused */
1676 if(global_user_profile_cache.profiles[i].data_vp ==
1677 (struct vnode *)0xFFFFFFFF)
1678 continue;
1679 /* Otherwise grab the first empty entry */
1680 if(global_user_profile_cache.profiles[i].data_vp == NULL) {
1681 *profile = &global_user_profile_cache.profiles[i];
1682 (*profile)->age = global_user_profile_cache.age;
1683 break;
1684 }
1685 /* Otherwise grab the oldest entry */
1686 if(global_user_profile_cache.profiles[i].age < lru) {
1687 lru = global_user_profile_cache.profiles[i].age;
1688 *profile = &global_user_profile_cache.profiles[i];
1689 }
1690 }
1691
1692 /* Did we set it? */
1693 if (*profile == NULL) {
1694 /*
1695 * No entries are available; this can only happen if all
1696 * of them are currently in the process of being reused;
1697 * if this happens, we sleep on the address of the first
1698 * element, and restart. This is less than ideal, but we
1699 * know it will work because we know that there will be a
1700 * wakeup on any entry currently in the process of being
1701 * reused.
1702 *
1703 * XXX Reccomend a two handed clock and more than 3 total
1704 * XXX cache entries at some point in the future.
1705 */
1706 /*
1707 * drop funnel and wait
1708 */
1709 (void)tsleep((void *)
1710 &global_user_profile_cache.profiles[0],
1711 PRIBIO, "app_profile", 0);
1712 goto restart;
1713 }
1714
1715 /*
1716 * If it's currently busy, we've picked the one at the end of the
1717 * LRU list, but it's currently being actively used. We sleep on
1718 * its address and restart.
1719 */
1720 if ((*profile)->busy) {
1721 /*
1722 * drop funnel and wait
1723 */
1724 (void)tsleep((void *)
1725 *profile,
1726 PRIBIO, "app_profile", 0);
1727 goto restart;
1728 }
1729 (*profile)->busy = 1;
1730 (*profile)->user = user;
1731
1732 /*
1733 * put dummy value in for now to get competing request to wait
1734 * above until we are finished
1735 *
1736 * Save the data_vp before setting it, so we can set it before
1737 * we kmem_free() or vrele(). If we don't do this, then we
1738 * have a potential funnel race condition we have to deal with.
1739 */
1740 data_vp = (*profile)->data_vp;
1741 (*profile)->data_vp = (struct vnode *)0xFFFFFFFF;
1742
1743 /*
1744 * Age the cache here in all cases; this guarantees that we won't
1745 * be reusing only one entry over and over, once the system reaches
1746 * steady-state.
1747 */
1748 global_user_profile_cache.age+=1;
1749
1750 if(data_vp != NULL) {
1751 kmem_free(kernel_map,
1752 (*profile)->buf_ptr, 4 * PAGE_SIZE);
1753 if ((*profile)->names_vp) {
1754 vnode_rele((*profile)->names_vp);
1755 (*profile)->names_vp = NULL;
1756 }
1757 vnode_rele(data_vp);
1758 }
1759
1760 /* Try to open the appropriate users profile files */
1761 /* If neither file is present, try to create them */
1762 /* If one file is present and the other not, fail. */
1763 /* If the files do exist, check them for the app_file */
1764 /* requested and read it in if present */
1765
1766 ret = kmem_alloc(kernel_map,
1767 (vm_offset_t *)&profile_data_string, PATH_MAX);
1768
1769 if(ret) {
1770 (*profile)->data_vp = NULL;
1771 (*profile)->busy = 0;
1772 wakeup(*profile);
1773 return ENOMEM;
1774 }
1775
1776 /* Split the buffer in half since we know the size of */
1777 /* our file path and our allocation is adequate for */
1778 /* both file path names */
1779 profile_names_string = profile_data_string + (PATH_MAX/2);
1780
1781
1782 strcpy(profile_data_string, cache_path);
1783 strcpy(profile_names_string, cache_path);
1784 profile_names_length = profile_data_length
1785 = strlen(profile_data_string);
1786 substring = profile_data_string + profile_data_length;
1787 sprintf(substring, "%x_data", user);
1788 substring = profile_names_string + profile_names_length;
1789 sprintf(substring, "%x_names", user);
1790
1791 /* We now have the absolute file names */
1792
1793 ret = kmem_alloc(kernel_map,
1794 (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
1795 if(ret) {
1796 kmem_free(kernel_map,
1797 (vm_offset_t)profile_data_string, PATH_MAX);
1798 (*profile)->data_vp = NULL;
1799 (*profile)->busy = 0;
1800 wakeup(*profile);
1801 return ENOMEM;
1802 }
1803
1804 NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF,
1805 UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
1806 NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF,
1807 UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
1808
1809 if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) {
1810 #ifdef notdef
1811 printf("bsd_open_page_cache_files: CacheData file not found %s\n",
1812 profile_data_string);
1813 #endif
1814 kmem_free(kernel_map,
1815 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1816 kmem_free(kernel_map,
1817 (vm_offset_t)profile_data_string, PATH_MAX);
1818 (*profile)->data_vp = NULL;
1819 (*profile)->busy = 0;
1820 wakeup(*profile);
1821 return error;
1822 }
1823 data_vp = nd_data.ni_vp;
1824
1825 if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) {
1826 printf("bsd_open_page_cache_files: NamesData file not found %s\n",
1827 profile_data_string);
1828 kmem_free(kernel_map,
1829 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1830 kmem_free(kernel_map,
1831 (vm_offset_t)profile_data_string, PATH_MAX);
1832
1833 vnode_rele(data_vp);
1834 vnode_put(data_vp);
1835
1836 (*profile)->data_vp = NULL;
1837 (*profile)->busy = 0;
1838 wakeup(*profile);
1839 return error;
1840 }
1841 names_vp = nd_names.ni_vp;
1842
1843 if ((error = vnode_size(names_vp, &file_size, &context)) != 0) {
1844 printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string);
1845 kmem_free(kernel_map,
1846 (vm_offset_t)profile_data_string, PATH_MAX);
1847 kmem_free(kernel_map,
1848 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1849
1850 vnode_rele(names_vp);
1851 vnode_put(names_vp);
1852 vnode_rele(data_vp);
1853 vnode_put(data_vp);
1854
1855 (*profile)->data_vp = NULL;
1856 (*profile)->busy = 0;
1857 wakeup(*profile);
1858 return error;
1859 }
1860
1861 size = file_size;
1862 if(size > 4 * PAGE_SIZE)
1863 size = 4 * PAGE_SIZE;
1864 buf_ptr = names_buf;
1865 resid_off = 0;
1866
1867 while(size) {
1868 int resid_int;
1869 error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr,
1870 size, resid_off,
1871 UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(),
1872 &resid_int, p);
1873 resid = (vm_size_t) resid_int;
1874 if((error) || (size == resid)) {
1875 if(!error) {
1876 error = EINVAL;
1877 }
1878 kmem_free(kernel_map,
1879 (vm_offset_t)profile_data_string, PATH_MAX);
1880 kmem_free(kernel_map,
1881 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1882
1883 vnode_rele(names_vp);
1884 vnode_put(names_vp);
1885 vnode_rele(data_vp);
1886 vnode_put(data_vp);
1887
1888 (*profile)->data_vp = NULL;
1889 (*profile)->busy = 0;
1890 wakeup(*profile);
1891 return error;
1892 }
1893 buf_ptr += size-resid;
1894 resid_off += size-resid;
1895 size = resid;
1896 }
1897 kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX);
1898
1899 (*profile)->names_vp = names_vp;
1900 (*profile)->data_vp = data_vp;
1901 (*profile)->buf_ptr = names_buf;
1902
1903 /*
1904 * at this point, the both the names_vp and the data_vp have
1905 * both a valid usecount and an iocount held
1906 */
1907 return 0;
1908
1909 }
1910
1911 void
1912 bsd_close_page_cache_files(
1913 struct global_profile *profile)
1914 {
1915 vnode_put(profile->data_vp);
1916 vnode_put(profile->names_vp);
1917
1918 profile->busy = 0;
1919 wakeup(profile);
1920 }
1921
1922 int
1923 bsd_read_page_cache_file(
1924 unsigned int user,
1925 int *fid,
1926 int *mod,
1927 char *app_name,
1928 struct vnode *app_vp,
1929 vm_offset_t *buffer,
1930 vm_offset_t *bufsize)
1931 {
1932
1933 boolean_t funnel_state;
1934
1935 struct proc *p;
1936 int error;
1937 unsigned int resid;
1938
1939 off_t profile;
1940 unsigned int profile_size;
1941
1942 vm_offset_t names_buf;
1943 struct vnode_attr va;
1944 struct vfs_context context;
1945
1946 kern_return_t ret;
1947
1948 struct vnode *names_vp;
1949 struct vnode *data_vp;
1950
1951 struct global_profile *uid_files;
1952
1953 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1954
1955 /* Try to open the appropriate users profile files */
1956 /* If neither file is present, try to create them */
1957 /* If one file is present and the other not, fail. */
1958 /* If the files do exist, check them for the app_file */
1959 /* requested and read it in if present */
1960
1961
1962 error = bsd_open_page_cache_files(user, &uid_files);
1963 if(error) {
1964 thread_funnel_set(kernel_flock, funnel_state);
1965 return EINVAL;
1966 }
1967
1968 p = current_proc();
1969
1970 names_vp = uid_files->names_vp;
1971 data_vp = uid_files->data_vp;
1972 names_buf = uid_files->buf_ptr;
1973
1974 context.vc_proc = p;
1975 context.vc_ucred = kauth_cred_get();
1976
1977 VATTR_INIT(&va);
1978 VATTR_WANTED(&va, va_fileid);
1979 VATTR_WANTED(&va, va_modify_time);
1980
1981 if ((error = vnode_getattr(app_vp, &va, &context))) {
1982 printf("bsd_read_cache_file: Can't stat app file %s\n", app_name);
1983 bsd_close_page_cache_files(uid_files);
1984 thread_funnel_set(kernel_flock, funnel_state);
1985 return error;
1986 }
1987
1988 *fid = (u_long)va.va_fileid;
1989 *mod = va.va_modify_time.tv_sec;
1990
1991 if (bsd_search_page_cache_data_base(
1992 names_vp,
1993 (struct profile_names_header *)names_buf,
1994 app_name,
1995 (unsigned int) va.va_modify_time.tv_sec,
1996 (u_long)va.va_fileid, &profile, &profile_size) == 0) {
1997 /* profile is an offset in the profile data base */
1998 /* It is zero if no profile data was found */
1999
2000 if(profile_size == 0) {
2001 *buffer = 0;
2002 *bufsize = 0;
2003 bsd_close_page_cache_files(uid_files);
2004 thread_funnel_set(kernel_flock, funnel_state);
2005 return 0;
2006 }
2007 ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size));
2008 if(ret) {
2009 bsd_close_page_cache_files(uid_files);
2010 thread_funnel_set(kernel_flock, funnel_state);
2011 return ENOMEM;
2012 }
2013 *bufsize = profile_size;
2014 while(profile_size) {
2015 int resid_int;
2016 error = vn_rdwr(UIO_READ, data_vp,
2017 (caddr_t) *buffer, profile_size,
2018 profile, UIO_SYSSPACE32, IO_NODELOCKED,
2019 kauth_cred_get(), &resid_int, p);
2020 resid = (vm_size_t) resid_int;
2021 if((error) || (profile_size == resid)) {
2022 bsd_close_page_cache_files(uid_files);
2023 kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size);
2024 thread_funnel_set(kernel_flock, funnel_state);
2025 return EINVAL;
2026 }
2027 profile += profile_size - resid;
2028 profile_size = resid;
2029 }
2030 bsd_close_page_cache_files(uid_files);
2031 thread_funnel_set(kernel_flock, funnel_state);
2032 return 0;
2033 } else {
2034 bsd_close_page_cache_files(uid_files);
2035 thread_funnel_set(kernel_flock, funnel_state);
2036 return EINVAL;
2037 }
2038
2039 }
2040
2041 int
2042 bsd_search_page_cache_data_base(
2043 struct vnode *vp,
2044 struct profile_names_header *database,
2045 char *app_name,
2046 unsigned int mod_date,
2047 unsigned int inode,
2048 off_t *profile,
2049 unsigned int *profile_size)
2050 {
2051
2052 struct proc *p;
2053
2054 unsigned int i;
2055 struct profile_element *element;
2056 unsigned int ele_total;
2057 unsigned int extended_list = 0;
2058 off_t file_off = 0;
2059 unsigned int size;
2060 off_t resid_off;
2061 unsigned int resid;
2062 vm_offset_t local_buf = 0;
2063
2064 int error;
2065 kern_return_t ret;
2066
2067 p = current_proc();
2068
2069 if(((vm_offset_t)database->element_array) !=
2070 sizeof(struct profile_names_header)) {
2071 return EINVAL;
2072 }
2073 element = (struct profile_element *)(
2074 (vm_offset_t)database->element_array +
2075 (vm_offset_t)database);
2076
2077 ele_total = database->number_of_profiles;
2078
2079 *profile = 0;
2080 *profile_size = 0;
2081 while(ele_total) {
2082 /* note: code assumes header + n*ele comes out on a page boundary */
2083 if(((local_buf == 0) && (sizeof(struct profile_names_header) +
2084 (ele_total * sizeof(struct profile_element)))
2085 > (PAGE_SIZE * 4)) ||
2086 ((local_buf != 0) &&
2087 (ele_total * sizeof(struct profile_element))
2088 > (PAGE_SIZE * 4))) {
2089 extended_list = ele_total;
2090 if(element == (struct profile_element *)
2091 ((vm_offset_t)database->element_array +
2092 (vm_offset_t)database)) {
2093 ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1;
2094 } else {
2095 ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element);
2096 }
2097 extended_list -= ele_total;
2098 }
2099 for (i=0; i<ele_total; i++) {
2100 if((mod_date == element[i].mod_date)
2101 && (inode == element[i].inode)) {
2102 if(strncmp(element[i].name, app_name, 12) == 0) {
2103 *profile = element[i].addr;
2104 *profile_size = element[i].size;
2105 if(local_buf != 0) {
2106 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2107 }
2108 return 0;
2109 }
2110 }
2111 }
2112 if(extended_list == 0)
2113 break;
2114 if(local_buf == 0) {
2115 ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE);
2116 if(ret != KERN_SUCCESS) {
2117 return ENOMEM;
2118 }
2119 }
2120 element = (struct profile_element *)local_buf;
2121 ele_total = extended_list;
2122 extended_list = 0;
2123 file_off += 4 * PAGE_SIZE;
2124 if((ele_total * sizeof(struct profile_element)) >
2125 (PAGE_SIZE * 4)) {
2126 size = PAGE_SIZE * 4;
2127 } else {
2128 size = ele_total * sizeof(struct profile_element);
2129 }
2130 resid_off = 0;
2131 while(size) {
2132 int resid_int;
2133 error = vn_rdwr(UIO_READ, vp,
2134 CAST_DOWN(caddr_t, (local_buf + resid_off)),
2135 size, file_off + resid_off, UIO_SYSSPACE32,
2136 IO_NODELOCKED, kauth_cred_get(), &resid_int, p);
2137 resid = (vm_size_t) resid_int;
2138 if((error) || (size == resid)) {
2139 if(local_buf != 0) {
2140 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2141 }
2142 return EINVAL;
2143 }
2144 resid_off += size-resid;
2145 size = resid;
2146 }
2147 }
2148 if(local_buf != 0) {
2149 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2150 }
2151 return 0;
2152 }
2153
2154 int
2155 bsd_write_page_cache_file(
2156 unsigned int user,
2157 char *file_name,
2158 caddr_t buffer,
2159 vm_size_t size,
2160 int mod,
2161 int fid)
2162 {
2163 struct proc *p;
2164 int resid;
2165 off_t resid_off;
2166 int error;
2167 boolean_t funnel_state;
2168 off_t file_size;
2169 struct vfs_context context;
2170 off_t profile;
2171 unsigned int profile_size;
2172
2173 vm_offset_t names_buf;
2174 struct vnode *names_vp;
2175 struct vnode *data_vp;
2176 struct profile_names_header *profile_header;
2177 off_t name_offset;
2178 struct global_profile *uid_files;
2179
2180
2181 funnel_state = thread_funnel_set(kernel_flock, TRUE);
2182
2183
2184 error = bsd_open_page_cache_files(user, &uid_files);
2185 if(error) {
2186 thread_funnel_set(kernel_flock, funnel_state);
2187 return EINVAL;
2188 }
2189
2190 p = current_proc();
2191
2192 names_vp = uid_files->names_vp;
2193 data_vp = uid_files->data_vp;
2194 names_buf = uid_files->buf_ptr;
2195
2196 /* Stat data file for size */
2197
2198 context.vc_proc = p;
2199 context.vc_ucred = kauth_cred_get();
2200
2201 if ((error = vnode_size(data_vp, &file_size, &context)) != 0) {
2202 printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name);
2203 bsd_close_page_cache_files(uid_files);
2204 thread_funnel_set(kernel_flock, funnel_state);
2205 return error;
2206 }
2207
2208 if (bsd_search_page_cache_data_base(names_vp,
2209 (struct profile_names_header *)names_buf,
2210 file_name, (unsigned int) mod,
2211 fid, &profile, &profile_size) == 0) {
2212 /* profile is an offset in the profile data base */
2213 /* It is zero if no profile data was found */
2214
2215 if(profile_size == 0) {
2216 unsigned int header_size;
2217 vm_offset_t buf_ptr;
2218
2219 /* Our Write case */
2220
2221 /* read header for last entry */
2222 profile_header =
2223 (struct profile_names_header *)names_buf;
2224 name_offset = sizeof(struct profile_names_header) +
2225 (sizeof(struct profile_element)
2226 * profile_header->number_of_profiles);
2227 profile_header->number_of_profiles += 1;
2228
2229 if(name_offset < PAGE_SIZE * 4) {
2230 struct profile_element *name;
2231 /* write new entry */
2232 name = (struct profile_element *)
2233 (names_buf + (vm_offset_t)name_offset);
2234 name->addr = file_size;
2235 name->size = size;
2236 name->mod_date = mod;
2237 name->inode = fid;
2238 strncpy (name->name, file_name, 12);
2239 } else {
2240 unsigned int ele_size;
2241 struct profile_element name;
2242 /* write new entry */
2243 name.addr = file_size;
2244 name.size = size;
2245 name.mod_date = mod;
2246 name.inode = fid;
2247 strncpy (name.name, file_name, 12);
2248 /* write element out separately */
2249 ele_size = sizeof(struct profile_element);
2250 buf_ptr = (vm_offset_t)&name;
2251 resid_off = name_offset;
2252
2253 while(ele_size) {
2254 error = vn_rdwr(UIO_WRITE, names_vp,
2255 (caddr_t)buf_ptr,
2256 ele_size, resid_off,
2257 UIO_SYSSPACE32, IO_NODELOCKED,
2258 kauth_cred_get(), &resid, p);
2259 if(error) {
2260 printf("bsd_write_page_cache_file: Can't write name_element %x\n", user);
2261 bsd_close_page_cache_files(
2262 uid_files);
2263 thread_funnel_set(
2264 kernel_flock,
2265 funnel_state);
2266 return error;
2267 }
2268 buf_ptr += (vm_offset_t)
2269 ele_size-resid;
2270 resid_off += ele_size-resid;
2271 ele_size = resid;
2272 }
2273 }
2274
2275 if(name_offset < PAGE_SIZE * 4) {
2276 header_size = name_offset +
2277 sizeof(struct profile_element);
2278
2279 } else {
2280 header_size =
2281 sizeof(struct profile_names_header);
2282 }
2283 buf_ptr = (vm_offset_t)profile_header;
2284 resid_off = 0;
2285
2286 /* write names file header */
2287 while(header_size) {
2288 error = vn_rdwr(UIO_WRITE, names_vp,
2289 (caddr_t)buf_ptr,
2290 header_size, resid_off,
2291 UIO_SYSSPACE32, IO_NODELOCKED,
2292 kauth_cred_get(), &resid, p);
2293 if(error) {
2294 printf("bsd_write_page_cache_file: Can't write header %x\n", user);
2295 bsd_close_page_cache_files(
2296 uid_files);
2297 thread_funnel_set(
2298 kernel_flock, funnel_state);
2299 return error;
2300 }
2301 buf_ptr += (vm_offset_t)header_size-resid;
2302 resid_off += header_size-resid;
2303 header_size = resid;
2304 }
2305 /* write profile to data file */
2306 resid_off = file_size;
2307 while(size) {
2308 error = vn_rdwr(UIO_WRITE, data_vp,
2309 (caddr_t)buffer, size, resid_off,
2310 UIO_SYSSPACE32, IO_NODELOCKED,
2311 kauth_cred_get(), &resid, p);
2312 if(error) {
2313 printf("bsd_write_page_cache_file: Can't write header %x\n", user);
2314 bsd_close_page_cache_files(
2315 uid_files);
2316 thread_funnel_set(
2317 kernel_flock, funnel_state);
2318 return error;
2319 }
2320 buffer += size-resid;
2321 resid_off += size-resid;
2322 size = resid;
2323 }
2324 bsd_close_page_cache_files(uid_files);
2325 thread_funnel_set(kernel_flock, funnel_state);
2326 return 0;
2327 }
2328 /* Someone else wrote a twin profile before us */
2329 bsd_close_page_cache_files(uid_files);
2330 thread_funnel_set(kernel_flock, funnel_state);
2331 return 0;
2332 } else {
2333 bsd_close_page_cache_files(uid_files);
2334 thread_funnel_set(kernel_flock, funnel_state);
2335 return EINVAL;
2336 }
2337
2338 }
2339
2340 int
2341 prepare_profile_database(int user)
2342 {
2343 const char *cache_path = "/var/vm/app_profile/";
2344 struct proc *p;
2345 int error;
2346 int resid;
2347 off_t resid_off;
2348 vm_size_t size;
2349
2350 struct vnode *names_vp;
2351 struct vnode *data_vp;
2352 vm_offset_t names_buf;
2353 vm_offset_t buf_ptr;
2354
2355 int profile_names_length;
2356 int profile_data_length;
2357 char *profile_data_string;
2358 char *profile_names_string;
2359 char *substring;
2360
2361 struct vnode_attr va;
2362 struct vfs_context context;
2363
2364 struct profile_names_header *profile_header;
2365 kern_return_t ret;
2366
2367 struct nameidata nd_names;
2368 struct nameidata nd_data;
2369
2370 p = current_proc();
2371
2372 context.vc_proc = p;
2373 context.vc_ucred = kauth_cred_get();
2374
2375 ret = kmem_alloc(kernel_map,
2376 (vm_offset_t *)&profile_data_string, PATH_MAX);
2377
2378 if(ret) {
2379 return ENOMEM;
2380 }
2381
2382 /* Split the buffer in half since we know the size of */
2383 /* our file path and our allocation is adequate for */
2384 /* both file path names */
2385 profile_names_string = profile_data_string + (PATH_MAX/2);
2386
2387
2388 strcpy(profile_data_string, cache_path);
2389 strcpy(profile_names_string, cache_path);
2390 profile_names_length = profile_data_length
2391 = strlen(profile_data_string);
2392 substring = profile_data_string + profile_data_length;
2393 sprintf(substring, "%x_data", user);
2394 substring = profile_names_string + profile_names_length;
2395 sprintf(substring, "%x_names", user);
2396
2397 /* We now have the absolute file names */
2398
2399 ret = kmem_alloc(kernel_map,
2400 (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
2401 if(ret) {
2402 kmem_free(kernel_map,
2403 (vm_offset_t)profile_data_string, PATH_MAX);
2404 return ENOMEM;
2405 }
2406
2407 NDINIT(&nd_names, LOOKUP, FOLLOW,
2408 UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
2409 NDINIT(&nd_data, LOOKUP, FOLLOW,
2410 UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
2411
2412 if ( (error = vn_open(&nd_data,
2413 O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
2414 kmem_free(kernel_map,
2415 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2416 kmem_free(kernel_map,
2417 (vm_offset_t)profile_data_string, PATH_MAX);
2418
2419 return 0;
2420 }
2421 data_vp = nd_data.ni_vp;
2422
2423 if ( (error = vn_open(&nd_names,
2424 O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
2425 printf("prepare_profile_database: Can't create CacheNames %s\n",
2426 profile_data_string);
2427 kmem_free(kernel_map,
2428 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2429 kmem_free(kernel_map,
2430 (vm_offset_t)profile_data_string, PATH_MAX);
2431
2432 vnode_rele(data_vp);
2433 vnode_put(data_vp);
2434
2435 return error;
2436 }
2437 names_vp = nd_names.ni_vp;
2438
2439 /* Write Header for new names file */
2440
2441 profile_header = (struct profile_names_header *)names_buf;
2442
2443 profile_header->number_of_profiles = 0;
2444 profile_header->user_id = user;
2445 profile_header->version = 1;
2446 profile_header->element_array =
2447 sizeof(struct profile_names_header);
2448 profile_header->spare1 = 0;
2449 profile_header->spare2 = 0;
2450 profile_header->spare3 = 0;
2451
2452 size = sizeof(struct profile_names_header);
2453 buf_ptr = (vm_offset_t)profile_header;
2454 resid_off = 0;
2455
2456 while(size) {
2457 error = vn_rdwr(UIO_WRITE, names_vp,
2458 (caddr_t)buf_ptr, size, resid_off,
2459 UIO_SYSSPACE32, IO_NODELOCKED,
2460 kauth_cred_get(), &resid, p);
2461 if(error) {
2462 printf("prepare_profile_database: Can't write header %s\n", profile_names_string);
2463 kmem_free(kernel_map,
2464 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2465 kmem_free(kernel_map,
2466 (vm_offset_t)profile_data_string,
2467 PATH_MAX);
2468
2469 vnode_rele(names_vp);
2470 vnode_put(names_vp);
2471 vnode_rele(data_vp);
2472 vnode_put(data_vp);
2473
2474 return error;
2475 }
2476 buf_ptr += size-resid;
2477 resid_off += size-resid;
2478 size = resid;
2479 }
2480 VATTR_INIT(&va);
2481 VATTR_SET(&va, va_uid, user);
2482
2483 error = vnode_setattr(names_vp, &va, &context);
2484 if(error) {
2485 printf("prepare_profile_database: "
2486 "Can't set user %s\n", profile_names_string);
2487 }
2488 vnode_rele(names_vp);
2489 vnode_put(names_vp);
2490
2491 VATTR_INIT(&va);
2492 VATTR_SET(&va, va_uid, user);
2493 error = vnode_setattr(data_vp, &va, &context);
2494 if(error) {
2495 printf("prepare_profile_database: "
2496 "Can't set user %s\n", profile_data_string);
2497 }
2498 vnode_rele(data_vp);
2499 vnode_put(data_vp);
2500
2501 kmem_free(kernel_map,
2502 (vm_offset_t)profile_data_string, PATH_MAX);
2503 kmem_free(kernel_map,
2504 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2505 return 0;
2506
2507 }