]> git.saurik.com Git - apple/xnu.git/blob - bsd/vm/vm_unix.c
7d1ad97a7dd571e0ed4456526d025154e0b28ee4
[apple/xnu.git] / bsd / vm / vm_unix.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /*
31 * Mach Operating System
32 * Copyright (c) 1987 Carnegie-Mellon University
33 * All rights reserved. The CMU software License Agreement specifies
34 * the terms and conditions for use and redistribution.
35 */
36
37 /*
38 */
39
40
41 #include <meta_features.h>
42
43 #include <kern/task.h>
44 #include <kern/thread.h>
45 #include <kern/debug.h>
46 #include <kern/lock.h>
47 #include <mach/mach_traps.h>
48 #include <mach/time_value.h>
49 #include <mach/vm_map.h>
50 #include <mach/vm_param.h>
51 #include <mach/vm_prot.h>
52 #include <mach/port.h>
53
54 #include <sys/file_internal.h>
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/dir.h>
58 #include <sys/namei.h>
59 #include <sys/proc_internal.h>
60 #include <sys/kauth.h>
61 #include <sys/vm.h>
62 #include <sys/file.h>
63 #include <sys/vnode_internal.h>
64 #include <sys/mount.h>
65 #include <sys/trace.h>
66 #include <sys/kernel.h>
67 #include <sys/ubc_internal.h>
68 #include <sys/user.h>
69 #include <sys/syslog.h>
70 #include <sys/stat.h>
71 #include <sys/sysproto.h>
72 #include <sys/mman.h>
73 #include <sys/sysctl.h>
74
75 #include <bsm/audit_kernel.h>
76 #include <bsm/audit_kevents.h>
77
78 #include <kern/kalloc.h>
79 #include <vm/vm_map.h>
80 #include <vm/vm_kern.h>
81
82 #include <machine/spl.h>
83
84 #include <mach/shared_memory_server.h>
85 #include <vm/vm_shared_memory_server.h>
86
87 #include <vm/vm_protos.h>
88
89 void
90 log_nx_failure(addr64_t vaddr, vm_prot_t prot)
91 {
92 printf("NX failure: %s - vaddr=%qx, prot=%x\n", current_proc()->p_comm, vaddr, prot);
93 }
94
95
96 int
97 useracc(
98 user_addr_t addr,
99 user_size_t len,
100 int prot)
101 {
102 return (vm_map_check_protection(
103 current_map(),
104 vm_map_trunc_page(addr), vm_map_round_page(addr+len),
105 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
106 }
107
108 int
109 vslock(
110 user_addr_t addr,
111 user_size_t len)
112 {
113 kern_return_t kret;
114 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
115 vm_map_round_page(addr+len),
116 VM_PROT_READ | VM_PROT_WRITE ,FALSE);
117
118 switch (kret) {
119 case KERN_SUCCESS:
120 return (0);
121 case KERN_INVALID_ADDRESS:
122 case KERN_NO_SPACE:
123 return (ENOMEM);
124 case KERN_PROTECTION_FAILURE:
125 return (EACCES);
126 default:
127 return (EINVAL);
128 }
129 }
130
131 int
132 vsunlock(
133 user_addr_t addr,
134 user_size_t len,
135 __unused int dirtied)
136 {
137 #if FIXME /* [ */
138 pmap_t pmap;
139 vm_page_t pg;
140 vm_map_offset_t vaddr;
141 ppnum_t paddr;
142 #endif /* FIXME ] */
143 kern_return_t kret;
144
145 #if FIXME /* [ */
146 if (dirtied) {
147 pmap = get_task_pmap(current_task());
148 for (vaddr = vm_map_trunc_page(addr);
149 vaddr < vm_map_round_page(addr+len);
150 vaddr += PAGE_SIZE) {
151 paddr = pmap_extract(pmap, vaddr);
152 pg = PHYS_TO_VM_PAGE(paddr);
153 vm_page_set_modified(pg);
154 }
155 }
156 #endif /* FIXME ] */
157 #ifdef lint
158 dirtied++;
159 #endif /* lint */
160 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
161 vm_map_round_page(addr+len), FALSE);
162 switch (kret) {
163 case KERN_SUCCESS:
164 return (0);
165 case KERN_INVALID_ADDRESS:
166 case KERN_NO_SPACE:
167 return (ENOMEM);
168 case KERN_PROTECTION_FAILURE:
169 return (EACCES);
170 default:
171 return (EINVAL);
172 }
173 }
174
175 int
176 subyte(
177 user_addr_t addr,
178 int byte)
179 {
180 char character;
181
182 character = (char)byte;
183 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
184 }
185
186 int
187 suibyte(
188 user_addr_t addr,
189 int byte)
190 {
191 char character;
192
193 character = (char)byte;
194 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
195 }
196
197 int fubyte(user_addr_t addr)
198 {
199 unsigned char byte;
200
201 if (copyin(addr, (void *) &byte, sizeof(char)))
202 return(-1);
203 return(byte);
204 }
205
206 int fuibyte(user_addr_t addr)
207 {
208 unsigned char byte;
209
210 if (copyin(addr, (void *) &(byte), sizeof(char)))
211 return(-1);
212 return(byte);
213 }
214
215 int
216 suword(
217 user_addr_t addr,
218 long word)
219 {
220 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
221 }
222
223 long fuword(user_addr_t addr)
224 {
225 long word;
226
227 if (copyin(addr, (void *) &word, sizeof(int)))
228 return(-1);
229 return(word);
230 }
231
232 /* suiword and fuiword are the same as suword and fuword, respectively */
233
234 int
235 suiword(
236 user_addr_t addr,
237 long word)
238 {
239 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
240 }
241
242 long fuiword(user_addr_t addr)
243 {
244 long word;
245
246 if (copyin(addr, (void *) &word, sizeof(int)))
247 return(-1);
248 return(word);
249 }
250
251 /*
252 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
253 * fetching and setting of process-sized size_t and pointer values.
254 */
255 int
256 sulong(user_addr_t addr, int64_t word)
257 {
258
259 if (IS_64BIT_PROCESS(current_proc())) {
260 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
261 } else {
262 return(suiword(addr, (long)word));
263 }
264 }
265
266 int64_t
267 fulong(user_addr_t addr)
268 {
269 int64_t longword;
270
271 if (IS_64BIT_PROCESS(current_proc())) {
272 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
273 return(-1);
274 return(longword);
275 } else {
276 return((int64_t)fuiword(addr));
277 }
278 }
279
280 int
281 suulong(user_addr_t addr, uint64_t uword)
282 {
283
284 if (IS_64BIT_PROCESS(current_proc())) {
285 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
286 } else {
287 return(suiword(addr, (u_long)uword));
288 }
289 }
290
291 uint64_t
292 fuulong(user_addr_t addr)
293 {
294 uint64_t ulongword;
295
296 if (IS_64BIT_PROCESS(current_proc())) {
297 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
298 return(-1ULL);
299 return(ulongword);
300 } else {
301 return((uint64_t)fuiword(addr));
302 }
303 }
304
305 int
306 swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval)
307 {
308 return(ENOTSUP);
309 }
310
311
312 kern_return_t
313 pid_for_task(
314 struct pid_for_task_args *args)
315 {
316 mach_port_name_t t = args->t;
317 user_addr_t pid_addr = args->pid;
318 struct proc * p;
319 task_t t1;
320 int pid = -1;
321 kern_return_t err = KERN_SUCCESS;
322 boolean_t funnel_state;
323
324 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
325 AUDIT_ARG(mach_port1, t);
326
327 funnel_state = thread_funnel_set(kernel_flock, TRUE);
328 t1 = port_name_to_task(t);
329
330 if (t1 == TASK_NULL) {
331 err = KERN_FAILURE;
332 goto pftout;
333 } else {
334 p = get_bsdtask_info(t1);
335 if (p) {
336 pid = proc_pid(p);
337 err = KERN_SUCCESS;
338 } else {
339 err = KERN_FAILURE;
340 }
341 }
342 task_deallocate(t1);
343 pftout:
344 AUDIT_ARG(pid, pid);
345 (void) copyout((char *) &pid, pid_addr, sizeof(int));
346 thread_funnel_set(kernel_flock, funnel_state);
347 AUDIT_MACH_SYSCALL_EXIT(err);
348 return(err);
349 }
350
351 /*
352 * Routine: task_for_pid
353 * Purpose:
354 * Get the task port for another "process", named by its
355 * process ID on the same host as "target_task".
356 *
357 * Only permitted to privileged processes, or processes
358 * with the same user ID.
359 *
360 * XXX This should be a BSD system call, not a Mach trap!!!
361 */
362 /*
363 *
364 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
365 * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged
366 * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks
367 *
368 */
369 static int tfp_policy = KERN_TFP_POLICY_RESTRICTED;
370 /* the groutp is inited to kmem group and is modifiable by sysctl */
371 static int tfp_group_inited = 0; /* policy groups are loaded ... */
372 static gid_t tfp_group_ronly = 0; /* procview group */
373 static gid_t tfp_group_rw = 0; /* procmod group */
374
375 kern_return_t
376 task_for_pid(
377 struct task_for_pid_args *args)
378 {
379 mach_port_name_t target_tport = args->target_tport;
380 int pid = args->pid;
381 user_addr_t task_addr = args->t;
382 struct uthread *uthread;
383 struct proc *p;
384 struct proc *p1;
385 task_t t1;
386 mach_port_name_t tret;
387 void * sright;
388 int error = 0;
389 int is_member = 0;
390 boolean_t funnel_state;
391 boolean_t ispermitted = FALSE;
392 char procname[MAXCOMLEN+1];
393
394 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
395 AUDIT_ARG(pid, pid);
396 AUDIT_ARG(mach_port1, target_tport);
397
398 t1 = port_name_to_task(target_tport);
399 if (t1 == TASK_NULL) {
400 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
401 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
402 return(KERN_FAILURE);
403 }
404
405 funnel_state = thread_funnel_set(kernel_flock, TRUE);
406
407 p1 = current_proc();
408
409 /*
410 * Delayed binding of thread credential to process credential, if we
411 * are not running with an explicitly set thread credential.
412 */
413 uthread = get_bsdthread_info(current_thread());
414 if (uthread->uu_ucred != p1->p_ucred &&
415 (uthread->uu_flag & UT_SETUID) == 0) {
416 kauth_cred_t old = uthread->uu_ucred;
417 proc_lock(p1);
418 uthread->uu_ucred = p1->p_ucred;
419 kauth_cred_ref(uthread->uu_ucred);
420 proc_unlock(p1);
421 if (old != NOCRED)
422 kauth_cred_rele(old);
423 }
424
425 p = pfind(pid);
426 AUDIT_ARG(process, p);
427
428 switch (tfp_policy) {
429
430 case KERN_TFP_POLICY_PERMISSIVE:
431 /* self or suser or related ones */
432 if ((p != (struct proc *) 0)
433 && (p1 != (struct proc *) 0)
434 && (
435 (p1 == p)
436 || !(suser(kauth_cred_get(), 0))
437 || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
438 ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
439 && ((p->p_flag & P_SUGID) == 0))
440 )
441 && (p->p_stat != SZOMB)
442 )
443 ispermitted = TRUE;
444 break;
445
446 case KERN_TFP_POLICY_RESTRICTED:
447 /* self or suser or setgid and related ones only */
448 if ((p != (struct proc *) 0)
449 && (p1 != (struct proc *) 0)
450 && (
451 (p1 == p)
452 || !(suser(kauth_cred_get(), 0))
453 || (((tfp_group_inited != 0) &&
454 (
455 ((kauth_cred_ismember_gid(kauth_cred_get(),
456 tfp_group_ronly, &is_member) == 0) && is_member)
457 ||((kauth_cred_ismember_gid(kauth_cred_get(),
458 tfp_group_rw, &is_member) == 0) && is_member)
459 )
460 )
461 && ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
462 ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
463 && ((p->p_flag & P_SUGID) == 0))
464 )
465 )
466 && (p->p_stat != SZOMB)
467 )
468 ispermitted = TRUE;
469
470 break;
471
472 case KERN_TFP_POLICY_DENY:
473 /* self or suser only */
474 default:
475 /* do not return task port of other task at all */
476 if ((p1 != (struct proc *) 0) && (p != (struct proc *) 0) && (p->p_stat != SZOMB)
477 && ((p1 == p) || !(suser(kauth_cred_get(), 0))))
478 ispermitted = TRUE;
479 else
480 ispermitted = FALSE;
481 break;
482 };
483
484
485 if (ispermitted == TRUE) {
486 if (p->task != TASK_NULL) {
487 task_reference(p->task);
488 sright = (void *)convert_task_to_port(p->task);
489 tret = ipc_port_copyout_send(
490 sright,
491 get_task_ipcspace(current_task()));
492 } else
493 tret = MACH_PORT_NULL;
494 AUDIT_ARG(mach_port2, tret);
495 (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
496 task_deallocate(t1);
497 error = KERN_SUCCESS;
498 goto tfpout;
499 } else {
500 /*
501 * There is no guarantee that p_comm is null terminated and
502 * kernel implementation of string functions are complete. So
503 * ensure stale info is not leaked out, bzero the buffer
504 */
505 bzero(&procname[0], MAXCOMLEN+1);
506 strncpy(&procname[0], &p1->p_comm[0], MAXCOMLEN);
507 if (tfp_policy != KERN_TFP_POLICY_PERMISSIVE)
508 log(LOG_NOTICE, "(%d: %s)tfp: failed on %d:\n",
509 ((p1 != PROC_NULL)?(p1->p_pid):0), &procname[0],
510 ((p != PROC_NULL)?(p->p_pid):0));
511 }
512
513 task_deallocate(t1);
514 tret = MACH_PORT_NULL;
515 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
516 error = KERN_FAILURE;
517 tfpout:
518 thread_funnel_set(kernel_flock, funnel_state);
519 AUDIT_MACH_SYSCALL_EXIT(error);
520 return(error);
521 }
522
523 /*
524 * Routine: task_name_for_pid
525 * Purpose:
526 * Get the task name port for another "process", named by its
527 * process ID on the same host as "target_task".
528 *
529 * Only permitted to privileged processes, or processes
530 * with the same user ID.
531 *
532 * XXX This should be a BSD system call, not a Mach trap!!!
533 */
534
535 kern_return_t
536 task_name_for_pid(
537 struct task_name_for_pid_args *args)
538 {
539 mach_port_name_t target_tport = args->target_tport;
540 int pid = args->pid;
541 user_addr_t task_addr = args->t;
542 struct uthread *uthread;
543 struct proc *p;
544 struct proc *p1;
545 task_t t1;
546 mach_port_name_t tret;
547 void * sright;
548 int error = 0;
549 boolean_t funnel_state;
550
551 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
552 AUDIT_ARG(pid, pid);
553 AUDIT_ARG(mach_port1, target_tport);
554
555 t1 = port_name_to_task(target_tport);
556 if (t1 == TASK_NULL) {
557 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
558 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
559 return(KERN_FAILURE);
560 }
561
562 funnel_state = thread_funnel_set(kernel_flock, TRUE);
563
564 p1 = current_proc();
565
566 /*
567 * Delayed binding of thread credential to process credential, if we
568 * are not running with an explicitly set thread credential.
569 */
570 uthread = get_bsdthread_info(current_thread());
571 if (uthread->uu_ucred != p1->p_ucred &&
572 (uthread->uu_flag & UT_SETUID) == 0) {
573 kauth_cred_t old = uthread->uu_ucred;
574 proc_lock(p1);
575 uthread->uu_ucred = p1->p_ucred;
576 kauth_cred_ref(uthread->uu_ucred);
577 proc_unlock(p1);
578 if (old != NOCRED)
579 kauth_cred_rele(old);
580 }
581
582 p = pfind(pid);
583 AUDIT_ARG(process, p);
584
585 if ((p != (struct proc *) 0)
586 && (p->p_stat != SZOMB)
587 && (p1 != (struct proc *) 0)
588 && ((p1 == p)
589 || !(suser(kauth_cred_get(), 0))
590 || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
591 ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)))))
592 {
593 if (p->task != TASK_NULL)
594 {
595 task_reference(p->task);
596 sright = (void *)convert_task_name_to_port(p->task);
597 tret = ipc_port_copyout_send(
598 sright,
599 get_task_ipcspace(current_task()));
600 } else
601 tret = MACH_PORT_NULL;
602 AUDIT_ARG(mach_port2, tret);
603 (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
604 task_deallocate(t1);
605 error = KERN_SUCCESS;
606 goto tnfpout;
607 }
608
609 task_deallocate(t1);
610 tret = MACH_PORT_NULL;
611 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
612 error = KERN_FAILURE;
613 tnfpout:
614 thread_funnel_set(kernel_flock, funnel_state);
615 AUDIT_MACH_SYSCALL_EXIT(error);
616 return(error);
617 }
618
619 static int
620 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
621 __unused int arg2, struct sysctl_req *req)
622 {
623 int error = 0;
624 int new_value;
625
626 error = SYSCTL_OUT(req, arg1, sizeof(int));
627 if (error || req->newptr == USER_ADDR_NULL)
628 return(error);
629
630 if (!is_suser())
631 return(EPERM);
632
633 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
634 goto out;
635 }
636 if ((new_value == KERN_TFP_POLICY_DENY)
637 || (new_value == KERN_TFP_POLICY_PERMISSIVE)
638 || (new_value == KERN_TFP_POLICY_RESTRICTED))
639 tfp_policy = new_value;
640 else
641 error = EINVAL;
642 out:
643 return(error);
644
645 }
646
647 static int
648 sysctl_settfp_groups(__unused struct sysctl_oid *oidp, void *arg1,
649 __unused int arg2, struct sysctl_req *req)
650 {
651 int error = 0;
652 int new_value;
653
654 error = SYSCTL_OUT(req, arg1, sizeof(int));
655 if (error || req->newptr == USER_ADDR_NULL)
656 return(error);
657
658 if (!is_suser())
659 return(EPERM);
660
661 /*
662 * Once set; cannot be reset till next boot. Launchd will set this
663 * in its pid 1 init and no one can set after that.
664 */
665 if (tfp_group_inited != 0)
666 return(EPERM);
667
668 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
669 goto out;
670 }
671
672 if (new_value >= 100)
673 error = EINVAL;
674 else {
675 if (arg1 == &tfp_group_ronly)
676 tfp_group_ronly = new_value;
677 else if (arg1 == &tfp_group_rw)
678 tfp_group_rw = new_value;
679 else
680 error = EINVAL;
681 if ((tfp_group_ronly != 0 ) && (tfp_group_rw != 0 ))
682 tfp_group_inited = 1;
683 }
684
685 out:
686 return(error);
687 }
688
689 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW, 0, "tfp");
690 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
691 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
692 SYSCTL_PROC(_kern_tfp, KERN_TFP_READ_GROUP, read_group, CTLTYPE_INT | CTLFLAG_RW,
693 &tfp_group_ronly, sizeof(uint32_t), &sysctl_settfp_groups ,"I","read_group");
694 SYSCTL_PROC(_kern_tfp, KERN_TFP_RW_GROUP, rw_group, CTLTYPE_INT | CTLFLAG_RW,
695 &tfp_group_rw, sizeof(uint32_t), &sysctl_settfp_groups ,"I","rw_group");
696
697
698 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, &shared_region_trace_level, 0, "");
699
700 /*
701 * shared_region_make_private_np:
702 *
703 * This system call is for "dyld" only.
704 *
705 * It creates a private copy of the current process's "shared region" for
706 * split libraries. "dyld" uses this when the shared region is full or
707 * it needs to load a split library that conflicts with an already loaded one
708 * that this process doesn't need. "dyld" specifies a set of address ranges
709 * that it wants to keep in the now-private "shared region". These cover
710 * the set of split libraries that the process needs so far. The kernel needs
711 * to deallocate the rest of the shared region, so that it's available for
712 * more libraries for this process.
713 */
714 int
715 shared_region_make_private_np(
716 struct proc *p,
717 struct shared_region_make_private_np_args *uap,
718 __unused int *retvalp)
719 {
720 int error;
721 kern_return_t kr;
722 boolean_t using_shared_regions;
723 user_addr_t user_ranges;
724 unsigned int range_count;
725 vm_size_t ranges_size;
726 struct shared_region_range_np *ranges;
727 shared_region_mapping_t shared_region;
728 struct shared_region_task_mappings task_mapping_info;
729 shared_region_mapping_t next;
730
731 ranges = NULL;
732
733 range_count = uap->rangeCount;
734 user_ranges = uap->ranges;
735 ranges_size = (vm_size_t) (range_count * sizeof (ranges[0]));
736
737 SHARED_REGION_TRACE(
738 SHARED_REGION_TRACE_INFO,
739 ("shared_region: %p [%d(%s)] "
740 "make_private(rangecount=%d)\n",
741 current_thread(), p->p_pid, p->p_comm, range_count));
742
743 /* allocate kernel space for the "ranges" */
744 if (range_count != 0) {
745 if ((mach_vm_size_t) ranges_size !=
746 (mach_vm_size_t) range_count * sizeof (ranges[0])) {
747 /* 32-bit integer overflow */
748 error = EINVAL;
749 goto done;
750 }
751 kr = kmem_alloc(kernel_map,
752 (vm_offset_t *) &ranges,
753 ranges_size);
754 if (kr != KERN_SUCCESS) {
755 error = ENOMEM;
756 goto done;
757 }
758
759 /* copy "ranges" from user-space */
760 error = copyin(user_ranges,
761 ranges,
762 ranges_size);
763 if (error) {
764 goto done;
765 }
766 }
767
768 if (p->p_flag & P_NOSHLIB) {
769 /* no split library has been mapped for this process so far */
770 using_shared_regions = FALSE;
771 } else {
772 /* this process has already mapped some split libraries */
773 using_shared_regions = TRUE;
774 }
775
776 /*
777 * Get a private copy of the current shared region.
778 * Do not chain it to the system-wide shared region, as we'll want
779 * to map other split libraries in place of the old ones. We want
780 * to completely detach from the system-wide shared region and go our
781 * own way after this point, not sharing anything with other processes.
782 */
783 error = clone_system_shared_regions(using_shared_regions,
784 FALSE, /* chain_regions */
785 ENV_DEFAULT_ROOT);
786 if (error) {
787 goto done;
788 }
789
790 /* get info on the newly allocated shared region */
791 vm_get_shared_region(current_task(), &shared_region);
792 task_mapping_info.self = (vm_offset_t) shared_region;
793 shared_region_mapping_info(shared_region,
794 &(task_mapping_info.text_region),
795 &(task_mapping_info.text_size),
796 &(task_mapping_info.data_region),
797 &(task_mapping_info.data_size),
798 &(task_mapping_info.region_mappings),
799 &(task_mapping_info.client_base),
800 &(task_mapping_info.alternate_base),
801 &(task_mapping_info.alternate_next),
802 &(task_mapping_info.fs_base),
803 &(task_mapping_info.system),
804 &(task_mapping_info.flags),
805 &next);
806
807 /*
808 * We now have our private copy of the shared region, as it was before
809 * the call to clone_system_shared_regions(). We now need to clean it
810 * up and keep only the memory areas described by the "ranges" array.
811 */
812 kr = shared_region_cleanup(range_count, ranges, &task_mapping_info);
813 switch (kr) {
814 case KERN_SUCCESS:
815 error = 0;
816 break;
817 default:
818 error = EINVAL;
819 goto done;
820 }
821
822 done:
823 if (ranges != NULL) {
824 kmem_free(kernel_map,
825 (vm_offset_t) ranges,
826 ranges_size);
827 ranges = NULL;
828 }
829
830 SHARED_REGION_TRACE(
831 SHARED_REGION_TRACE_INFO,
832 ("shared_region: %p [%d(%s)] "
833 "make_private(rangecount=%d) -> %d "
834 "shared_region=%p[%x,%x,%x]\n",
835 current_thread(), p->p_pid, p->p_comm,
836 range_count, error, shared_region,
837 task_mapping_info.fs_base,
838 task_mapping_info.system,
839 task_mapping_info.flags));
840
841 return error;
842 }
843
844
845 /*
846 * shared_region_map_file_np:
847 *
848 * This system call is for "dyld" only.
849 *
850 * "dyld" wants to map parts of a split library in the shared region.
851 * We get a file descriptor on the split library to be mapped and a set
852 * of mapping instructions, describing which parts of the file to map in\
853 * which areas of the shared segment and with what protection.
854 * The "shared region" is split in 2 areas:
855 * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections),
856 * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
857 *
858 */
859 int
860 shared_region_map_file_np(
861 struct proc *p,
862 struct shared_region_map_file_np_args *uap,
863 __unused int *retvalp)
864 {
865 int error;
866 kern_return_t kr;
867 int fd;
868 unsigned int mapping_count;
869 user_addr_t user_mappings; /* 64-bit */
870 user_addr_t user_slide_p; /* 64-bit */
871 struct shared_file_mapping_np *mappings;
872 vm_size_t mappings_size;
873 struct fileproc *fp;
874 mach_vm_offset_t slide;
875 struct vnode *vp;
876 struct vfs_context context;
877 memory_object_control_t file_control;
878 memory_object_size_t file_size;
879 shared_region_mapping_t shared_region;
880 struct shared_region_task_mappings task_mapping_info;
881 shared_region_mapping_t next;
882 shared_region_mapping_t default_shared_region;
883 boolean_t using_default_region;
884 unsigned int j;
885 vm_prot_t max_prot;
886 mach_vm_offset_t base_offset, end_offset;
887 mach_vm_offset_t original_base_offset;
888 boolean_t mappings_in_segment;
889 #define SFM_MAX_STACK 6
890 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK];
891
892 mappings_size = 0;
893 mappings = NULL;
894 mapping_count = 0;
895 fp = NULL;
896 vp = NULL;
897
898 /* get file descriptor for split library from arguments */
899 fd = uap->fd;
900
901 /* get file structure from file descriptor */
902 error = fp_lookup(p, fd, &fp, 0);
903 if (error) {
904 SHARED_REGION_TRACE(
905 SHARED_REGION_TRACE_ERROR,
906 ("shared_region: %p [%d(%s)] map_file: "
907 "fd=%d lookup failed (error=%d)\n",
908 current_thread(), p->p_pid, p->p_comm, fd, error));
909 goto done;
910 }
911
912 /* make sure we're attempting to map a vnode */
913 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
914 SHARED_REGION_TRACE(
915 SHARED_REGION_TRACE_ERROR,
916 ("shared_region: %p [%d(%s)] map_file: "
917 "fd=%d not a vnode (type=%d)\n",
918 current_thread(), p->p_pid, p->p_comm,
919 fd, fp->f_fglob->fg_type));
920 error = EINVAL;
921 goto done;
922 }
923
924 /* we need at least read permission on the file */
925 if (! (fp->f_fglob->fg_flag & FREAD)) {
926 SHARED_REGION_TRACE(
927 SHARED_REGION_TRACE_ERROR,
928 ("shared_region: %p [%d(%s)] map_file: "
929 "fd=%d not readable\n",
930 current_thread(), p->p_pid, p->p_comm, fd));
931 error = EPERM;
932 goto done;
933 }
934
935 /* get vnode from file structure */
936 error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data);
937 if (error) {
938 SHARED_REGION_TRACE(
939 SHARED_REGION_TRACE_ERROR,
940 ("shared_region: %p [%d(%s)] map_file: "
941 "fd=%d getwithref failed (error=%d)\n",
942 current_thread(), p->p_pid, p->p_comm, fd, error));
943 goto done;
944 }
945 vp = (struct vnode *) fp->f_fglob->fg_data;
946
947 /* make sure the vnode is a regular file */
948 if (vp->v_type != VREG) {
949 SHARED_REGION_TRACE(
950 SHARED_REGION_TRACE_ERROR,
951 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
952 "not a file (type=%d)\n",
953 current_thread(), p->p_pid, p->p_comm,
954 vp, vp->v_name, vp->v_type));
955 error = EINVAL;
956 goto done;
957 }
958
959 /* get vnode size */
960 {
961 off_t fs;
962
963 context.vc_proc = p;
964 context.vc_ucred = kauth_cred_get();
965 if ((error = vnode_size(vp, &fs, &context)) != 0) {
966 SHARED_REGION_TRACE(
967 SHARED_REGION_TRACE_ERROR,
968 ("shared_region: %p [%d(%s)] "
969 "map_file(%p:'%s'): "
970 "vnode_size(%p) failed (error=%d)\n",
971 current_thread(), p->p_pid, p->p_comm,
972 vp, vp->v_name, vp));
973 goto done;
974 }
975 file_size = fs;
976 }
977
978 /*
979 * Get the list of mappings the caller wants us to establish.
980 */
981 mapping_count = uap->mappingCount; /* the number of mappings */
982 mappings_size = (vm_size_t) (mapping_count * sizeof (mappings[0]));
983 if (mapping_count == 0) {
984 SHARED_REGION_TRACE(
985 SHARED_REGION_TRACE_INFO,
986 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
987 "no mappings\n",
988 current_thread(), p->p_pid, p->p_comm,
989 vp, vp->v_name));
990 error = 0; /* no mappings: we're done ! */
991 goto done;
992 } else if (mapping_count <= SFM_MAX_STACK) {
993 mappings = &stack_mappings[0];
994 } else {
995 if ((mach_vm_size_t) mappings_size !=
996 (mach_vm_size_t) mapping_count * sizeof (mappings[0])) {
997 /* 32-bit integer overflow */
998 error = EINVAL;
999 goto done;
1000 }
1001 kr = kmem_alloc(kernel_map,
1002 (vm_offset_t *) &mappings,
1003 mappings_size);
1004 if (kr != KERN_SUCCESS) {
1005 SHARED_REGION_TRACE(
1006 SHARED_REGION_TRACE_ERROR,
1007 ("shared_region: %p [%d(%s)] "
1008 "map_file(%p:'%s'): "
1009 "failed to allocate %d mappings (kr=0x%x)\n",
1010 current_thread(), p->p_pid, p->p_comm,
1011 vp, vp->v_name, mapping_count, kr));
1012 error = ENOMEM;
1013 goto done;
1014 }
1015 }
1016
1017 user_mappings = uap->mappings; /* the mappings, in user space */
1018 error = copyin(user_mappings,
1019 mappings,
1020 mappings_size);
1021 if (error != 0) {
1022 SHARED_REGION_TRACE(
1023 SHARED_REGION_TRACE_ERROR,
1024 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1025 "failed to copyin %d mappings (error=%d)\n",
1026 current_thread(), p->p_pid, p->p_comm,
1027 vp, vp->v_name, mapping_count, error));
1028 goto done;
1029 }
1030
1031 /*
1032 * If the caller provides a "slide" pointer, it means they're OK
1033 * with us moving the mappings around to make them fit.
1034 */
1035 user_slide_p = uap->slide_p;
1036
1037 /*
1038 * Make each mapping address relative to the beginning of the
1039 * shared region. Check that all mappings are in the shared region.
1040 * Compute the maximum set of protections required to tell the
1041 * buffer cache how we mapped the file (see call to ubc_map() below).
1042 */
1043 max_prot = VM_PROT_NONE;
1044 base_offset = -1LL;
1045 end_offset = 0;
1046 mappings_in_segment = TRUE;
1047 for (j = 0; j < mapping_count; j++) {
1048 mach_vm_offset_t segment;
1049 segment = (mappings[j].sfm_address &
1050 GLOBAL_SHARED_SEGMENT_MASK);
1051 if (segment != GLOBAL_SHARED_TEXT_SEGMENT &&
1052 segment != GLOBAL_SHARED_DATA_SEGMENT) {
1053 /* this mapping is not in the shared region... */
1054 if (user_slide_p == NULL) {
1055 /* ... and we can't slide it in: fail */
1056 SHARED_REGION_TRACE(
1057 SHARED_REGION_TRACE_CONFLICT,
1058 ("shared_region: %p [%d(%s)] "
1059 "map_file(%p:'%s'): "
1060 "mapping %p not in shared segment & "
1061 "no sliding\n",
1062 current_thread(), p->p_pid, p->p_comm,
1063 vp, vp->v_name,
1064 mappings[j].sfm_address));
1065 error = EINVAL;
1066 goto done;
1067 }
1068 if (j == 0) {
1069 /* expect all mappings to be outside */
1070 mappings_in_segment = FALSE;
1071 } else if (mappings_in_segment != FALSE) {
1072 /* other mappings were not outside: fail */
1073 SHARED_REGION_TRACE(
1074 SHARED_REGION_TRACE_CONFLICT,
1075 ("shared_region: %p [%d(%s)] "
1076 "map_file(%p:'%s'): "
1077 "mapping %p not in shared segment & "
1078 "other mappings in shared segment\n",
1079 current_thread(), p->p_pid, p->p_comm,
1080 vp, vp->v_name,
1081 mappings[j].sfm_address));
1082 error = EINVAL;
1083 goto done;
1084 }
1085 /* we'll try and slide that mapping in the segments */
1086 } else {
1087 if (j == 0) {
1088 /* expect all mappings to be inside */
1089 mappings_in_segment = TRUE;
1090 } else if (mappings_in_segment != TRUE) {
1091 /* other mappings were not inside: fail */
1092 SHARED_REGION_TRACE(
1093 SHARED_REGION_TRACE_CONFLICT,
1094 ("shared_region: %p [%d(%s)] "
1095 "map_file(%p:'%s'): "
1096 "mapping %p in shared segment & "
1097 "others in shared segment\n",
1098 current_thread(), p->p_pid, p->p_comm,
1099 vp, vp->v_name,
1100 mappings[j].sfm_address));
1101 error = EINVAL;
1102 goto done;
1103 }
1104 /* get a relative offset inside the shared segments */
1105 mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT;
1106 }
1107 if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK)
1108 < base_offset) {
1109 base_offset = (mappings[j].sfm_address &
1110 SHARED_TEXT_REGION_MASK);
1111 }
1112 if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) +
1113 mappings[j].sfm_size > end_offset) {
1114 end_offset =
1115 (mappings[j].sfm_address &
1116 SHARED_TEXT_REGION_MASK) +
1117 mappings[j].sfm_size;
1118 }
1119 max_prot |= mappings[j].sfm_max_prot;
1120 }
1121 /* Make all mappings relative to the base_offset */
1122 base_offset = vm_map_trunc_page(base_offset);
1123 end_offset = vm_map_round_page(end_offset);
1124 for (j = 0; j < mapping_count; j++) {
1125 mappings[j].sfm_address -= base_offset;
1126 }
1127 original_base_offset = base_offset;
1128 if (mappings_in_segment == FALSE) {
1129 /*
1130 * We're trying to map a library that was not pre-bound to
1131 * be in the shared segments. We want to try and slide it
1132 * back into the shared segments but as far back as possible,
1133 * so that it doesn't clash with pre-bound libraries. Set
1134 * the base_offset to the end of the region, so that it can't
1135 * possibly fit there and will have to be slid.
1136 */
1137 base_offset = SHARED_TEXT_REGION_SIZE - end_offset;
1138 }
1139
1140 /* get the file's memory object handle */
1141 UBCINFOCHECK("shared_region_map_file_np", vp);
1142 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1143 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1144 SHARED_REGION_TRACE(
1145 SHARED_REGION_TRACE_ERROR,
1146 ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1147 "ubc_getobject() failed\n",
1148 current_thread(), p->p_pid, p->p_comm,
1149 vp, vp->v_name));
1150 error = EINVAL;
1151 goto done;
1152 }
1153
1154 /*
1155 * Get info about the current process's shared region.
1156 * This might change if we decide we need to clone the shared region.
1157 */
1158 vm_get_shared_region(current_task(), &shared_region);
1159 task_mapping_info.self = (vm_offset_t) shared_region;
1160 shared_region_mapping_info(shared_region,
1161 &(task_mapping_info.text_region),
1162 &(task_mapping_info.text_size),
1163 &(task_mapping_info.data_region),
1164 &(task_mapping_info.data_size),
1165 &(task_mapping_info.region_mappings),
1166 &(task_mapping_info.client_base),
1167 &(task_mapping_info.alternate_base),
1168 &(task_mapping_info.alternate_next),
1169 &(task_mapping_info.fs_base),
1170 &(task_mapping_info.system),
1171 &(task_mapping_info.flags),
1172 &next);
1173
1174 /*
1175 * Are we using the system's current shared region
1176 * for this environment ?
1177 */
1178 default_shared_region =
1179 lookup_default_shared_region(ENV_DEFAULT_ROOT,
1180 task_mapping_info.system);
1181 if (shared_region == default_shared_region) {
1182 using_default_region = TRUE;
1183 } else {
1184 using_default_region = FALSE;
1185 }
1186 shared_region_mapping_dealloc(default_shared_region);
1187
1188 if (vp->v_mount != rootvnode->v_mount &&
1189 using_default_region) {
1190 /*
1191 * The split library is not on the root filesystem. We don't
1192 * want to polute the system-wide ("default") shared region
1193 * with it.
1194 * Reject the mapping. The caller (dyld) should "privatize"
1195 * (via shared_region_make_private()) the shared region and
1196 * try to establish the mapping privately for this process.
1197 */
1198 SHARED_REGION_TRACE(
1199 SHARED_REGION_TRACE_CONFLICT,
1200 ("shared_region: %p [%d(%s)] "
1201 "map_file(%p:'%s'): "
1202 "not on root volume\n",
1203 current_thread(), p->p_pid, p->p_comm,
1204 vp->v_name));
1205 error = EXDEV;
1206 goto done;
1207 }
1208
1209
1210 /*
1211 * Map the split library.
1212 */
1213 kr = map_shared_file(mapping_count,
1214 mappings,
1215 file_control,
1216 file_size,
1217 &task_mapping_info,
1218 base_offset,
1219 (user_slide_p) ? &slide : NULL);
1220
1221 if (kr == KERN_SUCCESS) {
1222 /*
1223 * The mapping was successful. Let the buffer cache know
1224 * that we've mapped that file with these protections. This
1225 * prevents the vnode from getting recycled while it's mapped.
1226 */
1227 (void) ubc_map(vp, max_prot);
1228 error = 0;
1229 } else {
1230 SHARED_REGION_TRACE(
1231 SHARED_REGION_TRACE_CONFLICT,
1232 ("shared_region: %p [%d(%s)] "
1233 "map_file(%p:'%s'): "
1234 "map_shared_file failed, kr=0x%x\n",
1235 current_thread(), p->p_pid, p->p_comm,
1236 vp, vp->v_name, kr));
1237 switch (kr) {
1238 case KERN_INVALID_ADDRESS:
1239 error = EFAULT;
1240 goto done;
1241 case KERN_PROTECTION_FAILURE:
1242 error = EPERM;
1243 goto done;
1244 case KERN_NO_SPACE:
1245 error = ENOMEM;
1246 goto done;
1247 case KERN_FAILURE:
1248 case KERN_INVALID_ARGUMENT:
1249 default:
1250 error = EINVAL;
1251 goto done;
1252 }
1253 }
1254
1255 if (p->p_flag & P_NOSHLIB) {
1256 /* signal that this process is now using split libraries */
1257 p->p_flag &= ~P_NOSHLIB;
1258 }
1259
1260 if (user_slide_p) {
1261 /*
1262 * The caller provided a pointer to a "slide" offset. Let
1263 * them know by how much we slid the mappings.
1264 */
1265 if (mappings_in_segment == FALSE) {
1266 /*
1267 * We faked the base_offset earlier, so undo that
1268 * and take into account the real base_offset.
1269 */
1270 slide += SHARED_TEXT_REGION_SIZE - end_offset;
1271 slide -= original_base_offset;
1272 /*
1273 * The mappings were slid into the shared segments
1274 * and "slide" is relative to the beginning of the
1275 * shared segments. Adjust it to be absolute.
1276 */
1277 slide += GLOBAL_SHARED_TEXT_SEGMENT;
1278 }
1279 error = copyout(&slide,
1280 user_slide_p,
1281 sizeof (slide));
1282 if (slide != 0) {
1283 SHARED_REGION_TRACE(
1284 SHARED_REGION_TRACE_CONFLICT,
1285 ("shared_region: %p [%d(%s)] "
1286 "map_file(%p:'%s'): "
1287 "slid by 0x%llx\n",
1288 current_thread(), p->p_pid, p->p_comm,
1289 vp, vp->v_name, slide));
1290 }
1291 }
1292
1293 done:
1294 if (vp != NULL) {
1295 /*
1296 * release the vnode...
1297 * ubc_map() still holds it for us in the non-error case
1298 */
1299 (void) vnode_put(vp);
1300 vp = NULL;
1301 }
1302 if (fp != NULL) {
1303 /* release the file descriptor */
1304 fp_drop(p, fd, fp, 0);
1305 fp = NULL;
1306 }
1307 if (mappings != NULL &&
1308 mappings != &stack_mappings[0]) {
1309 kmem_free(kernel_map,
1310 (vm_offset_t) mappings,
1311 mappings_size);
1312 }
1313 mappings = NULL;
1314
1315 return error;
1316 }
1317
1318 int
1319 load_shared_file(
1320 __unused struct proc *p,
1321 __unused struct load_shared_file_args *uap,
1322 __unused int *retval)
1323 {
1324 return ENOSYS;
1325 }
1326
1327 int
1328 reset_shared_file(
1329 __unused struct proc *p,
1330 __unused struct reset_shared_file_args *uap,
1331 __unused int *retval)
1332 {
1333 return ENOSYS;
1334 }
1335
1336 int
1337 new_system_shared_regions(
1338 __unused struct proc *p,
1339 __unused struct new_system_shared_regions_args *uap,
1340 __unused int *retval)
1341 {
1342 return ENOSYS;
1343 }
1344
1345
1346
1347 int
1348 clone_system_shared_regions(
1349 int shared_regions_active,
1350 int chain_regions,
1351 int base_vnode)
1352 {
1353 shared_region_mapping_t new_shared_region;
1354 shared_region_mapping_t next;
1355 shared_region_mapping_t old_shared_region;
1356 struct shared_region_task_mappings old_info;
1357 struct shared_region_task_mappings new_info;
1358
1359 vm_get_shared_region(current_task(), &old_shared_region);
1360 old_info.self = (vm_offset_t)old_shared_region;
1361 shared_region_mapping_info(old_shared_region,
1362 &(old_info.text_region),
1363 &(old_info.text_size),
1364 &(old_info.data_region),
1365 &(old_info.data_size),
1366 &(old_info.region_mappings),
1367 &(old_info.client_base),
1368 &(old_info.alternate_base),
1369 &(old_info.alternate_next),
1370 &(old_info.fs_base),
1371 &(old_info.system),
1372 &(old_info.flags), &next);
1373
1374 if (shared_regions_active ||
1375 base_vnode == ENV_DEFAULT_ROOT) {
1376 if (shared_file_create_system_region(&new_shared_region,
1377 old_info.fs_base,
1378 old_info.system))
1379 return ENOMEM;
1380 } else {
1381 if (old_shared_region &&
1382 base_vnode == ENV_DEFAULT_ROOT) {
1383 base_vnode = old_info.fs_base;
1384 }
1385 new_shared_region =
1386 lookup_default_shared_region(base_vnode,
1387 old_info.system);
1388 if (new_shared_region == NULL) {
1389 shared_file_boot_time_init(base_vnode,
1390 old_info.system);
1391 vm_get_shared_region(current_task(),
1392 &new_shared_region);
1393 } else {
1394 vm_set_shared_region(current_task(), new_shared_region);
1395 }
1396 if (old_shared_region)
1397 shared_region_mapping_dealloc(old_shared_region);
1398 }
1399 new_info.self = (vm_offset_t)new_shared_region;
1400 shared_region_mapping_info(new_shared_region,
1401 &(new_info.text_region),
1402 &(new_info.text_size),
1403 &(new_info.data_region),
1404 &(new_info.data_size),
1405 &(new_info.region_mappings),
1406 &(new_info.client_base),
1407 &(new_info.alternate_base),
1408 &(new_info.alternate_next),
1409 &(new_info.fs_base),
1410 &(new_info.system),
1411 &(new_info.flags), &next);
1412 if(shared_regions_active) {
1413 if(vm_region_clone(old_info.text_region, new_info.text_region)) {
1414 panic("clone_system_shared_regions: shared region mis-alignment 1");
1415 shared_region_mapping_dealloc(new_shared_region);
1416 return(EINVAL);
1417 }
1418 if (vm_region_clone(old_info.data_region, new_info.data_region)) {
1419 panic("clone_system_shared_regions: shared region mis-alignment 2");
1420 shared_region_mapping_dealloc(new_shared_region);
1421 return(EINVAL);
1422 }
1423 if (chain_regions) {
1424 /*
1425 * We want a "shadowed" clone, a private superset of the old
1426 * shared region. The info about the old mappings is still
1427 * valid for us.
1428 */
1429 shared_region_object_chain_attach(
1430 new_shared_region, old_shared_region);
1431 } else {
1432 /*
1433 * We want a completely detached clone with no link to
1434 * the old shared region. We'll be removing some mappings
1435 * in our private, cloned, shared region, so the old mappings
1436 * will become irrelevant to us. Since we have a private
1437 * "shared region" now, it isn't going to be shared with
1438 * anyone else and we won't need to maintain mappings info.
1439 */
1440 shared_region_object_chain_detached(new_shared_region);
1441 }
1442 }
1443 if (vm_map_region_replace(current_map(), old_info.text_region,
1444 new_info.text_region, old_info.client_base,
1445 old_info.client_base+old_info.text_size)) {
1446 panic("clone_system_shared_regions: shared region mis-alignment 3");
1447 shared_region_mapping_dealloc(new_shared_region);
1448 return(EINVAL);
1449 }
1450 if(vm_map_region_replace(current_map(), old_info.data_region,
1451 new_info.data_region,
1452 old_info.client_base + old_info.text_size,
1453 old_info.client_base
1454 + old_info.text_size + old_info.data_size)) {
1455 panic("clone_system_shared_regions: shared region mis-alignment 4");
1456 shared_region_mapping_dealloc(new_shared_region);
1457 return(EINVAL);
1458 }
1459 vm_set_shared_region(current_task(), new_shared_region);
1460
1461 /* consume the reference which wasn't accounted for in object */
1462 /* chain attach */
1463 if (!shared_regions_active || !chain_regions)
1464 shared_region_mapping_dealloc(old_shared_region);
1465
1466 SHARED_REGION_TRACE(
1467 SHARED_REGION_TRACE_INFO,
1468 ("shared_region: %p task=%p "
1469 "clone(active=%d, base=0x%x,chain=%d) "
1470 "old=%p[%x,%x,%x] new=%p[%x,%x,%x]\n",
1471 current_thread(), current_task(),
1472 shared_regions_active, base_vnode, chain_regions,
1473 old_shared_region,
1474 old_info.fs_base,
1475 old_info.system,
1476 old_info.flags,
1477 new_shared_region,
1478 new_info.fs_base,
1479 new_info.system,
1480 new_info.flags));
1481
1482 return(0);
1483
1484 }
1485
1486 /* header for the profile name file. The profiled app info is held */
1487 /* in the data file and pointed to by elements in the name file */
1488
1489 struct profile_names_header {
1490 unsigned int number_of_profiles;
1491 unsigned int user_id;
1492 unsigned int version;
1493 off_t element_array;
1494 unsigned int spare1;
1495 unsigned int spare2;
1496 unsigned int spare3;
1497 };
1498
1499 struct profile_element {
1500 off_t addr;
1501 vm_size_t size;
1502 unsigned int mod_date;
1503 unsigned int inode;
1504 char name[12];
1505 };
1506
1507 struct global_profile {
1508 struct vnode *names_vp;
1509 struct vnode *data_vp;
1510 vm_offset_t buf_ptr;
1511 unsigned int user;
1512 unsigned int age;
1513 unsigned int busy;
1514 };
1515
1516 struct global_profile_cache {
1517 int max_ele;
1518 unsigned int age;
1519 struct global_profile profiles[3];
1520 };
1521
1522 /* forward declarations */
1523 int bsd_open_page_cache_files(unsigned int user,
1524 struct global_profile **profile);
1525 void bsd_close_page_cache_files(struct global_profile *profile);
1526 int bsd_search_page_cache_data_base(
1527 struct vnode *vp,
1528 struct profile_names_header *database,
1529 char *app_name,
1530 unsigned int mod_date,
1531 unsigned int inode,
1532 off_t *profile,
1533 unsigned int *profile_size);
1534
1535 struct global_profile_cache global_user_profile_cache =
1536 {3, 0, {{NULL, NULL, 0, 0, 0, 0},
1537 {NULL, NULL, 0, 0, 0, 0},
1538 {NULL, NULL, 0, 0, 0, 0}} };
1539
1540 /* BSD_OPEN_PAGE_CACHE_FILES: */
1541 /* Caller provides a user id. This id was used in */
1542 /* prepare_profile_database to create two unique absolute */
1543 /* file paths to the associated profile files. These files */
1544 /* are either opened or bsd_open_page_cache_files returns an */
1545 /* error. The header of the names file is then consulted. */
1546 /* The header and the vnodes for the names and data files are */
1547 /* returned. */
1548
1549 int
1550 bsd_open_page_cache_files(
1551 unsigned int user,
1552 struct global_profile **profile)
1553 {
1554 const char *cache_path = "/var/vm/app_profile/";
1555 struct proc *p;
1556 int error;
1557 vm_size_t resid;
1558 off_t resid_off;
1559 unsigned int lru;
1560 vm_size_t size;
1561
1562 struct vnode *names_vp;
1563 struct vnode *data_vp;
1564 vm_offset_t names_buf;
1565 vm_offset_t buf_ptr;
1566
1567 int profile_names_length;
1568 int profile_data_length;
1569 char *profile_data_string;
1570 char *profile_names_string;
1571 char *substring;
1572
1573 off_t file_size;
1574 struct vfs_context context;
1575
1576 kern_return_t ret;
1577
1578 struct nameidata nd_names;
1579 struct nameidata nd_data;
1580 int i;
1581
1582
1583 p = current_proc();
1584
1585 context.vc_proc = p;
1586 context.vc_ucred = kauth_cred_get();
1587
1588 restart:
1589 for(i = 0; i<global_user_profile_cache.max_ele; i++) {
1590 if((global_user_profile_cache.profiles[i].user == user)
1591 && (global_user_profile_cache.profiles[i].data_vp
1592 != NULL)) {
1593 *profile = &global_user_profile_cache.profiles[i];
1594 /* already in cache, we're done */
1595 if ((*profile)->busy) {
1596 /*
1597 * drop funnel and wait
1598 */
1599 (void)tsleep((void *)
1600 *profile,
1601 PRIBIO, "app_profile", 0);
1602 goto restart;
1603 }
1604 (*profile)->busy = 1;
1605 (*profile)->age = global_user_profile_cache.age;
1606
1607 /*
1608 * entries in cache are held with a valid
1609 * usecount... take an iocount which will
1610 * be dropped in "bsd_close_page_cache_files"
1611 * which is called after the read or writes to
1612 * these files are done
1613 */
1614 if ( (vnode_getwithref((*profile)->data_vp)) ) {
1615
1616 vnode_rele((*profile)->data_vp);
1617 vnode_rele((*profile)->names_vp);
1618
1619 (*profile)->data_vp = NULL;
1620 (*profile)->busy = 0;
1621 wakeup(*profile);
1622
1623 goto restart;
1624 }
1625 if ( (vnode_getwithref((*profile)->names_vp)) ) {
1626
1627 vnode_put((*profile)->data_vp);
1628 vnode_rele((*profile)->data_vp);
1629 vnode_rele((*profile)->names_vp);
1630
1631 (*profile)->data_vp = NULL;
1632 (*profile)->busy = 0;
1633 wakeup(*profile);
1634
1635 goto restart;
1636 }
1637 global_user_profile_cache.age+=1;
1638 return 0;
1639 }
1640 }
1641
1642 lru = global_user_profile_cache.age;
1643 *profile = NULL;
1644 for(i = 0; i<global_user_profile_cache.max_ele; i++) {
1645 /* Skip entry if it is in the process of being reused */
1646 if(global_user_profile_cache.profiles[i].data_vp ==
1647 (struct vnode *)0xFFFFFFFF)
1648 continue;
1649 /* Otherwise grab the first empty entry */
1650 if(global_user_profile_cache.profiles[i].data_vp == NULL) {
1651 *profile = &global_user_profile_cache.profiles[i];
1652 (*profile)->age = global_user_profile_cache.age;
1653 break;
1654 }
1655 /* Otherwise grab the oldest entry */
1656 if(global_user_profile_cache.profiles[i].age < lru) {
1657 lru = global_user_profile_cache.profiles[i].age;
1658 *profile = &global_user_profile_cache.profiles[i];
1659 }
1660 }
1661
1662 /* Did we set it? */
1663 if (*profile == NULL) {
1664 /*
1665 * No entries are available; this can only happen if all
1666 * of them are currently in the process of being reused;
1667 * if this happens, we sleep on the address of the first
1668 * element, and restart. This is less than ideal, but we
1669 * know it will work because we know that there will be a
1670 * wakeup on any entry currently in the process of being
1671 * reused.
1672 *
1673 * XXX Reccomend a two handed clock and more than 3 total
1674 * XXX cache entries at some point in the future.
1675 */
1676 /*
1677 * drop funnel and wait
1678 */
1679 (void)tsleep((void *)
1680 &global_user_profile_cache.profiles[0],
1681 PRIBIO, "app_profile", 0);
1682 goto restart;
1683 }
1684
1685 /*
1686 * If it's currently busy, we've picked the one at the end of the
1687 * LRU list, but it's currently being actively used. We sleep on
1688 * its address and restart.
1689 */
1690 if ((*profile)->busy) {
1691 /*
1692 * drop funnel and wait
1693 */
1694 (void)tsleep((void *)
1695 *profile,
1696 PRIBIO, "app_profile", 0);
1697 goto restart;
1698 }
1699 (*profile)->busy = 1;
1700 (*profile)->user = user;
1701
1702 /*
1703 * put dummy value in for now to get competing request to wait
1704 * above until we are finished
1705 *
1706 * Save the data_vp before setting it, so we can set it before
1707 * we kmem_free() or vrele(). If we don't do this, then we
1708 * have a potential funnel race condition we have to deal with.
1709 */
1710 data_vp = (*profile)->data_vp;
1711 (*profile)->data_vp = (struct vnode *)0xFFFFFFFF;
1712
1713 /*
1714 * Age the cache here in all cases; this guarantees that we won't
1715 * be reusing only one entry over and over, once the system reaches
1716 * steady-state.
1717 */
1718 global_user_profile_cache.age+=1;
1719
1720 if(data_vp != NULL) {
1721 kmem_free(kernel_map,
1722 (*profile)->buf_ptr, 4 * PAGE_SIZE);
1723 if ((*profile)->names_vp) {
1724 vnode_rele((*profile)->names_vp);
1725 (*profile)->names_vp = NULL;
1726 }
1727 vnode_rele(data_vp);
1728 }
1729
1730 /* Try to open the appropriate users profile files */
1731 /* If neither file is present, try to create them */
1732 /* If one file is present and the other not, fail. */
1733 /* If the files do exist, check them for the app_file */
1734 /* requested and read it in if present */
1735
1736 ret = kmem_alloc(kernel_map,
1737 (vm_offset_t *)&profile_data_string, PATH_MAX);
1738
1739 if(ret) {
1740 (*profile)->data_vp = NULL;
1741 (*profile)->busy = 0;
1742 wakeup(*profile);
1743 return ENOMEM;
1744 }
1745
1746 /* Split the buffer in half since we know the size of */
1747 /* our file path and our allocation is adequate for */
1748 /* both file path names */
1749 profile_names_string = profile_data_string + (PATH_MAX/2);
1750
1751
1752 strcpy(profile_data_string, cache_path);
1753 strcpy(profile_names_string, cache_path);
1754 profile_names_length = profile_data_length
1755 = strlen(profile_data_string);
1756 substring = profile_data_string + profile_data_length;
1757 sprintf(substring, "%x_data", user);
1758 substring = profile_names_string + profile_names_length;
1759 sprintf(substring, "%x_names", user);
1760
1761 /* We now have the absolute file names */
1762
1763 ret = kmem_alloc(kernel_map,
1764 (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
1765 if(ret) {
1766 kmem_free(kernel_map,
1767 (vm_offset_t)profile_data_string, PATH_MAX);
1768 (*profile)->data_vp = NULL;
1769 (*profile)->busy = 0;
1770 wakeup(*profile);
1771 return ENOMEM;
1772 }
1773
1774 NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF,
1775 UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
1776 NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF,
1777 UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
1778
1779 if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) {
1780 #ifdef notdef
1781 printf("bsd_open_page_cache_files: CacheData file not found %s\n",
1782 profile_data_string);
1783 #endif
1784 kmem_free(kernel_map,
1785 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1786 kmem_free(kernel_map,
1787 (vm_offset_t)profile_data_string, PATH_MAX);
1788 (*profile)->data_vp = NULL;
1789 (*profile)->busy = 0;
1790 wakeup(*profile);
1791 return error;
1792 }
1793 data_vp = nd_data.ni_vp;
1794
1795 if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) {
1796 printf("bsd_open_page_cache_files: NamesData file not found %s\n",
1797 profile_data_string);
1798 kmem_free(kernel_map,
1799 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1800 kmem_free(kernel_map,
1801 (vm_offset_t)profile_data_string, PATH_MAX);
1802
1803 vnode_rele(data_vp);
1804 vnode_put(data_vp);
1805
1806 (*profile)->data_vp = NULL;
1807 (*profile)->busy = 0;
1808 wakeup(*profile);
1809 return error;
1810 }
1811 names_vp = nd_names.ni_vp;
1812
1813 if ((error = vnode_size(names_vp, &file_size, &context)) != 0) {
1814 printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string);
1815 kmem_free(kernel_map,
1816 (vm_offset_t)profile_data_string, PATH_MAX);
1817 kmem_free(kernel_map,
1818 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1819
1820 vnode_rele(names_vp);
1821 vnode_put(names_vp);
1822 vnode_rele(data_vp);
1823 vnode_put(data_vp);
1824
1825 (*profile)->data_vp = NULL;
1826 (*profile)->busy = 0;
1827 wakeup(*profile);
1828 return error;
1829 }
1830
1831 size = file_size;
1832 if(size > 4 * PAGE_SIZE)
1833 size = 4 * PAGE_SIZE;
1834 buf_ptr = names_buf;
1835 resid_off = 0;
1836
1837 while(size) {
1838 int resid_int;
1839 error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr,
1840 size, resid_off,
1841 UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(),
1842 &resid_int, p);
1843 resid = (vm_size_t) resid_int;
1844 if((error) || (size == resid)) {
1845 if(!error) {
1846 error = EINVAL;
1847 }
1848 kmem_free(kernel_map,
1849 (vm_offset_t)profile_data_string, PATH_MAX);
1850 kmem_free(kernel_map,
1851 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1852
1853 vnode_rele(names_vp);
1854 vnode_put(names_vp);
1855 vnode_rele(data_vp);
1856 vnode_put(data_vp);
1857
1858 (*profile)->data_vp = NULL;
1859 (*profile)->busy = 0;
1860 wakeup(*profile);
1861 return error;
1862 }
1863 buf_ptr += size-resid;
1864 resid_off += size-resid;
1865 size = resid;
1866 }
1867 kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX);
1868
1869 (*profile)->names_vp = names_vp;
1870 (*profile)->data_vp = data_vp;
1871 (*profile)->buf_ptr = names_buf;
1872
1873 /*
1874 * at this point, the both the names_vp and the data_vp have
1875 * both a valid usecount and an iocount held
1876 */
1877 return 0;
1878
1879 }
1880
1881 void
1882 bsd_close_page_cache_files(
1883 struct global_profile *profile)
1884 {
1885 vnode_put(profile->data_vp);
1886 vnode_put(profile->names_vp);
1887
1888 profile->busy = 0;
1889 wakeup(profile);
1890 }
1891
1892 int
1893 bsd_read_page_cache_file(
1894 unsigned int user,
1895 int *fid,
1896 int *mod,
1897 char *app_name,
1898 struct vnode *app_vp,
1899 vm_offset_t *buffer,
1900 vm_offset_t *bufsize)
1901 {
1902
1903 boolean_t funnel_state;
1904
1905 struct proc *p;
1906 int error;
1907 unsigned int resid;
1908
1909 off_t profile;
1910 unsigned int profile_size;
1911
1912 vm_offset_t names_buf;
1913 struct vnode_attr va;
1914 struct vfs_context context;
1915
1916 kern_return_t ret;
1917
1918 struct vnode *names_vp;
1919 struct vnode *data_vp;
1920
1921 struct global_profile *uid_files;
1922
1923 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1924
1925 /* Try to open the appropriate users profile files */
1926 /* If neither file is present, try to create them */
1927 /* If one file is present and the other not, fail. */
1928 /* If the files do exist, check them for the app_file */
1929 /* requested and read it in if present */
1930
1931
1932 error = bsd_open_page_cache_files(user, &uid_files);
1933 if(error) {
1934 thread_funnel_set(kernel_flock, funnel_state);
1935 return EINVAL;
1936 }
1937
1938 p = current_proc();
1939
1940 names_vp = uid_files->names_vp;
1941 data_vp = uid_files->data_vp;
1942 names_buf = uid_files->buf_ptr;
1943
1944 context.vc_proc = p;
1945 context.vc_ucred = kauth_cred_get();
1946
1947 VATTR_INIT(&va);
1948 VATTR_WANTED(&va, va_fileid);
1949 VATTR_WANTED(&va, va_modify_time);
1950
1951 if ((error = vnode_getattr(app_vp, &va, &context))) {
1952 printf("bsd_read_cache_file: Can't stat app file %s\n", app_name);
1953 bsd_close_page_cache_files(uid_files);
1954 thread_funnel_set(kernel_flock, funnel_state);
1955 return error;
1956 }
1957
1958 *fid = (u_long)va.va_fileid;
1959 *mod = va.va_modify_time.tv_sec;
1960
1961 if (bsd_search_page_cache_data_base(
1962 names_vp,
1963 (struct profile_names_header *)names_buf,
1964 app_name,
1965 (unsigned int) va.va_modify_time.tv_sec,
1966 (u_long)va.va_fileid, &profile, &profile_size) == 0) {
1967 /* profile is an offset in the profile data base */
1968 /* It is zero if no profile data was found */
1969
1970 if(profile_size == 0) {
1971 *buffer = 0;
1972 *bufsize = 0;
1973 bsd_close_page_cache_files(uid_files);
1974 thread_funnel_set(kernel_flock, funnel_state);
1975 return 0;
1976 }
1977 ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size));
1978 if(ret) {
1979 bsd_close_page_cache_files(uid_files);
1980 thread_funnel_set(kernel_flock, funnel_state);
1981 return ENOMEM;
1982 }
1983 *bufsize = profile_size;
1984 while(profile_size) {
1985 int resid_int;
1986 error = vn_rdwr(UIO_READ, data_vp,
1987 (caddr_t) *buffer, profile_size,
1988 profile, UIO_SYSSPACE32, IO_NODELOCKED,
1989 kauth_cred_get(), &resid_int, p);
1990 resid = (vm_size_t) resid_int;
1991 if((error) || (profile_size == resid)) {
1992 bsd_close_page_cache_files(uid_files);
1993 kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size);
1994 thread_funnel_set(kernel_flock, funnel_state);
1995 return EINVAL;
1996 }
1997 profile += profile_size - resid;
1998 profile_size = resid;
1999 }
2000 bsd_close_page_cache_files(uid_files);
2001 thread_funnel_set(kernel_flock, funnel_state);
2002 return 0;
2003 } else {
2004 bsd_close_page_cache_files(uid_files);
2005 thread_funnel_set(kernel_flock, funnel_state);
2006 return EINVAL;
2007 }
2008
2009 }
2010
2011 int
2012 bsd_search_page_cache_data_base(
2013 struct vnode *vp,
2014 struct profile_names_header *database,
2015 char *app_name,
2016 unsigned int mod_date,
2017 unsigned int inode,
2018 off_t *profile,
2019 unsigned int *profile_size)
2020 {
2021
2022 struct proc *p;
2023
2024 unsigned int i;
2025 struct profile_element *element;
2026 unsigned int ele_total;
2027 unsigned int extended_list = 0;
2028 off_t file_off = 0;
2029 unsigned int size;
2030 off_t resid_off;
2031 unsigned int resid;
2032 vm_offset_t local_buf = 0;
2033
2034 int error;
2035 kern_return_t ret;
2036
2037 p = current_proc();
2038
2039 if(((vm_offset_t)database->element_array) !=
2040 sizeof(struct profile_names_header)) {
2041 return EINVAL;
2042 }
2043 element = (struct profile_element *)(
2044 (vm_offset_t)database->element_array +
2045 (vm_offset_t)database);
2046
2047 ele_total = database->number_of_profiles;
2048
2049 *profile = 0;
2050 *profile_size = 0;
2051 while(ele_total) {
2052 /* note: code assumes header + n*ele comes out on a page boundary */
2053 if(((local_buf == 0) && (sizeof(struct profile_names_header) +
2054 (ele_total * sizeof(struct profile_element)))
2055 > (PAGE_SIZE * 4)) ||
2056 ((local_buf != 0) &&
2057 (ele_total * sizeof(struct profile_element))
2058 > (PAGE_SIZE * 4))) {
2059 extended_list = ele_total;
2060 if(element == (struct profile_element *)
2061 ((vm_offset_t)database->element_array +
2062 (vm_offset_t)database)) {
2063 ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1;
2064 } else {
2065 ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element);
2066 }
2067 extended_list -= ele_total;
2068 }
2069 for (i=0; i<ele_total; i++) {
2070 if((mod_date == element[i].mod_date)
2071 && (inode == element[i].inode)) {
2072 if(strncmp(element[i].name, app_name, 12) == 0) {
2073 *profile = element[i].addr;
2074 *profile_size = element[i].size;
2075 if(local_buf != 0) {
2076 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2077 }
2078 return 0;
2079 }
2080 }
2081 }
2082 if(extended_list == 0)
2083 break;
2084 if(local_buf == 0) {
2085 ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE);
2086 if(ret != KERN_SUCCESS) {
2087 return ENOMEM;
2088 }
2089 }
2090 element = (struct profile_element *)local_buf;
2091 ele_total = extended_list;
2092 extended_list = 0;
2093 file_off += 4 * PAGE_SIZE;
2094 if((ele_total * sizeof(struct profile_element)) >
2095 (PAGE_SIZE * 4)) {
2096 size = PAGE_SIZE * 4;
2097 } else {
2098 size = ele_total * sizeof(struct profile_element);
2099 }
2100 resid_off = 0;
2101 while(size) {
2102 int resid_int;
2103 error = vn_rdwr(UIO_READ, vp,
2104 CAST_DOWN(caddr_t, (local_buf + resid_off)),
2105 size, file_off + resid_off, UIO_SYSSPACE32,
2106 IO_NODELOCKED, kauth_cred_get(), &resid_int, p);
2107 resid = (vm_size_t) resid_int;
2108 if((error) || (size == resid)) {
2109 if(local_buf != 0) {
2110 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2111 }
2112 return EINVAL;
2113 }
2114 resid_off += size-resid;
2115 size = resid;
2116 }
2117 }
2118 if(local_buf != 0) {
2119 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2120 }
2121 return 0;
2122 }
2123
2124 int
2125 bsd_write_page_cache_file(
2126 unsigned int user,
2127 char *file_name,
2128 caddr_t buffer,
2129 vm_size_t size,
2130 int mod,
2131 int fid)
2132 {
2133 struct proc *p;
2134 int resid;
2135 off_t resid_off;
2136 int error;
2137 boolean_t funnel_state;
2138 off_t file_size;
2139 struct vfs_context context;
2140 off_t profile;
2141 unsigned int profile_size;
2142
2143 vm_offset_t names_buf;
2144 struct vnode *names_vp;
2145 struct vnode *data_vp;
2146 struct profile_names_header *profile_header;
2147 off_t name_offset;
2148 struct global_profile *uid_files;
2149
2150
2151 funnel_state = thread_funnel_set(kernel_flock, TRUE);
2152
2153
2154 error = bsd_open_page_cache_files(user, &uid_files);
2155 if(error) {
2156 thread_funnel_set(kernel_flock, funnel_state);
2157 return EINVAL;
2158 }
2159
2160 p = current_proc();
2161
2162 names_vp = uid_files->names_vp;
2163 data_vp = uid_files->data_vp;
2164 names_buf = uid_files->buf_ptr;
2165
2166 /* Stat data file for size */
2167
2168 context.vc_proc = p;
2169 context.vc_ucred = kauth_cred_get();
2170
2171 if ((error = vnode_size(data_vp, &file_size, &context)) != 0) {
2172 printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name);
2173 bsd_close_page_cache_files(uid_files);
2174 thread_funnel_set(kernel_flock, funnel_state);
2175 return error;
2176 }
2177
2178 if (bsd_search_page_cache_data_base(names_vp,
2179 (struct profile_names_header *)names_buf,
2180 file_name, (unsigned int) mod,
2181 fid, &profile, &profile_size) == 0) {
2182 /* profile is an offset in the profile data base */
2183 /* It is zero if no profile data was found */
2184
2185 if(profile_size == 0) {
2186 unsigned int header_size;
2187 vm_offset_t buf_ptr;
2188
2189 /* Our Write case */
2190
2191 /* read header for last entry */
2192 profile_header =
2193 (struct profile_names_header *)names_buf;
2194 name_offset = sizeof(struct profile_names_header) +
2195 (sizeof(struct profile_element)
2196 * profile_header->number_of_profiles);
2197 profile_header->number_of_profiles += 1;
2198
2199 if(name_offset < PAGE_SIZE * 4) {
2200 struct profile_element *name;
2201 /* write new entry */
2202 name = (struct profile_element *)
2203 (names_buf + (vm_offset_t)name_offset);
2204 name->addr = file_size;
2205 name->size = size;
2206 name->mod_date = mod;
2207 name->inode = fid;
2208 strncpy (name->name, file_name, 12);
2209 } else {
2210 unsigned int ele_size;
2211 struct profile_element name;
2212 /* write new entry */
2213 name.addr = file_size;
2214 name.size = size;
2215 name.mod_date = mod;
2216 name.inode = fid;
2217 strncpy (name.name, file_name, 12);
2218 /* write element out separately */
2219 ele_size = sizeof(struct profile_element);
2220 buf_ptr = (vm_offset_t)&name;
2221 resid_off = name_offset;
2222
2223 while(ele_size) {
2224 error = vn_rdwr(UIO_WRITE, names_vp,
2225 (caddr_t)buf_ptr,
2226 ele_size, resid_off,
2227 UIO_SYSSPACE32, IO_NODELOCKED,
2228 kauth_cred_get(), &resid, p);
2229 if(error) {
2230 printf("bsd_write_page_cache_file: Can't write name_element %x\n", user);
2231 bsd_close_page_cache_files(
2232 uid_files);
2233 thread_funnel_set(
2234 kernel_flock,
2235 funnel_state);
2236 return error;
2237 }
2238 buf_ptr += (vm_offset_t)
2239 ele_size-resid;
2240 resid_off += ele_size-resid;
2241 ele_size = resid;
2242 }
2243 }
2244
2245 if(name_offset < PAGE_SIZE * 4) {
2246 header_size = name_offset +
2247 sizeof(struct profile_element);
2248
2249 } else {
2250 header_size =
2251 sizeof(struct profile_names_header);
2252 }
2253 buf_ptr = (vm_offset_t)profile_header;
2254 resid_off = 0;
2255
2256 /* write names file header */
2257 while(header_size) {
2258 error = vn_rdwr(UIO_WRITE, names_vp,
2259 (caddr_t)buf_ptr,
2260 header_size, resid_off,
2261 UIO_SYSSPACE32, IO_NODELOCKED,
2262 kauth_cred_get(), &resid, p);
2263 if(error) {
2264 printf("bsd_write_page_cache_file: Can't write header %x\n", user);
2265 bsd_close_page_cache_files(
2266 uid_files);
2267 thread_funnel_set(
2268 kernel_flock, funnel_state);
2269 return error;
2270 }
2271 buf_ptr += (vm_offset_t)header_size-resid;
2272 resid_off += header_size-resid;
2273 header_size = resid;
2274 }
2275 /* write profile to data file */
2276 resid_off = file_size;
2277 while(size) {
2278 error = vn_rdwr(UIO_WRITE, data_vp,
2279 (caddr_t)buffer, size, resid_off,
2280 UIO_SYSSPACE32, IO_NODELOCKED,
2281 kauth_cred_get(), &resid, p);
2282 if(error) {
2283 printf("bsd_write_page_cache_file: Can't write header %x\n", user);
2284 bsd_close_page_cache_files(
2285 uid_files);
2286 thread_funnel_set(
2287 kernel_flock, funnel_state);
2288 return error;
2289 }
2290 buffer += size-resid;
2291 resid_off += size-resid;
2292 size = resid;
2293 }
2294 bsd_close_page_cache_files(uid_files);
2295 thread_funnel_set(kernel_flock, funnel_state);
2296 return 0;
2297 }
2298 /* Someone else wrote a twin profile before us */
2299 bsd_close_page_cache_files(uid_files);
2300 thread_funnel_set(kernel_flock, funnel_state);
2301 return 0;
2302 } else {
2303 bsd_close_page_cache_files(uid_files);
2304 thread_funnel_set(kernel_flock, funnel_state);
2305 return EINVAL;
2306 }
2307
2308 }
2309
2310 int
2311 prepare_profile_database(int user)
2312 {
2313 const char *cache_path = "/var/vm/app_profile/";
2314 struct proc *p;
2315 int error;
2316 int resid;
2317 off_t resid_off;
2318 vm_size_t size;
2319
2320 struct vnode *names_vp;
2321 struct vnode *data_vp;
2322 vm_offset_t names_buf;
2323 vm_offset_t buf_ptr;
2324
2325 int profile_names_length;
2326 int profile_data_length;
2327 char *profile_data_string;
2328 char *profile_names_string;
2329 char *substring;
2330
2331 struct vnode_attr va;
2332 struct vfs_context context;
2333
2334 struct profile_names_header *profile_header;
2335 kern_return_t ret;
2336
2337 struct nameidata nd_names;
2338 struct nameidata nd_data;
2339
2340 p = current_proc();
2341
2342 context.vc_proc = p;
2343 context.vc_ucred = kauth_cred_get();
2344
2345 ret = kmem_alloc(kernel_map,
2346 (vm_offset_t *)&profile_data_string, PATH_MAX);
2347
2348 if(ret) {
2349 return ENOMEM;
2350 }
2351
2352 /* Split the buffer in half since we know the size of */
2353 /* our file path and our allocation is adequate for */
2354 /* both file path names */
2355 profile_names_string = profile_data_string + (PATH_MAX/2);
2356
2357
2358 strcpy(profile_data_string, cache_path);
2359 strcpy(profile_names_string, cache_path);
2360 profile_names_length = profile_data_length
2361 = strlen(profile_data_string);
2362 substring = profile_data_string + profile_data_length;
2363 sprintf(substring, "%x_data", user);
2364 substring = profile_names_string + profile_names_length;
2365 sprintf(substring, "%x_names", user);
2366
2367 /* We now have the absolute file names */
2368
2369 ret = kmem_alloc(kernel_map,
2370 (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
2371 if(ret) {
2372 kmem_free(kernel_map,
2373 (vm_offset_t)profile_data_string, PATH_MAX);
2374 return ENOMEM;
2375 }
2376
2377 NDINIT(&nd_names, LOOKUP, FOLLOW,
2378 UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
2379 NDINIT(&nd_data, LOOKUP, FOLLOW,
2380 UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
2381
2382 if ( (error = vn_open(&nd_data,
2383 O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
2384 kmem_free(kernel_map,
2385 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2386 kmem_free(kernel_map,
2387 (vm_offset_t)profile_data_string, PATH_MAX);
2388
2389 return 0;
2390 }
2391 data_vp = nd_data.ni_vp;
2392
2393 if ( (error = vn_open(&nd_names,
2394 O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
2395 printf("prepare_profile_database: Can't create CacheNames %s\n",
2396 profile_data_string);
2397 kmem_free(kernel_map,
2398 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2399 kmem_free(kernel_map,
2400 (vm_offset_t)profile_data_string, PATH_MAX);
2401
2402 vnode_rele(data_vp);
2403 vnode_put(data_vp);
2404
2405 return error;
2406 }
2407 names_vp = nd_names.ni_vp;
2408
2409 /* Write Header for new names file */
2410
2411 profile_header = (struct profile_names_header *)names_buf;
2412
2413 profile_header->number_of_profiles = 0;
2414 profile_header->user_id = user;
2415 profile_header->version = 1;
2416 profile_header->element_array =
2417 sizeof(struct profile_names_header);
2418 profile_header->spare1 = 0;
2419 profile_header->spare2 = 0;
2420 profile_header->spare3 = 0;
2421
2422 size = sizeof(struct profile_names_header);
2423 buf_ptr = (vm_offset_t)profile_header;
2424 resid_off = 0;
2425
2426 while(size) {
2427 error = vn_rdwr(UIO_WRITE, names_vp,
2428 (caddr_t)buf_ptr, size, resid_off,
2429 UIO_SYSSPACE32, IO_NODELOCKED,
2430 kauth_cred_get(), &resid, p);
2431 if(error) {
2432 printf("prepare_profile_database: Can't write header %s\n", profile_names_string);
2433 kmem_free(kernel_map,
2434 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2435 kmem_free(kernel_map,
2436 (vm_offset_t)profile_data_string,
2437 PATH_MAX);
2438
2439 vnode_rele(names_vp);
2440 vnode_put(names_vp);
2441 vnode_rele(data_vp);
2442 vnode_put(data_vp);
2443
2444 return error;
2445 }
2446 buf_ptr += size-resid;
2447 resid_off += size-resid;
2448 size = resid;
2449 }
2450 VATTR_INIT(&va);
2451 VATTR_SET(&va, va_uid, user);
2452
2453 error = vnode_setattr(names_vp, &va, &context);
2454 if(error) {
2455 printf("prepare_profile_database: "
2456 "Can't set user %s\n", profile_names_string);
2457 }
2458 vnode_rele(names_vp);
2459 vnode_put(names_vp);
2460
2461 VATTR_INIT(&va);
2462 VATTR_SET(&va, va_uid, user);
2463 error = vnode_setattr(data_vp, &va, &context);
2464 if(error) {
2465 printf("prepare_profile_database: "
2466 "Can't set user %s\n", profile_data_string);
2467 }
2468 vnode_rele(data_vp);
2469 vnode_put(data_vp);
2470
2471 kmem_free(kernel_map,
2472 (vm_offset_t)profile_data_string, PATH_MAX);
2473 kmem_free(kernel_map,
2474 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2475 return 0;
2476
2477 }