bsd/vm/vm_unix.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Mach Operating System
  30  * Copyright (c) 1987 Carnegie-Mellon University
  31  * All rights reserved.  The CMU software License Agreement specifies
  32  * the terms and conditions for use and redistribution.
  33  */
  34
  35 /*
  36  */
  37
  38
  39 #include <meta_features.h>
  40
  41 #include <kern/task.h>
  42 #include <kern/thread.h>
  43 #include <kern/debug.h>
  44 #include <kern/lock.h>
  45 #include <mach/mach_traps.h>
  46 #include <mach/time_value.h>
  47 #include <mach/vm_map.h>
  48 #include <mach/vm_param.h>
  49 #include <mach/vm_prot.h>
  50 #include <mach/port.h>
  51
  52 #include <sys/file_internal.h>
  53 #include <sys/param.h>
  54 #include <sys/systm.h>
  55 #include <sys/dir.h>
  56 #include <sys/namei.h>
  57 #include <sys/proc_internal.h>
  58 #include <sys/kauth.h>
  59 #include <sys/vm.h>
  60 #include <sys/file.h>
  61 #include <sys/vnode_internal.h>
  62 #include <sys/mount.h>
  63 #include <sys/trace.h>
  64 #include <sys/kernel.h>
  65 #include <sys/ubc_internal.h>
  66 #include <sys/user.h>
  67 #include <sys/syslog.h>
  68 #include <sys/stat.h>
  69 #include <sys/sysproto.h>
  70 #include <sys/mman.h>
  71 #include <sys/sysctl.h>
  72
  73 #include <bsm/audit_kernel.h>
  74 #include <bsm/audit_kevents.h>
  75
  76 #include <kern/kalloc.h>
  77 #include <vm/vm_map.h>
  78 #include <vm/vm_kern.h>
  79
  80 #include <machine/spl.h>
  81
  82 #include <mach/shared_memory_server.h>
  83 #include <vm/vm_shared_memory_server.h>
  84
  85 #include <vm/vm_protos.h>
  86
  87 void
  88 log_nx_failure(addr64_t vaddr, vm_prot_t prot)
  89 {
  90         printf("NX failure: %s  -  vaddr=%qx,  prot=%x\n", current_proc()->p_comm, vaddr, prot);
  91 }
  92
  93
  94 int
  95 useracc(
  96         user_addr_t     addr,
  97         user_size_t     len,
  98         int     prot)
  99 {
 100         return (vm_map_check_protection(
 101                         current_map(),
 102                         vm_map_trunc_page(addr), vm_map_round_page(addr+len),
 103                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
 104 }
 105
 106 int
 107 vslock(
 108         user_addr_t     addr,
 109         user_size_t     len)
 110 {
 111         kern_return_t kret;
 112         kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
 113                         vm_map_round_page(addr+len),
 114                         VM_PROT_READ | VM_PROT_WRITE ,FALSE);
 115
 116         switch (kret) {
 117         case KERN_SUCCESS:
 118                 return (0);
 119         case KERN_INVALID_ADDRESS:
 120         case KERN_NO_SPACE:
 121                 return (ENOMEM);
 122         case KERN_PROTECTION_FAILURE:
 123                 return (EACCES);
 124         default:
 125                 return (EINVAL);
 126         }
 127 }
 128
 129 int
 130 vsunlock(
 131         user_addr_t addr,
 132         user_size_t len,
 133         __unused int dirtied)
 134 {
 135 #if FIXME  /* [ */
 136         pmap_t          pmap;
 137         vm_page_t       pg;
 138         vm_map_offset_t vaddr;
 139         ppnum_t         paddr;
 140 #endif  /* FIXME ] */
 141         kern_return_t kret;
 142
 143 #if FIXME  /* [ */
 144         if (dirtied) {
 145                 pmap = get_task_pmap(current_task());
 146                 for (vaddr = vm_map_trunc_page(addr);
 147                      vaddr < vm_map_round_page(addr+len);
 148                                 vaddr += PAGE_SIZE) {
 149                         paddr = pmap_extract(pmap, vaddr);
 150                         pg = PHYS_TO_VM_PAGE(paddr);
 151                         vm_page_set_modified(pg);
 152                 }
 153         }
 154 #endif  /* FIXME ] */
 155 #ifdef  lint
 156         dirtied++;
 157 #endif  /* lint */
 158         kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
 159                                 vm_map_round_page(addr+len), FALSE);
 160         switch (kret) {
 161         case KERN_SUCCESS:
 162                 return (0);
 163         case KERN_INVALID_ADDRESS:
 164         case KERN_NO_SPACE:
 165                 return (ENOMEM);
 166         case KERN_PROTECTION_FAILURE:
 167                 return (EACCES);
 168         default:
 169                 return (EINVAL);
 170         }
 171 }
 172
 173 int
 174 subyte(
 175         user_addr_t addr,
 176         int byte)
 177 {
 178         char character;
 179
 180         character = (char)byte;
 181         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 182 }
 183
 184 int
 185 suibyte(
 186         user_addr_t addr,
 187         int byte)
 188 {
 189         char character;
 190
 191         character = (char)byte;
 192         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 193 }
 194
 195 int fubyte(user_addr_t addr)
 196 {
 197         unsigned char byte;
 198
 199         if (copyin(addr, (void *) &byte, sizeof(char)))
 200                 return(-1);
 201         return(byte);
 202 }
 203
 204 int fuibyte(user_addr_t addr)
 205 {
 206         unsigned char byte;
 207
 208         if (copyin(addr, (void *) &(byte), sizeof(char)))
 209                 return(-1);
 210         return(byte);
 211 }
 212
 213 int
 214 suword(
 215         user_addr_t addr,
 216         long word)
 217 {
 218         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 219 }
 220
 221 long fuword(user_addr_t addr)
 222 {
 223         long word;
 224
 225         if (copyin(addr, (void *) &word, sizeof(int)))
 226                 return(-1);
 227         return(word);
 228 }
 229
 230 /* suiword and fuiword are the same as suword and fuword, respectively */
 231
 232 int
 233 suiword(
 234         user_addr_t addr,
 235         long word)
 236 {
 237         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 238 }
 239
 240 long fuiword(user_addr_t addr)
 241 {
 242         long word;
 243
 244         if (copyin(addr, (void *) &word, sizeof(int)))
 245                 return(-1);
 246         return(word);
 247 }
 248
 249 /*
 250  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
 251  * fetching and setting of process-sized size_t and pointer values.
 252  */
 253 int
 254 sulong(user_addr_t addr, int64_t word)
 255 {
 256
 257         if (IS_64BIT_PROCESS(current_proc())) {
 258                 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
 259         } else {
 260                 return(suiword(addr, (long)word));
 261         }
 262 }
 263
 264 int64_t
 265 fulong(user_addr_t addr)
 266 {
 267         int64_t longword;
 268
 269         if (IS_64BIT_PROCESS(current_proc())) {
 270                 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
 271                         return(-1);
 272                 return(longword);
 273         } else {
 274                 return((int64_t)fuiword(addr));
 275         }
 276 }
 277
 278 int
 279 suulong(user_addr_t addr, uint64_t uword)
 280 {
 281
 282         if (IS_64BIT_PROCESS(current_proc())) {
 283                 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
 284         } else {
 285                 return(suiword(addr, (u_long)uword));
 286         }
 287 }
 288
 289 uint64_t
 290 fuulong(user_addr_t addr)
 291 {
 292         uint64_t ulongword;
 293
 294         if (IS_64BIT_PROCESS(current_proc())) {
 295                 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
 296                         return(-1ULL);
 297                 return(ulongword);
 298         } else {
 299                 return((uint64_t)fuiword(addr));
 300         }
 301 }
 302
 303 int
 304 swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval)
 305 {
 306         return(ENOTSUP);
 307 }
 308
 309
 310 kern_return_t
 311 pid_for_task(
 312         struct pid_for_task_args *args)
 313 {
 314         mach_port_name_t        t = args->t;
 315         user_addr_t             pid_addr  = args->pid;
 316         struct proc * p;
 317         task_t          t1;
 318         int     pid = -1;
 319         kern_return_t   err = KERN_SUCCESS;
 320         boolean_t funnel_state;
 321
 322         AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
 323         AUDIT_ARG(mach_port1, t);
 324
 325         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 326         t1 = port_name_to_task(t);
 327
 328         if (t1 == TASK_NULL) {
 329                 err = KERN_FAILURE;
 330                 goto pftout;
 331         } else {
 332                 p = get_bsdtask_info(t1);
 333                 if (p) {
 334                         pid  = proc_pid(p);
 335                         err = KERN_SUCCESS;
 336                 } else {
 337                         err = KERN_FAILURE;
 338                 }
 339         }
 340         task_deallocate(t1);
 341 pftout:
 342         AUDIT_ARG(pid, pid);
 343         (void) copyout((char *) &pid, pid_addr, sizeof(int));
 344         thread_funnel_set(kernel_flock, funnel_state);
 345         AUDIT_MACH_SYSCALL_EXIT(err);
 346         return(err);
 347 }
 348
 349 /*
 350  *      Routine:        task_for_pid
 351  *      Purpose:
 352  *              Get the task port for another "process", named by its
 353  *              process ID on the same host as "target_task".
 354  *
 355  *              Only permitted to privileged processes, or processes
 356  *              with the same user ID.
 357  *
 358  * XXX This should be a BSD system call, not a Mach trap!!!
 359  */
 360 /*
 361  *
 362  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
 363  * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged
 364  * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks
 365  *
 366  */
 367 static  int tfp_policy = KERN_TFP_POLICY_RESTRICTED;
 368 /* the groutp is inited to kmem group and is modifiable by sysctl */
 369 static int tfp_group_inited = 0; /* policy groups are loaded ... */
 370 static  gid_t tfp_group_ronly = 0; /* procview group */
 371 static  gid_t tfp_group_rw = 0; /* procmod group */
 372
 373 kern_return_t
 374 task_for_pid(
 375         struct task_for_pid_args *args)
 376 {
 377         mach_port_name_t        target_tport = args->target_tport;
 378         int                     pid = args->pid;
 379         user_addr_t             task_addr = args->t;
 380         struct uthread          *uthread;
 381         struct proc     *p;
 382         struct proc *p1;
 383         task_t          t1;
 384         mach_port_name_t        tret;
 385         void * sright;
 386         int error = 0;
 387         int is_member = 0;
 388         boolean_t funnel_state;
 389         boolean_t ispermitted = FALSE;
 390 #if DIAGNOSTIC
 391         char procname[MAXCOMLEN+1];
 392 #endif /* DIAGNOSTIC */
 393
 394         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
 395         AUDIT_ARG(pid, pid);
 396         AUDIT_ARG(mach_port1, target_tport);
 397
 398         t1 = port_name_to_task(target_tport);
 399         if (t1 == TASK_NULL) {
 400                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 401                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 402                 return(KERN_FAILURE);
 403         }
 404
 405         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 406
 407         p1 = current_proc();
 408
 409         /*
 410          * Delayed binding of thread credential to process credential, if we
 411          * are not running with an explicitly set thread credential.
 412          */
 413         uthread = get_bsdthread_info(current_thread());
 414         if (uthread->uu_ucred != p1->p_ucred &&
 415             (uthread->uu_flag & UT_SETUID) == 0) {
 416                 kauth_cred_t old = uthread->uu_ucred;
 417                 proc_lock(p1);
 418                 kauth_cred_ref(p1->p_ucred);
 419                 uthread->uu_ucred = p1->p_ucred;
 420                 proc_unlock(p1);
 421                 if (IS_VALID_CRED(old))
 422                         kauth_cred_unref(&old);
 423         }
 424
 425         p = pfind(pid);
 426         AUDIT_ARG(process, p);
 427
 428         /*
 429          * XXX p_ucred check can be bogus in multithreaded processes,
 430          * XXX unless the funnel is held.
 431          */
 432         switch (tfp_policy) {
 433
 434                 case KERN_TFP_POLICY_PERMISSIVE:
 435                         /* self or suser or related ones */
 436                         if ((p != (struct proc *) 0)
 437                                 && (p->p_stat != SZOMB)
 438                                 && (p1 != (struct proc *) 0)
 439                                 && (
 440                                         (p1 == p)
 441                                         || !(suser(kauth_cred_get(), 0))
 442                                         || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
 443                                                 ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
 444                                                 && ((p->p_flag & P_SUGID) == 0))
 445                                         )
 446                                 )
 447                                         ispermitted = TRUE;
 448                         break;
 449
 450                 case KERN_TFP_POLICY_RESTRICTED:
 451                         /* self or suser or  setgid and related ones only */
 452                         if ((p != (struct proc *) 0)
 453                                 && (p1 != (struct proc *) 0)
 454                                 && (p->p_stat != SZOMB)
 455                                 && (
 456                                         (p1 == p)
 457                                         || !(suser(kauth_cred_get(), 0))
 458                                         || (((tfp_group_inited != 0) &&
 459                                                         (
 460                                                         ((kauth_cred_ismember_gid(kauth_cred_get(),
 461                                                                         tfp_group_ronly, &is_member) == 0) && is_member)
 462                                                         ||((kauth_cred_ismember_gid(kauth_cred_get(),
 463                                                                         tfp_group_rw, &is_member) == 0) && is_member)
 464                                                         )
 465                                            )
 466                                            && ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
 467                                                         ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
 468                                                         && ((p->p_flag & P_SUGID) == 0))
 469                                           )
 470                                         )
 471                                 )
 472                                         ispermitted = TRUE;
 473
 474                         break;
 475
 476                 case KERN_TFP_POLICY_DENY:
 477                         /* self or suser only */
 478                 default:
 479                         /* do not return task port of other task at all */
 480                         if ((p1 != (struct proc *) 0) && (p != (struct proc *) 0) && (p->p_stat != SZOMB)
 481                                         && ((p1 == p)  || !(suser(kauth_cred_get(), 0))))
 482                                 ispermitted = TRUE;
 483                         else
 484                                 ispermitted = FALSE;
 485                         break;
 486         };
 487
 488
 489         if (ispermitted == TRUE) {
 490                 if (p->task != TASK_NULL) {
 491                         task_reference(p->task);
 492                         sright = (void *)convert_task_to_port(p->task);
 493                         tret = ipc_port_copyout_send(
 494                                                 sright,
 495                                                 get_task_ipcspace(current_task()));
 496                         } else
 497                                 tret  = MACH_PORT_NULL;
 498                         AUDIT_ARG(mach_port2, tret);
 499                         (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
 500                 task_deallocate(t1);
 501                         error = KERN_SUCCESS;
 502                         goto tfpout;
 503         }
 504 #if DIAGNOSTIC
 505         else {
 506                 /*
 507                  * There is no guarantee that p_comm is null terminated and
 508                  * kernel implementation of string functions are complete. So
 509                  * ensure stale info is not leaked out, bzero the  buffer
 510                  */
 511                 bzero(&procname[0], MAXCOMLEN+1);
 512                 strncpy(&procname[0], &p1->p_comm[0], MAXCOMLEN);
 513                 if (tfp_policy != KERN_TFP_POLICY_PERMISSIVE)
 514                         log(LOG_NOTICE, "(%d: %s)tfp: failed on %d:\n",
 515                                 ((p1 != PROC_NULL)?(p1->p_pid):0), &procname[0],
 516                                 ((p != PROC_NULL)?(p->p_pid):0));
 517         }
 518 #endif /* DIAGNOSTIC */
 519
 520     task_deallocate(t1);
 521         tret = MACH_PORT_NULL;
 522         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 523         error = KERN_FAILURE;
 524 tfpout:
 525         thread_funnel_set(kernel_flock, funnel_state);
 526         AUDIT_MACH_SYSCALL_EXIT(error);
 527         return(error);
 528 }
 529
 530 /*
 531  *      Routine:        task_name_for_pid
 532  *      Purpose:
 533  *              Get the task name port for another "process", named by its
 534  *              process ID on the same host as "target_task".
 535  *
 536  *              Only permitted to privileged processes, or processes
 537  *              with the same user ID.
 538  *
 539  * XXX This should be a BSD system call, not a Mach trap!!!
 540  */
 541
 542 kern_return_t
 543 task_name_for_pid(
 544         struct task_name_for_pid_args *args)
 545 {
 546         mach_port_name_t        target_tport = args->target_tport;
 547         int                     pid = args->pid;
 548         user_addr_t             task_addr = args->t;
 549         struct uthread          *uthread;
 550         struct proc     *p;
 551         struct proc *p1;
 552         task_t          t1;
 553         mach_port_name_t        tret;
 554         void * sright;
 555         int error = 0;
 556         boolean_t funnel_state;
 557
 558         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
 559         AUDIT_ARG(pid, pid);
 560         AUDIT_ARG(mach_port1, target_tport);
 561
 562         t1 = port_name_to_task(target_tport);
 563         if (t1 == TASK_NULL) {
 564                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 565                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 566                 return(KERN_FAILURE);
 567         }
 568
 569         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 570
 571         p1 = current_proc();
 572
 573         /*
 574          * Delayed binding of thread credential to process credential, if we
 575          * are not running with an explicitly set thread credential.
 576          */
 577         /*
 578          * XXX p_ucred check can be bogus in multithreaded processes,
 579          * XXX unless the funnel is held.
 580          */
 581         uthread = get_bsdthread_info(current_thread());
 582         if (uthread->uu_ucred != p1->p_ucred &&
 583             (uthread->uu_flag & UT_SETUID) == 0) {
 584                 kauth_cred_t old = uthread->uu_ucred;
 585                 proc_lock(p1);
 586                 kauth_cred_ref(p1->p_ucred);
 587                 uthread->uu_ucred = p1->p_ucred;
 588                 proc_unlock(p1);
 589                 if (IS_VALID_CRED(old))
 590                         kauth_cred_unref(&old);
 591         }
 592
 593         p = pfind(pid);
 594         AUDIT_ARG(process, p);
 595
 596         if ((p != (struct proc *) 0)
 597             && (p->p_stat != SZOMB)
 598             && (p1 != (struct proc *) 0)
 599             && ((p1 == p)
 600                 || !(suser(kauth_cred_get(), 0))
 601                 || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
 602                     ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)))))
 603         {
 604                 if (p->task != TASK_NULL)
 605                 {
 606                         task_reference(p->task);
 607                         sright = (void *)convert_task_name_to_port(p->task);
 608                         tret = ipc_port_copyout_send(
 609                                                 sright,
 610                                                 get_task_ipcspace(current_task()));
 611                 } else
 612                         tret  = MACH_PORT_NULL;
 613                 AUDIT_ARG(mach_port2, tret);
 614                 (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
 615                 task_deallocate(t1);
 616                 error = KERN_SUCCESS;
 617                 goto tnfpout;
 618         }
 619
 620         task_deallocate(t1);
 621         tret = MACH_PORT_NULL;
 622         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 623         error = KERN_FAILURE;
 624 tnfpout:
 625         thread_funnel_set(kernel_flock, funnel_state);
 626         AUDIT_MACH_SYSCALL_EXIT(error);
 627         return(error);
 628 }
 629
 630 static int
 631 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
 632     __unused int arg2, struct sysctl_req *req)
 633 {
 634     int error = 0;
 635         int new_value;
 636
 637     error = SYSCTL_OUT(req, arg1, sizeof(int));
 638     if (error || req->newptr == USER_ADDR_NULL)
 639         return(error);
 640
 641         if (!is_suser())
 642                 return(EPERM);
 643
 644         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
 645                 goto out;
 646         }
 647         if ((new_value == KERN_TFP_POLICY_DENY)
 648                 || (new_value == KERN_TFP_POLICY_PERMISSIVE)
 649                 || (new_value == KERN_TFP_POLICY_RESTRICTED))
 650                         tfp_policy = new_value;
 651         else
 652                         error = EINVAL;
 653 out:
 654     return(error);
 655
 656 }
 657
 658 static int
 659 sysctl_settfp_groups(__unused struct sysctl_oid *oidp, void *arg1,
 660     __unused int arg2, struct sysctl_req *req)
 661 {
 662     int error = 0;
 663         int new_value;
 664
 665     error = SYSCTL_OUT(req, arg1, sizeof(int));
 666     if (error || req->newptr == USER_ADDR_NULL)
 667         return(error);
 668
 669         if (!is_suser())
 670                 return(EPERM);
 671
 672         /*
 673          * Once set; cannot be reset till next boot. Launchd will set this
 674          * in its pid 1 init and no one can set after that.
 675          */
 676         if (tfp_group_inited != 0)
 677                 return(EPERM);
 678
 679         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
 680                 goto out;
 681         }
 682
 683         if (new_value >= 100)
 684                         error = EINVAL;
 685         else {
 686                 if (arg1 == &tfp_group_ronly)
 687                         tfp_group_ronly = new_value;
 688                 else if (arg1 == &tfp_group_rw)
 689                         tfp_group_rw = new_value;
 690                 else
 691                         error = EINVAL;
 692                 if ((tfp_group_ronly != 0 ) && (tfp_group_rw != 0 ))
 693                         tfp_group_inited = 1;
 694         }
 695
 696 out:
 697     return(error);
 698 }
 699
 700 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW, 0, "tfp");
 701 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
 702     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
 703 SYSCTL_PROC(_kern_tfp, KERN_TFP_READ_GROUP, read_group, CTLTYPE_INT | CTLFLAG_RW,
 704     &tfp_group_ronly, sizeof(uint32_t), &sysctl_settfp_groups ,"I","read_group");
 705 SYSCTL_PROC(_kern_tfp, KERN_TFP_RW_GROUP, rw_group, CTLTYPE_INT | CTLFLAG_RW,
 706     &tfp_group_rw, sizeof(uint32_t), &sysctl_settfp_groups ,"I","rw_group");
 707
 708
 709 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, &shared_region_trace_level, 0, "");
 710
 711 /*
 712  * Try and cap the number of mappings the user might be trying to deal with,
 713  * so that we don't end up allocating insane amounts of wired memory in the
 714  * kernel based on bogus user arguments.
 715  * There are 2 shared regions (TEXT and DATA). The size of each submap
 716  * is SHARED_TEXT_REGION_SIZE and we can have at most 1 VM map entry per page,
 717  * so the maximum number of mappings we could ever have to deal with is...
 718  */
 719 #define SHARED_REGION_MAX_MAPPINGS ((2 *SHARED_TEXT_REGION_SIZE) >> PAGE_SHIFT)
 720
 721 /*
 722  * shared_region_make_private_np:
 723  *
 724  * This system call is for "dyld" only.
 725  *
 726  * It creates a private copy of the current process's "shared region" for
 727  * split libraries.  "dyld" uses this when the shared region is full or
 728  * it needs to load a split library that conflicts with an already loaded one
 729  * that this process doesn't need.  "dyld" specifies a set of address ranges
 730  * that it wants to keep in the now-private "shared region".  These cover
 731  * the set of split libraries that the process needs so far.  The kernel needs
 732  * to deallocate the rest of the shared region, so that it's available for
 733  * more libraries for this process.
 734  */
 735 int
 736 shared_region_make_private_np(
 737         struct proc                                     *p,
 738         struct shared_region_make_private_np_args       *uap,
 739         __unused int                                    *retvalp)
 740 {
 741         int                             error;
 742         kern_return_t                   kr;
 743         boolean_t                       using_shared_regions;
 744         user_addr_t                     user_ranges;
 745         unsigned int                    range_count;
 746         vm_size_t                       ranges_size;
 747         struct shared_region_range_np   *ranges;
 748         shared_region_mapping_t         shared_region;
 749         struct shared_region_task_mappings      task_mapping_info;
 750         shared_region_mapping_t         next;
 751
 752         ranges = NULL;
 753
 754         range_count = uap->rangeCount;
 755         user_ranges = uap->ranges;
 756         ranges_size = (vm_size_t) (range_count * sizeof (ranges[0]));
 757
 758         SHARED_REGION_TRACE(
 759                 SHARED_REGION_TRACE_INFO,
 760                 ("shared_region: %p [%d(%s)] "
 761                  "make_private(rangecount=%d)\n",
 762                  current_thread(), p->p_pid, p->p_comm, range_count));
 763
 764         /* allocate kernel space for the "ranges" */
 765         if (range_count != 0) {
 766                 if (range_count > SHARED_REGION_MAX_MAPPINGS) {
 767                         error = EINVAL;
 768                         goto done;
 769                 }
 770                 if ((mach_vm_size_t) ranges_size !=
 771                     (mach_vm_size_t) range_count * sizeof (ranges[0])) {
 772                         /* 32-bit integer overflow */
 773                         error = EINVAL;
 774                         goto done;
 775                 }
 776                 kr = kmem_alloc(kernel_map,
 777                                 (vm_offset_t *) &ranges,
 778                                 ranges_size);
 779                 if (kr != KERN_SUCCESS) {
 780                         error = ENOMEM;
 781                         goto done;
 782                 }
 783
 784                 /* copy "ranges" from user-space */
 785                 error = copyin(user_ranges,
 786                                ranges,
 787                                ranges_size);
 788                 if (error) {
 789                         goto done;
 790                 }
 791         }
 792
 793         if (p->p_flag & P_NOSHLIB) {
 794                 /* no split library has been mapped for this process so far */
 795                 using_shared_regions = FALSE;
 796         } else {
 797                 /* this process has already mapped some split libraries */
 798                 using_shared_regions = TRUE;
 799         }
 800
 801         /*
 802          * Get a private copy of the current shared region.
 803          * Do not chain it to the system-wide shared region, as we'll want
 804          * to map other split libraries in place of the old ones.  We want
 805          * to completely detach from the system-wide shared region and go our
 806          * own way after this point, not sharing anything with other processes.
 807          */
 808         error = clone_system_shared_regions(using_shared_regions,
 809                                             FALSE, /* chain_regions */
 810                                             ENV_DEFAULT_ROOT);
 811         if (error) {
 812                 goto done;
 813         }
 814
 815         /* get info on the newly allocated shared region */
 816         vm_get_shared_region(current_task(), &shared_region);
 817         task_mapping_info.self = (vm_offset_t) shared_region;
 818         shared_region_mapping_info(shared_region,
 819                                    &(task_mapping_info.text_region),
 820                                    &(task_mapping_info.text_size),
 821                                    &(task_mapping_info.data_region),
 822                                    &(task_mapping_info.data_size),
 823                                    &(task_mapping_info.region_mappings),
 824                                    &(task_mapping_info.client_base),
 825                                    &(task_mapping_info.alternate_base),
 826                                    &(task_mapping_info.alternate_next),
 827                                    &(task_mapping_info.fs_base),
 828                                    &(task_mapping_info.system),
 829                                    &(task_mapping_info.flags),
 830                                    &next);
 831
 832         /*
 833          * We now have our private copy of the shared region, as it was before
 834          * the call to clone_system_shared_regions().  We now need to clean it
 835          * up and keep only the memory areas described by the "ranges" array.
 836          */
 837         kr = shared_region_cleanup(range_count, ranges, &task_mapping_info);
 838         switch (kr) {
 839         case KERN_SUCCESS:
 840                 error = 0;
 841                 break;
 842         default:
 843                 error = EINVAL;
 844                 goto done;
 845         }
 846
 847 done:
 848         if (ranges != NULL) {
 849                 kmem_free(kernel_map,
 850                           (vm_offset_t) ranges,
 851                           ranges_size);
 852                 ranges = NULL;
 853         }
 854
 855         SHARED_REGION_TRACE(
 856                 SHARED_REGION_TRACE_INFO,
 857                 ("shared_region: %p [%d(%s)] "
 858                  "make_private(rangecount=%d) -> %d "
 859                  "shared_region=%p[%x,%x,%x]\n",
 860                  current_thread(), p->p_pid, p->p_comm,
 861                  range_count, error, shared_region,
 862                  task_mapping_info.fs_base,
 863                  task_mapping_info.system,
 864                  task_mapping_info.flags));
 865
 866         return error;
 867 }
 868
 869
 870 /*
 871  * shared_region_map_file_np:
 872  *
 873  * This system call is for "dyld" only.
 874  *
 875  * "dyld" wants to map parts of a split library in the shared region.
 876  * We get a file descriptor on the split library to be mapped and a set
 877  * of mapping instructions, describing which parts of the file to map in\
 878  * which areas of the shared segment and with what protection.
 879  * The "shared region" is split in 2 areas:
 880  * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections),
 881  * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
 882  *
 883  */
 884 int
 885 shared_region_map_file_np(
 886         struct proc                             *p,
 887         struct shared_region_map_file_np_args   *uap,
 888         __unused int                            *retvalp)
 889 {
 890         int                                     error;
 891         kern_return_t                           kr;
 892         int                                     fd;
 893         unsigned int                            mapping_count;
 894         user_addr_t                             user_mappings; /* 64-bit */
 895         user_addr_t                             user_slide_p;  /* 64-bit */
 896         struct shared_file_mapping_np           *mappings;
 897         vm_size_t                               mappings_size;
 898         struct fileproc                         *fp;
 899         mach_vm_offset_t                        slide;
 900         struct vnode                            *vp;
 901         struct vfs_context                      context;
 902         memory_object_control_t                 file_control;
 903         memory_object_size_t                    file_size;
 904         shared_region_mapping_t                 shared_region;
 905         struct shared_region_task_mappings      task_mapping_info;
 906         shared_region_mapping_t                 next;
 907         shared_region_mapping_t                 default_shared_region;
 908         boolean_t                               using_default_region;
 909         unsigned int                            j;
 910         vm_prot_t                               max_prot;
 911         mach_vm_offset_t                        base_offset, end_offset;
 912         mach_vm_offset_t                        original_base_offset;
 913         boolean_t                               mappings_in_segment;
 914 #define SFM_MAX_STACK   6
 915         struct shared_file_mapping_np           stack_mappings[SFM_MAX_STACK];
 916
 917         mappings_size = 0;
 918         mappings = NULL;
 919         mapping_count = 0;
 920         fp = NULL;
 921         vp = NULL;
 922
 923         /* get file descriptor for split library from arguments */
 924         fd = uap->fd;
 925
 926         /* get file structure from file descriptor */
 927         error = fp_lookup(p, fd, &fp, 0);
 928         if (error) {
 929                 SHARED_REGION_TRACE(
 930                         SHARED_REGION_TRACE_ERROR,
 931                         ("shared_region: %p [%d(%s)] map_file: "
 932                          "fd=%d lookup failed (error=%d)\n",
 933                          current_thread(), p->p_pid, p->p_comm, fd, error));
 934                 goto done;
 935         }
 936
 937         /* make sure we're attempting to map a vnode */
 938         if (fp->f_fglob->fg_type != DTYPE_VNODE) {
 939                 SHARED_REGION_TRACE(
 940                         SHARED_REGION_TRACE_ERROR,
 941                         ("shared_region: %p [%d(%s)] map_file: "
 942                          "fd=%d not a vnode (type=%d)\n",
 943                          current_thread(), p->p_pid, p->p_comm,
 944                          fd, fp->f_fglob->fg_type));
 945                 error = EINVAL;
 946                 goto done;
 947         }
 948
 949         /* we need at least read permission on the file */
 950         if (! (fp->f_fglob->fg_flag & FREAD)) {
 951                 SHARED_REGION_TRACE(
 952                         SHARED_REGION_TRACE_ERROR,
 953                         ("shared_region: %p [%d(%s)] map_file: "
 954                          "fd=%d not readable\n",
 955                          current_thread(), p->p_pid, p->p_comm, fd));
 956                 error = EPERM;
 957                 goto done;
 958         }
 959
 960         /* get vnode from file structure */
 961         error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data);
 962         if (error) {
 963                 SHARED_REGION_TRACE(
 964                         SHARED_REGION_TRACE_ERROR,
 965                         ("shared_region: %p [%d(%s)] map_file: "
 966                          "fd=%d getwithref failed (error=%d)\n",
 967                          current_thread(), p->p_pid, p->p_comm, fd, error));
 968                 goto done;
 969         }
 970         vp = (struct vnode *) fp->f_fglob->fg_data;
 971
 972         /* make sure the vnode is a regular file */
 973         if (vp->v_type != VREG) {
 974                 SHARED_REGION_TRACE(
 975                         SHARED_REGION_TRACE_ERROR,
 976                         ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
 977                          "not a file (type=%d)\n",
 978                          current_thread(), p->p_pid, p->p_comm,
 979                          vp, vp->v_name, vp->v_type));
 980                 error = EINVAL;
 981                 goto done;
 982         }
 983
 984         /* get vnode size */
 985         {
 986                 off_t   fs;
 987
 988                 context.vc_proc = p;
 989                 context.vc_ucred = kauth_cred_get();
 990                 if ((error = vnode_size(vp, &fs, &context)) != 0) {
 991                         SHARED_REGION_TRACE(
 992                                 SHARED_REGION_TRACE_ERROR,
 993                                 ("shared_region: %p [%d(%s)] "
 994                                  "map_file(%p:'%s'): "
 995                                  "vnode_size(%p) failed (error=%d)\n",
 996                                  current_thread(), p->p_pid, p->p_comm,
 997                                  vp, vp->v_name, vp));
 998                         goto done;
 999                 }
1000                 file_size = fs;
1001         }
1002
1003         /*
1004          * Get the list of mappings the caller wants us to establish.
1005          */
1006         mapping_count = uap->mappingCount; /* the number of mappings */
1007         mappings_size = (vm_size_t) (mapping_count * sizeof (mappings[0]));
1008         if (mapping_count == 0) {
1009                 SHARED_REGION_TRACE(
1010                         SHARED_REGION_TRACE_INFO,
1011                         ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1012                          "no mappings\n",
1013                          current_thread(), p->p_pid, p->p_comm,
1014                          vp, vp->v_name));
1015                 error = 0;      /* no mappings: we're done ! */
1016                 goto done;
1017         } else if (mapping_count <= SFM_MAX_STACK) {
1018                 mappings = &stack_mappings[0];
1019         } else {
1020                 if (mapping_count > SHARED_REGION_MAX_MAPPINGS) {
1021                         error = EINVAL;
1022                         goto done;
1023                 }
1024                 if ((mach_vm_size_t) mappings_size !=
1025                     (mach_vm_size_t) mapping_count * sizeof (mappings[0])) {
1026                         /* 32-bit integer overflow */
1027                         error = EINVAL;
1028                         goto done;
1029                 }
1030                 kr = kmem_alloc(kernel_map,
1031                                 (vm_offset_t *) &mappings,
1032                                 mappings_size);
1033                 if (kr != KERN_SUCCESS) {
1034                         SHARED_REGION_TRACE(
1035                                 SHARED_REGION_TRACE_ERROR,
1036                                 ("shared_region: %p [%d(%s)] "
1037                                  "map_file(%p:'%s'): "
1038                                  "failed to allocate %d mappings (kr=0x%x)\n",
1039                                  current_thread(), p->p_pid, p->p_comm,
1040                                  vp, vp->v_name, mapping_count, kr));
1041                         error = ENOMEM;
1042                         goto done;
1043                 }
1044         }
1045
1046         user_mappings = uap->mappings;     /* the mappings, in user space */
1047         error = copyin(user_mappings,
1048                        mappings,
1049                        mappings_size);
1050         if (error != 0) {
1051                 SHARED_REGION_TRACE(
1052                         SHARED_REGION_TRACE_ERROR,
1053                         ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1054                          "failed to copyin %d mappings (error=%d)\n",
1055                          current_thread(), p->p_pid, p->p_comm,
1056                          vp, vp->v_name, mapping_count, error));
1057                 goto done;
1058         }
1059
1060         /*
1061          * If the caller provides a "slide" pointer, it means they're OK
1062          * with us moving the mappings around to make them fit.
1063          */
1064         user_slide_p = uap->slide_p;
1065
1066         /*
1067          * Make each mapping address relative to the beginning of the
1068          * shared region.  Check that all mappings are in the shared region.
1069          * Compute the maximum set of protections required to tell the
1070          * buffer cache how we mapped the file (see call to ubc_map() below).
1071          */
1072         max_prot = VM_PROT_NONE;
1073         base_offset = -1LL;
1074         end_offset = 0;
1075         mappings_in_segment = TRUE;
1076         for (j = 0; j < mapping_count; j++) {
1077                 mach_vm_offset_t segment;
1078                 segment = (mappings[j].sfm_address &
1079                            GLOBAL_SHARED_SEGMENT_MASK);
1080                 if (segment != GLOBAL_SHARED_TEXT_SEGMENT &&
1081                     segment != GLOBAL_SHARED_DATA_SEGMENT) {
1082                         /* this mapping is not in the shared region... */
1083                         if (user_slide_p == NULL) {
1084                                 /* ... and we can't slide it in: fail */
1085                                 SHARED_REGION_TRACE(
1086                                         SHARED_REGION_TRACE_CONFLICT,
1087                                         ("shared_region: %p [%d(%s)] "
1088                                          "map_file(%p:'%s'): "
1089                                          "mapping %p not in shared segment & "
1090                                          "no sliding\n",
1091                                          current_thread(), p->p_pid, p->p_comm,
1092                                          vp, vp->v_name,
1093                                          mappings[j].sfm_address));
1094                                 error = EINVAL;
1095                                 goto done;
1096                         }
1097                         if (j == 0) {
1098                                 /* expect all mappings to be outside */
1099                                 mappings_in_segment = FALSE;
1100                         } else if (mappings_in_segment != FALSE) {
1101                                 /* other mappings were not outside: fail */
1102                                 SHARED_REGION_TRACE(
1103                                         SHARED_REGION_TRACE_CONFLICT,
1104                                         ("shared_region: %p [%d(%s)] "
1105                                          "map_file(%p:'%s'): "
1106                                          "mapping %p not in shared segment & "
1107                                          "other mappings in shared segment\n",
1108                                          current_thread(), p->p_pid, p->p_comm,
1109                                          vp, vp->v_name,
1110                                          mappings[j].sfm_address));
1111                                 error = EINVAL;
1112                                 goto done;
1113                         }
1114                         /* we'll try and slide that mapping in the segments */
1115                 } else {
1116                         if (j == 0) {
1117                                 /* expect all mappings to be inside */
1118                                 mappings_in_segment = TRUE;
1119                         } else if (mappings_in_segment != TRUE) {
1120                                 /* other mappings were not inside: fail */
1121                                 SHARED_REGION_TRACE(
1122                                         SHARED_REGION_TRACE_CONFLICT,
1123                                         ("shared_region: %p [%d(%s)] "
1124                                          "map_file(%p:'%s'): "
1125                                          "mapping %p in shared segment & "
1126                                          "others in shared segment\n",
1127                                          current_thread(), p->p_pid, p->p_comm,
1128                                          vp, vp->v_name,
1129                                          mappings[j].sfm_address));
1130                                 error = EINVAL;
1131                                 goto done;
1132                         }
1133                         /* get a relative offset inside the shared segments */
1134                         mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT;
1135                 }
1136                 if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK)
1137                     < base_offset) {
1138                         base_offset = (mappings[j].sfm_address &
1139                                        SHARED_TEXT_REGION_MASK);
1140                 }
1141                 if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) +
1142                     mappings[j].sfm_size > end_offset) {
1143                         end_offset =
1144                                 (mappings[j].sfm_address &
1145                                  SHARED_TEXT_REGION_MASK) +
1146                                 mappings[j].sfm_size;
1147                 }
1148                 max_prot |= mappings[j].sfm_max_prot;
1149         }
1150         /* Make all mappings relative to the base_offset */
1151         base_offset = vm_map_trunc_page(base_offset);
1152         end_offset = vm_map_round_page(end_offset);
1153         for (j = 0; j < mapping_count; j++) {
1154                 mappings[j].sfm_address -= base_offset;
1155         }
1156         original_base_offset = base_offset;
1157         if (mappings_in_segment == FALSE) {
1158                 /*
1159                  * We're trying to map a library that was not pre-bound to
1160                  * be in the shared segments.  We want to try and slide it
1161                  * back into the shared segments but as far back as possible,
1162                  * so that it doesn't clash with pre-bound libraries.  Set
1163                  * the base_offset to the end of the region, so that it can't
1164                  * possibly fit there and will have to be slid.
1165                  */
1166                 base_offset = SHARED_TEXT_REGION_SIZE - end_offset;
1167         }
1168
1169         /* get the file's memory object handle */
1170         UBCINFOCHECK("shared_region_map_file_np", vp);
1171         file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1172         if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1173                 SHARED_REGION_TRACE(
1174                         SHARED_REGION_TRACE_ERROR,
1175                         ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1176                          "ubc_getobject() failed\n",
1177                          current_thread(), p->p_pid, p->p_comm,
1178                          vp, vp->v_name));
1179                 error = EINVAL;
1180                 goto done;
1181         }
1182
1183         /*
1184          * Get info about the current process's shared region.
1185          * This might change if we decide we need to clone the shared region.
1186          */
1187         vm_get_shared_region(current_task(), &shared_region);
1188         task_mapping_info.self = (vm_offset_t) shared_region;
1189         shared_region_mapping_info(shared_region,
1190                                    &(task_mapping_info.text_region),
1191                                    &(task_mapping_info.text_size),
1192                                    &(task_mapping_info.data_region),
1193                                    &(task_mapping_info.data_size),
1194                                    &(task_mapping_info.region_mappings),
1195                                    &(task_mapping_info.client_base),
1196                                    &(task_mapping_info.alternate_base),
1197                                    &(task_mapping_info.alternate_next),
1198                                    &(task_mapping_info.fs_base),
1199                                    &(task_mapping_info.system),
1200                                    &(task_mapping_info.flags),
1201                                    &next);
1202
1203         /*
1204          * Are we using the system's current shared region
1205          * for this environment ?
1206          */
1207         default_shared_region =
1208                 lookup_default_shared_region(ENV_DEFAULT_ROOT,
1209                                              task_mapping_info.system);
1210         if (shared_region == default_shared_region) {
1211                 using_default_region = TRUE;
1212         } else {
1213                 using_default_region = FALSE;
1214         }
1215         shared_region_mapping_dealloc(default_shared_region);
1216
1217         if (vp->v_mount != rootvnode->v_mount &&
1218             using_default_region) {
1219                 /*
1220                  * The split library is not on the root filesystem.  We don't
1221                  * want to polute the system-wide ("default") shared region
1222                  * with it.
1223                  * Reject the mapping.  The caller (dyld) should "privatize"
1224                  * (via shared_region_make_private()) the shared region and
1225                  * try to establish the mapping privately for this process.
1226                  */
1227                 SHARED_REGION_TRACE(
1228                         SHARED_REGION_TRACE_CONFLICT,
1229                         ("shared_region: %p [%d(%s)] "
1230                          "map_file(%p:'%s'): "
1231                          "not on root volume\n",
1232                          current_thread(), p->p_pid, p->p_comm,
1233                          vp->v_name));
1234                 error = EXDEV;
1235                 goto done;
1236         }
1237
1238
1239         /*
1240          * Map the split library.
1241          */
1242         kr = map_shared_file(mapping_count,
1243                              mappings,
1244                              file_control,
1245                              file_size,
1246                              &task_mapping_info,
1247                              base_offset,
1248                              (user_slide_p) ? &slide : NULL);
1249
1250         if (kr == KERN_SUCCESS) {
1251                 /*
1252                  * The mapping was successful.  Let the buffer cache know
1253                  * that we've mapped that file with these protections.  This
1254                  * prevents the vnode from getting recycled while it's mapped.
1255                  */
1256                 (void) ubc_map(vp, max_prot);
1257                 error = 0;
1258         } else {
1259                 SHARED_REGION_TRACE(
1260                         SHARED_REGION_TRACE_CONFLICT,
1261                         ("shared_region: %p [%d(%s)] "
1262                          "map_file(%p:'%s'): "
1263                          "map_shared_file failed, kr=0x%x\n",
1264                          current_thread(), p->p_pid, p->p_comm,
1265                          vp, vp->v_name, kr));
1266                 switch (kr) {
1267                 case KERN_INVALID_ADDRESS:
1268                         error = EFAULT;
1269                         goto done;
1270                 case KERN_PROTECTION_FAILURE:
1271                         error = EPERM;
1272                         goto done;
1273                 case KERN_NO_SPACE:
1274                         error = ENOMEM;
1275                         goto done;
1276                 case KERN_FAILURE:
1277                 case KERN_INVALID_ARGUMENT:
1278                 default:
1279                         error = EINVAL;
1280                         goto done;
1281                 }
1282         }
1283
1284         if (p->p_flag & P_NOSHLIB) {
1285                 /* signal that this process is now using split libraries */
1286                 p->p_flag &= ~P_NOSHLIB;
1287         }
1288
1289         if (user_slide_p) {
1290                 /*
1291                  * The caller provided a pointer to a "slide" offset.  Let
1292                  * them know by how much we slid the mappings.
1293                  */
1294                 if (mappings_in_segment == FALSE) {
1295                         /*
1296                          * We faked the base_offset earlier, so undo that
1297                          * and take into account the real base_offset.
1298                          */
1299                         slide += SHARED_TEXT_REGION_SIZE - end_offset;
1300                         slide -= original_base_offset;
1301                         /*
1302                          * The mappings were slid into the shared segments
1303                          * and "slide" is relative to the beginning of the
1304                          * shared segments.  Adjust it to be absolute.
1305                          */
1306                         slide += GLOBAL_SHARED_TEXT_SEGMENT;
1307                 }
1308                 error = copyout(&slide,
1309                                 user_slide_p,
1310                                 sizeof (slide));
1311                 if (slide != 0) {
1312                         SHARED_REGION_TRACE(
1313                                 SHARED_REGION_TRACE_CONFLICT,
1314                                 ("shared_region: %p [%d(%s)] "
1315                                  "map_file(%p:'%s'): "
1316                                  "slid by 0x%llx\n",
1317                                  current_thread(), p->p_pid, p->p_comm,
1318                                  vp, vp->v_name, slide));
1319                 }
1320         }
1321
1322 done:
1323         if (vp != NULL) {
1324                 /*
1325                  * release the vnode...
1326                  * ubc_map() still holds it for us in the non-error case
1327                  */
1328                 (void) vnode_put(vp);
1329                 vp = NULL;
1330         }
1331         if (fp != NULL) {
1332                 /* release the file descriptor */
1333                 fp_drop(p, fd, fp, 0);
1334                 fp = NULL;
1335         }
1336         if (mappings != NULL &&
1337             mappings != &stack_mappings[0]) {
1338                 kmem_free(kernel_map,
1339                           (vm_offset_t) mappings,
1340                           mappings_size);
1341         }
1342         mappings = NULL;
1343
1344         return error;
1345 }
1346
1347 int
1348 load_shared_file(
1349         __unused struct proc *p,
1350         __unused struct load_shared_file_args *uap,
1351         __unused int *retval)
1352 {
1353         return ENOSYS;
1354 }
1355
1356 int
1357 reset_shared_file(
1358         __unused struct proc *p,
1359         __unused struct reset_shared_file_args *uap,
1360         __unused int *retval)
1361 {
1362         return ENOSYS;
1363 }
1364
1365 int
1366 new_system_shared_regions(
1367         __unused struct proc *p,
1368         __unused struct new_system_shared_regions_args *uap,
1369         __unused int *retval)
1370 {
1371         return ENOSYS;
1372 }
1373
1374
1375
1376 int
1377 clone_system_shared_regions(
1378         int             shared_regions_active,
1379         int             chain_regions,
1380         int             base_vnode)
1381 {
1382         shared_region_mapping_t new_shared_region;
1383         shared_region_mapping_t next;
1384         shared_region_mapping_t old_shared_region;
1385         struct shared_region_task_mappings old_info;
1386         struct shared_region_task_mappings new_info;
1387
1388         vm_get_shared_region(current_task(), &old_shared_region);
1389         old_info.self = (vm_offset_t)old_shared_region;
1390         shared_region_mapping_info(old_shared_region,
1391                 &(old_info.text_region),
1392                 &(old_info.text_size),
1393                 &(old_info.data_region),
1394                 &(old_info.data_size),
1395                 &(old_info.region_mappings),
1396                 &(old_info.client_base),
1397                 &(old_info.alternate_base),
1398                 &(old_info.alternate_next),
1399                 &(old_info.fs_base),
1400                 &(old_info.system),
1401                 &(old_info.flags), &next);
1402
1403         if (shared_regions_active ||
1404             base_vnode == ENV_DEFAULT_ROOT) {
1405                 if (shared_file_create_system_region(&new_shared_region,
1406                                                      old_info.fs_base,
1407                                                      old_info.system))
1408                         return ENOMEM;
1409         } else {
1410                 if (old_shared_region &&
1411                     base_vnode == ENV_DEFAULT_ROOT) {
1412                         base_vnode = old_info.fs_base;
1413                 }
1414                 new_shared_region =
1415                         lookup_default_shared_region(base_vnode,
1416                                                      old_info.system);
1417                 if (new_shared_region == NULL) {
1418                         shared_file_boot_time_init(base_vnode,
1419                                                    old_info.system);
1420                         vm_get_shared_region(current_task(),
1421                                              &new_shared_region);
1422                 } else {
1423                         vm_set_shared_region(current_task(), new_shared_region);
1424                 }
1425                 if (old_shared_region)
1426                         shared_region_mapping_dealloc(old_shared_region);
1427         }
1428         new_info.self = (vm_offset_t)new_shared_region;
1429         shared_region_mapping_info(new_shared_region,
1430                 &(new_info.text_region),
1431                 &(new_info.text_size),
1432                 &(new_info.data_region),
1433                 &(new_info.data_size),
1434                 &(new_info.region_mappings),
1435                 &(new_info.client_base),
1436                 &(new_info.alternate_base),
1437                 &(new_info.alternate_next),
1438                 &(new_info.fs_base),
1439                 &(new_info.system),
1440                 &(new_info.flags), &next);
1441         if(shared_regions_active) {
1442            if(vm_region_clone(old_info.text_region, new_info.text_region)) {
1443            panic("clone_system_shared_regions: shared region mis-alignment 1");
1444                 shared_region_mapping_dealloc(new_shared_region);
1445                 return(EINVAL);
1446            }
1447            if (vm_region_clone(old_info.data_region, new_info.data_region)) {
1448            panic("clone_system_shared_regions: shared region mis-alignment 2");
1449                 shared_region_mapping_dealloc(new_shared_region);
1450                 return(EINVAL);
1451            }
1452            if (chain_regions) {
1453                    /*
1454                     * We want a "shadowed" clone, a private superset of the old
1455                     * shared region.  The info about the old mappings is still
1456                     * valid for us.
1457                     */
1458                    shared_region_object_chain_attach(
1459                            new_shared_region, old_shared_region);
1460            }
1461         }
1462         if (!chain_regions) {
1463                 /*
1464                  * We want a completely detached clone with no link to
1465                  * the old shared region.  We'll be removing some mappings
1466                  * in our private, cloned, shared region, so the old mappings
1467                  * will become irrelevant to us.  Since we have a private
1468                  * "shared region" now, it isn't going to be shared with
1469                  * anyone else and we won't need to maintain mappings info.
1470                  */
1471                 shared_region_object_chain_detached(new_shared_region);
1472         }
1473         if (vm_map_region_replace(current_map(), old_info.text_region,
1474                         new_info.text_region, old_info.client_base,
1475                         old_info.client_base+old_info.text_size)) {
1476         panic("clone_system_shared_regions: shared region mis-alignment 3");
1477                 shared_region_mapping_dealloc(new_shared_region);
1478                 return(EINVAL);
1479         }
1480         if(vm_map_region_replace(current_map(), old_info.data_region,
1481                         new_info.data_region,
1482                         old_info.client_base + old_info.text_size,
1483                         old_info.client_base
1484                                 + old_info.text_size + old_info.data_size)) {
1485         panic("clone_system_shared_regions: shared region mis-alignment 4");
1486                 shared_region_mapping_dealloc(new_shared_region);
1487                 return(EINVAL);
1488         }
1489         vm_set_shared_region(current_task(), new_shared_region);
1490
1491         /* consume the reference which wasn't accounted for in object */
1492         /* chain attach */
1493         if (!shared_regions_active || !chain_regions)
1494                 shared_region_mapping_dealloc(old_shared_region);
1495
1496         SHARED_REGION_TRACE(
1497                 SHARED_REGION_TRACE_INFO,
1498                 ("shared_region: %p task=%p "
1499                  "clone(active=%d, base=0x%x,chain=%d) "
1500                  "old=%p[%x,%x,%x] new=%p[%x,%x,%x]\n",
1501                  current_thread(), current_task(),
1502                  shared_regions_active, base_vnode, chain_regions,
1503                  old_shared_region,
1504                  old_info.fs_base,
1505                  old_info.system,
1506                  old_info.flags,
1507                  new_shared_region,
1508                  new_info.fs_base,
1509                  new_info.system,
1510                  new_info.flags));
1511
1512         return(0);
1513
1514 }
1515
1516 /* header for the profile name file.  The profiled app info is held */
1517 /* in the data file and pointed to by elements in the name file     */
1518
1519 struct profile_names_header {
1520         unsigned int    number_of_profiles;
1521         unsigned int    user_id;
1522         unsigned int    version;
1523         off_t           element_array;
1524         unsigned int    spare1;
1525         unsigned int    spare2;
1526         unsigned int    spare3;
1527 };
1528
1529 struct profile_element {
1530         off_t           addr;
1531         vm_size_t       size;
1532         unsigned int    mod_date;
1533         unsigned int    inode;
1534         char name[12];
1535 };
1536
1537 struct global_profile {
1538         struct vnode    *names_vp;
1539         struct vnode    *data_vp;
1540         vm_offset_t     buf_ptr;
1541         unsigned int    user;
1542         unsigned int    age;
1543         unsigned int    busy;
1544 };
1545
1546 struct global_profile_cache {
1547         int                     max_ele;
1548         unsigned int            age;
1549         struct global_profile   profiles[3];
1550 };
1551
1552 /* forward declarations */
1553 int bsd_open_page_cache_files(unsigned int user,
1554                               struct global_profile **profile);
1555 void bsd_close_page_cache_files(struct global_profile *profile);
1556 int bsd_search_page_cache_data_base(
1557         struct  vnode                   *vp,
1558         struct profile_names_header     *database,
1559         char                            *app_name,
1560         unsigned int                    mod_date,
1561         unsigned int                    inode,
1562         off_t                           *profile,
1563         unsigned int                    *profile_size);
1564
1565 struct global_profile_cache global_user_profile_cache =
1566         {3, 0, {{NULL, NULL, 0, 0, 0, 0},
1567                     {NULL, NULL, 0, 0, 0, 0},
1568                     {NULL, NULL, 0, 0, 0, 0}} };
1569
1570 /* BSD_OPEN_PAGE_CACHE_FILES:                                 */
1571 /* Caller provides a user id.  This id was used in            */
1572 /* prepare_profile_database to create two unique absolute     */
1573 /* file paths to the associated profile files.  These files   */
1574 /* are either opened or bsd_open_page_cache_files returns an  */
1575 /* error.  The header of the names file is then consulted.    */
1576 /* The header and the vnodes for the names and data files are */
1577 /* returned. */
1578
1579 int
1580 bsd_open_page_cache_files(
1581         unsigned int    user,
1582         struct global_profile **profile)
1583 {
1584         const char *cache_path = "/var/vm/app_profile/";
1585         struct proc     *p;
1586         int             error;
1587         vm_size_t       resid;
1588         off_t           resid_off;
1589         unsigned int    lru;
1590         vm_size_t       size;
1591
1592         struct  vnode   *names_vp;
1593         struct  vnode   *data_vp;
1594         vm_offset_t     names_buf;
1595         vm_offset_t     buf_ptr;
1596
1597         int             profile_names_length;
1598         int             profile_data_length;
1599         char            *profile_data_string;
1600         char            *profile_names_string;
1601         char            *substring;
1602
1603         off_t           file_size;
1604         struct vfs_context  context;
1605
1606         kern_return_t   ret;
1607
1608         struct nameidata nd_names;
1609         struct nameidata nd_data;
1610         int             i;
1611
1612
1613         p = current_proc();
1614
1615         context.vc_proc = p;
1616         context.vc_ucred = kauth_cred_get();
1617
1618 restart:
1619         for(i = 0; i<global_user_profile_cache.max_ele; i++) {
1620                 if((global_user_profile_cache.profiles[i].user == user)
1621                         &&  (global_user_profile_cache.profiles[i].data_vp
1622                                                                 != NULL)) {
1623                         *profile = &global_user_profile_cache.profiles[i];
1624                         /* already in cache, we're done */
1625                         if ((*profile)->busy) {
1626                                 /*
1627                                 * drop funnel and wait
1628                                 */
1629                                 (void)tsleep((void *)
1630                                         *profile,
1631                                         PRIBIO, "app_profile", 0);
1632                                 goto restart;
1633                         }
1634                         (*profile)->busy = 1;
1635                         (*profile)->age = global_user_profile_cache.age;
1636
1637                         /*
1638                          * entries in cache are held with a valid
1639                          * usecount... take an iocount which will
1640                          * be dropped in "bsd_close_page_cache_files"
1641                          * which is called after the read or writes to
1642                          * these files are done
1643                          */
1644                         if ( (vnode_getwithref((*profile)->data_vp)) ) {
1645
1646                                 vnode_rele((*profile)->data_vp);
1647                                 vnode_rele((*profile)->names_vp);
1648
1649                                 (*profile)->data_vp = NULL;
1650                                 (*profile)->busy = 0;
1651                                 wakeup(*profile);
1652
1653                                 goto restart;
1654                         }
1655                         if ( (vnode_getwithref((*profile)->names_vp)) ) {
1656
1657                                 vnode_put((*profile)->data_vp);
1658                                 vnode_rele((*profile)->data_vp);
1659                                 vnode_rele((*profile)->names_vp);
1660
1661                                 (*profile)->data_vp = NULL;
1662                                 (*profile)->busy = 0;
1663                                 wakeup(*profile);
1664
1665                                 goto restart;
1666                         }
1667                         global_user_profile_cache.age+=1;
1668                         return 0;
1669                 }
1670         }
1671
1672         lru = global_user_profile_cache.age;
1673         *profile = NULL;
1674         for(i = 0; i<global_user_profile_cache.max_ele; i++) {
1675                 /* Skip entry if it is in the process of being reused */
1676                 if(global_user_profile_cache.profiles[i].data_vp ==
1677                                                 (struct vnode *)0xFFFFFFFF)
1678                         continue;
1679                 /* Otherwise grab the first empty entry */
1680                 if(global_user_profile_cache.profiles[i].data_vp == NULL) {
1681                         *profile = &global_user_profile_cache.profiles[i];
1682                         (*profile)->age = global_user_profile_cache.age;
1683                         break;
1684                 }
1685                 /* Otherwise grab the oldest entry */
1686                 if(global_user_profile_cache.profiles[i].age < lru) {
1687                         lru = global_user_profile_cache.profiles[i].age;
1688                         *profile = &global_user_profile_cache.profiles[i];
1689                 }
1690         }
1691
1692         /* Did we set it? */
1693         if (*profile == NULL) {
1694                 /*
1695                  * No entries are available; this can only happen if all
1696                  * of them are currently in the process of being reused;
1697                  * if this happens, we sleep on the address of the first
1698                  * element, and restart.  This is less than ideal, but we
1699                  * know it will work because we know that there will be a
1700                  * wakeup on any entry currently in the process of being
1701                  * reused.
1702                  *
1703                  * XXX Reccomend a two handed clock and more than 3 total
1704                  * XXX cache entries at some point in the future.
1705                  */
1706                 /*
1707                 * drop funnel and wait
1708                 */
1709                 (void)tsleep((void *)
1710                  &global_user_profile_cache.profiles[0],
1711                         PRIBIO, "app_profile", 0);
1712                 goto restart;
1713         }
1714
1715         /*
1716          * If it's currently busy, we've picked the one at the end of the
1717          * LRU list, but it's currently being actively used.  We sleep on
1718          * its address and restart.
1719          */
1720         if ((*profile)->busy) {
1721                 /*
1722                 * drop funnel and wait
1723                 */
1724                 (void)tsleep((void *)
1725                         *profile,
1726                         PRIBIO, "app_profile", 0);
1727                 goto restart;
1728         }
1729         (*profile)->busy = 1;
1730         (*profile)->user = user;
1731
1732         /*
1733          * put dummy value in for now to get competing request to wait
1734          * above until we are finished
1735          *
1736          * Save the data_vp before setting it, so we can set it before
1737          * we kmem_free() or vrele().  If we don't do this, then we
1738          * have a potential funnel race condition we have to deal with.
1739          */
1740         data_vp = (*profile)->data_vp;
1741         (*profile)->data_vp = (struct vnode *)0xFFFFFFFF;
1742
1743         /*
1744          * Age the cache here in all cases; this guarantees that we won't
1745          * be reusing only one entry over and over, once the system reaches
1746          * steady-state.
1747          */
1748         global_user_profile_cache.age+=1;
1749
1750         if(data_vp != NULL) {
1751                 kmem_free(kernel_map,
1752                                 (*profile)->buf_ptr, 4 * PAGE_SIZE);
1753                 if ((*profile)->names_vp) {
1754                         vnode_rele((*profile)->names_vp);
1755                         (*profile)->names_vp = NULL;
1756                 }
1757                 vnode_rele(data_vp);
1758         }
1759
1760         /* Try to open the appropriate users profile files */
1761         /* If neither file is present, try to create them  */
1762         /* If one file is present and the other not, fail. */
1763         /* If the files do exist, check them for the app_file */
1764         /* requested and read it in if present */
1765
1766         ret = kmem_alloc(kernel_map,
1767                 (vm_offset_t *)&profile_data_string, PATH_MAX);
1768
1769         if(ret) {
1770                 (*profile)->data_vp = NULL;
1771                 (*profile)->busy = 0;
1772                 wakeup(*profile);
1773                 return ENOMEM;
1774         }
1775
1776         /* Split the buffer in half since we know the size of */
1777         /* our file path and our allocation is adequate for   */
1778         /* both file path names */
1779         profile_names_string = profile_data_string + (PATH_MAX/2);
1780
1781
1782         strcpy(profile_data_string, cache_path);
1783         strcpy(profile_names_string, cache_path);
1784         profile_names_length = profile_data_length
1785                         = strlen(profile_data_string);
1786         substring = profile_data_string + profile_data_length;
1787         sprintf(substring, "%x_data", user);
1788         substring = profile_names_string + profile_names_length;
1789         sprintf(substring, "%x_names", user);
1790
1791         /* We now have the absolute file names */
1792
1793         ret = kmem_alloc(kernel_map,
1794                         (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
1795         if(ret) {
1796                 kmem_free(kernel_map,
1797                                 (vm_offset_t)profile_data_string, PATH_MAX);
1798                 (*profile)->data_vp = NULL;
1799                 (*profile)->busy = 0;
1800                 wakeup(*profile);
1801                 return ENOMEM;
1802         }
1803
1804         NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF,
1805                         UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
1806         NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF,
1807                         UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
1808
1809         if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) {
1810 #ifdef notdef
1811                 printf("bsd_open_page_cache_files: CacheData file not found %s\n",
1812                         profile_data_string);
1813 #endif
1814                 kmem_free(kernel_map,
1815                                 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1816                 kmem_free(kernel_map,
1817                         (vm_offset_t)profile_data_string, PATH_MAX);
1818                 (*profile)->data_vp = NULL;
1819                 (*profile)->busy = 0;
1820                 wakeup(*profile);
1821                 return error;
1822         }
1823         data_vp = nd_data.ni_vp;
1824
1825         if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) {
1826                 printf("bsd_open_page_cache_files: NamesData file not found %s\n",
1827                         profile_data_string);
1828                 kmem_free(kernel_map,
1829                                 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1830                 kmem_free(kernel_map,
1831                         (vm_offset_t)profile_data_string, PATH_MAX);
1832
1833                 vnode_rele(data_vp);
1834                 vnode_put(data_vp);
1835
1836                 (*profile)->data_vp = NULL;
1837                 (*profile)->busy = 0;
1838                 wakeup(*profile);
1839                 return error;
1840         }
1841         names_vp = nd_names.ni_vp;
1842
1843         if ((error = vnode_size(names_vp, &file_size, &context)) != 0) {
1844                 printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string);
1845                 kmem_free(kernel_map,
1846                         (vm_offset_t)profile_data_string, PATH_MAX);
1847                 kmem_free(kernel_map,
1848                         (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1849
1850                 vnode_rele(names_vp);
1851                 vnode_put(names_vp);
1852                 vnode_rele(data_vp);
1853                 vnode_put(data_vp);
1854
1855                 (*profile)->data_vp = NULL;
1856                 (*profile)->busy = 0;
1857                 wakeup(*profile);
1858                 return error;
1859         }
1860
1861         size = file_size;
1862         if(size > 4 * PAGE_SIZE)
1863                 size = 4 * PAGE_SIZE;
1864         buf_ptr = names_buf;
1865         resid_off = 0;
1866
1867         while(size) {
1868                 int resid_int;
1869                 error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr,
1870                         size, resid_off,
1871                         UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(),
1872                         &resid_int, p);
1873                 resid = (vm_size_t) resid_int;
1874                 if((error) || (size == resid)) {
1875                         if(!error) {
1876                                 error = EINVAL;
1877                         }
1878                         kmem_free(kernel_map,
1879                                 (vm_offset_t)profile_data_string, PATH_MAX);
1880                         kmem_free(kernel_map,
1881                                 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1882
1883                         vnode_rele(names_vp);
1884                         vnode_put(names_vp);
1885                         vnode_rele(data_vp);
1886                         vnode_put(data_vp);
1887
1888                         (*profile)->data_vp = NULL;
1889                         (*profile)->busy = 0;
1890                         wakeup(*profile);
1891                         return error;
1892                 }
1893                 buf_ptr += size-resid;
1894                 resid_off += size-resid;
1895                 size = resid;
1896         }
1897         kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX);
1898
1899         (*profile)->names_vp = names_vp;
1900         (*profile)->data_vp = data_vp;
1901         (*profile)->buf_ptr = names_buf;
1902
1903         /*
1904          * at this point, the both the names_vp and the data_vp have
1905          * both a valid usecount and an iocount held
1906          */
1907         return 0;
1908
1909 }
1910
1911 void
1912 bsd_close_page_cache_files(
1913         struct global_profile *profile)
1914 {
1915         vnode_put(profile->data_vp);
1916         vnode_put(profile->names_vp);
1917
1918         profile->busy = 0;
1919         wakeup(profile);
1920 }
1921
1922 int
1923 bsd_read_page_cache_file(
1924         unsigned int    user,
1925         int             *fid,
1926         int             *mod,
1927         char            *app_name,
1928         struct vnode    *app_vp,
1929         vm_offset_t     *buffer,
1930         vm_offset_t     *bufsize)
1931 {
1932
1933         boolean_t       funnel_state;
1934
1935         struct proc     *p;
1936         int             error;
1937         unsigned int    resid;
1938
1939         off_t           profile;
1940         unsigned int    profile_size;
1941
1942         vm_offset_t     names_buf;
1943         struct vnode_attr       va;
1944         struct vfs_context  context;
1945
1946         kern_return_t   ret;
1947
1948         struct  vnode   *names_vp;
1949         struct  vnode   *data_vp;
1950
1951         struct global_profile *uid_files;
1952
1953         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1954
1955         /* Try to open the appropriate users profile files */
1956         /* If neither file is present, try to create them  */
1957         /* If one file is present and the other not, fail. */
1958         /* If the files do exist, check them for the app_file */
1959         /* requested and read it in if present */
1960
1961
1962         error = bsd_open_page_cache_files(user, &uid_files);
1963         if(error) {
1964                 thread_funnel_set(kernel_flock, funnel_state);
1965                 return EINVAL;
1966         }
1967
1968         p = current_proc();
1969
1970         names_vp = uid_files->names_vp;
1971         data_vp = uid_files->data_vp;
1972         names_buf = uid_files->buf_ptr;
1973
1974         context.vc_proc = p;
1975         context.vc_ucred = kauth_cred_get();
1976
1977         VATTR_INIT(&va);
1978         VATTR_WANTED(&va, va_fileid);
1979         VATTR_WANTED(&va, va_modify_time);
1980
1981         if ((error = vnode_getattr(app_vp, &va, &context))) {
1982                 printf("bsd_read_cache_file: Can't stat app file %s\n", app_name);
1983                 bsd_close_page_cache_files(uid_files);
1984                 thread_funnel_set(kernel_flock, funnel_state);
1985                 return error;
1986         }
1987
1988         *fid = (u_long)va.va_fileid;
1989         *mod = va.va_modify_time.tv_sec;
1990
1991         if (bsd_search_page_cache_data_base(
1992                     names_vp,
1993                     (struct profile_names_header *)names_buf,
1994                     app_name,
1995                     (unsigned int) va.va_modify_time.tv_sec,
1996                     (u_long)va.va_fileid, &profile, &profile_size) == 0) {
1997                 /* profile is an offset in the profile data base */
1998                 /* It is zero if no profile data was found */
1999
2000                 if(profile_size == 0) {
2001                         *buffer = 0;
2002                         *bufsize = 0;
2003                         bsd_close_page_cache_files(uid_files);
2004                         thread_funnel_set(kernel_flock, funnel_state);
2005                         return 0;
2006                 }
2007                 ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size));
2008                 if(ret) {
2009                         bsd_close_page_cache_files(uid_files);
2010                         thread_funnel_set(kernel_flock, funnel_state);
2011                         return ENOMEM;
2012                 }
2013                 *bufsize = profile_size;
2014                 while(profile_size) {
2015                         int resid_int;
2016                         error = vn_rdwr(UIO_READ, data_vp,
2017                                 (caddr_t) *buffer, profile_size,
2018                                 profile, UIO_SYSSPACE32, IO_NODELOCKED,
2019                                 kauth_cred_get(), &resid_int, p);
2020                         resid = (vm_size_t) resid_int;
2021                         if((error) || (profile_size == resid)) {
2022                                 bsd_close_page_cache_files(uid_files);
2023                                 kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size);
2024                                 thread_funnel_set(kernel_flock, funnel_state);
2025                                 return EINVAL;
2026                         }
2027                         profile += profile_size - resid;
2028                         profile_size = resid;
2029                 }
2030                 bsd_close_page_cache_files(uid_files);
2031                 thread_funnel_set(kernel_flock, funnel_state);
2032                 return 0;
2033         } else {
2034                 bsd_close_page_cache_files(uid_files);
2035                 thread_funnel_set(kernel_flock, funnel_state);
2036                 return EINVAL;
2037         }
2038
2039 }
2040
2041 int
2042 bsd_search_page_cache_data_base(
2043         struct  vnode                   *vp,
2044         struct profile_names_header     *database,
2045         char                            *app_name,
2046         unsigned int                    mod_date,
2047         unsigned int                    inode,
2048         off_t                           *profile,
2049         unsigned int                    *profile_size)
2050 {
2051
2052         struct proc             *p;
2053
2054         unsigned int            i;
2055         struct profile_element  *element;
2056         unsigned int            ele_total;
2057         unsigned int            extended_list = 0;
2058         off_t                   file_off = 0;
2059         unsigned int            size;
2060         off_t                   resid_off;
2061         unsigned int            resid;
2062         vm_offset_t             local_buf = 0;
2063
2064         int                     error;
2065         kern_return_t           ret;
2066
2067         p = current_proc();
2068
2069         if(((vm_offset_t)database->element_array) !=
2070                                 sizeof(struct profile_names_header)) {
2071                 return EINVAL;
2072         }
2073         element = (struct profile_element *)(
2074                         (vm_offset_t)database->element_array +
2075                                                 (vm_offset_t)database);
2076
2077         ele_total = database->number_of_profiles;
2078
2079         *profile = 0;
2080         *profile_size = 0;
2081         while(ele_total) {
2082                 /* note: code assumes header + n*ele comes out on a page boundary */
2083                 if(((local_buf == 0) && (sizeof(struct profile_names_header) +
2084                         (ele_total * sizeof(struct profile_element)))
2085                                         > (PAGE_SIZE * 4)) ||
2086                         ((local_buf != 0) &&
2087                                 (ele_total * sizeof(struct profile_element))
2088                                          > (PAGE_SIZE * 4))) {
2089                         extended_list = ele_total;
2090                         if(element == (struct profile_element *)
2091                                 ((vm_offset_t)database->element_array +
2092                                                 (vm_offset_t)database)) {
2093                                 ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1;
2094                         } else {
2095                                 ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element);
2096                         }
2097                         extended_list -= ele_total;
2098                 }
2099                 for (i=0; i<ele_total; i++) {
2100                         if((mod_date == element[i].mod_date)
2101                                         && (inode == element[i].inode)) {
2102                                 if(strncmp(element[i].name, app_name, 12) == 0) {
2103                                         *profile = element[i].addr;
2104                                         *profile_size = element[i].size;
2105                                         if(local_buf != 0) {
2106                                                 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2107                                         }
2108                                         return 0;
2109                                 }
2110                         }
2111                 }
2112                 if(extended_list == 0)
2113                         break;
2114                 if(local_buf == 0) {
2115                         ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE);
2116                         if(ret != KERN_SUCCESS) {
2117                                 return ENOMEM;
2118                         }
2119                 }
2120                 element = (struct profile_element *)local_buf;
2121                 ele_total = extended_list;
2122                 extended_list = 0;
2123                 file_off +=  4 * PAGE_SIZE;
2124                 if((ele_total * sizeof(struct profile_element)) >
2125                                                         (PAGE_SIZE * 4)) {
2126                         size = PAGE_SIZE * 4;
2127                 } else {
2128                         size = ele_total * sizeof(struct profile_element);
2129                 }
2130                 resid_off = 0;
2131                 while(size) {
2132                         int resid_int;
2133                         error = vn_rdwr(UIO_READ, vp,
2134                                 CAST_DOWN(caddr_t, (local_buf + resid_off)),
2135                                 size, file_off + resid_off, UIO_SYSSPACE32,
2136                                 IO_NODELOCKED, kauth_cred_get(), &resid_int, p);
2137                         resid = (vm_size_t) resid_int;
2138                         if((error) || (size == resid)) {
2139                                 if(local_buf != 0) {
2140                                         kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2141                                 }
2142                                 return EINVAL;
2143                         }
2144                         resid_off += size-resid;
2145                         size = resid;
2146                 }
2147         }
2148         if(local_buf != 0) {
2149                 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2150         }
2151         return 0;
2152 }
2153
2154 int
2155 bsd_write_page_cache_file(
2156         unsigned int    user,
2157         char            *file_name,
2158         caddr_t         buffer,
2159         vm_size_t       size,
2160         int             mod,
2161         int             fid)
2162 {
2163         struct proc             *p;
2164         int                             resid;
2165         off_t                   resid_off;
2166         int                             error;
2167         boolean_t               funnel_state;
2168         off_t                   file_size;
2169         struct vfs_context      context;
2170         off_t                   profile;
2171         unsigned int    profile_size;
2172
2173         vm_offset_t     names_buf;
2174         struct  vnode   *names_vp;
2175         struct  vnode   *data_vp;
2176         struct  profile_names_header *profile_header;
2177         off_t                   name_offset;
2178         struct global_profile *uid_files;
2179
2180
2181         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2182
2183
2184         error = bsd_open_page_cache_files(user, &uid_files);
2185         if(error) {
2186                 thread_funnel_set(kernel_flock, funnel_state);
2187                 return EINVAL;
2188         }
2189
2190         p = current_proc();
2191
2192         names_vp = uid_files->names_vp;
2193         data_vp = uid_files->data_vp;
2194         names_buf = uid_files->buf_ptr;
2195
2196         /* Stat data file for size */
2197
2198         context.vc_proc = p;
2199         context.vc_ucred = kauth_cred_get();
2200
2201         if ((error = vnode_size(data_vp, &file_size, &context)) != 0) {
2202                 printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name);
2203                 bsd_close_page_cache_files(uid_files);
2204                 thread_funnel_set(kernel_flock, funnel_state);
2205                 return error;
2206         }
2207
2208         if (bsd_search_page_cache_data_base(names_vp,
2209                         (struct profile_names_header *)names_buf,
2210                         file_name, (unsigned int) mod,
2211                         fid, &profile, &profile_size) == 0) {
2212                 /* profile is an offset in the profile data base */
2213                 /* It is zero if no profile data was found */
2214
2215                 if(profile_size == 0) {
2216                         unsigned int    header_size;
2217                         vm_offset_t     buf_ptr;
2218
2219                         /* Our Write case */
2220
2221                         /* read header for last entry */
2222                         profile_header =
2223                                 (struct profile_names_header *)names_buf;
2224                         name_offset = sizeof(struct profile_names_header) +
2225                                 (sizeof(struct profile_element)
2226                                         * profile_header->number_of_profiles);
2227                         profile_header->number_of_profiles += 1;
2228
2229                         if(name_offset < PAGE_SIZE * 4) {
2230                                 struct profile_element  *name;
2231                                 /* write new entry */
2232                                 name = (struct profile_element *)
2233                                         (names_buf + (vm_offset_t)name_offset);
2234                                 name->addr =  file_size;
2235                                 name->size = size;
2236                                 name->mod_date = mod;
2237                                 name->inode = fid;
2238                                 strncpy (name->name, file_name, 12);
2239                         } else {
2240                                 unsigned int    ele_size;
2241                                 struct profile_element  name;
2242                                 /* write new entry */
2243                                 name.addr = file_size;
2244                                 name.size = size;
2245                                 name.mod_date = mod;
2246                                 name.inode = fid;
2247                                 strncpy (name.name, file_name, 12);
2248                                 /* write element out separately */
2249                                 ele_size = sizeof(struct profile_element);
2250                                 buf_ptr = (vm_offset_t)&name;
2251                                 resid_off = name_offset;
2252
2253                                 while(ele_size) {
2254                                         error = vn_rdwr(UIO_WRITE, names_vp,
2255                                                 (caddr_t)buf_ptr,
2256                                                 ele_size, resid_off,
2257                                                 UIO_SYSSPACE32, IO_NODELOCKED,
2258                                                 kauth_cred_get(), &resid, p);
2259                                         if(error) {
2260                                                 printf("bsd_write_page_cache_file: Can't write name_element %x\n", user);
2261                                                 bsd_close_page_cache_files(
2262                                                         uid_files);
2263                                                 thread_funnel_set(
2264                                                         kernel_flock,
2265                                                         funnel_state);
2266                                                 return error;
2267                                         }
2268                                         buf_ptr += (vm_offset_t)
2269                                                         ele_size-resid;
2270                                         resid_off += ele_size-resid;
2271                                         ele_size = resid;
2272                                 }
2273                         }
2274
2275                         if(name_offset < PAGE_SIZE * 4) {
2276                                 header_size = name_offset +
2277                                         sizeof(struct profile_element);
2278
2279                         } else {
2280                                 header_size =
2281                                         sizeof(struct profile_names_header);
2282                         }
2283                         buf_ptr = (vm_offset_t)profile_header;
2284                         resid_off = 0;
2285
2286                         /* write names file header */
2287                         while(header_size) {
2288                                 error = vn_rdwr(UIO_WRITE, names_vp,
2289                                         (caddr_t)buf_ptr,
2290                                         header_size, resid_off,
2291                                         UIO_SYSSPACE32, IO_NODELOCKED,
2292                                         kauth_cred_get(), &resid, p);
2293                                 if(error) {
2294                                         printf("bsd_write_page_cache_file: Can't write header %x\n", user);
2295                                         bsd_close_page_cache_files(
2296                                                 uid_files);
2297                                         thread_funnel_set(
2298                                                 kernel_flock, funnel_state);
2299                                         return error;
2300                                 }
2301                                 buf_ptr += (vm_offset_t)header_size-resid;
2302                                 resid_off += header_size-resid;
2303                                 header_size = resid;
2304                         }
2305                         /* write profile to data file */
2306                         resid_off = file_size;
2307                         while(size) {
2308                                 error = vn_rdwr(UIO_WRITE, data_vp,
2309                                         (caddr_t)buffer, size, resid_off,
2310                                         UIO_SYSSPACE32, IO_NODELOCKED,
2311                                         kauth_cred_get(), &resid, p);
2312                                 if(error) {
2313                                         printf("bsd_write_page_cache_file: Can't write header %x\n", user);
2314                                         bsd_close_page_cache_files(
2315                                                 uid_files);
2316                                         thread_funnel_set(
2317                                                 kernel_flock, funnel_state);
2318                                         return error;
2319                                 }
2320                                 buffer += size-resid;
2321                                 resid_off += size-resid;
2322                                 size = resid;
2323                         }
2324                         bsd_close_page_cache_files(uid_files);
2325                         thread_funnel_set(kernel_flock, funnel_state);
2326                         return 0;
2327                 }
2328                 /* Someone else wrote a twin profile before us */
2329                 bsd_close_page_cache_files(uid_files);
2330                 thread_funnel_set(kernel_flock, funnel_state);
2331                 return 0;
2332         } else {
2333                 bsd_close_page_cache_files(uid_files);
2334                 thread_funnel_set(kernel_flock, funnel_state);
2335                 return EINVAL;
2336         }
2337
2338 }
2339
2340 int
2341 prepare_profile_database(int    user)
2342 {
2343         const char *cache_path = "/var/vm/app_profile/";
2344         struct proc     *p;
2345         int             error;
2346         int             resid;
2347         off_t           resid_off;
2348         vm_size_t       size;
2349
2350         struct  vnode   *names_vp;
2351         struct  vnode   *data_vp;
2352         vm_offset_t     names_buf;
2353         vm_offset_t     buf_ptr;
2354
2355         int             profile_names_length;
2356         int             profile_data_length;
2357         char            *profile_data_string;
2358         char            *profile_names_string;
2359         char            *substring;
2360
2361         struct vnode_attr va;
2362         struct vfs_context context;
2363
2364         struct  profile_names_header *profile_header;
2365         kern_return_t   ret;
2366
2367         struct nameidata nd_names;
2368         struct nameidata nd_data;
2369
2370         p = current_proc();
2371
2372         context.vc_proc = p;
2373         context.vc_ucred = kauth_cred_get();
2374
2375         ret = kmem_alloc(kernel_map,
2376                 (vm_offset_t *)&profile_data_string, PATH_MAX);
2377
2378         if(ret) {
2379                 return ENOMEM;
2380         }
2381
2382         /* Split the buffer in half since we know the size of */
2383         /* our file path and our allocation is adequate for   */
2384         /* both file path names */
2385         profile_names_string = profile_data_string + (PATH_MAX/2);
2386
2387
2388         strcpy(profile_data_string, cache_path);
2389         strcpy(profile_names_string, cache_path);
2390         profile_names_length = profile_data_length
2391                         = strlen(profile_data_string);
2392         substring = profile_data_string + profile_data_length;
2393         sprintf(substring, "%x_data", user);
2394         substring = profile_names_string + profile_names_length;
2395         sprintf(substring, "%x_names", user);
2396
2397         /* We now have the absolute file names */
2398
2399         ret = kmem_alloc(kernel_map,
2400                         (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
2401         if(ret) {
2402                 kmem_free(kernel_map,
2403                                 (vm_offset_t)profile_data_string, PATH_MAX);
2404                 return ENOMEM;
2405         }
2406
2407         NDINIT(&nd_names, LOOKUP, FOLLOW,
2408                         UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
2409         NDINIT(&nd_data, LOOKUP, FOLLOW,
2410                         UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
2411
2412         if ( (error = vn_open(&nd_data,
2413                                                         O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
2414                         kmem_free(kernel_map,
2415                                         (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2416                         kmem_free(kernel_map,
2417                                 (vm_offset_t)profile_data_string, PATH_MAX);
2418
2419                         return 0;
2420         }
2421         data_vp = nd_data.ni_vp;
2422
2423         if ( (error = vn_open(&nd_names,
2424                                                         O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
2425                         printf("prepare_profile_database: Can't create CacheNames %s\n",
2426                                 profile_data_string);
2427                         kmem_free(kernel_map,
2428                                         (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2429                         kmem_free(kernel_map,
2430                                 (vm_offset_t)profile_data_string, PATH_MAX);
2431
2432                         vnode_rele(data_vp);
2433                         vnode_put(data_vp);
2434
2435                         return error;
2436         }
2437         names_vp = nd_names.ni_vp;
2438
2439         /* Write Header for new names file */
2440
2441         profile_header = (struct profile_names_header *)names_buf;
2442
2443         profile_header->number_of_profiles = 0;
2444         profile_header->user_id =  user;
2445         profile_header->version = 1;
2446         profile_header->element_array =
2447                                 sizeof(struct profile_names_header);
2448         profile_header->spare1 = 0;
2449         profile_header->spare2 = 0;
2450         profile_header->spare3 = 0;
2451
2452         size = sizeof(struct profile_names_header);
2453         buf_ptr = (vm_offset_t)profile_header;
2454         resid_off = 0;
2455
2456         while(size) {
2457                 error = vn_rdwr(UIO_WRITE, names_vp,
2458                                 (caddr_t)buf_ptr, size, resid_off,
2459                                 UIO_SYSSPACE32, IO_NODELOCKED,
2460                                 kauth_cred_get(), &resid, p);
2461                 if(error) {
2462                         printf("prepare_profile_database: Can't write header %s\n", profile_names_string);
2463                         kmem_free(kernel_map,
2464                                 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2465                         kmem_free(kernel_map,
2466                                 (vm_offset_t)profile_data_string,
2467                                 PATH_MAX);
2468
2469                         vnode_rele(names_vp);
2470                         vnode_put(names_vp);
2471                         vnode_rele(data_vp);
2472                         vnode_put(data_vp);
2473
2474                         return error;
2475                 }
2476                 buf_ptr += size-resid;
2477                 resid_off += size-resid;
2478                 size = resid;
2479         }
2480         VATTR_INIT(&va);
2481         VATTR_SET(&va, va_uid, user);
2482
2483         error = vnode_setattr(names_vp, &va, &context);
2484         if(error) {
2485                 printf("prepare_profile_database: "
2486                         "Can't set user %s\n", profile_names_string);
2487         }
2488         vnode_rele(names_vp);
2489         vnode_put(names_vp);
2490
2491         VATTR_INIT(&va);
2492         VATTR_SET(&va, va_uid, user);
2493         error = vnode_setattr(data_vp, &va, &context);
2494         if(error) {
2495                 printf("prepare_profile_database: "
2496                         "Can't set user %s\n", profile_data_string);
2497         }
2498         vnode_rele(data_vp);
2499         vnode_put(data_vp);
2500
2501         kmem_free(kernel_map,
2502                         (vm_offset_t)profile_data_string, PATH_MAX);
2503         kmem_free(kernel_map,
2504                         (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2505         return 0;
2506
2507 }