bsd/vm/vm_unix.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30 /*
  31  * Mach Operating System
  32  * Copyright (c) 1987 Carnegie-Mellon University
  33  * All rights reserved.  The CMU software License Agreement specifies
  34  * the terms and conditions for use and redistribution.
  35  */
  36
  37 /*
  38  */
  39
  40
  41 #include <meta_features.h>
  42
  43 #include <kern/task.h>
  44 #include <kern/thread.h>
  45 #include <kern/debug.h>
  46 #include <kern/lock.h>
  47 #include <mach/mach_traps.h>
  48 #include <mach/time_value.h>
  49 #include <mach/vm_map.h>
  50 #include <mach/vm_param.h>
  51 #include <mach/vm_prot.h>
  52 #include <mach/port.h>
  53
  54 #include <sys/file_internal.h>
  55 #include <sys/param.h>
  56 #include <sys/systm.h>
  57 #include <sys/dir.h>
  58 #include <sys/namei.h>
  59 #include <sys/proc_internal.h>
  60 #include <sys/kauth.h>
  61 #include <sys/vm.h>
  62 #include <sys/file.h>
  63 #include <sys/vnode_internal.h>
  64 #include <sys/mount.h>
  65 #include <sys/trace.h>
  66 #include <sys/kernel.h>
  67 #include <sys/ubc_internal.h>
  68 #include <sys/user.h>
  69 #include <sys/syslog.h>
  70 #include <sys/stat.h>
  71 #include <sys/sysproto.h>
  72 #include <sys/mman.h>
  73 #include <sys/sysctl.h>
  74
  75 #include <bsm/audit_kernel.h>
  76 #include <bsm/audit_kevents.h>
  77
  78 #include <kern/kalloc.h>
  79 #include <vm/vm_map.h>
  80 #include <vm/vm_kern.h>
  81
  82 #include <machine/spl.h>
  83
  84 #include <mach/shared_memory_server.h>
  85 #include <vm/vm_shared_memory_server.h>
  86
  87 #include <vm/vm_protos.h>
  88
  89 void
  90 log_nx_failure(addr64_t vaddr, vm_prot_t prot)
  91 {
  92         printf("NX failure: %s  -  vaddr=%qx,  prot=%x\n", current_proc()->p_comm, vaddr, prot);
  93 }
  94
  95
  96 int
  97 useracc(
  98         user_addr_t     addr,
  99         user_size_t     len,
 100         int     prot)
 101 {
 102         return (vm_map_check_protection(
 103                         current_map(),
 104                         vm_map_trunc_page(addr), vm_map_round_page(addr+len),
 105                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
 106 }
 107
 108 int
 109 vslock(
 110         user_addr_t     addr,
 111         user_size_t     len)
 112 {
 113         kern_return_t kret;
 114         kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
 115                         vm_map_round_page(addr+len),
 116                         VM_PROT_READ | VM_PROT_WRITE ,FALSE);
 117
 118         switch (kret) {
 119         case KERN_SUCCESS:
 120                 return (0);
 121         case KERN_INVALID_ADDRESS:
 122         case KERN_NO_SPACE:
 123                 return (ENOMEM);
 124         case KERN_PROTECTION_FAILURE:
 125                 return (EACCES);
 126         default:
 127                 return (EINVAL);
 128         }
 129 }
 130
 131 int
 132 vsunlock(
 133         user_addr_t addr,
 134         user_size_t len,
 135         __unused int dirtied)
 136 {
 137 #if FIXME  /* [ */
 138         pmap_t          pmap;
 139         vm_page_t       pg;
 140         vm_map_offset_t vaddr;
 141         ppnum_t         paddr;
 142 #endif  /* FIXME ] */
 143         kern_return_t kret;
 144
 145 #if FIXME  /* [ */
 146         if (dirtied) {
 147                 pmap = get_task_pmap(current_task());
 148                 for (vaddr = vm_map_trunc_page(addr);
 149                      vaddr < vm_map_round_page(addr+len);
 150                                 vaddr += PAGE_SIZE) {
 151                         paddr = pmap_extract(pmap, vaddr);
 152                         pg = PHYS_TO_VM_PAGE(paddr);
 153                         vm_page_set_modified(pg);
 154                 }
 155         }
 156 #endif  /* FIXME ] */
 157 #ifdef  lint
 158         dirtied++;
 159 #endif  /* lint */
 160         kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
 161                                 vm_map_round_page(addr+len), FALSE);
 162         switch (kret) {
 163         case KERN_SUCCESS:
 164                 return (0);
 165         case KERN_INVALID_ADDRESS:
 166         case KERN_NO_SPACE:
 167                 return (ENOMEM);
 168         case KERN_PROTECTION_FAILURE:
 169                 return (EACCES);
 170         default:
 171                 return (EINVAL);
 172         }
 173 }
 174
 175 int
 176 subyte(
 177         user_addr_t addr,
 178         int byte)
 179 {
 180         char character;
 181
 182         character = (char)byte;
 183         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 184 }
 185
 186 int
 187 suibyte(
 188         user_addr_t addr,
 189         int byte)
 190 {
 191         char character;
 192
 193         character = (char)byte;
 194         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
 195 }
 196
 197 int fubyte(user_addr_t addr)
 198 {
 199         unsigned char byte;
 200
 201         if (copyin(addr, (void *) &byte, sizeof(char)))
 202                 return(-1);
 203         return(byte);
 204 }
 205
 206 int fuibyte(user_addr_t addr)
 207 {
 208         unsigned char byte;
 209
 210         if (copyin(addr, (void *) &(byte), sizeof(char)))
 211                 return(-1);
 212         return(byte);
 213 }
 214
 215 int
 216 suword(
 217         user_addr_t addr,
 218         long word)
 219 {
 220         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 221 }
 222
 223 long fuword(user_addr_t addr)
 224 {
 225         long word;
 226
 227         if (copyin(addr, (void *) &word, sizeof(int)))
 228                 return(-1);
 229         return(word);
 230 }
 231
 232 /* suiword and fuiword are the same as suword and fuword, respectively */
 233
 234 int
 235 suiword(
 236         user_addr_t addr,
 237         long word)
 238 {
 239         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
 240 }
 241
 242 long fuiword(user_addr_t addr)
 243 {
 244         long word;
 245
 246         if (copyin(addr, (void *) &word, sizeof(int)))
 247                 return(-1);
 248         return(word);
 249 }
 250
 251 /*
 252  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
 253  * fetching and setting of process-sized size_t and pointer values.
 254  */
 255 int
 256 sulong(user_addr_t addr, int64_t word)
 257 {
 258
 259         if (IS_64BIT_PROCESS(current_proc())) {
 260                 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
 261         } else {
 262                 return(suiword(addr, (long)word));
 263         }
 264 }
 265
 266 int64_t
 267 fulong(user_addr_t addr)
 268 {
 269         int64_t longword;
 270
 271         if (IS_64BIT_PROCESS(current_proc())) {
 272                 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
 273                         return(-1);
 274                 return(longword);
 275         } else {
 276                 return((int64_t)fuiword(addr));
 277         }
 278 }
 279
 280 int
 281 suulong(user_addr_t addr, uint64_t uword)
 282 {
 283
 284         if (IS_64BIT_PROCESS(current_proc())) {
 285                 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
 286         } else {
 287                 return(suiword(addr, (u_long)uword));
 288         }
 289 }
 290
 291 uint64_t
 292 fuulong(user_addr_t addr)
 293 {
 294         uint64_t ulongword;
 295
 296         if (IS_64BIT_PROCESS(current_proc())) {
 297                 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
 298                         return(-1ULL);
 299                 return(ulongword);
 300         } else {
 301                 return((uint64_t)fuiword(addr));
 302         }
 303 }
 304
 305 int
 306 swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval)
 307 {
 308         return(ENOTSUP);
 309 }
 310
 311
 312 kern_return_t
 313 pid_for_task(
 314         struct pid_for_task_args *args)
 315 {
 316         mach_port_name_t        t = args->t;
 317         user_addr_t             pid_addr  = args->pid;
 318         struct proc * p;
 319         task_t          t1;
 320         int     pid = -1;
 321         kern_return_t   err = KERN_SUCCESS;
 322         boolean_t funnel_state;
 323
 324         AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
 325         AUDIT_ARG(mach_port1, t);
 326
 327         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 328         t1 = port_name_to_task(t);
 329
 330         if (t1 == TASK_NULL) {
 331                 err = KERN_FAILURE;
 332                 goto pftout;
 333         } else {
 334                 p = get_bsdtask_info(t1);
 335                 if (p) {
 336                         pid  = proc_pid(p);
 337                         err = KERN_SUCCESS;
 338                 } else {
 339                         err = KERN_FAILURE;
 340                 }
 341         }
 342         task_deallocate(t1);
 343 pftout:
 344         AUDIT_ARG(pid, pid);
 345         (void) copyout((char *) &pid, pid_addr, sizeof(int));
 346         thread_funnel_set(kernel_flock, funnel_state);
 347         AUDIT_MACH_SYSCALL_EXIT(err);
 348         return(err);
 349 }
 350
 351 /*
 352  *      Routine:        task_for_pid
 353  *      Purpose:
 354  *              Get the task port for another "process", named by its
 355  *              process ID on the same host as "target_task".
 356  *
 357  *              Only permitted to privileged processes, or processes
 358  *              with the same user ID.
 359  *
 360  * XXX This should be a BSD system call, not a Mach trap!!!
 361  */
 362 /*
 363  *
 364  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
 365  * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged
 366  * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks
 367  *
 368  */
 369 static  int tfp_policy = KERN_TFP_POLICY_RESTRICTED;
 370 /* the groutp is inited to kmem group and is modifiable by sysctl */
 371 static int tfp_group_inited = 0; /* policy groups are loaded ... */
 372 static  gid_t tfp_group_ronly = 0; /* procview group */
 373 static  gid_t tfp_group_rw = 0; /* procmod group */
 374
 375 kern_return_t
 376 task_for_pid(
 377         struct task_for_pid_args *args)
 378 {
 379         mach_port_name_t        target_tport = args->target_tport;
 380         int                     pid = args->pid;
 381         user_addr_t             task_addr = args->t;
 382         struct uthread          *uthread;
 383         struct proc     *p;
 384         struct proc *p1;
 385         task_t          t1;
 386         mach_port_name_t        tret;
 387         void * sright;
 388         int error = 0;
 389         int is_member = 0;
 390         boolean_t funnel_state;
 391         boolean_t ispermitted = FALSE;
 392         char procname[MAXCOMLEN+1];
 393
 394         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
 395         AUDIT_ARG(pid, pid);
 396         AUDIT_ARG(mach_port1, target_tport);
 397
 398         t1 = port_name_to_task(target_tport);
 399         if (t1 == TASK_NULL) {
 400                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 401                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 402                 return(KERN_FAILURE);
 403         }
 404
 405         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 406
 407         p1 = current_proc();
 408
 409         /*
 410          * Delayed binding of thread credential to process credential, if we
 411          * are not running with an explicitly set thread credential.
 412          */
 413         uthread = get_bsdthread_info(current_thread());
 414         if (uthread->uu_ucred != p1->p_ucred &&
 415             (uthread->uu_flag & UT_SETUID) == 0) {
 416                 kauth_cred_t old = uthread->uu_ucred;
 417                 proc_lock(p1);
 418                 uthread->uu_ucred = p1->p_ucred;
 419                 kauth_cred_ref(uthread->uu_ucred);
 420                 proc_unlock(p1);
 421                 if (old != NOCRED)
 422                         kauth_cred_rele(old);
 423         }
 424
 425         p = pfind(pid);
 426         AUDIT_ARG(process, p);
 427
 428         switch (tfp_policy) {
 429
 430                 case KERN_TFP_POLICY_PERMISSIVE:
 431                         /* self or suser or related ones */
 432                         if ((p != (struct proc *) 0)
 433                                 && (p1 != (struct proc *) 0)
 434                                 && (
 435                                         (p1 == p)
 436                                         || !(suser(kauth_cred_get(), 0))
 437                                         || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
 438                                                 ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
 439                                                 && ((p->p_flag & P_SUGID) == 0))
 440                                         )
 441                                 && (p->p_stat != SZOMB)
 442                                 )
 443                                         ispermitted = TRUE;
 444                         break;
 445
 446                 case KERN_TFP_POLICY_RESTRICTED:
 447                         /* self or suser or  setgid and related ones only */
 448                         if ((p != (struct proc *) 0)
 449                                 && (p1 != (struct proc *) 0)
 450                                 && (
 451                                         (p1 == p)
 452                                         || !(suser(kauth_cred_get(), 0))
 453                                         || (((tfp_group_inited != 0) &&
 454                                                         (
 455                                                         ((kauth_cred_ismember_gid(kauth_cred_get(),
 456                                                                         tfp_group_ronly, &is_member) == 0) && is_member)
 457                                                         ||((kauth_cred_ismember_gid(kauth_cred_get(),
 458                                                                         tfp_group_rw, &is_member) == 0) && is_member)
 459                                                         )
 460                                            )
 461                                            && ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
 462                                                         ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))
 463                                                         && ((p->p_flag & P_SUGID) == 0))
 464                                           )
 465                                         )
 466                                 && (p->p_stat != SZOMB)
 467                                 )
 468                                         ispermitted = TRUE;
 469
 470                         break;
 471
 472                 case KERN_TFP_POLICY_DENY:
 473                         /* self or suser only */
 474                 default:
 475                         /* do not return task port of other task at all */
 476                         if ((p1 != (struct proc *) 0) && (p != (struct proc *) 0) && (p->p_stat != SZOMB)
 477                                         && ((p1 == p)  || !(suser(kauth_cred_get(), 0))))
 478                                 ispermitted = TRUE;
 479                         else
 480                                 ispermitted = FALSE;
 481                         break;
 482         };
 483
 484
 485         if (ispermitted == TRUE) {
 486                 if (p->task != TASK_NULL) {
 487                         task_reference(p->task);
 488                         sright = (void *)convert_task_to_port(p->task);
 489                         tret = ipc_port_copyout_send(
 490                                                 sright,
 491                                                 get_task_ipcspace(current_task()));
 492                         } else
 493                                 tret  = MACH_PORT_NULL;
 494                         AUDIT_ARG(mach_port2, tret);
 495                         (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
 496                 task_deallocate(t1);
 497                         error = KERN_SUCCESS;
 498                         goto tfpout;
 499         } else {
 500                 /*
 501                  * There is no guarantee that p_comm is null terminated and
 502                  * kernel implementation of string functions are complete. So
 503                  * ensure stale info is not leaked out, bzero the  buffer
 504                  */
 505                 bzero(&procname[0], MAXCOMLEN+1);
 506                 strncpy(&procname[0], &p1->p_comm[0], MAXCOMLEN);
 507                 if (tfp_policy != KERN_TFP_POLICY_PERMISSIVE)
 508                         log(LOG_NOTICE, "(%d: %s)tfp: failed on %d:\n",
 509                                 ((p1 != PROC_NULL)?(p1->p_pid):0), &procname[0],
 510                                 ((p != PROC_NULL)?(p->p_pid):0));
 511         }
 512
 513     task_deallocate(t1);
 514         tret = MACH_PORT_NULL;
 515         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 516         error = KERN_FAILURE;
 517 tfpout:
 518         thread_funnel_set(kernel_flock, funnel_state);
 519         AUDIT_MACH_SYSCALL_EXIT(error);
 520         return(error);
 521 }
 522
 523 /*
 524  *      Routine:        task_name_for_pid
 525  *      Purpose:
 526  *              Get the task name port for another "process", named by its
 527  *              process ID on the same host as "target_task".
 528  *
 529  *              Only permitted to privileged processes, or processes
 530  *              with the same user ID.
 531  *
 532  * XXX This should be a BSD system call, not a Mach trap!!!
 533  */
 534
 535 kern_return_t
 536 task_name_for_pid(
 537         struct task_name_for_pid_args *args)
 538 {
 539         mach_port_name_t        target_tport = args->target_tport;
 540         int                     pid = args->pid;
 541         user_addr_t             task_addr = args->t;
 542         struct uthread          *uthread;
 543         struct proc     *p;
 544         struct proc *p1;
 545         task_t          t1;
 546         mach_port_name_t        tret;
 547         void * sright;
 548         int error = 0;
 549         boolean_t funnel_state;
 550
 551         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
 552         AUDIT_ARG(pid, pid);
 553         AUDIT_ARG(mach_port1, target_tport);
 554
 555         t1 = port_name_to_task(target_tport);
 556         if (t1 == TASK_NULL) {
 557                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
 558                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
 559                 return(KERN_FAILURE);
 560         }
 561
 562         funnel_state = thread_funnel_set(kernel_flock, TRUE);
 563
 564         p1 = current_proc();
 565
 566         /*
 567          * Delayed binding of thread credential to process credential, if we
 568          * are not running with an explicitly set thread credential.
 569          */
 570         uthread = get_bsdthread_info(current_thread());
 571         if (uthread->uu_ucred != p1->p_ucred &&
 572             (uthread->uu_flag & UT_SETUID) == 0) {
 573                 kauth_cred_t old = uthread->uu_ucred;
 574                 proc_lock(p1);
 575                 uthread->uu_ucred = p1->p_ucred;
 576                 kauth_cred_ref(uthread->uu_ucred);
 577                 proc_unlock(p1);
 578                 if (old != NOCRED)
 579                         kauth_cred_rele(old);
 580         }
 581
 582         p = pfind(pid);
 583         AUDIT_ARG(process, p);
 584
 585         if ((p != (struct proc *) 0)
 586             && (p->p_stat != SZOMB)
 587             && (p1 != (struct proc *) 0)
 588             && ((p1 == p)
 589                 || !(suser(kauth_cred_get(), 0))
 590                 || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) &&
 591                     ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)))))
 592         {
 593                 if (p->task != TASK_NULL)
 594                 {
 595                         task_reference(p->task);
 596                         sright = (void *)convert_task_name_to_port(p->task);
 597                         tret = ipc_port_copyout_send(
 598                                                 sright,
 599                                                 get_task_ipcspace(current_task()));
 600                 } else
 601                         tret  = MACH_PORT_NULL;
 602                 AUDIT_ARG(mach_port2, tret);
 603                 (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
 604                 task_deallocate(t1);
 605                 error = KERN_SUCCESS;
 606                 goto tnfpout;
 607         }
 608
 609         task_deallocate(t1);
 610         tret = MACH_PORT_NULL;
 611         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
 612         error = KERN_FAILURE;
 613 tnfpout:
 614         thread_funnel_set(kernel_flock, funnel_state);
 615         AUDIT_MACH_SYSCALL_EXIT(error);
 616         return(error);
 617 }
 618
 619 static int
 620 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
 621     __unused int arg2, struct sysctl_req *req)
 622 {
 623     int error = 0;
 624         int new_value;
 625
 626     error = SYSCTL_OUT(req, arg1, sizeof(int));
 627     if (error || req->newptr == USER_ADDR_NULL)
 628         return(error);
 629
 630         if (!is_suser())
 631                 return(EPERM);
 632
 633         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
 634                 goto out;
 635         }
 636         if ((new_value == KERN_TFP_POLICY_DENY)
 637                 || (new_value == KERN_TFP_POLICY_PERMISSIVE)
 638                 || (new_value == KERN_TFP_POLICY_RESTRICTED))
 639                         tfp_policy = new_value;
 640         else
 641                         error = EINVAL;
 642 out:
 643     return(error);
 644
 645 }
 646
 647 static int
 648 sysctl_settfp_groups(__unused struct sysctl_oid *oidp, void *arg1,
 649     __unused int arg2, struct sysctl_req *req)
 650 {
 651     int error = 0;
 652         int new_value;
 653
 654     error = SYSCTL_OUT(req, arg1, sizeof(int));
 655     if (error || req->newptr == USER_ADDR_NULL)
 656         return(error);
 657
 658         if (!is_suser())
 659                 return(EPERM);
 660
 661         /*
 662          * Once set; cannot be reset till next boot. Launchd will set this
 663          * in its pid 1 init and no one can set after that.
 664          */
 665         if (tfp_group_inited != 0)
 666                 return(EPERM);
 667
 668         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
 669                 goto out;
 670         }
 671
 672         if (new_value >= 100)
 673                         error = EINVAL;
 674         else {
 675                 if (arg1 == &tfp_group_ronly)
 676                         tfp_group_ronly = new_value;
 677                 else if (arg1 == &tfp_group_rw)
 678                         tfp_group_rw = new_value;
 679                 else
 680                         error = EINVAL;
 681                 if ((tfp_group_ronly != 0 ) && (tfp_group_rw != 0 ))
 682                         tfp_group_inited = 1;
 683         }
 684
 685 out:
 686     return(error);
 687 }
 688
 689 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW, 0, "tfp");
 690 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
 691     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
 692 SYSCTL_PROC(_kern_tfp, KERN_TFP_READ_GROUP, read_group, CTLTYPE_INT | CTLFLAG_RW,
 693     &tfp_group_ronly, sizeof(uint32_t), &sysctl_settfp_groups ,"I","read_group");
 694 SYSCTL_PROC(_kern_tfp, KERN_TFP_RW_GROUP, rw_group, CTLTYPE_INT | CTLFLAG_RW,
 695     &tfp_group_rw, sizeof(uint32_t), &sysctl_settfp_groups ,"I","rw_group");
 696
 697
 698 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, &shared_region_trace_level, 0, "");
 699
 700 /*
 701  * shared_region_make_private_np:
 702  *
 703  * This system call is for "dyld" only.
 704  *
 705  * It creates a private copy of the current process's "shared region" for
 706  * split libraries.  "dyld" uses this when the shared region is full or
 707  * it needs to load a split library that conflicts with an already loaded one
 708  * that this process doesn't need.  "dyld" specifies a set of address ranges
 709  * that it wants to keep in the now-private "shared region".  These cover
 710  * the set of split libraries that the process needs so far.  The kernel needs
 711  * to deallocate the rest of the shared region, so that it's available for
 712  * more libraries for this process.
 713  */
 714 int
 715 shared_region_make_private_np(
 716         struct proc                                     *p,
 717         struct shared_region_make_private_np_args       *uap,
 718         __unused int                                    *retvalp)
 719 {
 720         int                             error;
 721         kern_return_t                   kr;
 722         boolean_t                       using_shared_regions;
 723         user_addr_t                     user_ranges;
 724         unsigned int                    range_count;
 725         vm_size_t                       ranges_size;
 726         struct shared_region_range_np   *ranges;
 727         shared_region_mapping_t         shared_region;
 728         struct shared_region_task_mappings      task_mapping_info;
 729         shared_region_mapping_t         next;
 730
 731         ranges = NULL;
 732
 733         range_count = uap->rangeCount;
 734         user_ranges = uap->ranges;
 735         ranges_size = (vm_size_t) (range_count * sizeof (ranges[0]));
 736
 737         SHARED_REGION_TRACE(
 738                 SHARED_REGION_TRACE_INFO,
 739                 ("shared_region: %p [%d(%s)] "
 740                  "make_private(rangecount=%d)\n",
 741                  current_thread(), p->p_pid, p->p_comm, range_count));
 742
 743         /* allocate kernel space for the "ranges" */
 744         if (range_count != 0) {
 745                 if ((mach_vm_size_t) ranges_size !=
 746                     (mach_vm_size_t) range_count * sizeof (ranges[0])) {
 747                         /* 32-bit integer overflow */
 748                         error = EINVAL;
 749                         goto done;
 750                 }
 751                 kr = kmem_alloc(kernel_map,
 752                                 (vm_offset_t *) &ranges,
 753                                 ranges_size);
 754                 if (kr != KERN_SUCCESS) {
 755                         error = ENOMEM;
 756                         goto done;
 757                 }
 758
 759                 /* copy "ranges" from user-space */
 760                 error = copyin(user_ranges,
 761                                ranges,
 762                                ranges_size);
 763                 if (error) {
 764                         goto done;
 765                 }
 766         }
 767
 768         if (p->p_flag & P_NOSHLIB) {
 769                 /* no split library has been mapped for this process so far */
 770                 using_shared_regions = FALSE;
 771         } else {
 772                 /* this process has already mapped some split libraries */
 773                 using_shared_regions = TRUE;
 774         }
 775
 776         /*
 777          * Get a private copy of the current shared region.
 778          * Do not chain it to the system-wide shared region, as we'll want
 779          * to map other split libraries in place of the old ones.  We want
 780          * to completely detach from the system-wide shared region and go our
 781          * own way after this point, not sharing anything with other processes.
 782          */
 783         error = clone_system_shared_regions(using_shared_regions,
 784                                             FALSE, /* chain_regions */
 785                                             ENV_DEFAULT_ROOT);
 786         if (error) {
 787                 goto done;
 788         }
 789
 790         /* get info on the newly allocated shared region */
 791         vm_get_shared_region(current_task(), &shared_region);
 792         task_mapping_info.self = (vm_offset_t) shared_region;
 793         shared_region_mapping_info(shared_region,
 794                                    &(task_mapping_info.text_region),
 795                                    &(task_mapping_info.text_size),
 796                                    &(task_mapping_info.data_region),
 797                                    &(task_mapping_info.data_size),
 798                                    &(task_mapping_info.region_mappings),
 799                                    &(task_mapping_info.client_base),
 800                                    &(task_mapping_info.alternate_base),
 801                                    &(task_mapping_info.alternate_next),
 802                                    &(task_mapping_info.fs_base),
 803                                    &(task_mapping_info.system),
 804                                    &(task_mapping_info.flags),
 805                                    &next);
 806
 807         /*
 808          * We now have our private copy of the shared region, as it was before
 809          * the call to clone_system_shared_regions().  We now need to clean it
 810          * up and keep only the memory areas described by the "ranges" array.
 811          */
 812         kr = shared_region_cleanup(range_count, ranges, &task_mapping_info);
 813         switch (kr) {
 814         case KERN_SUCCESS:
 815                 error = 0;
 816                 break;
 817         default:
 818                 error = EINVAL;
 819                 goto done;
 820         }
 821
 822 done:
 823         if (ranges != NULL) {
 824                 kmem_free(kernel_map,
 825                           (vm_offset_t) ranges,
 826                           ranges_size);
 827                 ranges = NULL;
 828         }
 829
 830         SHARED_REGION_TRACE(
 831                 SHARED_REGION_TRACE_INFO,
 832                 ("shared_region: %p [%d(%s)] "
 833                  "make_private(rangecount=%d) -> %d "
 834                  "shared_region=%p[%x,%x,%x]\n",
 835                  current_thread(), p->p_pid, p->p_comm,
 836                  range_count, error, shared_region,
 837                  task_mapping_info.fs_base,
 838                  task_mapping_info.system,
 839                  task_mapping_info.flags));
 840
 841         return error;
 842 }
 843
 844
 845 /*
 846  * shared_region_map_file_np:
 847  *
 848  * This system call is for "dyld" only.
 849  *
 850  * "dyld" wants to map parts of a split library in the shared region.
 851  * We get a file descriptor on the split library to be mapped and a set
 852  * of mapping instructions, describing which parts of the file to map in\
 853  * which areas of the shared segment and with what protection.
 854  * The "shared region" is split in 2 areas:
 855  * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections),
 856  * 0xa0000000 - 0xb0000000 : writable area (for DATA sections).
 857  *
 858  */
 859 int
 860 shared_region_map_file_np(
 861         struct proc                             *p,
 862         struct shared_region_map_file_np_args   *uap,
 863         __unused int                            *retvalp)
 864 {
 865         int                                     error;
 866         kern_return_t                           kr;
 867         int                                     fd;
 868         unsigned int                            mapping_count;
 869         user_addr_t                             user_mappings; /* 64-bit */
 870         user_addr_t                             user_slide_p;  /* 64-bit */
 871         struct shared_file_mapping_np           *mappings;
 872         vm_size_t                               mappings_size;
 873         struct fileproc                         *fp;
 874         mach_vm_offset_t                        slide;
 875         struct vnode                            *vp;
 876         struct vfs_context                      context;
 877         memory_object_control_t                 file_control;
 878         memory_object_size_t                    file_size;
 879         shared_region_mapping_t                 shared_region;
 880         struct shared_region_task_mappings      task_mapping_info;
 881         shared_region_mapping_t                 next;
 882         shared_region_mapping_t                 default_shared_region;
 883         boolean_t                               using_default_region;
 884         unsigned int                            j;
 885         vm_prot_t                               max_prot;
 886         mach_vm_offset_t                        base_offset, end_offset;
 887         mach_vm_offset_t                        original_base_offset;
 888         boolean_t                               mappings_in_segment;
 889 #define SFM_MAX_STACK   6
 890         struct shared_file_mapping_np           stack_mappings[SFM_MAX_STACK];
 891
 892         mappings_size = 0;
 893         mappings = NULL;
 894         mapping_count = 0;
 895         fp = NULL;
 896         vp = NULL;
 897
 898         /* get file descriptor for split library from arguments */
 899         fd = uap->fd;
 900
 901         /* get file structure from file descriptor */
 902         error = fp_lookup(p, fd, &fp, 0);
 903         if (error) {
 904                 SHARED_REGION_TRACE(
 905                         SHARED_REGION_TRACE_ERROR,
 906                         ("shared_region: %p [%d(%s)] map_file: "
 907                          "fd=%d lookup failed (error=%d)\n",
 908                          current_thread(), p->p_pid, p->p_comm, fd, error));
 909                 goto done;
 910         }
 911
 912         /* make sure we're attempting to map a vnode */
 913         if (fp->f_fglob->fg_type != DTYPE_VNODE) {
 914                 SHARED_REGION_TRACE(
 915                         SHARED_REGION_TRACE_ERROR,
 916                         ("shared_region: %p [%d(%s)] map_file: "
 917                          "fd=%d not a vnode (type=%d)\n",
 918                          current_thread(), p->p_pid, p->p_comm,
 919                          fd, fp->f_fglob->fg_type));
 920                 error = EINVAL;
 921                 goto done;
 922         }
 923
 924         /* we need at least read permission on the file */
 925         if (! (fp->f_fglob->fg_flag & FREAD)) {
 926                 SHARED_REGION_TRACE(
 927                         SHARED_REGION_TRACE_ERROR,
 928                         ("shared_region: %p [%d(%s)] map_file: "
 929                          "fd=%d not readable\n",
 930                          current_thread(), p->p_pid, p->p_comm, fd));
 931                 error = EPERM;
 932                 goto done;
 933         }
 934
 935         /* get vnode from file structure */
 936         error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data);
 937         if (error) {
 938                 SHARED_REGION_TRACE(
 939                         SHARED_REGION_TRACE_ERROR,
 940                         ("shared_region: %p [%d(%s)] map_file: "
 941                          "fd=%d getwithref failed (error=%d)\n",
 942                          current_thread(), p->p_pid, p->p_comm, fd, error));
 943                 goto done;
 944         }
 945         vp = (struct vnode *) fp->f_fglob->fg_data;
 946
 947         /* make sure the vnode is a regular file */
 948         if (vp->v_type != VREG) {
 949                 SHARED_REGION_TRACE(
 950                         SHARED_REGION_TRACE_ERROR,
 951                         ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
 952                          "not a file (type=%d)\n",
 953                          current_thread(), p->p_pid, p->p_comm,
 954                          vp, vp->v_name, vp->v_type));
 955                 error = EINVAL;
 956                 goto done;
 957         }
 958
 959         /* get vnode size */
 960         {
 961                 off_t   fs;
 962
 963                 context.vc_proc = p;
 964                 context.vc_ucred = kauth_cred_get();
 965                 if ((error = vnode_size(vp, &fs, &context)) != 0) {
 966                         SHARED_REGION_TRACE(
 967                                 SHARED_REGION_TRACE_ERROR,
 968                                 ("shared_region: %p [%d(%s)] "
 969                                  "map_file(%p:'%s'): "
 970                                  "vnode_size(%p) failed (error=%d)\n",
 971                                  current_thread(), p->p_pid, p->p_comm,
 972                                  vp, vp->v_name, vp));
 973                         goto done;
 974                 }
 975                 file_size = fs;
 976         }
 977
 978         /*
 979          * Get the list of mappings the caller wants us to establish.
 980          */
 981         mapping_count = uap->mappingCount; /* the number of mappings */
 982         mappings_size = (vm_size_t) (mapping_count * sizeof (mappings[0]));
 983         if (mapping_count == 0) {
 984                 SHARED_REGION_TRACE(
 985                         SHARED_REGION_TRACE_INFO,
 986                         ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
 987                          "no mappings\n",
 988                          current_thread(), p->p_pid, p->p_comm,
 989                          vp, vp->v_name));
 990                 error = 0;      /* no mappings: we're done ! */
 991                 goto done;
 992         } else if (mapping_count <= SFM_MAX_STACK) {
 993                 mappings = &stack_mappings[0];
 994         } else {
 995                 if ((mach_vm_size_t) mappings_size !=
 996                     (mach_vm_size_t) mapping_count * sizeof (mappings[0])) {
 997                         /* 32-bit integer overflow */
 998                         error = EINVAL;
 999                         goto done;
1000                 }
1001                 kr = kmem_alloc(kernel_map,
1002                                 (vm_offset_t *) &mappings,
1003                                 mappings_size);
1004                 if (kr != KERN_SUCCESS) {
1005                         SHARED_REGION_TRACE(
1006                                 SHARED_REGION_TRACE_ERROR,
1007                                 ("shared_region: %p [%d(%s)] "
1008                                  "map_file(%p:'%s'): "
1009                                  "failed to allocate %d mappings (kr=0x%x)\n",
1010                                  current_thread(), p->p_pid, p->p_comm,
1011                                  vp, vp->v_name, mapping_count, kr));
1012                         error = ENOMEM;
1013                         goto done;
1014                 }
1015         }
1016
1017         user_mappings = uap->mappings;     /* the mappings, in user space */
1018         error = copyin(user_mappings,
1019                        mappings,
1020                        mappings_size);
1021         if (error != 0) {
1022                 SHARED_REGION_TRACE(
1023                         SHARED_REGION_TRACE_ERROR,
1024                         ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1025                          "failed to copyin %d mappings (error=%d)\n",
1026                          current_thread(), p->p_pid, p->p_comm,
1027                          vp, vp->v_name, mapping_count, error));
1028                 goto done;
1029         }
1030
1031         /*
1032          * If the caller provides a "slide" pointer, it means they're OK
1033          * with us moving the mappings around to make them fit.
1034          */
1035         user_slide_p = uap->slide_p;
1036
1037         /*
1038          * Make each mapping address relative to the beginning of the
1039          * shared region.  Check that all mappings are in the shared region.
1040          * Compute the maximum set of protections required to tell the
1041          * buffer cache how we mapped the file (see call to ubc_map() below).
1042          */
1043         max_prot = VM_PROT_NONE;
1044         base_offset = -1LL;
1045         end_offset = 0;
1046         mappings_in_segment = TRUE;
1047         for (j = 0; j < mapping_count; j++) {
1048                 mach_vm_offset_t segment;
1049                 segment = (mappings[j].sfm_address &
1050                            GLOBAL_SHARED_SEGMENT_MASK);
1051                 if (segment != GLOBAL_SHARED_TEXT_SEGMENT &&
1052                     segment != GLOBAL_SHARED_DATA_SEGMENT) {
1053                         /* this mapping is not in the shared region... */
1054                         if (user_slide_p == NULL) {
1055                                 /* ... and we can't slide it in: fail */
1056                                 SHARED_REGION_TRACE(
1057                                         SHARED_REGION_TRACE_CONFLICT,
1058                                         ("shared_region: %p [%d(%s)] "
1059                                          "map_file(%p:'%s'): "
1060                                          "mapping %p not in shared segment & "
1061                                          "no sliding\n",
1062                                          current_thread(), p->p_pid, p->p_comm,
1063                                          vp, vp->v_name,
1064                                          mappings[j].sfm_address));
1065                                 error = EINVAL;
1066                                 goto done;
1067                         }
1068                         if (j == 0) {
1069                                 /* expect all mappings to be outside */
1070                                 mappings_in_segment = FALSE;
1071                         } else if (mappings_in_segment != FALSE) {
1072                                 /* other mappings were not outside: fail */
1073                                 SHARED_REGION_TRACE(
1074                                         SHARED_REGION_TRACE_CONFLICT,
1075                                         ("shared_region: %p [%d(%s)] "
1076                                          "map_file(%p:'%s'): "
1077                                          "mapping %p not in shared segment & "
1078                                          "other mappings in shared segment\n",
1079                                          current_thread(), p->p_pid, p->p_comm,
1080                                          vp, vp->v_name,
1081                                          mappings[j].sfm_address));
1082                                 error = EINVAL;
1083                                 goto done;
1084                         }
1085                         /* we'll try and slide that mapping in the segments */
1086                 } else {
1087                         if (j == 0) {
1088                                 /* expect all mappings to be inside */
1089                                 mappings_in_segment = TRUE;
1090                         } else if (mappings_in_segment != TRUE) {
1091                                 /* other mappings were not inside: fail */
1092                                 SHARED_REGION_TRACE(
1093                                         SHARED_REGION_TRACE_CONFLICT,
1094                                         ("shared_region: %p [%d(%s)] "
1095                                          "map_file(%p:'%s'): "
1096                                          "mapping %p in shared segment & "
1097                                          "others in shared segment\n",
1098                                          current_thread(), p->p_pid, p->p_comm,
1099                                          vp, vp->v_name,
1100                                          mappings[j].sfm_address));
1101                                 error = EINVAL;
1102                                 goto done;
1103                         }
1104                         /* get a relative offset inside the shared segments */
1105                         mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT;
1106                 }
1107                 if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK)
1108                     < base_offset) {
1109                         base_offset = (mappings[j].sfm_address &
1110                                        SHARED_TEXT_REGION_MASK);
1111                 }
1112                 if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) +
1113                     mappings[j].sfm_size > end_offset) {
1114                         end_offset =
1115                                 (mappings[j].sfm_address &
1116                                  SHARED_TEXT_REGION_MASK) +
1117                                 mappings[j].sfm_size;
1118                 }
1119                 max_prot |= mappings[j].sfm_max_prot;
1120         }
1121         /* Make all mappings relative to the base_offset */
1122         base_offset = vm_map_trunc_page(base_offset);
1123         end_offset = vm_map_round_page(end_offset);
1124         for (j = 0; j < mapping_count; j++) {
1125                 mappings[j].sfm_address -= base_offset;
1126         }
1127         original_base_offset = base_offset;
1128         if (mappings_in_segment == FALSE) {
1129                 /*
1130                  * We're trying to map a library that was not pre-bound to
1131                  * be in the shared segments.  We want to try and slide it
1132                  * back into the shared segments but as far back as possible,
1133                  * so that it doesn't clash with pre-bound libraries.  Set
1134                  * the base_offset to the end of the region, so that it can't
1135                  * possibly fit there and will have to be slid.
1136                  */
1137                 base_offset = SHARED_TEXT_REGION_SIZE - end_offset;
1138         }
1139
1140         /* get the file's memory object handle */
1141         UBCINFOCHECK("shared_region_map_file_np", vp);
1142         file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1143         if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1144                 SHARED_REGION_TRACE(
1145                         SHARED_REGION_TRACE_ERROR,
1146                         ("shared_region: %p [%d(%s)] map_file(%p:'%s'): "
1147                          "ubc_getobject() failed\n",
1148                          current_thread(), p->p_pid, p->p_comm,
1149                          vp, vp->v_name));
1150                 error = EINVAL;
1151                 goto done;
1152         }
1153
1154         /*
1155          * Get info about the current process's shared region.
1156          * This might change if we decide we need to clone the shared region.
1157          */
1158         vm_get_shared_region(current_task(), &shared_region);
1159         task_mapping_info.self = (vm_offset_t) shared_region;
1160         shared_region_mapping_info(shared_region,
1161                                    &(task_mapping_info.text_region),
1162                                    &(task_mapping_info.text_size),
1163                                    &(task_mapping_info.data_region),
1164                                    &(task_mapping_info.data_size),
1165                                    &(task_mapping_info.region_mappings),
1166                                    &(task_mapping_info.client_base),
1167                                    &(task_mapping_info.alternate_base),
1168                                    &(task_mapping_info.alternate_next),
1169                                    &(task_mapping_info.fs_base),
1170                                    &(task_mapping_info.system),
1171                                    &(task_mapping_info.flags),
1172                                    &next);
1173
1174         /*
1175          * Are we using the system's current shared region
1176          * for this environment ?
1177          */
1178         default_shared_region =
1179                 lookup_default_shared_region(ENV_DEFAULT_ROOT,
1180                                              task_mapping_info.system);
1181         if (shared_region == default_shared_region) {
1182                 using_default_region = TRUE;
1183         } else {
1184                 using_default_region = FALSE;
1185         }
1186         shared_region_mapping_dealloc(default_shared_region);
1187
1188         if (vp->v_mount != rootvnode->v_mount &&
1189             using_default_region) {
1190                 /*
1191                  * The split library is not on the root filesystem.  We don't
1192                  * want to polute the system-wide ("default") shared region
1193                  * with it.
1194                  * Reject the mapping.  The caller (dyld) should "privatize"
1195                  * (via shared_region_make_private()) the shared region and
1196                  * try to establish the mapping privately for this process.
1197                  */
1198                 SHARED_REGION_TRACE(
1199                         SHARED_REGION_TRACE_CONFLICT,
1200                         ("shared_region: %p [%d(%s)] "
1201                          "map_file(%p:'%s'): "
1202                          "not on root volume\n",
1203                          current_thread(), p->p_pid, p->p_comm,
1204                          vp->v_name));
1205                 error = EXDEV;
1206                 goto done;
1207         }
1208
1209
1210         /*
1211          * Map the split library.
1212          */
1213         kr = map_shared_file(mapping_count,
1214                              mappings,
1215                              file_control,
1216                              file_size,
1217                              &task_mapping_info,
1218                              base_offset,
1219                              (user_slide_p) ? &slide : NULL);
1220
1221         if (kr == KERN_SUCCESS) {
1222                 /*
1223                  * The mapping was successful.  Let the buffer cache know
1224                  * that we've mapped that file with these protections.  This
1225                  * prevents the vnode from getting recycled while it's mapped.
1226                  */
1227                 (void) ubc_map(vp, max_prot);
1228                 error = 0;
1229         } else {
1230                 SHARED_REGION_TRACE(
1231                         SHARED_REGION_TRACE_CONFLICT,
1232                         ("shared_region: %p [%d(%s)] "
1233                          "map_file(%p:'%s'): "
1234                          "map_shared_file failed, kr=0x%x\n",
1235                          current_thread(), p->p_pid, p->p_comm,
1236                          vp, vp->v_name, kr));
1237                 switch (kr) {
1238                 case KERN_INVALID_ADDRESS:
1239                         error = EFAULT;
1240                         goto done;
1241                 case KERN_PROTECTION_FAILURE:
1242                         error = EPERM;
1243                         goto done;
1244                 case KERN_NO_SPACE:
1245                         error = ENOMEM;
1246                         goto done;
1247                 case KERN_FAILURE:
1248                 case KERN_INVALID_ARGUMENT:
1249                 default:
1250                         error = EINVAL;
1251                         goto done;
1252                 }
1253         }
1254
1255         if (p->p_flag & P_NOSHLIB) {
1256                 /* signal that this process is now using split libraries */
1257                 p->p_flag &= ~P_NOSHLIB;
1258         }
1259
1260         if (user_slide_p) {
1261                 /*
1262                  * The caller provided a pointer to a "slide" offset.  Let
1263                  * them know by how much we slid the mappings.
1264                  */
1265                 if (mappings_in_segment == FALSE) {
1266                         /*
1267                          * We faked the base_offset earlier, so undo that
1268                          * and take into account the real base_offset.
1269                          */
1270                         slide += SHARED_TEXT_REGION_SIZE - end_offset;
1271                         slide -= original_base_offset;
1272                         /*
1273                          * The mappings were slid into the shared segments
1274                          * and "slide" is relative to the beginning of the
1275                          * shared segments.  Adjust it to be absolute.
1276                          */
1277                         slide += GLOBAL_SHARED_TEXT_SEGMENT;
1278                 }
1279                 error = copyout(&slide,
1280                                 user_slide_p,
1281                                 sizeof (slide));
1282                 if (slide != 0) {
1283                         SHARED_REGION_TRACE(
1284                                 SHARED_REGION_TRACE_CONFLICT,
1285                                 ("shared_region: %p [%d(%s)] "
1286                                  "map_file(%p:'%s'): "
1287                                  "slid by 0x%llx\n",
1288                                  current_thread(), p->p_pid, p->p_comm,
1289                                  vp, vp->v_name, slide));
1290                 }
1291         }
1292
1293 done:
1294         if (vp != NULL) {
1295                 /*
1296                  * release the vnode...
1297                  * ubc_map() still holds it for us in the non-error case
1298                  */
1299                 (void) vnode_put(vp);
1300                 vp = NULL;
1301         }
1302         if (fp != NULL) {
1303                 /* release the file descriptor */
1304                 fp_drop(p, fd, fp, 0);
1305                 fp = NULL;
1306         }
1307         if (mappings != NULL &&
1308             mappings != &stack_mappings[0]) {
1309                 kmem_free(kernel_map,
1310                           (vm_offset_t) mappings,
1311                           mappings_size);
1312         }
1313         mappings = NULL;
1314
1315         return error;
1316 }
1317
1318 int
1319 load_shared_file(
1320         __unused struct proc *p,
1321         __unused struct load_shared_file_args *uap,
1322         __unused int *retval)
1323 {
1324         return ENOSYS;
1325 }
1326
1327 int
1328 reset_shared_file(
1329         __unused struct proc *p,
1330         __unused struct reset_shared_file_args *uap,
1331         __unused int *retval)
1332 {
1333         return ENOSYS;
1334 }
1335
1336 int
1337 new_system_shared_regions(
1338         __unused struct proc *p,
1339         __unused struct new_system_shared_regions_args *uap,
1340         __unused int *retval)
1341 {
1342         return ENOSYS;
1343 }
1344
1345
1346
1347 int
1348 clone_system_shared_regions(
1349         int             shared_regions_active,
1350         int             chain_regions,
1351         int             base_vnode)
1352 {
1353         shared_region_mapping_t new_shared_region;
1354         shared_region_mapping_t next;
1355         shared_region_mapping_t old_shared_region;
1356         struct shared_region_task_mappings old_info;
1357         struct shared_region_task_mappings new_info;
1358
1359         vm_get_shared_region(current_task(), &old_shared_region);
1360         old_info.self = (vm_offset_t)old_shared_region;
1361         shared_region_mapping_info(old_shared_region,
1362                 &(old_info.text_region),
1363                 &(old_info.text_size),
1364                 &(old_info.data_region),
1365                 &(old_info.data_size),
1366                 &(old_info.region_mappings),
1367                 &(old_info.client_base),
1368                 &(old_info.alternate_base),
1369                 &(old_info.alternate_next),
1370                 &(old_info.fs_base),
1371                 &(old_info.system),
1372                 &(old_info.flags), &next);
1373
1374         if (shared_regions_active ||
1375             base_vnode == ENV_DEFAULT_ROOT) {
1376                 if (shared_file_create_system_region(&new_shared_region,
1377                                                      old_info.fs_base,
1378                                                      old_info.system))
1379                         return ENOMEM;
1380         } else {
1381                 if (old_shared_region &&
1382                     base_vnode == ENV_DEFAULT_ROOT) {
1383                         base_vnode = old_info.fs_base;
1384                 }
1385                 new_shared_region =
1386                         lookup_default_shared_region(base_vnode,
1387                                                      old_info.system);
1388                 if (new_shared_region == NULL) {
1389                         shared_file_boot_time_init(base_vnode,
1390                                                    old_info.system);
1391                         vm_get_shared_region(current_task(),
1392                                              &new_shared_region);
1393                 } else {
1394                         vm_set_shared_region(current_task(), new_shared_region);
1395                 }
1396                 if (old_shared_region)
1397                         shared_region_mapping_dealloc(old_shared_region);
1398         }
1399         new_info.self = (vm_offset_t)new_shared_region;
1400         shared_region_mapping_info(new_shared_region,
1401                 &(new_info.text_region),
1402                 &(new_info.text_size),
1403                 &(new_info.data_region),
1404                 &(new_info.data_size),
1405                 &(new_info.region_mappings),
1406                 &(new_info.client_base),
1407                 &(new_info.alternate_base),
1408                 &(new_info.alternate_next),
1409                 &(new_info.fs_base),
1410                 &(new_info.system),
1411                 &(new_info.flags), &next);
1412         if(shared_regions_active) {
1413            if(vm_region_clone(old_info.text_region, new_info.text_region)) {
1414            panic("clone_system_shared_regions: shared region mis-alignment 1");
1415                 shared_region_mapping_dealloc(new_shared_region);
1416                 return(EINVAL);
1417            }
1418            if (vm_region_clone(old_info.data_region, new_info.data_region)) {
1419            panic("clone_system_shared_regions: shared region mis-alignment 2");
1420                 shared_region_mapping_dealloc(new_shared_region);
1421                 return(EINVAL);
1422            }
1423            if (chain_regions) {
1424                    /*
1425                     * We want a "shadowed" clone, a private superset of the old
1426                     * shared region.  The info about the old mappings is still
1427                     * valid for us.
1428                     */
1429                    shared_region_object_chain_attach(
1430                            new_shared_region, old_shared_region);
1431            } else {
1432                    /*
1433                     * We want a completely detached clone with no link to
1434                     * the old shared region.  We'll be removing some mappings
1435                     * in our private, cloned, shared region, so the old mappings
1436                     * will become irrelevant to us.  Since we have a private
1437                     * "shared region" now, it isn't going to be shared with
1438                     * anyone else and we won't need to maintain mappings info.
1439                     */
1440                    shared_region_object_chain_detached(new_shared_region);
1441            }
1442         }
1443         if (vm_map_region_replace(current_map(), old_info.text_region,
1444                         new_info.text_region, old_info.client_base,
1445                         old_info.client_base+old_info.text_size)) {
1446         panic("clone_system_shared_regions: shared region mis-alignment 3");
1447                 shared_region_mapping_dealloc(new_shared_region);
1448                 return(EINVAL);
1449         }
1450         if(vm_map_region_replace(current_map(), old_info.data_region,
1451                         new_info.data_region,
1452                         old_info.client_base + old_info.text_size,
1453                         old_info.client_base
1454                                 + old_info.text_size + old_info.data_size)) {
1455         panic("clone_system_shared_regions: shared region mis-alignment 4");
1456                 shared_region_mapping_dealloc(new_shared_region);
1457                 return(EINVAL);
1458         }
1459         vm_set_shared_region(current_task(), new_shared_region);
1460
1461         /* consume the reference which wasn't accounted for in object */
1462         /* chain attach */
1463         if (!shared_regions_active || !chain_regions)
1464                 shared_region_mapping_dealloc(old_shared_region);
1465
1466         SHARED_REGION_TRACE(
1467                 SHARED_REGION_TRACE_INFO,
1468                 ("shared_region: %p task=%p "
1469                  "clone(active=%d, base=0x%x,chain=%d) "
1470                  "old=%p[%x,%x,%x] new=%p[%x,%x,%x]\n",
1471                  current_thread(), current_task(),
1472                  shared_regions_active, base_vnode, chain_regions,
1473                  old_shared_region,
1474                  old_info.fs_base,
1475                  old_info.system,
1476                  old_info.flags,
1477                  new_shared_region,
1478                  new_info.fs_base,
1479                  new_info.system,
1480                  new_info.flags));
1481
1482         return(0);
1483
1484 }
1485
1486 /* header for the profile name file.  The profiled app info is held */
1487 /* in the data file and pointed to by elements in the name file     */
1488
1489 struct profile_names_header {
1490         unsigned int    number_of_profiles;
1491         unsigned int    user_id;
1492         unsigned int    version;
1493         off_t           element_array;
1494         unsigned int    spare1;
1495         unsigned int    spare2;
1496         unsigned int    spare3;
1497 };
1498
1499 struct profile_element {
1500         off_t           addr;
1501         vm_size_t       size;
1502         unsigned int    mod_date;
1503         unsigned int    inode;
1504         char name[12];
1505 };
1506
1507 struct global_profile {
1508         struct vnode    *names_vp;
1509         struct vnode    *data_vp;
1510         vm_offset_t     buf_ptr;
1511         unsigned int    user;
1512         unsigned int    age;
1513         unsigned int    busy;
1514 };
1515
1516 struct global_profile_cache {
1517         int                     max_ele;
1518         unsigned int            age;
1519         struct global_profile   profiles[3];
1520 };
1521
1522 /* forward declarations */
1523 int bsd_open_page_cache_files(unsigned int user,
1524                               struct global_profile **profile);
1525 void bsd_close_page_cache_files(struct global_profile *profile);
1526 int bsd_search_page_cache_data_base(
1527         struct  vnode                   *vp,
1528         struct profile_names_header     *database,
1529         char                            *app_name,
1530         unsigned int                    mod_date,
1531         unsigned int                    inode,
1532         off_t                           *profile,
1533         unsigned int                    *profile_size);
1534
1535 struct global_profile_cache global_user_profile_cache =
1536         {3, 0, {{NULL, NULL, 0, 0, 0, 0},
1537                     {NULL, NULL, 0, 0, 0, 0},
1538                     {NULL, NULL, 0, 0, 0, 0}} };
1539
1540 /* BSD_OPEN_PAGE_CACHE_FILES:                                 */
1541 /* Caller provides a user id.  This id was used in            */
1542 /* prepare_profile_database to create two unique absolute     */
1543 /* file paths to the associated profile files.  These files   */
1544 /* are either opened or bsd_open_page_cache_files returns an  */
1545 /* error.  The header of the names file is then consulted.    */
1546 /* The header and the vnodes for the names and data files are */
1547 /* returned. */
1548
1549 int
1550 bsd_open_page_cache_files(
1551         unsigned int    user,
1552         struct global_profile **profile)
1553 {
1554         const char *cache_path = "/var/vm/app_profile/";
1555         struct proc     *p;
1556         int             error;
1557         vm_size_t       resid;
1558         off_t           resid_off;
1559         unsigned int    lru;
1560         vm_size_t       size;
1561
1562         struct  vnode   *names_vp;
1563         struct  vnode   *data_vp;
1564         vm_offset_t     names_buf;
1565         vm_offset_t     buf_ptr;
1566
1567         int             profile_names_length;
1568         int             profile_data_length;
1569         char            *profile_data_string;
1570         char            *profile_names_string;
1571         char            *substring;
1572
1573         off_t           file_size;
1574         struct vfs_context  context;
1575
1576         kern_return_t   ret;
1577
1578         struct nameidata nd_names;
1579         struct nameidata nd_data;
1580         int             i;
1581
1582
1583         p = current_proc();
1584
1585         context.vc_proc = p;
1586         context.vc_ucred = kauth_cred_get();
1587
1588 restart:
1589         for(i = 0; i<global_user_profile_cache.max_ele; i++) {
1590                 if((global_user_profile_cache.profiles[i].user == user)
1591                         &&  (global_user_profile_cache.profiles[i].data_vp
1592                                                                 != NULL)) {
1593                         *profile = &global_user_profile_cache.profiles[i];
1594                         /* already in cache, we're done */
1595                         if ((*profile)->busy) {
1596                                 /*
1597                                 * drop funnel and wait
1598                                 */
1599                                 (void)tsleep((void *)
1600                                         *profile,
1601                                         PRIBIO, "app_profile", 0);
1602                                 goto restart;
1603                         }
1604                         (*profile)->busy = 1;
1605                         (*profile)->age = global_user_profile_cache.age;
1606
1607                         /*
1608                          * entries in cache are held with a valid
1609                          * usecount... take an iocount which will
1610                          * be dropped in "bsd_close_page_cache_files"
1611                          * which is called after the read or writes to
1612                          * these files are done
1613                          */
1614                         if ( (vnode_getwithref((*profile)->data_vp)) ) {
1615
1616                                 vnode_rele((*profile)->data_vp);
1617                                 vnode_rele((*profile)->names_vp);
1618
1619                                 (*profile)->data_vp = NULL;
1620                                 (*profile)->busy = 0;
1621                                 wakeup(*profile);
1622
1623                                 goto restart;
1624                         }
1625                         if ( (vnode_getwithref((*profile)->names_vp)) ) {
1626
1627                                 vnode_put((*profile)->data_vp);
1628                                 vnode_rele((*profile)->data_vp);
1629                                 vnode_rele((*profile)->names_vp);
1630
1631                                 (*profile)->data_vp = NULL;
1632                                 (*profile)->busy = 0;
1633                                 wakeup(*profile);
1634
1635                                 goto restart;
1636                         }
1637                         global_user_profile_cache.age+=1;
1638                         return 0;
1639                 }
1640         }
1641
1642         lru = global_user_profile_cache.age;
1643         *profile = NULL;
1644         for(i = 0; i<global_user_profile_cache.max_ele; i++) {
1645                 /* Skip entry if it is in the process of being reused */
1646                 if(global_user_profile_cache.profiles[i].data_vp ==
1647                                                 (struct vnode *)0xFFFFFFFF)
1648                         continue;
1649                 /* Otherwise grab the first empty entry */
1650                 if(global_user_profile_cache.profiles[i].data_vp == NULL) {
1651                         *profile = &global_user_profile_cache.profiles[i];
1652                         (*profile)->age = global_user_profile_cache.age;
1653                         break;
1654                 }
1655                 /* Otherwise grab the oldest entry */
1656                 if(global_user_profile_cache.profiles[i].age < lru) {
1657                         lru = global_user_profile_cache.profiles[i].age;
1658                         *profile = &global_user_profile_cache.profiles[i];
1659                 }
1660         }
1661
1662         /* Did we set it? */
1663         if (*profile == NULL) {
1664                 /*
1665                  * No entries are available; this can only happen if all
1666                  * of them are currently in the process of being reused;
1667                  * if this happens, we sleep on the address of the first
1668                  * element, and restart.  This is less than ideal, but we
1669                  * know it will work because we know that there will be a
1670                  * wakeup on any entry currently in the process of being
1671                  * reused.
1672                  *
1673                  * XXX Reccomend a two handed clock and more than 3 total
1674                  * XXX cache entries at some point in the future.
1675                  */
1676                 /*
1677                 * drop funnel and wait
1678                 */
1679                 (void)tsleep((void *)
1680                  &global_user_profile_cache.profiles[0],
1681                         PRIBIO, "app_profile", 0);
1682                 goto restart;
1683         }
1684
1685         /*
1686          * If it's currently busy, we've picked the one at the end of the
1687          * LRU list, but it's currently being actively used.  We sleep on
1688          * its address and restart.
1689          */
1690         if ((*profile)->busy) {
1691                 /*
1692                 * drop funnel and wait
1693                 */
1694                 (void)tsleep((void *)
1695                         *profile,
1696                         PRIBIO, "app_profile", 0);
1697                 goto restart;
1698         }
1699         (*profile)->busy = 1;
1700         (*profile)->user = user;
1701
1702         /*
1703          * put dummy value in for now to get competing request to wait
1704          * above until we are finished
1705          *
1706          * Save the data_vp before setting it, so we can set it before
1707          * we kmem_free() or vrele().  If we don't do this, then we
1708          * have a potential funnel race condition we have to deal with.
1709          */
1710         data_vp = (*profile)->data_vp;
1711         (*profile)->data_vp = (struct vnode *)0xFFFFFFFF;
1712
1713         /*
1714          * Age the cache here in all cases; this guarantees that we won't
1715          * be reusing only one entry over and over, once the system reaches
1716          * steady-state.
1717          */
1718         global_user_profile_cache.age+=1;
1719
1720         if(data_vp != NULL) {
1721                 kmem_free(kernel_map,
1722                                 (*profile)->buf_ptr, 4 * PAGE_SIZE);
1723                 if ((*profile)->names_vp) {
1724                         vnode_rele((*profile)->names_vp);
1725                         (*profile)->names_vp = NULL;
1726                 }
1727                 vnode_rele(data_vp);
1728         }
1729
1730         /* Try to open the appropriate users profile files */
1731         /* If neither file is present, try to create them  */
1732         /* If one file is present and the other not, fail. */
1733         /* If the files do exist, check them for the app_file */
1734         /* requested and read it in if present */
1735
1736         ret = kmem_alloc(kernel_map,
1737                 (vm_offset_t *)&profile_data_string, PATH_MAX);
1738
1739         if(ret) {
1740                 (*profile)->data_vp = NULL;
1741                 (*profile)->busy = 0;
1742                 wakeup(*profile);
1743                 return ENOMEM;
1744         }
1745
1746         /* Split the buffer in half since we know the size of */
1747         /* our file path and our allocation is adequate for   */
1748         /* both file path names */
1749         profile_names_string = profile_data_string + (PATH_MAX/2);
1750
1751
1752         strcpy(profile_data_string, cache_path);
1753         strcpy(profile_names_string, cache_path);
1754         profile_names_length = profile_data_length
1755                         = strlen(profile_data_string);
1756         substring = profile_data_string + profile_data_length;
1757         sprintf(substring, "%x_data", user);
1758         substring = profile_names_string + profile_names_length;
1759         sprintf(substring, "%x_names", user);
1760
1761         /* We now have the absolute file names */
1762
1763         ret = kmem_alloc(kernel_map,
1764                         (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
1765         if(ret) {
1766                 kmem_free(kernel_map,
1767                                 (vm_offset_t)profile_data_string, PATH_MAX);
1768                 (*profile)->data_vp = NULL;
1769                 (*profile)->busy = 0;
1770                 wakeup(*profile);
1771                 return ENOMEM;
1772         }
1773
1774         NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF,
1775                         UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
1776         NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF,
1777                         UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
1778
1779         if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) {
1780 #ifdef notdef
1781                 printf("bsd_open_page_cache_files: CacheData file not found %s\n",
1782                         profile_data_string);
1783 #endif
1784                 kmem_free(kernel_map,
1785                                 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1786                 kmem_free(kernel_map,
1787                         (vm_offset_t)profile_data_string, PATH_MAX);
1788                 (*profile)->data_vp = NULL;
1789                 (*profile)->busy = 0;
1790                 wakeup(*profile);
1791                 return error;
1792         }
1793         data_vp = nd_data.ni_vp;
1794
1795         if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) {
1796                 printf("bsd_open_page_cache_files: NamesData file not found %s\n",
1797                         profile_data_string);
1798                 kmem_free(kernel_map,
1799                                 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1800                 kmem_free(kernel_map,
1801                         (vm_offset_t)profile_data_string, PATH_MAX);
1802
1803                 vnode_rele(data_vp);
1804                 vnode_put(data_vp);
1805
1806                 (*profile)->data_vp = NULL;
1807                 (*profile)->busy = 0;
1808                 wakeup(*profile);
1809                 return error;
1810         }
1811         names_vp = nd_names.ni_vp;
1812
1813         if ((error = vnode_size(names_vp, &file_size, &context)) != 0) {
1814                 printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string);
1815                 kmem_free(kernel_map,
1816                         (vm_offset_t)profile_data_string, PATH_MAX);
1817                 kmem_free(kernel_map,
1818                         (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1819
1820                 vnode_rele(names_vp);
1821                 vnode_put(names_vp);
1822                 vnode_rele(data_vp);
1823                 vnode_put(data_vp);
1824
1825                 (*profile)->data_vp = NULL;
1826                 (*profile)->busy = 0;
1827                 wakeup(*profile);
1828                 return error;
1829         }
1830
1831         size = file_size;
1832         if(size > 4 * PAGE_SIZE)
1833                 size = 4 * PAGE_SIZE;
1834         buf_ptr = names_buf;
1835         resid_off = 0;
1836
1837         while(size) {
1838                 int resid_int;
1839                 error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr,
1840                         size, resid_off,
1841                         UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(),
1842                         &resid_int, p);
1843                 resid = (vm_size_t) resid_int;
1844                 if((error) || (size == resid)) {
1845                         if(!error) {
1846                                 error = EINVAL;
1847                         }
1848                         kmem_free(kernel_map,
1849                                 (vm_offset_t)profile_data_string, PATH_MAX);
1850                         kmem_free(kernel_map,
1851                                 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
1852
1853                         vnode_rele(names_vp);
1854                         vnode_put(names_vp);
1855                         vnode_rele(data_vp);
1856                         vnode_put(data_vp);
1857
1858                         (*profile)->data_vp = NULL;
1859                         (*profile)->busy = 0;
1860                         wakeup(*profile);
1861                         return error;
1862                 }
1863                 buf_ptr += size-resid;
1864                 resid_off += size-resid;
1865                 size = resid;
1866         }
1867         kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX);
1868
1869         (*profile)->names_vp = names_vp;
1870         (*profile)->data_vp = data_vp;
1871         (*profile)->buf_ptr = names_buf;
1872
1873         /*
1874          * at this point, the both the names_vp and the data_vp have
1875          * both a valid usecount and an iocount held
1876          */
1877         return 0;
1878
1879 }
1880
1881 void
1882 bsd_close_page_cache_files(
1883         struct global_profile *profile)
1884 {
1885         vnode_put(profile->data_vp);
1886         vnode_put(profile->names_vp);
1887
1888         profile->busy = 0;
1889         wakeup(profile);
1890 }
1891
1892 int
1893 bsd_read_page_cache_file(
1894         unsigned int    user,
1895         int             *fid,
1896         int             *mod,
1897         char            *app_name,
1898         struct vnode    *app_vp,
1899         vm_offset_t     *buffer,
1900         vm_offset_t     *bufsize)
1901 {
1902
1903         boolean_t       funnel_state;
1904
1905         struct proc     *p;
1906         int             error;
1907         unsigned int    resid;
1908
1909         off_t           profile;
1910         unsigned int    profile_size;
1911
1912         vm_offset_t     names_buf;
1913         struct vnode_attr       va;
1914         struct vfs_context  context;
1915
1916         kern_return_t   ret;
1917
1918         struct  vnode   *names_vp;
1919         struct  vnode   *data_vp;
1920
1921         struct global_profile *uid_files;
1922
1923         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1924
1925         /* Try to open the appropriate users profile files */
1926         /* If neither file is present, try to create them  */
1927         /* If one file is present and the other not, fail. */
1928         /* If the files do exist, check them for the app_file */
1929         /* requested and read it in if present */
1930
1931
1932         error = bsd_open_page_cache_files(user, &uid_files);
1933         if(error) {
1934                 thread_funnel_set(kernel_flock, funnel_state);
1935                 return EINVAL;
1936         }
1937
1938         p = current_proc();
1939
1940         names_vp = uid_files->names_vp;
1941         data_vp = uid_files->data_vp;
1942         names_buf = uid_files->buf_ptr;
1943
1944         context.vc_proc = p;
1945         context.vc_ucred = kauth_cred_get();
1946
1947         VATTR_INIT(&va);
1948         VATTR_WANTED(&va, va_fileid);
1949         VATTR_WANTED(&va, va_modify_time);
1950
1951         if ((error = vnode_getattr(app_vp, &va, &context))) {
1952                 printf("bsd_read_cache_file: Can't stat app file %s\n", app_name);
1953                 bsd_close_page_cache_files(uid_files);
1954                 thread_funnel_set(kernel_flock, funnel_state);
1955                 return error;
1956         }
1957
1958         *fid = (u_long)va.va_fileid;
1959         *mod = va.va_modify_time.tv_sec;
1960
1961         if (bsd_search_page_cache_data_base(
1962                     names_vp,
1963                     (struct profile_names_header *)names_buf,
1964                     app_name,
1965                     (unsigned int) va.va_modify_time.tv_sec,
1966                     (u_long)va.va_fileid, &profile, &profile_size) == 0) {
1967                 /* profile is an offset in the profile data base */
1968                 /* It is zero if no profile data was found */
1969
1970                 if(profile_size == 0) {
1971                         *buffer = 0;
1972                         *bufsize = 0;
1973                         bsd_close_page_cache_files(uid_files);
1974                         thread_funnel_set(kernel_flock, funnel_state);
1975                         return 0;
1976                 }
1977                 ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size));
1978                 if(ret) {
1979                         bsd_close_page_cache_files(uid_files);
1980                         thread_funnel_set(kernel_flock, funnel_state);
1981                         return ENOMEM;
1982                 }
1983                 *bufsize = profile_size;
1984                 while(profile_size) {
1985                         int resid_int;
1986                         error = vn_rdwr(UIO_READ, data_vp,
1987                                 (caddr_t) *buffer, profile_size,
1988                                 profile, UIO_SYSSPACE32, IO_NODELOCKED,
1989                                 kauth_cred_get(), &resid_int, p);
1990                         resid = (vm_size_t) resid_int;
1991                         if((error) || (profile_size == resid)) {
1992                                 bsd_close_page_cache_files(uid_files);
1993                                 kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size);
1994                                 thread_funnel_set(kernel_flock, funnel_state);
1995                                 return EINVAL;
1996                         }
1997                         profile += profile_size - resid;
1998                         profile_size = resid;
1999                 }
2000                 bsd_close_page_cache_files(uid_files);
2001                 thread_funnel_set(kernel_flock, funnel_state);
2002                 return 0;
2003         } else {
2004                 bsd_close_page_cache_files(uid_files);
2005                 thread_funnel_set(kernel_flock, funnel_state);
2006                 return EINVAL;
2007         }
2008
2009 }
2010
2011 int
2012 bsd_search_page_cache_data_base(
2013         struct  vnode                   *vp,
2014         struct profile_names_header     *database,
2015         char                            *app_name,
2016         unsigned int                    mod_date,
2017         unsigned int                    inode,
2018         off_t                           *profile,
2019         unsigned int                    *profile_size)
2020 {
2021
2022         struct proc             *p;
2023
2024         unsigned int            i;
2025         struct profile_element  *element;
2026         unsigned int            ele_total;
2027         unsigned int            extended_list = 0;
2028         off_t                   file_off = 0;
2029         unsigned int            size;
2030         off_t                   resid_off;
2031         unsigned int            resid;
2032         vm_offset_t             local_buf = 0;
2033
2034         int                     error;
2035         kern_return_t           ret;
2036
2037         p = current_proc();
2038
2039         if(((vm_offset_t)database->element_array) !=
2040                                 sizeof(struct profile_names_header)) {
2041                 return EINVAL;
2042         }
2043         element = (struct profile_element *)(
2044                         (vm_offset_t)database->element_array +
2045                                                 (vm_offset_t)database);
2046
2047         ele_total = database->number_of_profiles;
2048
2049         *profile = 0;
2050         *profile_size = 0;
2051         while(ele_total) {
2052                 /* note: code assumes header + n*ele comes out on a page boundary */
2053                 if(((local_buf == 0) && (sizeof(struct profile_names_header) +
2054                         (ele_total * sizeof(struct profile_element)))
2055                                         > (PAGE_SIZE * 4)) ||
2056                         ((local_buf != 0) &&
2057                                 (ele_total * sizeof(struct profile_element))
2058                                          > (PAGE_SIZE * 4))) {
2059                         extended_list = ele_total;
2060                         if(element == (struct profile_element *)
2061                                 ((vm_offset_t)database->element_array +
2062                                                 (vm_offset_t)database)) {
2063                                 ele_total = ((PAGE_SIZE * 4)/sizeof(struct profile_element)) - 1;
2064                         } else {
2065                                 ele_total = (PAGE_SIZE * 4)/sizeof(struct profile_element);
2066                         }
2067                         extended_list -= ele_total;
2068                 }
2069                 for (i=0; i<ele_total; i++) {
2070                         if((mod_date == element[i].mod_date)
2071                                         && (inode == element[i].inode)) {
2072                                 if(strncmp(element[i].name, app_name, 12) == 0) {
2073                                         *profile = element[i].addr;
2074                                         *profile_size = element[i].size;
2075                                         if(local_buf != 0) {
2076                                                 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2077                                         }
2078                                         return 0;
2079                                 }
2080                         }
2081                 }
2082                 if(extended_list == 0)
2083                         break;
2084                 if(local_buf == 0) {
2085                         ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE);
2086                         if(ret != KERN_SUCCESS) {
2087                                 return ENOMEM;
2088                         }
2089                 }
2090                 element = (struct profile_element *)local_buf;
2091                 ele_total = extended_list;
2092                 extended_list = 0;
2093                 file_off +=  4 * PAGE_SIZE;
2094                 if((ele_total * sizeof(struct profile_element)) >
2095                                                         (PAGE_SIZE * 4)) {
2096                         size = PAGE_SIZE * 4;
2097                 } else {
2098                         size = ele_total * sizeof(struct profile_element);
2099                 }
2100                 resid_off = 0;
2101                 while(size) {
2102                         int resid_int;
2103                         error = vn_rdwr(UIO_READ, vp,
2104                                 CAST_DOWN(caddr_t, (local_buf + resid_off)),
2105                                 size, file_off + resid_off, UIO_SYSSPACE32,
2106                                 IO_NODELOCKED, kauth_cred_get(), &resid_int, p);
2107                         resid = (vm_size_t) resid_int;
2108                         if((error) || (size == resid)) {
2109                                 if(local_buf != 0) {
2110                                         kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2111                                 }
2112                                 return EINVAL;
2113                         }
2114                         resid_off += size-resid;
2115                         size = resid;
2116                 }
2117         }
2118         if(local_buf != 0) {
2119                 kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE);
2120         }
2121         return 0;
2122 }
2123
2124 int
2125 bsd_write_page_cache_file(
2126         unsigned int    user,
2127         char            *file_name,
2128         caddr_t         buffer,
2129         vm_size_t       size,
2130         int             mod,
2131         int             fid)
2132 {
2133         struct proc             *p;
2134         int                             resid;
2135         off_t                   resid_off;
2136         int                             error;
2137         boolean_t               funnel_state;
2138         off_t                   file_size;
2139         struct vfs_context      context;
2140         off_t                   profile;
2141         unsigned int    profile_size;
2142
2143         vm_offset_t     names_buf;
2144         struct  vnode   *names_vp;
2145         struct  vnode   *data_vp;
2146         struct  profile_names_header *profile_header;
2147         off_t                   name_offset;
2148         struct global_profile *uid_files;
2149
2150
2151         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2152
2153
2154         error = bsd_open_page_cache_files(user, &uid_files);
2155         if(error) {
2156                 thread_funnel_set(kernel_flock, funnel_state);
2157                 return EINVAL;
2158         }
2159
2160         p = current_proc();
2161
2162         names_vp = uid_files->names_vp;
2163         data_vp = uid_files->data_vp;
2164         names_buf = uid_files->buf_ptr;
2165
2166         /* Stat data file for size */
2167
2168         context.vc_proc = p;
2169         context.vc_ucred = kauth_cred_get();
2170
2171         if ((error = vnode_size(data_vp, &file_size, &context)) != 0) {
2172                 printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name);
2173                 bsd_close_page_cache_files(uid_files);
2174                 thread_funnel_set(kernel_flock, funnel_state);
2175                 return error;
2176         }
2177
2178         if (bsd_search_page_cache_data_base(names_vp,
2179                         (struct profile_names_header *)names_buf,
2180                         file_name, (unsigned int) mod,
2181                         fid, &profile, &profile_size) == 0) {
2182                 /* profile is an offset in the profile data base */
2183                 /* It is zero if no profile data was found */
2184
2185                 if(profile_size == 0) {
2186                         unsigned int    header_size;
2187                         vm_offset_t     buf_ptr;
2188
2189                         /* Our Write case */
2190
2191                         /* read header for last entry */
2192                         profile_header =
2193                                 (struct profile_names_header *)names_buf;
2194                         name_offset = sizeof(struct profile_names_header) +
2195                                 (sizeof(struct profile_element)
2196                                         * profile_header->number_of_profiles);
2197                         profile_header->number_of_profiles += 1;
2198
2199                         if(name_offset < PAGE_SIZE * 4) {
2200                                 struct profile_element  *name;
2201                                 /* write new entry */
2202                                 name = (struct profile_element *)
2203                                         (names_buf + (vm_offset_t)name_offset);
2204                                 name->addr =  file_size;
2205                                 name->size = size;
2206                                 name->mod_date = mod;
2207                                 name->inode = fid;
2208                                 strncpy (name->name, file_name, 12);
2209                         } else {
2210                                 unsigned int    ele_size;
2211                                 struct profile_element  name;
2212                                 /* write new entry */
2213                                 name.addr = file_size;
2214                                 name.size = size;
2215                                 name.mod_date = mod;
2216                                 name.inode = fid;
2217                                 strncpy (name.name, file_name, 12);
2218                                 /* write element out separately */
2219                                 ele_size = sizeof(struct profile_element);
2220                                 buf_ptr = (vm_offset_t)&name;
2221                                 resid_off = name_offset;
2222
2223                                 while(ele_size) {
2224                                         error = vn_rdwr(UIO_WRITE, names_vp,
2225                                                 (caddr_t)buf_ptr,
2226                                                 ele_size, resid_off,
2227                                                 UIO_SYSSPACE32, IO_NODELOCKED,
2228                                                 kauth_cred_get(), &resid, p);
2229                                         if(error) {
2230                                                 printf("bsd_write_page_cache_file: Can't write name_element %x\n", user);
2231                                                 bsd_close_page_cache_files(
2232                                                         uid_files);
2233                                                 thread_funnel_set(
2234                                                         kernel_flock,
2235                                                         funnel_state);
2236                                                 return error;
2237                                         }
2238                                         buf_ptr += (vm_offset_t)
2239                                                         ele_size-resid;
2240                                         resid_off += ele_size-resid;
2241                                         ele_size = resid;
2242                                 }
2243                         }
2244
2245                         if(name_offset < PAGE_SIZE * 4) {
2246                                 header_size = name_offset +
2247                                         sizeof(struct profile_element);
2248
2249                         } else {
2250                                 header_size =
2251                                         sizeof(struct profile_names_header);
2252                         }
2253                         buf_ptr = (vm_offset_t)profile_header;
2254                         resid_off = 0;
2255
2256                         /* write names file header */
2257                         while(header_size) {
2258                                 error = vn_rdwr(UIO_WRITE, names_vp,
2259                                         (caddr_t)buf_ptr,
2260                                         header_size, resid_off,
2261                                         UIO_SYSSPACE32, IO_NODELOCKED,
2262                                         kauth_cred_get(), &resid, p);
2263                                 if(error) {
2264                                         printf("bsd_write_page_cache_file: Can't write header %x\n", user);
2265                                         bsd_close_page_cache_files(
2266                                                 uid_files);
2267                                         thread_funnel_set(
2268                                                 kernel_flock, funnel_state);
2269                                         return error;
2270                                 }
2271                                 buf_ptr += (vm_offset_t)header_size-resid;
2272                                 resid_off += header_size-resid;
2273                                 header_size = resid;
2274                         }
2275                         /* write profile to data file */
2276                         resid_off = file_size;
2277                         while(size) {
2278                                 error = vn_rdwr(UIO_WRITE, data_vp,
2279                                         (caddr_t)buffer, size, resid_off,
2280                                         UIO_SYSSPACE32, IO_NODELOCKED,
2281                                         kauth_cred_get(), &resid, p);
2282                                 if(error) {
2283                                         printf("bsd_write_page_cache_file: Can't write header %x\n", user);
2284                                         bsd_close_page_cache_files(
2285                                                 uid_files);
2286                                         thread_funnel_set(
2287                                                 kernel_flock, funnel_state);
2288                                         return error;
2289                                 }
2290                                 buffer += size-resid;
2291                                 resid_off += size-resid;
2292                                 size = resid;
2293                         }
2294                         bsd_close_page_cache_files(uid_files);
2295                         thread_funnel_set(kernel_flock, funnel_state);
2296                         return 0;
2297                 }
2298                 /* Someone else wrote a twin profile before us */
2299                 bsd_close_page_cache_files(uid_files);
2300                 thread_funnel_set(kernel_flock, funnel_state);
2301                 return 0;
2302         } else {
2303                 bsd_close_page_cache_files(uid_files);
2304                 thread_funnel_set(kernel_flock, funnel_state);
2305                 return EINVAL;
2306         }
2307
2308 }
2309
2310 int
2311 prepare_profile_database(int    user)
2312 {
2313         const char *cache_path = "/var/vm/app_profile/";
2314         struct proc     *p;
2315         int             error;
2316         int             resid;
2317         off_t           resid_off;
2318         vm_size_t       size;
2319
2320         struct  vnode   *names_vp;
2321         struct  vnode   *data_vp;
2322         vm_offset_t     names_buf;
2323         vm_offset_t     buf_ptr;
2324
2325         int             profile_names_length;
2326         int             profile_data_length;
2327         char            *profile_data_string;
2328         char            *profile_names_string;
2329         char            *substring;
2330
2331         struct vnode_attr va;
2332         struct vfs_context context;
2333
2334         struct  profile_names_header *profile_header;
2335         kern_return_t   ret;
2336
2337         struct nameidata nd_names;
2338         struct nameidata nd_data;
2339
2340         p = current_proc();
2341
2342         context.vc_proc = p;
2343         context.vc_ucred = kauth_cred_get();
2344
2345         ret = kmem_alloc(kernel_map,
2346                 (vm_offset_t *)&profile_data_string, PATH_MAX);
2347
2348         if(ret) {
2349                 return ENOMEM;
2350         }
2351
2352         /* Split the buffer in half since we know the size of */
2353         /* our file path and our allocation is adequate for   */
2354         /* both file path names */
2355         profile_names_string = profile_data_string + (PATH_MAX/2);
2356
2357
2358         strcpy(profile_data_string, cache_path);
2359         strcpy(profile_names_string, cache_path);
2360         profile_names_length = profile_data_length
2361                         = strlen(profile_data_string);
2362         substring = profile_data_string + profile_data_length;
2363         sprintf(substring, "%x_data", user);
2364         substring = profile_names_string + profile_names_length;
2365         sprintf(substring, "%x_names", user);
2366
2367         /* We now have the absolute file names */
2368
2369         ret = kmem_alloc(kernel_map,
2370                         (vm_offset_t *)&names_buf, 4 * PAGE_SIZE);
2371         if(ret) {
2372                 kmem_free(kernel_map,
2373                                 (vm_offset_t)profile_data_string, PATH_MAX);
2374                 return ENOMEM;
2375         }
2376
2377         NDINIT(&nd_names, LOOKUP, FOLLOW,
2378                         UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context);
2379         NDINIT(&nd_data, LOOKUP, FOLLOW,
2380                         UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context);
2381
2382         if ( (error = vn_open(&nd_data,
2383                                                         O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
2384                         kmem_free(kernel_map,
2385                                         (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2386                         kmem_free(kernel_map,
2387                                 (vm_offset_t)profile_data_string, PATH_MAX);
2388
2389                         return 0;
2390         }
2391         data_vp = nd_data.ni_vp;
2392
2393         if ( (error = vn_open(&nd_names,
2394                                                         O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) {
2395                         printf("prepare_profile_database: Can't create CacheNames %s\n",
2396                                 profile_data_string);
2397                         kmem_free(kernel_map,
2398                                         (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2399                         kmem_free(kernel_map,
2400                                 (vm_offset_t)profile_data_string, PATH_MAX);
2401
2402                         vnode_rele(data_vp);
2403                         vnode_put(data_vp);
2404
2405                         return error;
2406         }
2407         names_vp = nd_names.ni_vp;
2408
2409         /* Write Header for new names file */
2410
2411         profile_header = (struct profile_names_header *)names_buf;
2412
2413         profile_header->number_of_profiles = 0;
2414         profile_header->user_id =  user;
2415         profile_header->version = 1;
2416         profile_header->element_array =
2417                                 sizeof(struct profile_names_header);
2418         profile_header->spare1 = 0;
2419         profile_header->spare2 = 0;
2420         profile_header->spare3 = 0;
2421
2422         size = sizeof(struct profile_names_header);
2423         buf_ptr = (vm_offset_t)profile_header;
2424         resid_off = 0;
2425
2426         while(size) {
2427                 error = vn_rdwr(UIO_WRITE, names_vp,
2428                                 (caddr_t)buf_ptr, size, resid_off,
2429                                 UIO_SYSSPACE32, IO_NODELOCKED,
2430                                 kauth_cred_get(), &resid, p);
2431                 if(error) {
2432                         printf("prepare_profile_database: Can't write header %s\n", profile_names_string);
2433                         kmem_free(kernel_map,
2434                                 (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2435                         kmem_free(kernel_map,
2436                                 (vm_offset_t)profile_data_string,
2437                                 PATH_MAX);
2438
2439                         vnode_rele(names_vp);
2440                         vnode_put(names_vp);
2441                         vnode_rele(data_vp);
2442                         vnode_put(data_vp);
2443
2444                         return error;
2445                 }
2446                 buf_ptr += size-resid;
2447                 resid_off += size-resid;
2448                 size = resid;
2449         }
2450         VATTR_INIT(&va);
2451         VATTR_SET(&va, va_uid, user);
2452
2453         error = vnode_setattr(names_vp, &va, &context);
2454         if(error) {
2455                 printf("prepare_profile_database: "
2456                         "Can't set user %s\n", profile_names_string);
2457         }
2458         vnode_rele(names_vp);
2459         vnode_put(names_vp);
2460
2461         VATTR_INIT(&va);
2462         VATTR_SET(&va, va_uid, user);
2463         error = vnode_setattr(data_vp, &va, &context);
2464         if(error) {
2465                 printf("prepare_profile_database: "
2466                         "Can't set user %s\n", profile_data_string);
2467         }
2468         vnode_rele(data_vp);
2469         vnode_put(data_vp);
2470
2471         kmem_free(kernel_map,
2472                         (vm_offset_t)profile_data_string, PATH_MAX);
2473         kmem_free(kernel_map,
2474                         (vm_offset_t)names_buf, 4 * PAGE_SIZE);
2475         return 0;
2476
2477 }