bsd/kern/ubc_subr.c

   1 /*
   2  * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  *      File:   ubc_subr.c
  30  *      Author: Umesh Vaishampayan [umeshv@apple.com]
  31  *              05-Aug-1999     umeshv  Created.
  32  *
  33  *      Functions related to Unified Buffer cache.
  34  *
  35  * Caller of UBC functions MUST have a valid reference on the vnode.
  36  *
  37  */
  38
  39 #include <sys/types.h>
  40 #include <sys/param.h>
  41 #include <sys/systm.h>
  42 #include <sys/lock.h>
  43 #include <sys/mman.h>
  44 #include <sys/mount_internal.h>
  45 #include <sys/vnode_internal.h>
  46 #include <sys/ubc_internal.h>
  47 #include <sys/ucred.h>
  48 #include <sys/proc_internal.h>
  49 #include <sys/kauth.h>
  50 #include <sys/buf.h>
  51 #include <sys/user.h>
  52 #include <sys/codesign.h>
  53
  54 #include <mach/mach_types.h>
  55 #include <mach/memory_object_types.h>
  56 #include <mach/memory_object_control.h>
  57 #include <mach/vm_map.h>
  58 #include <mach/mach_vm.h>
  59 #include <mach/upl.h>
  60
  61 #include <kern/kern_types.h>
  62 #include <kern/kalloc.h>
  63 #include <kern/zalloc.h>
  64 #include <kern/thread.h>
  65 #include <vm/vm_kern.h>
  66 #include <vm/vm_protos.h> /* last */
  67
  68 #include <libkern/crypto/sha1.h>
  69
  70 #include <security/mac_framework.h>
  71
  72 /* XXX These should be in a BSD accessible Mach header, but aren't. */
  73 extern kern_return_t memory_object_pages_resident(memory_object_control_t,
  74                                                         boolean_t *);
  75 extern kern_return_t    memory_object_signed(memory_object_control_t control,
  76                                              boolean_t is_signed);
  77 extern boolean_t        memory_object_is_slid(memory_object_control_t   control);
  78
  79 extern void Debugger(const char *message);
  80
  81
  82 /* XXX no one uses this interface! */
  83 kern_return_t ubc_page_op_with_control(
  84         memory_object_control_t  control,
  85         off_t                    f_offset,
  86         int                      ops,
  87         ppnum_t                  *phys_entryp,
  88         int                      *flagsp);
  89
  90
  91 #if DIAGNOSTIC
  92 #if defined(assert)
  93 #undef assert
  94 #endif
  95 #define assert(cond)    \
  96     ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond)))
  97 #else
  98 #include <kern/assert.h>
  99 #endif /* DIAGNOSTIC */
 100
 101 static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize);
 102 static int ubc_umcallback(vnode_t, void *);
 103 static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *);
 104 static void ubc_cs_free(struct ubc_info *uip);
 105
 106 struct zone     *ubc_info_zone;
 107
 108
 109 /*
 110  * CODESIGNING
 111  * Routines to navigate code signing data structures in the kernel...
 112  */
 113
 114 extern int cs_debug;
 115
 116 static boolean_t
 117 cs_valid_range(
 118         const void *start,
 119         const void *end,
 120         const void *lower_bound,
 121         const void *upper_bound)
 122 {
 123         if (upper_bound < lower_bound ||
 124             end < start) {
 125                 return FALSE;
 126         }
 127
 128         if (start < lower_bound ||
 129             end > upper_bound) {
 130                 return FALSE;
 131         }
 132
 133         return TRUE;
 134 }
 135
 136 /*
 137  * Magic numbers used by Code Signing
 138  */
 139 enum {
 140         CSMAGIC_REQUIREMENT = 0xfade0c00,               /* single Requirement blob */
 141         CSMAGIC_REQUIREMENTS = 0xfade0c01,              /* Requirements vector (internal requirements) */
 142         CSMAGIC_CODEDIRECTORY = 0xfade0c02,             /* CodeDirectory blob */
 143         CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
 144         CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02,    /* XXX */
 145         CSMAGIC_EMBEDDED_ENTITLEMENTS = 0xfade7171,     /* embedded entitlements */
 146         CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
 147
 148         CSSLOT_CODEDIRECTORY = 0,                               /* slot index for CodeDirectory */
 149         CSSLOT_ENTITLEMENTS = 5
 150 };
 151
 152 static const uint32_t supportsScatter = 0x20100;        // first version to support scatter option
 153
 154 /*
 155  * Structure of an embedded-signature SuperBlob
 156  */
 157 typedef struct __BlobIndex {
 158         uint32_t type;                                  /* type of entry */
 159         uint32_t offset;                                /* offset of entry */
 160 } CS_BlobIndex;
 161
 162 typedef struct __SuperBlob {
 163         uint32_t magic;                                 /* magic number */
 164         uint32_t length;                                /* total length of SuperBlob */
 165         uint32_t count;                                 /* number of index entries following */
 166         CS_BlobIndex index[];                   /* (count) entries */
 167         /* followed by Blobs in no particular order as indicated by offsets in index */
 168 } CS_SuperBlob;
 169
 170 typedef struct __GenericBlob {
 171         uint32_t magic;                         /* magic number */
 172         uint32_t length;                        /* total length of blob */
 173         char data[];
 174 } CS_GenericBlob;
 175
 176 struct Scatter {
 177         uint32_t count;                 // number of pages; zero for sentinel (only)
 178         uint32_t base;                  // first page number
 179         uint64_t targetOffset;          // offset in target
 180         uint64_t spare;                 // reserved
 181 };
 182
 183 /*
 184  * C form of a CodeDirectory.
 185  */
 186 typedef struct __CodeDirectory {
 187         uint32_t magic;                                 /* magic number (CSMAGIC_CODEDIRECTORY) */
 188         uint32_t length;                                /* total length of CodeDirectory blob */
 189         uint32_t version;                               /* compatibility version */
 190         uint32_t flags;                                 /* setup and mode flags */
 191         uint32_t hashOffset;                    /* offset of hash slot element at index zero */
 192         uint32_t identOffset;                   /* offset of identifier string */
 193         uint32_t nSpecialSlots;                 /* number of special hash slots */
 194         uint32_t nCodeSlots;                    /* number of ordinary (code) hash slots */
 195         uint32_t codeLimit;                             /* limit to main image signature range */
 196         uint8_t hashSize;                               /* size of each hash in bytes */
 197         uint8_t hashType;                               /* type of hash (cdHashType* constants) */
 198         uint8_t spare1;                                 /* unused (must be zero) */
 199         uint8_t pageSize;                               /* log2(page size in bytes); 0 => infinite */
 200         uint32_t spare2;                                /* unused (must be zero) */
 201         /* Version 0x20100 */
 202         uint32_t scatterOffset;                         /* offset of optional scatter vector */
 203         /* followed by dynamic content as located by offset fields above */
 204 } CS_CodeDirectory;
 205
 206
 207 /*
 208  * Locate the CodeDirectory from an embedded signature blob
 209  */
 210 static const
 211 CS_CodeDirectory *findCodeDirectory(
 212         const CS_SuperBlob *embedded,
 213         char *lower_bound,
 214         char *upper_bound)
 215 {
 216         const CS_CodeDirectory *cd = NULL;
 217
 218         if (embedded &&
 219             cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) &&
 220             ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
 221                 const CS_BlobIndex *limit;
 222                 const CS_BlobIndex *p;
 223
 224                 limit = &embedded->index[ntohl(embedded->count)];
 225                 if (!cs_valid_range(&embedded->index[0], limit,
 226                                     lower_bound, upper_bound)) {
 227                         return NULL;
 228                 }
 229                 for (p = embedded->index; p < limit; ++p) {
 230                         if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
 231                                 const unsigned char *base;
 232
 233                                 base = (const unsigned char *)embedded;
 234                                 cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
 235                                 break;
 236                         }
 237                 }
 238         } else {
 239                 /*
 240                  * Detached signatures come as a bare CS_CodeDirectory,
 241                  * without a blob.
 242                  */
 243                 cd = (const CS_CodeDirectory *) embedded;
 244         }
 245
 246         if (cd &&
 247             cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
 248             cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
 249                            lower_bound, upper_bound) &&
 250             cs_valid_range(cd, (const char *) cd + ntohl(cd->hashOffset),
 251                            lower_bound, upper_bound) &&
 252             cs_valid_range(cd, (const char *) cd +
 253                            ntohl(cd->hashOffset) +
 254                            (ntohl(cd->nCodeSlots) * SHA1_RESULTLEN),
 255                            lower_bound, upper_bound) &&
 256
 257             ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) {
 258                 return cd;
 259         }
 260
 261         // not found or not a valid code directory
 262         return NULL;
 263 }
 264
 265
 266 /*
 267  * Locating a page hash
 268  */
 269 static const unsigned char *
 270 hashes(
 271         const CS_CodeDirectory *cd,
 272         unsigned page,
 273         char *lower_bound,
 274         char *upper_bound)
 275 {
 276         const unsigned char *base, *top, *hash;
 277         uint32_t nCodeSlots = ntohl(cd->nCodeSlots);
 278
 279         assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound));
 280
 281         if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
 282                 /* Get first scatter struct */
 283                 const struct Scatter *scatter = (const struct Scatter*)
 284                         ((const char*)cd + ntohl(cd->scatterOffset));
 285                 uint32_t hashindex=0, scount, sbase=0;
 286                 /* iterate all scatter structs */
 287                 do {
 288                         if((const char*)scatter > (const char*)cd + ntohl(cd->length)) {
 289                                 if(cs_debug) {
 290                                         printf("CODE SIGNING: Scatter extends past Code Directory\n");
 291                                 }
 292                                 return NULL;
 293                         }
 294
 295                         scount = ntohl(scatter->count);
 296                         uint32_t new_base = ntohl(scatter->base);
 297
 298                         /* last scatter? */
 299                         if (scount == 0) {
 300                                 return NULL;
 301                         }
 302
 303                         if((hashindex > 0) && (new_base <= sbase)) {
 304                                 if(cs_debug) {
 305                                         printf("CODE SIGNING: unordered Scatter, prev base %d, cur base %d\n",
 306                                         sbase, new_base);
 307                                 }
 308                                 return NULL;    /* unordered scatter array */
 309                         }
 310                         sbase = new_base;
 311
 312                         /* this scatter beyond page we're looking for? */
 313                         if (sbase > page) {
 314                                 return NULL;
 315                         }
 316
 317                         if (sbase+scount >= page) {
 318                                 /* Found the scatter struct that is
 319                                  * referencing our page */
 320
 321                                 /* base = address of first hash covered by scatter */
 322                                 base = (const unsigned char *)cd + ntohl(cd->hashOffset) +
 323                                         hashindex * SHA1_RESULTLEN;
 324                                 /* top = address of first hash after this scatter */
 325                                 top = base + scount * SHA1_RESULTLEN;
 326                                 if (!cs_valid_range(base, top, lower_bound,
 327                                                     upper_bound) ||
 328                                     hashindex > nCodeSlots) {
 329                                         return NULL;
 330                                 }
 331
 332                                 break;
 333                         }
 334
 335                         /* this scatter struct is before the page we're looking
 336                          * for. Iterate. */
 337                         hashindex+=scount;
 338                         scatter++;
 339                 } while(1);
 340
 341                 hash = base + (page - sbase) * SHA1_RESULTLEN;
 342         } else {
 343                 base = (const unsigned char *)cd + ntohl(cd->hashOffset);
 344                 top = base + nCodeSlots * SHA1_RESULTLEN;
 345                 if (!cs_valid_range(base, top, lower_bound, upper_bound) ||
 346                     page > nCodeSlots) {
 347                         return NULL;
 348                 }
 349                 assert(page < nCodeSlots);
 350
 351                 hash = base + page * SHA1_RESULTLEN;
 352         }
 353
 354         if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
 355                             lower_bound, upper_bound)) {
 356                 hash = NULL;
 357         }
 358
 359         return hash;
 360 }
 361 /*
 362  * CODESIGNING
 363  * End of routines to navigate code signing data structures in the kernel.
 364  */
 365
 366 /*
 367  * ENTITLEMENTS
 368  * Routines to navigate entitlements in the kernel.
 369  */
 370
 371 /* Retrieve the entitlements blob for a process.
 372  * Returns:
 373  *   EINVAL     no text vnode associated with the process
 374  *   EBADEXEC   invalid code signing data
 375  *   ENOMEM     you should reboot
 376  *   0          no error occurred
 377  *
 378  * On success, out_start and out_length will point to the
 379  * entitlements blob if found; or will be set to NULL/zero
 380  * if there were no entitlements.
 381  */
 382 int
 383 cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length)
 384 {
 385         SHA1_CTX context;                       /* XXX hash agility */
 386         int error = 0;
 387         struct cs_blob *blob_list_entry;
 388         CS_SuperBlob *super_blob;
 389         CS_BlobIndex *blob_index;
 390         CS_GenericBlob *blob;
 391         CS_CodeDirectory *code_dir;
 392         unsigned char *computed_hash = NULL;
 393         unsigned char *embedded_hash = NULL;
 394         void *start = NULL;
 395         size_t length = 0;
 396         size_t hash_size = 0;
 397         unsigned int i, count;
 398
 399         if (NULL == p->p_textvp) {
 400                 error = EINVAL;
 401                 goto out;
 402         }
 403         if (NULL == (blob_list_entry = ubc_cs_blob_get(p->p_textvp, -1,
 404             p->p_textoff)))
 405                 goto out;
 406         super_blob = (void *)blob_list_entry->csb_mem_kaddr;
 407         if (CSMAGIC_EMBEDDED_SIGNATURE != ntohl(super_blob->magic)) {
 408                 error = EBADEXEC;
 409                 goto out;
 410         }
 411         count = ntohl(super_blob->count);
 412         for (i = 0; i < count; ++i) {
 413                 blob_index = &super_blob->index[i];
 414                 blob = (void *)((char *)super_blob + ntohl(blob_index->offset));
 415                 switch (ntohl(blob_index->type)) {
 416                 case CSSLOT_CODEDIRECTORY:
 417                         if (CSMAGIC_CODEDIRECTORY != ntohl(blob->magic))
 418                                 break;
 419                         code_dir = (void *)blob;
 420                         hash_size = code_dir->hashSize;
 421                         if (CSSLOT_ENTITLEMENTS <=
 422                             ntohl(code_dir->nSpecialSlots)) {
 423                                 embedded_hash = (void *)((char *)code_dir +
 424                                     ntohl(code_dir->hashOffset) -
 425                                     (hash_size * CSSLOT_ENTITLEMENTS));
 426                         }
 427                         break;
 428                 case CSSLOT_ENTITLEMENTS:
 429                         if (CSMAGIC_EMBEDDED_ENTITLEMENTS != ntohl(blob->magic))
 430                                 break;
 431                         start = (void *)blob;
 432                         length = ntohl(blob->length);
 433                         break;
 434                 default:
 435                         break;
 436                 }
 437         }
 438         if (NULL == start && NULL == embedded_hash) {
 439                 error = 0;
 440                 goto out;
 441         } else if (NULL == start || NULL == embedded_hash) {
 442                 error = EBADEXEC;
 443                 goto out;
 444         }
 445         if (NULL == (computed_hash = kalloc(hash_size))) {
 446                 error = ENOMEM;
 447                 goto out;
 448         }
 449         SHA1Init(&context);
 450         SHA1Update(&context, start, length);
 451         SHA1Final(computed_hash, &context);
 452         if (0 != memcmp(computed_hash, embedded_hash, hash_size)) {
 453                 error = EBADEXEC;
 454                 goto out;
 455         }
 456         error = 0;
 457 out:
 458         if (NULL != computed_hash)
 459                 kfree(computed_hash, hash_size);
 460         if (0 == error) {
 461                 *out_start = start;
 462                 *out_length = length;
 463         }
 464         return error;
 465 }
 466
 467 /*
 468  * ENTITLEMENTS
 469  * End of routines to navigate entitlements in the kernel.
 470  */
 471
 472
 473
 474 /*
 475  * ubc_init
 476  *
 477  * Initialization of the zone for Unified Buffer Cache.
 478  *
 479  * Parameters:  (void)
 480  *
 481  * Returns:     (void)
 482  *
 483  * Implicit returns:
 484  *              ubc_info_zone(global)   initialized for subsequent allocations
 485  */
 486 __private_extern__ void
 487 ubc_init(void)
 488 {
 489         int     i;
 490
 491         i = (vm_size_t) sizeof (struct ubc_info);
 492
 493         ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone");
 494
 495         zone_change(ubc_info_zone, Z_NOENCRYPT, TRUE);
 496 }
 497
 498
 499 /*
 500  * ubc_info_init
 501  *
 502  * Allocate and attach an empty ubc_info structure to a vnode
 503  *
 504  * Parameters:  vp                      Pointer to the vnode
 505  *
 506  * Returns:     0                       Success
 507  *      vnode_size:ENOMEM               Not enough space
 508  *      vnode_size:???                  Other error from vnode_getattr
 509  *
 510  */
 511 int
 512 ubc_info_init(struct vnode *vp)
 513 {
 514         return(ubc_info_init_internal(vp, 0, 0));
 515 }
 516
 517
 518 /*
 519  * ubc_info_init_withsize
 520  *
 521  * Allocate and attach a sized ubc_info structure to a vnode
 522  *
 523  * Parameters:  vp                      Pointer to the vnode
 524  *              filesize                The size of the file
 525  *
 526  * Returns:     0                       Success
 527  *      vnode_size:ENOMEM               Not enough space
 528  *      vnode_size:???                  Other error from vnode_getattr
 529  */
 530 int
 531 ubc_info_init_withsize(struct vnode *vp, off_t filesize)
 532 {
 533         return(ubc_info_init_internal(vp, 1, filesize));
 534 }
 535
 536
 537 /*
 538  * ubc_info_init_internal
 539  *
 540  * Allocate and attach a ubc_info structure to a vnode
 541  *
 542  * Parameters:  vp                      Pointer to the vnode
 543  *              withfsize{0,1}          Zero if the size should be obtained
 544  *                                      from the vnode; otherwise, use filesize
 545  *              filesize                The size of the file, if withfsize == 1
 546  *
 547  * Returns:     0                       Success
 548  *      vnode_size:ENOMEM               Not enough space
 549  *      vnode_size:???                  Other error from vnode_getattr
 550  *
 551  * Notes:       We call a blocking zalloc(), and the zone was created as an
 552  *              expandable and collectable zone, so if no memory is available,
 553  *              it is possible for zalloc() to block indefinitely.  zalloc()
 554  *              may also panic if the zone of zones is exhausted, since it's
 555  *              NOT expandable.
 556  *
 557  *              We unconditionally call vnode_pager_setup(), even if this is
 558  *              a reuse of a ubc_info; in that case, we should probably assert
 559  *              that it does not already have a pager association, but do not.
 560  *
 561  *              Since memory_object_create_named() can only fail from receiving
 562  *              an invalid pager argument, the explicit check and panic is
 563  *              merely precautionary.
 564  */
 565 static int
 566 ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize)
 567 {
 568         register struct ubc_info        *uip;
 569         void *  pager;
 570         int error = 0;
 571         kern_return_t kret;
 572         memory_object_control_t control;
 573
 574         uip = vp->v_ubcinfo;
 575
 576         /*
 577          * If there is not already a ubc_info attached to the vnode, we
 578          * attach one; otherwise, we will reuse the one that's there.
 579          */
 580         if (uip == UBC_INFO_NULL) {
 581
 582                 uip = (struct ubc_info *) zalloc(ubc_info_zone);
 583                 bzero((char *)uip, sizeof(struct ubc_info));
 584
 585                 uip->ui_vnode = vp;
 586                 uip->ui_flags = UI_INITED;
 587                 uip->ui_ucred = NOCRED;
 588         }
 589         assert(uip->ui_flags != UI_NONE);
 590         assert(uip->ui_vnode == vp);
 591
 592         /* now set this ubc_info in the vnode */
 593         vp->v_ubcinfo = uip;
 594
 595         /*
 596          * Allocate a pager object for this vnode
 597          *
 598          * XXX The value of the pager parameter is currently ignored.
 599          * XXX Presumably, this API changed to avoid the race between
 600          * XXX setting the pager and the UI_HASPAGER flag.
 601          */
 602         pager = (void *)vnode_pager_setup(vp, uip->ui_pager);
 603         assert(pager);
 604
 605         /*
 606          * Explicitly set the pager into the ubc_info, after setting the
 607          * UI_HASPAGER flag.
 608          */
 609         SET(uip->ui_flags, UI_HASPAGER);
 610         uip->ui_pager = pager;
 611
 612         /*
 613          * Note: We can not use VNOP_GETATTR() to get accurate
 614          * value of ui_size because this may be an NFS vnode, and
 615          * nfs_getattr() can call vinvalbuf(); if this happens,
 616          * ubc_info is not set up to deal with that event.
 617          * So use bogus size.
 618          */
 619
 620         /*
 621          * create a vnode - vm_object association
 622          * memory_object_create_named() creates a "named" reference on the
 623          * memory object we hold this reference as long as the vnode is
 624          * "alive."  Since memory_object_create_named() took its own reference
 625          * on the vnode pager we passed it, we can drop the reference
 626          * vnode_pager_setup() returned here.
 627          */
 628         kret = memory_object_create_named(pager,
 629                 (memory_object_size_t)uip->ui_size, &control);
 630         vnode_pager_deallocate(pager);
 631         if (kret != KERN_SUCCESS)
 632                 panic("ubc_info_init: memory_object_create_named returned %d", kret);
 633
 634         assert(control);
 635         uip->ui_control = control;      /* cache the value of the mo control */
 636         SET(uip->ui_flags, UI_HASOBJREF);       /* with a named reference */
 637
 638         if (withfsize == 0) {
 639                 /* initialize the size */
 640                 error = vnode_size(vp, &uip->ui_size, vfs_context_current());
 641                 if (error)
 642                         uip->ui_size = 0;
 643         } else {
 644                 uip->ui_size = filesize;
 645         }
 646         vp->v_lflag |= VNAMED_UBC;      /* vnode has a named ubc reference */
 647
 648         return (error);
 649 }
 650
 651
 652 /*
 653  * ubc_info_free
 654  *
 655  * Free a ubc_info structure
 656  *
 657  * Parameters:  uip                     A pointer to the ubc_info to free
 658  *
 659  * Returns:     (void)
 660  *
 661  * Notes:       If there is a credential that has subsequently been associated
 662  *              with the ubc_info via a call to ubc_setcred(), the reference
 663  *              to the credential is dropped.
 664  *
 665  *              It's actually impossible for a ubc_info.ui_control to take the
 666  *              value MEMORY_OBJECT_CONTROL_NULL.
 667  */
 668 static void
 669 ubc_info_free(struct ubc_info *uip)
 670 {
 671         if (IS_VALID_CRED(uip->ui_ucred)) {
 672                 kauth_cred_unref(&uip->ui_ucred);
 673         }
 674
 675         if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL)
 676                 memory_object_control_deallocate(uip->ui_control);
 677
 678         cluster_release(uip);
 679         ubc_cs_free(uip);
 680
 681         zfree(ubc_info_zone, uip);
 682         return;
 683 }
 684
 685
 686 void
 687 ubc_info_deallocate(struct ubc_info *uip)
 688 {
 689         ubc_info_free(uip);
 690 }
 691
 692
 693 /*
 694  * ubc_setsize
 695  *
 696  * Tell the  VM that the the size of the file represented by the vnode has
 697  * changed
 698  *
 699  * Parameters:  vp                      The vp whose backing file size is
 700  *                                      being changed
 701  *              nsize                   The new size of the backing file
 702  *
 703  * Returns:     1                       Success
 704  *              0                       Failure
 705  *
 706  * Notes:       This function will indicate failure if the new size that's
 707  *              being attempted to be set is negative.
 708  *
 709  *              This function will fail if there is no ubc_info currently
 710  *              associated with the vnode.
 711  *
 712  *              This function will indicate success it the new size is the
 713  *              same or larger than the old size (in this case, the remainder
 714  *              of the file will require modification or use of an existing upl
 715  *              to access successfully).
 716  *
 717  *              This function will fail if the new file size is smaller, and
 718  *              the memory region being invalidated was unable to actually be
 719  *              invalidated and/or the last page could not be flushed, if the
 720  *              new size is not aligned to a page boundary.  This is usually
 721  *              indicative of an I/O error.
 722  */
 723 int
 724 ubc_setsize(struct vnode *vp, off_t nsize)
 725 {
 726         off_t osize;    /* ui_size before change */
 727         off_t lastpg, olastpgend, lastoff;
 728         struct ubc_info *uip;
 729         memory_object_control_t control;
 730         kern_return_t kret = KERN_SUCCESS;
 731
 732         if (nsize < (off_t)0)
 733                 return (0);
 734
 735         if (!UBCINFOEXISTS(vp))
 736                 return (0);
 737
 738         uip = vp->v_ubcinfo;
 739         osize = uip->ui_size;
 740         /*
 741          * Update the size before flushing the VM
 742          */
 743         uip->ui_size = nsize;
 744
 745         if (nsize >= osize) {   /* Nothing more to do */
 746                 if (nsize > osize) {
 747                         lock_vnode_and_post(vp, NOTE_EXTEND);
 748                 }
 749
 750                 return (1);             /* return success */
 751         }
 752
 753         /*
 754          * When the file shrinks, invalidate the pages beyond the
 755          * new size. Also get rid of garbage beyond nsize on the
 756          * last page. The ui_size already has the nsize, so any
 757          * subsequent page-in will zero-fill the tail properly
 758          */
 759         lastpg = trunc_page_64(nsize);
 760         olastpgend = round_page_64(osize);
 761         control = uip->ui_control;
 762         assert(control);
 763         lastoff = (nsize & PAGE_MASK_64);
 764
 765         if (lastoff) {
 766                 upl_t           upl;
 767                 upl_page_info_t *pl;
 768
 769
 770                 /*
 771                  * new EOF ends up in the middle of a page
 772                  * zero the tail of this page if its currently
 773                  * present in the cache
 774                  */
 775                 kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE);
 776
 777                 if (kret != KERN_SUCCESS)
 778                         panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret);
 779
 780                 if (upl_valid_page(pl, 0))
 781                         cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL);
 782
 783                 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
 784
 785                 lastpg += PAGE_SIZE_64;
 786         }
 787         if (olastpgend > lastpg) {
 788                 int     flags;
 789
 790                 if (lastpg == 0)
 791                         flags = MEMORY_OBJECT_DATA_FLUSH_ALL;
 792                 else
 793                         flags = MEMORY_OBJECT_DATA_FLUSH;
 794                 /*
 795                  * invalidate the pages beyond the new EOF page
 796                  *
 797                  */
 798                 kret = memory_object_lock_request(control,
 799                                                   (memory_object_offset_t)lastpg,
 800                                                   (memory_object_size_t)(olastpgend - lastpg), NULL, NULL,
 801                                                   MEMORY_OBJECT_RETURN_NONE, flags, VM_PROT_NO_CHANGE);
 802                 if (kret != KERN_SUCCESS)
 803                         printf("ubc_setsize: invalidate failed (error = %d)\n", kret);
 804         }
 805         return ((kret == KERN_SUCCESS) ? 1 : 0);
 806 }
 807
 808
 809 /*
 810  * ubc_getsize
 811  *
 812  * Get the size of the file assocated with the specified vnode
 813  *
 814  * Parameters:  vp                      The vnode whose size is of interest
 815  *
 816  * Returns:     0                       There is no ubc_info associated with
 817  *                                      this vnode, or the size is zero
 818  *              !0                      The size of the file
 819  *
 820  * Notes:       Using this routine, it is not possible for a caller to
 821  *              successfully distinguish between a vnode associate with a zero
 822  *              length file, and a vnode with no associated ubc_info.  The
 823  *              caller therefore needs to not care, or needs to ensure that
 824  *              they have previously successfully called ubc_info_init() or
 825  *              ubc_info_init_withsize().
 826  */
 827 off_t
 828 ubc_getsize(struct vnode *vp)
 829 {
 830         /* people depend on the side effect of this working this way
 831          * as they call this for directory
 832          */
 833         if (!UBCINFOEXISTS(vp))
 834                 return ((off_t)0);
 835         return (vp->v_ubcinfo->ui_size);
 836 }
 837
 838
 839 /*
 840  * ubc_umount
 841  *
 842  * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this
 843  * mount point
 844  *
 845  * Parameters:  mp                      The mount point
 846  *
 847  * Returns:     0                       Success
 848  *
 849  * Notes:       There is no failure indication for this function.
 850  *
 851  *              This function is used in the unmount path; since it may block
 852  *              I/O indefinitely, it should not be used in the forced unmount
 853  *              path, since a device unavailability could also block that
 854  *              indefinitely.
 855  *
 856  *              Because there is no device ejection interlock on USB, FireWire,
 857  *              or similar devices, it's possible that an ejection that begins
 858  *              subsequent to the vnode_iterate() completing, either on one of
 859  *              those devices, or a network mount for which the server quits
 860  *              responding, etc., may cause the caller to block indefinitely.
 861  */
 862 __private_extern__ int
 863 ubc_umount(struct mount *mp)
 864 {
 865         vnode_iterate(mp, 0, ubc_umcallback, 0);
 866         return(0);
 867 }
 868
 869
 870 /*
 871  * ubc_umcallback
 872  *
 873  * Used by ubc_umount() as an internal implementation detail; see ubc_umount()
 874  * and vnode_iterate() for details of implementation.
 875  */
 876 static int
 877 ubc_umcallback(vnode_t vp, __unused void * args)
 878 {
 879
 880         if (UBCINFOEXISTS(vp)) {
 881
 882                 (void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL);
 883         }
 884         return (VNODE_RETURNED);
 885 }
 886
 887
 888 /*
 889  * ubc_getcred
 890  *
 891  * Get the credentials currently active for the ubc_info associated with the
 892  * vnode.
 893  *
 894  * Parameters:  vp                      The vnode whose ubc_info credentials
 895  *                                      are to be retrieved
 896  *
 897  * Returns:     !NOCRED                 The credentials
 898  *              NOCRED                  If there is no ubc_info for the vnode,
 899  *                                      or if there is one, but it has not had
 900  *                                      any credentials associated with it via
 901  *                                      a call to ubc_setcred()
 902  */
 903 kauth_cred_t
 904 ubc_getcred(struct vnode *vp)
 905 {
 906         if (UBCINFOEXISTS(vp))
 907                 return (vp->v_ubcinfo->ui_ucred);
 908
 909         return (NOCRED);
 910 }
 911
 912
 913 /*
 914  * ubc_setthreadcred
 915  *
 916  * If they are not already set, set the credentials of the ubc_info structure
 917  * associated with the vnode to those of the supplied thread; otherwise leave
 918  * them alone.
 919  *
 920  * Parameters:  vp                      The vnode whose ubc_info creds are to
 921  *                                      be set
 922  *              p                       The process whose credentials are to
 923  *                                      be used, if not running on an assumed
 924  *                                      credential
 925  *              thread                  The thread whose credentials are to
 926  *                                      be used
 927  *
 928  * Returns:     1                       This vnode has no associated ubc_info
 929  *              0                       Success
 930  *
 931  * Notes:       This function takes a proc parameter to account for bootstrap
 932  *              issues where a task or thread may call this routine, either
 933  *              before credentials have been initialized by bsd_init(), or if
 934  *              there is no BSD info asscoiate with a mach thread yet.  This
 935  *              is known to happen in both the initial swap and memory mapping
 936  *              calls.
 937  *
 938  *              This function is generally used only in the following cases:
 939  *
 940  *              o       a memory mapped file via the mmap() system call
 941  *              o       a memory mapped file via the deprecated map_fd() call
 942  *              o       a swap store backing file
 943  *              o       subsequent to a successful write via vn_write()
 944  *
 945  *              The information is then used by the NFS client in order to
 946  *              cons up a wire message in either the page-in or page-out path.
 947  *
 948  *              There are two potential problems with the use of this API:
 949  *
 950  *              o       Because the write path only set it on a successful
 951  *                      write, there is a race window between setting the
 952  *                      credential and its use to evict the pages to the
 953  *                      remote file server
 954  *
 955  *              o       Because a page-in may occur prior to a write, the
 956  *                      credential may not be set at this time, if the page-in
 957  *                      is not the result of a mapping established via mmap()
 958  *                      or map_fd().
 959  *
 960  *              In both these cases, this will be triggered from the paging
 961  *              path, which will instead use the credential of the current
 962  *              process, which in this case is either the dynamic_pager or
 963  *              the kernel task, both of which utilize "root" credentials.
 964  *
 965  *              This may potentially permit operations to occur which should
 966  *              be denied, or it may cause to be denied operations which
 967  *              should be permitted, depending on the configuration of the NFS
 968  *              server.
 969  */
 970 int
 971 ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread)
 972 {
 973         struct ubc_info *uip;
 974         kauth_cred_t credp;
 975         struct uthread  *uthread = get_bsdthread_info(thread);
 976
 977         if (!UBCINFOEXISTS(vp))
 978                 return (1);
 979
 980         vnode_lock(vp);
 981
 982         uip = vp->v_ubcinfo;
 983         credp = uip->ui_ucred;
 984
 985         if (!IS_VALID_CRED(credp)) {
 986                 /* use per-thread cred, if assumed identity, else proc cred */
 987                 if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) {
 988                         uip->ui_ucred = kauth_cred_proc_ref(p);
 989                 } else {
 990                         uip->ui_ucred = uthread->uu_ucred;
 991                         kauth_cred_ref(uip->ui_ucred);
 992                 }
 993         }
 994         vnode_unlock(vp);
 995
 996         return (0);
 997 }
 998
 999
1000 /*
1001  * ubc_setcred
1002  *
1003  * If they are not already set, set the credentials of the ubc_info structure
1004  * associated with the vnode to those of the process; otherwise leave them
1005  * alone.
1006  *
1007  * Parameters:  vp                      The vnode whose ubc_info creds are to
1008  *                                      be set
1009  *              p                       The process whose credentials are to
1010  *                                      be used
1011  *
1012  * Returns:     0                       This vnode has no associated ubc_info
1013  *              1                       Success
1014  *
1015  * Notes:       The return values for this function are inverted from nearly
1016  *              all other uses in the kernel.
1017  *
1018  *              See also ubc_setthreadcred(), above.
1019  *
1020  *              This function is considered deprecated, and generally should
1021  *              not be used, as it is incompatible with per-thread credentials;
1022  *              it exists for legacy KPI reasons.
1023  *
1024  * DEPRECATION: ubc_setcred() is being deprecated. Please use
1025  *              ubc_setthreadcred() instead.
1026  */
1027 int
1028 ubc_setcred(struct vnode *vp, proc_t p)
1029 {
1030         struct ubc_info *uip;
1031         kauth_cred_t credp;
1032
1033         /* If there is no ubc_info, deny the operation */
1034         if ( !UBCINFOEXISTS(vp))
1035                 return (0);
1036
1037         /*
1038          * Check to see if there is already a credential reference in the
1039          * ubc_info; if there is not, take one on the supplied credential.
1040          */
1041         vnode_lock(vp);
1042         uip = vp->v_ubcinfo;
1043         credp = uip->ui_ucred;
1044         if (!IS_VALID_CRED(credp)) {
1045                 uip->ui_ucred = kauth_cred_proc_ref(p);
1046         }
1047         vnode_unlock(vp);
1048
1049         return (1);
1050 }
1051
1052 /*
1053  * ubc_getpager
1054  *
1055  * Get the pager associated with the ubc_info associated with the vnode.
1056  *
1057  * Parameters:  vp                      The vnode to obtain the pager from
1058  *
1059  * Returns:     !VNODE_PAGER_NULL       The memory_object_t for the pager
1060  *              VNODE_PAGER_NULL        There is no ubc_info for this vnode
1061  *
1062  * Notes:       For each vnode that has a ubc_info associated with it, that
1063  *              ubc_info SHALL have a pager associated with it, so in the
1064  *              normal case, it's impossible to return VNODE_PAGER_NULL for
1065  *              a vnode with an associated ubc_info.
1066  */
1067 __private_extern__ memory_object_t
1068 ubc_getpager(struct vnode *vp)
1069 {
1070         if (UBCINFOEXISTS(vp))
1071                 return (vp->v_ubcinfo->ui_pager);
1072
1073         return (0);
1074 }
1075
1076
1077 /*
1078  * ubc_getobject
1079  *
1080  * Get the memory object control associated with the ubc_info associated with
1081  * the vnode
1082  *
1083  * Parameters:  vp                      The vnode to obtain the memory object
1084  *                                      from
1085  *              flags                   DEPRECATED
1086  *
1087  * Returns:     !MEMORY_OBJECT_CONTROL_NULL
1088  *              MEMORY_OBJECT_CONTROL_NULL
1089  *
1090  * Notes:       Historically, if the flags were not "do not reactivate", this
1091  *              function would look up the memory object using the pager if
1092  *              it did not exist (this could be the case if the vnode had
1093  *              been previously reactivated).  The flags would also permit a
1094  *              hold to be requested, which would have created an object
1095  *              reference, if one had not already existed.  This usage is
1096  *              deprecated, as it would permit a race between finding and
1097  *              taking the reference vs. a single reference being dropped in
1098  *              another thread.
1099  */
1100 memory_object_control_t
1101 ubc_getobject(struct vnode *vp, __unused int flags)
1102 {
1103         if (UBCINFOEXISTS(vp))
1104                 return((vp->v_ubcinfo->ui_control));
1105
1106         return (MEMORY_OBJECT_CONTROL_NULL);
1107 }
1108
1109 boolean_t
1110 ubc_strict_uncached_IO(struct vnode *vp)
1111 {
1112         boolean_t result = FALSE;
1113
1114         if (UBCINFOEXISTS(vp)) {
1115                 result = memory_object_is_slid(vp->v_ubcinfo->ui_control);
1116         }
1117         return result;
1118 }
1119
1120 /*
1121  * ubc_blktooff
1122  *
1123  * Convert a given block number to a memory backing object (file) offset for a
1124  * given vnode
1125  *
1126  * Parameters:  vp                      The vnode in which the block is located
1127  *              blkno                   The block number to convert
1128  *
1129  * Returns:     !-1                     The offset into the backing object
1130  *              -1                      There is no ubc_info associated with
1131  *                                      the vnode
1132  *              -1                      An error occurred in the underlying VFS
1133  *                                      while translating the block to an
1134  *                                      offset; the most likely cause is that
1135  *                                      the caller specified a block past the
1136  *                                      end of the file, but this could also be
1137  *                                      any other error from VNOP_BLKTOOFF().
1138  *
1139  * Note:        Representing the error in band loses some information, but does
1140  *              not occlude a valid offset, since an off_t of -1 is normally
1141  *              used to represent EOF.  If we had a more reliable constant in
1142  *              our header files for it (i.e. explicitly cast to an off_t), we
1143  *              would use it here instead.
1144  */
1145 off_t
1146 ubc_blktooff(vnode_t vp, daddr64_t blkno)
1147 {
1148         off_t file_offset = -1;
1149         int error;
1150
1151         if (UBCINFOEXISTS(vp)) {
1152                 error = VNOP_BLKTOOFF(vp, blkno, &file_offset);
1153                 if (error)
1154                         file_offset = -1;
1155         }
1156
1157         return (file_offset);
1158 }
1159
1160
1161 /*
1162  * ubc_offtoblk
1163  *
1164  * Convert a given offset in a memory backing object into a block number for a
1165  * given vnode
1166  *
1167  * Parameters:  vp                      The vnode in which the offset is
1168  *                                      located
1169  *              offset                  The offset into the backing object
1170  *
1171  * Returns:     !-1                     The returned block number
1172  *              -1                      There is no ubc_info associated with
1173  *                                      the vnode
1174  *              -1                      An error occurred in the underlying VFS
1175  *                                      while translating the block to an
1176  *                                      offset; the most likely cause is that
1177  *                                      the caller specified a block past the
1178  *                                      end of the file, but this could also be
1179  *                                      any other error from VNOP_OFFTOBLK().
1180  *
1181  * Note:        Representing the error in band loses some information, but does
1182  *              not occlude a valid block number, since block numbers exceed
1183  *              the valid range for offsets, due to their relative sizes.  If
1184  *              we had a more reliable constant than -1 in our header files
1185  *              for it (i.e. explicitly cast to an daddr64_t), we would use it
1186  *              here instead.
1187  */
1188 daddr64_t
1189 ubc_offtoblk(vnode_t vp, off_t offset)
1190 {
1191         daddr64_t blkno = -1;
1192         int error = 0;
1193
1194         if (UBCINFOEXISTS(vp)) {
1195                 error = VNOP_OFFTOBLK(vp, offset, &blkno);
1196                 if (error)
1197                         blkno = -1;
1198         }
1199
1200         return (blkno);
1201 }
1202
1203
1204 /*
1205  * ubc_pages_resident
1206  *
1207  * Determine whether or not a given vnode has pages resident via the memory
1208  * object control associated with the ubc_info associated with the vnode
1209  *
1210  * Parameters:  vp                      The vnode we want to know about
1211  *
1212  * Returns:     1                       Yes
1213  *              0                       No
1214  */
1215 int
1216 ubc_pages_resident(vnode_t vp)
1217 {
1218         kern_return_t           kret;
1219         boolean_t                       has_pages_resident;
1220
1221         if (!UBCINFOEXISTS(vp))
1222                 return (0);
1223
1224         /*
1225          * The following call may fail if an invalid ui_control is specified,
1226          * or if there is no VM object associated with the control object.  In
1227          * either case, reacting to it as if there were no pages resident will
1228          * result in correct behavior.
1229          */
1230         kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident);
1231
1232         if (kret != KERN_SUCCESS)
1233                 return (0);
1234
1235         if (has_pages_resident == TRUE)
1236                 return (1);
1237
1238         return (0);
1239 }
1240
1241
1242 /*
1243  * ubc_sync_range
1244  *
1245  * Clean and/or invalidate a range in the memory object that backs this vnode
1246  *
1247  * Parameters:  vp                      The vnode whose associated ubc_info's
1248  *                                      associated memory object is to have a
1249  *                                      range invalidated within it
1250  *              beg_off                 The start of the range, as an offset
1251  *              end_off                 The end of the range, as an offset
1252  *              flags                   See ubc_msync_internal()
1253  *
1254  * Returns:     1                       Success
1255  *              0                       Failure
1256  *
1257  * Notes:       see ubc_msync_internal() for more detailed information.
1258  *
1259  * DEPRECATED:  This interface is obsolete due to a failure to return error
1260  *              information needed in order to correct failures.  The currently
1261  *              recommended interface is ubc_msync().
1262  */
1263 int
1264 ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags)
1265 {
1266         return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL));
1267 }
1268
1269
1270 /*
1271  * ubc_msync
1272  *
1273  * Clean and/or invalidate a range in the memory object that backs this vnode
1274  *
1275  * Parameters:  vp                      The vnode whose associated ubc_info's
1276  *                                      associated memory object is to have a
1277  *                                      range invalidated within it
1278  *              beg_off                 The start of the range, as an offset
1279  *              end_off                 The end of the range, as an offset
1280  *              resid_off               The address of an off_t supplied by the
1281  *                                      caller; may be set to NULL to ignore
1282  *              flags                   See ubc_msync_internal()
1283  *
1284  * Returns:     0                       Success
1285  *              !0                      Failure; an errno is returned
1286  *
1287  * Implicit Returns:
1288  *              *resid_off, modified    If non-NULL, the  contents are ALWAYS
1289  *                                      modified; they are initialized to the
1290  *                                      beg_off, and in case of an I/O error,
1291  *                                      the difference between beg_off and the
1292  *                                      current value will reflect what was
1293  *                                      able to be written before the error
1294  *                                      occurred.  If no error is returned, the
1295  *                                      value of the resid_off is undefined; do
1296  *                                      NOT use it in place of end_off if you
1297  *                                      intend to increment from the end of the
1298  *                                      last call and call iteratively.
1299  *
1300  * Notes:       see ubc_msync_internal() for more detailed information.
1301  *
1302  */
1303 errno_t
1304 ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags)
1305 {
1306         int retval;
1307         int io_errno = 0;
1308
1309         if (resid_off)
1310                 *resid_off = beg_off;
1311
1312         retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno);
1313
1314         if (retval == 0 && io_errno == 0)
1315                 return (EINVAL);
1316         return (io_errno);
1317 }
1318
1319
1320 /*
1321  * Clean and/or invalidate a range in the memory object that backs this vnode
1322  *
1323  * Parameters:  vp                      The vnode whose associated ubc_info's
1324  *                                      associated memory object is to have a
1325  *                                      range invalidated within it
1326  *              beg_off                 The start of the range, as an offset
1327  *              end_off                 The end of the range, as an offset
1328  *              resid_off               The address of an off_t supplied by the
1329  *                                      caller; may be set to NULL to ignore
1330  *              flags                   MUST contain at least one of the flags
1331  *                                      UBC_INVALIDATE, UBC_PUSHDIRTY, or
1332  *                                      UBC_PUSHALL; if UBC_PUSHDIRTY is used,
1333  *                                      UBC_SYNC may also be specified to cause
1334  *                                      this function to block until the
1335  *                                      operation is complete.  The behavior
1336  *                                      of UBC_SYNC is otherwise undefined.
1337  *              io_errno                The address of an int to contain the
1338  *                                      errno from a failed I/O operation, if
1339  *                                      one occurs; may be set to NULL to
1340  *                                      ignore
1341  *
1342  * Returns:     1                       Success
1343  *              0                       Failure
1344  *
1345  * Implicit Returns:
1346  *              *resid_off, modified    The contents of this offset MAY be
1347  *                                      modified; in case of an I/O error, the
1348  *                                      difference between beg_off and the
1349  *                                      current value will reflect what was
1350  *                                      able to be written before the error
1351  *                                      occurred.
1352  *              *io_errno, modified     The contents of this offset are set to
1353  *                                      an errno, if an error occurs; if the
1354  *                                      caller supplies an io_errno parameter,
1355  *                                      they should be careful to initialize it
1356  *                                      to 0 before calling this function to
1357  *                                      enable them to distinguish an error
1358  *                                      with a valid *resid_off from an invalid
1359  *                                      one, and to avoid potentially falsely
1360  *                                      reporting an error, depending on use.
1361  *
1362  * Notes:       If there is no ubc_info associated with the vnode supplied,
1363  *              this function immediately returns success.
1364  *
1365  *              If the value of end_off is less than or equal to beg_off, this
1366  *              function immediately returns success; that is, end_off is NOT
1367  *              inclusive.
1368  *
1369  *              IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or
1370  *              UBC_PUSHALL MUST be specified; that is, it is NOT possible to
1371  *              attempt to block on in-progress I/O by calling this function
1372  *              with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC
1373  *              in order to block pending on the I/O already in progress.
1374  *
1375  *              The start offset is truncated to the page boundary and the
1376  *              size is adjusted to include the last page in the range; that
1377  *              is, end_off on exactly a page boundary will not change if it
1378  *              is rounded, and the range of bytes written will be from the
1379  *              truncate beg_off to the rounded (end_off - 1).
1380  */
1381 static int
1382 ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno)
1383 {
1384         memory_object_size_t    tsize;
1385         kern_return_t           kret;
1386         int request_flags = 0;
1387         int flush_flags   = MEMORY_OBJECT_RETURN_NONE;
1388
1389         if ( !UBCINFOEXISTS(vp))
1390                 return (0);
1391         if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0)
1392                 return (0);
1393         if (end_off <= beg_off)
1394                 return (1);
1395
1396         if (flags & UBC_INVALIDATE)
1397                 /*
1398                  * discard the resident pages
1399                  */
1400                 request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE);
1401
1402         if (flags & UBC_SYNC)
1403                 /*
1404                  * wait for all the I/O to complete before returning
1405                  */
1406                 request_flags |= MEMORY_OBJECT_IO_SYNC;
1407
1408         if (flags & UBC_PUSHDIRTY)
1409                 /*
1410                  * we only return the dirty pages in the range
1411                  */
1412                 flush_flags = MEMORY_OBJECT_RETURN_DIRTY;
1413
1414         if (flags & UBC_PUSHALL)
1415                 /*
1416                  * then return all the interesting pages in the range (both
1417                  * dirty and precious) to the pager
1418                  */
1419                 flush_flags = MEMORY_OBJECT_RETURN_ALL;
1420
1421         beg_off = trunc_page_64(beg_off);
1422         end_off = round_page_64(end_off);
1423         tsize   = (memory_object_size_t)end_off - beg_off;
1424
1425         /* flush and/or invalidate pages in the range requested */
1426         kret = memory_object_lock_request(vp->v_ubcinfo->ui_control,
1427                                           beg_off, tsize,
1428                                           (memory_object_offset_t *)resid_off,
1429                                           io_errno, flush_flags, request_flags,
1430                                           VM_PROT_NO_CHANGE);
1431
1432         return ((kret == KERN_SUCCESS) ? 1 : 0);
1433 }
1434
1435
1436 /*
1437  * ubc_msync_internal
1438  *
1439  * Explicitly map a vnode that has an associate ubc_info, and add a reference
1440  * to it for the ubc system, if there isn't one already, so it will not be
1441  * recycled while it's in use, and set flags on the ubc_info to indicate that
1442  * we have done this
1443  *
1444  * Parameters:  vp                      The vnode to map
1445  *              flags                   The mapping flags for the vnode; this
1446  *                                      will be a combination of one or more of
1447  *                                      PROT_READ, PROT_WRITE, and PROT_EXEC
1448  *
1449  * Returns:     0                       Success
1450  *              EPERM                   Permission was denied
1451  *
1452  * Notes:       An I/O reference on the vnode must already be held on entry
1453  *
1454  *              If there is no ubc_info associated with the vnode, this function
1455  *              will return success.
1456  *
1457  *              If a permission error occurs, this function will return
1458  *              failure; all other failures will cause this function to return
1459  *              success.
1460  *
1461  *              IMPORTANT: This is an internal use function, and its symbols
1462  *              are not exported, hence its error checking is not very robust.
1463  *              It is primarily used by:
1464  *
1465  *              o       mmap(), when mapping a file
1466  *              o       The deprecated map_fd() interface, when mapping a file
1467  *              o       When mapping a shared file (a shared library in the
1468  *                      shared segment region)
1469  *              o       When loading a program image during the exec process
1470  *
1471  *              ...all of these uses ignore the return code, and any fault that
1472  *              results later because of a failure is handled in the fix-up path
1473  *              of the fault handler.  The interface exists primarily as a
1474  *              performance hint.
1475  *
1476  *              Given that third party implementation of the type of interfaces
1477  *              that would use this function, such as alternative executable
1478  *              formats, etc., are unsupported, this function is not exported
1479  *              for general use.
1480  *
1481  *              The extra reference is held until the VM system unmaps the
1482  *              vnode from its own context to maintain a vnode reference in
1483  *              cases like open()/mmap()/close(), which leave the backing
1484  *              object referenced by a mapped memory region in a process
1485  *              address space.
1486  */
1487 __private_extern__ int
1488 ubc_map(vnode_t vp, int flags)
1489 {
1490         struct ubc_info *uip;
1491         int error = 0;
1492         int need_ref = 0;
1493         int need_wakeup = 0;
1494
1495         if (UBCINFOEXISTS(vp)) {
1496
1497                 vnode_lock(vp);
1498                 uip = vp->v_ubcinfo;
1499
1500                 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1501                         SET(uip->ui_flags, UI_MAPWAITING);
1502                         (void) msleep(&uip->ui_flags, &vp->v_lock,
1503                                       PRIBIO, "ubc_map", NULL);
1504                 }
1505                 SET(uip->ui_flags, UI_MAPBUSY);
1506                 vnode_unlock(vp);
1507
1508                 error = VNOP_MMAP(vp, flags, vfs_context_current());
1509
1510                 if (error != EPERM)
1511                         error = 0;
1512
1513                 vnode_lock_spin(vp);
1514
1515                 if (error == 0) {
1516                         if ( !ISSET(uip->ui_flags, UI_ISMAPPED))
1517                                 need_ref = 1;
1518                         SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED));
1519                 }
1520                 CLR(uip->ui_flags, UI_MAPBUSY);
1521
1522                 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1523                         CLR(uip->ui_flags, UI_MAPWAITING);
1524                         need_wakeup = 1;
1525                 }
1526                 vnode_unlock(vp);
1527
1528                 if (need_wakeup)
1529                         wakeup(&uip->ui_flags);
1530
1531                 if (need_ref)
1532                         vnode_ref(vp);
1533         }
1534         return (error);
1535 }
1536
1537
1538 /*
1539  * ubc_destroy_named
1540  *
1541  * Destroy the named memory object associated with the ubc_info control object
1542  * associated with the designated vnode, if there is a ubc_info associated
1543  * with the vnode, and a control object is associated with it
1544  *
1545  * Parameters:  vp                      The designated vnode
1546  *
1547  * Returns:     (void)
1548  *
1549  * Notes:       This function is called on vnode termination for all vnodes,
1550  *              and must therefore not assume that there is a ubc_info that is
1551  *              associated with the vnode, nor that there is a control object
1552  *              associated with the ubc_info.
1553  *
1554  *              If all the conditions necessary are present, this function
1555  *              calls memory_object_destory(), which will in turn end up
1556  *              calling ubc_unmap() to release any vnode references that were
1557  *              established via ubc_map().
1558  *
1559  *              IMPORTANT: This is an internal use function that is used
1560  *              exclusively by the internal use function vclean().
1561  */
1562 __private_extern__ void
1563 ubc_destroy_named(vnode_t vp)
1564 {
1565         memory_object_control_t control;
1566         struct ubc_info *uip;
1567         kern_return_t kret;
1568
1569         if (UBCINFOEXISTS(vp)) {
1570                 uip = vp->v_ubcinfo;
1571
1572                 /* Terminate the memory object  */
1573                 control = ubc_getobject(vp, UBC_HOLDOBJECT);
1574                 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1575                         kret = memory_object_destroy(control, 0);
1576                         if (kret != KERN_SUCCESS)
1577                                 panic("ubc_destroy_named: memory_object_destroy failed");
1578                 }
1579         }
1580 }
1581
1582
1583 /*
1584  * ubc_isinuse
1585  *
1586  * Determine whether or not a vnode is currently in use by ubc at a level in
1587  * excess of the requested busycount
1588  *
1589  * Parameters:  vp                      The vnode to check
1590  *              busycount               The threshold busy count, used to bias
1591  *                                      the count usually already held by the
1592  *                                      caller to avoid races
1593  *
1594  * Returns:     1                       The vnode is in use over the threshold
1595  *              0                       The vnode is not in use over the
1596  *                                      threshold
1597  *
1598  * Notes:       Because the vnode is only held locked while actually asking
1599  *              the use count, this function only represents a snapshot of the
1600  *              current state of the vnode.  If more accurate information is
1601  *              required, an additional busycount should be held by the caller
1602  *              and a non-zero busycount used.
1603  *
1604  *              If there is no ubc_info associated with the vnode, this
1605  *              function will report that the vnode is not in use by ubc.
1606  */
1607 int
1608 ubc_isinuse(struct vnode *vp, int busycount)
1609 {
1610         if ( !UBCINFOEXISTS(vp))
1611                 return (0);
1612         return(ubc_isinuse_locked(vp, busycount, 0));
1613 }
1614
1615
1616 /*
1617  * ubc_isinuse_locked
1618  *
1619  * Determine whether or not a vnode is currently in use by ubc at a level in
1620  * excess of the requested busycount
1621  *
1622  * Parameters:  vp                      The vnode to check
1623  *              busycount               The threshold busy count, used to bias
1624  *                                      the count usually already held by the
1625  *                                      caller to avoid races
1626  *              locked                  True if the vnode is already locked by
1627  *                                      the caller
1628  *
1629  * Returns:     1                       The vnode is in use over the threshold
1630  *              0                       The vnode is not in use over the
1631  *                                      threshold
1632  *
1633  * Notes:       If the vnode is not locked on entry, it is locked while
1634  *              actually asking the use count.  If this is the case, this
1635  *              function only represents a snapshot of the current state of
1636  *              the vnode.  If more accurate information is required, the
1637  *              vnode lock should be held by the caller, otherwise an
1638  *              additional busycount should be held by the caller and a
1639  *              non-zero busycount used.
1640  *
1641  *              If there is no ubc_info associated with the vnode, this
1642  *              function will report that the vnode is not in use by ubc.
1643  */
1644 int
1645 ubc_isinuse_locked(struct vnode *vp, int busycount, int locked)
1646 {
1647         int retval = 0;
1648
1649
1650         if (!locked)
1651                 vnode_lock_spin(vp);
1652
1653         if ((vp->v_usecount - vp->v_kusecount) > busycount)
1654                 retval = 1;
1655
1656         if (!locked)
1657                 vnode_unlock(vp);
1658         return (retval);
1659 }
1660
1661
1662 /*
1663  * ubc_unmap
1664  *
1665  * Reverse the effects of a ubc_map() call for a given vnode
1666  *
1667  * Parameters:  vp                      vnode to unmap from ubc
1668  *
1669  * Returns:     (void)
1670  *
1671  * Notes:       This is an internal use function used by vnode_pager_unmap().
1672  *              It will attempt to obtain a reference on the supplied vnode,
1673  *              and if it can do so, and there is an associated ubc_info, and
1674  *              the flags indicate that it was mapped via ubc_map(), then the
1675  *              flag is cleared, the mapping removed, and the reference taken
1676  *              by ubc_map() is released.
1677  *
1678  *              IMPORTANT: This MUST only be called by the VM
1679  *              to prevent race conditions.
1680  */
1681 __private_extern__ void
1682 ubc_unmap(struct vnode *vp)
1683 {
1684         struct ubc_info *uip;
1685         int     need_rele = 0;
1686         int     need_wakeup = 0;
1687
1688         if (vnode_getwithref(vp))
1689                 return;
1690
1691         if (UBCINFOEXISTS(vp)) {
1692                 vnode_lock(vp);
1693                 uip = vp->v_ubcinfo;
1694
1695                 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1696                         SET(uip->ui_flags, UI_MAPWAITING);
1697                         (void) msleep(&uip->ui_flags, &vp->v_lock,
1698                                       PRIBIO, "ubc_unmap", NULL);
1699                 }
1700                 SET(uip->ui_flags, UI_MAPBUSY);
1701
1702                 if (ISSET(uip->ui_flags, UI_ISMAPPED)) {
1703                         CLR(uip->ui_flags, UI_ISMAPPED);
1704                         need_rele = 1;
1705                 }
1706                 vnode_unlock(vp);
1707
1708                 if (need_rele) {
1709                         (void)VNOP_MNOMAP(vp, vfs_context_current());
1710                         vnode_rele(vp);
1711                 }
1712
1713                 vnode_lock_spin(vp);
1714
1715                 CLR(uip->ui_flags, UI_MAPBUSY);
1716                 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1717                         CLR(uip->ui_flags, UI_MAPWAITING);
1718                         need_wakeup = 1;
1719                 }
1720                 vnode_unlock(vp);
1721
1722                 if (need_wakeup)
1723                         wakeup(&uip->ui_flags);
1724
1725         }
1726         /*
1727          * the drop of the vnode ref will cleanup
1728          */
1729         vnode_put(vp);
1730 }
1731
1732
1733 /*
1734  * ubc_page_op
1735  *
1736  * Manipulate individual page state for a vnode with an associated ubc_info
1737  * with an associated memory object control.
1738  *
1739  * Parameters:  vp                      The vnode backing the page
1740  *              f_offset                A file offset interior to the page
1741  *              ops                     The operations to perform, as a bitmap
1742  *                                      (see below for more information)
1743  *              phys_entryp             The address of a ppnum_t; may be NULL
1744  *                                      to ignore
1745  *              flagsp                  A pointer to an int to contain flags;
1746  *                                      may be NULL to ignore
1747  *
1748  * Returns:     KERN_SUCCESS            Success
1749  *              KERN_INVALID_ARGUMENT   If the memory object control has no VM
1750  *                                      object associated
1751  *              KERN_INVALID_OBJECT     If UPL_POP_PHYSICAL and the object is
1752  *                                      not physically contiguous
1753  *              KERN_INVALID_OBJECT     If !UPL_POP_PHYSICAL and the object is
1754  *                                      physically contiguous
1755  *              KERN_FAILURE            If the page cannot be looked up
1756  *
1757  * Implicit Returns:
1758  *              *phys_entryp (modified) If phys_entryp is non-NULL and
1759  *                                      UPL_POP_PHYSICAL
1760  *              *flagsp (modified)      If flagsp is non-NULL and there was
1761  *                                      !UPL_POP_PHYSICAL and a KERN_SUCCESS
1762  *
1763  * Notes:       For object boundaries, it is considerably more efficient to
1764  *              ensure that f_offset is in fact on a page boundary, as this
1765  *              will avoid internal use of the hash table to identify the
1766  *              page, and would therefore skip a number of early optimizations.
1767  *              Since this is a page operation anyway, the caller should try
1768  *              to pass only a page aligned offset because of this.
1769  *
1770  *              *flagsp may be modified even if this function fails.  If it is
1771  *              modified, it will contain the condition of the page before the
1772  *              requested operation was attempted; these will only include the
1773  *              bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP,
1774  *              UPL_POP_SET, or UPL_POP_CLR bits.
1775  *
1776  *              The flags field may contain a specific operation, such as
1777  *              UPL_POP_PHYSICAL or UPL_POP_DUMP:
1778  *
1779  *              o       UPL_POP_PHYSICAL        Fail if not contiguous; if
1780  *                                              *phys_entryp and successful, set
1781  *                                              *phys_entryp
1782  *              o       UPL_POP_DUMP            Dump the specified page
1783  *
1784  *              Otherwise, it is treated as a bitmap of one or more page
1785  *              operations to perform on the final memory object; allowable
1786  *              bit values are:
1787  *
1788  *              o       UPL_POP_DIRTY           The page is dirty
1789  *              o       UPL_POP_PAGEOUT         The page is paged out
1790  *              o       UPL_POP_PRECIOUS        The page is precious
1791  *              o       UPL_POP_ABSENT          The page is absent
1792  *              o       UPL_POP_BUSY            The page is busy
1793  *
1794  *              If the page status is only being queried and not modified, then
1795  *              not other bits should be specified.  However, if it is being
1796  *              modified, exactly ONE of the following bits should be set:
1797  *
1798  *              o       UPL_POP_SET             Set the current bitmap bits
1799  *              o       UPL_POP_CLR             Clear the current bitmap bits
1800  *
1801  *              Thus to effect a combination of setting an clearing, it may be
1802  *              necessary to call this function twice.  If this is done, the
1803  *              set should be used before the clear, since clearing may trigger
1804  *              a wakeup on the destination page, and if the page is backed by
1805  *              an encrypted swap file, setting will trigger the decryption
1806  *              needed before the wakeup occurs.
1807  */
1808 kern_return_t
1809 ubc_page_op(
1810         struct vnode    *vp,
1811         off_t           f_offset,
1812         int             ops,
1813         ppnum_t *phys_entryp,
1814         int             *flagsp)
1815 {
1816         memory_object_control_t         control;
1817
1818         control = ubc_getobject(vp, UBC_FLAGS_NONE);
1819         if (control == MEMORY_OBJECT_CONTROL_NULL)
1820                 return KERN_INVALID_ARGUMENT;
1821
1822         return (memory_object_page_op(control,
1823                                       (memory_object_offset_t)f_offset,
1824                                       ops,
1825                                       phys_entryp,
1826                                       flagsp));
1827 }
1828
1829
1830 /*
1831  * ubc_range_op
1832  *
1833  * Manipulate page state for a range of memory for a vnode with an associated
1834  * ubc_info with an associated memory object control, when page level state is
1835  * not required to be returned from the call (i.e. there are no phys_entryp or
1836  * flagsp parameters to this call, and it takes a range which may contain
1837  * multiple pages, rather than an offset interior to a single page).
1838  *
1839  * Parameters:  vp                      The vnode backing the page
1840  *              f_offset_beg            A file offset interior to the start page
1841  *              f_offset_end            A file offset interior to the end page
1842  *              ops                     The operations to perform, as a bitmap
1843  *                                      (see below for more information)
1844  *              range                   The address of an int; may be NULL to
1845  *                                      ignore
1846  *
1847  * Returns:     KERN_SUCCESS            Success
1848  *              KERN_INVALID_ARGUMENT   If the memory object control has no VM
1849  *                                      object associated
1850  *              KERN_INVALID_OBJECT     If the object is physically contiguous
1851  *
1852  * Implicit Returns:
1853  *              *range (modified)       If range is non-NULL, its contents will
1854  *                                      be modified to contain the number of
1855  *                                      bytes successfully operated upon.
1856  *
1857  * Notes:       IMPORTANT: This function cannot be used on a range that
1858  *              consists of physically contiguous pages.
1859  *
1860  *              For object boundaries, it is considerably more efficient to
1861  *              ensure that f_offset_beg and f_offset_end are in fact on page
1862  *              boundaries, as this will avoid internal use of the hash table
1863  *              to identify the page, and would therefore skip a number of
1864  *              early optimizations.  Since this is an operation on a set of
1865  *              pages anyway, the caller should try to pass only a page aligned
1866  *              offsets because of this.
1867  *
1868  *              *range will be modified only if this function succeeds.
1869  *
1870  *              The flags field MUST contain a specific operation; allowable
1871  *              values are:
1872  *
1873  *              o       UPL_ROP_ABSENT  Returns the extent of the range
1874  *                                      presented which is absent, starting
1875  *                                      with the start address presented
1876  *
1877  *              o       UPL_ROP_PRESENT Returns the extent of the range
1878  *                                      presented which is present (resident),
1879  *                                      starting with the start address
1880  *                                      presented
1881  *              o       UPL_ROP_DUMP    Dump the pages which are found in the
1882  *                                      target object for the target range.
1883  *
1884  *              IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are
1885  *              multiple regions in the range, only the first matching region
1886  *              is returned.
1887  */
1888 kern_return_t
1889 ubc_range_op(
1890         struct vnode    *vp,
1891         off_t           f_offset_beg,
1892         off_t           f_offset_end,
1893         int             ops,
1894         int             *range)
1895 {
1896         memory_object_control_t         control;
1897
1898         control = ubc_getobject(vp, UBC_FLAGS_NONE);
1899         if (control == MEMORY_OBJECT_CONTROL_NULL)
1900                 return KERN_INVALID_ARGUMENT;
1901
1902         return (memory_object_range_op(control,
1903                                       (memory_object_offset_t)f_offset_beg,
1904                                       (memory_object_offset_t)f_offset_end,
1905                                       ops,
1906                                       range));
1907 }
1908
1909
1910 /*
1911  * ubc_create_upl
1912  *
1913  * Given a vnode, cause the population of a portion of the vm_object; based on
1914  * the nature of the request, the pages returned may contain valid data, or
1915  * they may be uninitialized.
1916  *
1917  * Parameters:  vp                      The vnode from which to create the upl
1918  *              f_offset                The start offset into the backing store
1919  *                                      represented by the vnode
1920  *              bufsize                 The size of the upl to create
1921  *              uplp                    Pointer to the upl_t to receive the
1922  *                                      created upl; MUST NOT be NULL
1923  *              plp                     Pointer to receive the internal page
1924  *                                      list for the created upl; MAY be NULL
1925  *                                      to ignore
1926  *
1927  * Returns:     KERN_SUCCESS            The requested upl has been created
1928  *              KERN_INVALID_ARGUMENT   The bufsize argument is not an even
1929  *                                      multiple of the page size
1930  *              KERN_INVALID_ARGUMENT   There is no ubc_info associated with
1931  *                                      the vnode, or there is no memory object
1932  *                                      control associated with the ubc_info
1933  *      memory_object_upl_request:KERN_INVALID_VALUE
1934  *                                      The supplied upl_flags argument is
1935  *                                      invalid
1936  * Implicit Returns:
1937  *              *uplp (modified)
1938  *              *plp (modified)         If non-NULL, the value of *plp will be
1939  *                                      modified to point to the internal page
1940  *                                      list; this modification may occur even
1941  *                                      if this function is unsuccessful, in
1942  *                                      which case the contents may be invalid
1943  *
1944  * Note:        If successful, the returned *uplp MUST subsequently be freed
1945  *              via a call to ubc_upl_commit(), ubc_upl_commit_range(),
1946  *              ubc_upl_abort(), or ubc_upl_abort_range().
1947  */
1948 kern_return_t
1949 ubc_create_upl(
1950         struct vnode    *vp,
1951         off_t           f_offset,
1952         int             bufsize,
1953         upl_t           *uplp,
1954         upl_page_info_t **plp,
1955         int             uplflags)
1956 {
1957         memory_object_control_t         control;
1958         kern_return_t                   kr;
1959
1960         if (plp != NULL)
1961                 *plp = NULL;
1962         *uplp = NULL;
1963
1964         if (bufsize & 0xfff)
1965                 return KERN_INVALID_ARGUMENT;
1966
1967         if (bufsize > MAX_UPL_SIZE * PAGE_SIZE)
1968                 return KERN_INVALID_ARGUMENT;
1969
1970         if (uplflags & (UPL_UBC_MSYNC | UPL_UBC_PAGEOUT | UPL_UBC_PAGEIN)) {
1971
1972                 if (uplflags & UPL_UBC_MSYNC) {
1973                         uplflags &= UPL_RET_ONLY_DIRTY;
1974
1975                         uplflags |= UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
1976                                     UPL_SET_INTERNAL | UPL_SET_LITE;
1977
1978                 } else if (uplflags & UPL_UBC_PAGEOUT) {
1979                         uplflags &= UPL_RET_ONLY_DIRTY;
1980
1981                         if (uplflags & UPL_RET_ONLY_DIRTY)
1982                                 uplflags |= UPL_NOBLOCK;
1983
1984                         uplflags |= UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
1985                                     UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE;
1986                 } else {
1987                         uplflags |= UPL_RET_ONLY_ABSENT | UPL_NOBLOCK |
1988                                     UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
1989                                     UPL_SET_INTERNAL | UPL_SET_LITE;
1990                 }
1991         } else {
1992                 uplflags &= ~UPL_FOR_PAGEOUT;
1993
1994                 if (uplflags & UPL_WILL_BE_DUMPED) {
1995                         uplflags &= ~UPL_WILL_BE_DUMPED;
1996                         uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL);
1997                 } else
1998                         uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL);
1999         }
2000         control = ubc_getobject(vp, UBC_FLAGS_NONE);
2001         if (control == MEMORY_OBJECT_CONTROL_NULL)
2002                 return KERN_INVALID_ARGUMENT;
2003
2004         kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, NULL, uplflags);
2005         if (kr == KERN_SUCCESS && plp != NULL)
2006                 *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp);
2007         return kr;
2008 }
2009
2010
2011 /*
2012  * ubc_upl_maxbufsize
2013  *
2014  * Return the maximum bufsize ubc_create_upl( ) will take.
2015  *
2016  * Parameters:  none
2017  *
2018  * Returns:     maximum size buffer (in bytes) ubc_create_upl( ) will take.
2019  */
2020 upl_size_t
2021 ubc_upl_maxbufsize(
2022         void)
2023 {
2024         return(MAX_UPL_SIZE * PAGE_SIZE);
2025 }
2026
2027 /*
2028  * ubc_upl_map
2029  *
2030  * Map the page list assocated with the supplied upl into the kernel virtual
2031  * address space at the virtual address indicated by the dst_addr argument;
2032  * the entire upl is mapped
2033  *
2034  * Parameters:  upl                     The upl to map
2035  *              dst_addr                The address at which to map the upl
2036  *
2037  * Returns:     KERN_SUCCESS            The upl has been mapped
2038  *              KERN_INVALID_ARGUMENT   The upl is UPL_NULL
2039  *              KERN_FAILURE            The upl is already mapped
2040  *      vm_map_enter:KERN_INVALID_ARGUMENT
2041  *                                      A failure code from vm_map_enter() due
2042  *                                      to an invalid argument
2043  */
2044 kern_return_t
2045 ubc_upl_map(
2046         upl_t           upl,
2047         vm_offset_t     *dst_addr)
2048 {
2049         return (vm_upl_map(kernel_map, upl, dst_addr));
2050 }
2051
2052
2053 /*
2054  * ubc_upl_unmap
2055  *
2056  * Unmap the page list assocated with the supplied upl from the kernel virtual
2057  * address space; the entire upl is unmapped.
2058  *
2059  * Parameters:  upl                     The upl to unmap
2060  *
2061  * Returns:     KERN_SUCCESS            The upl has been unmapped
2062  *              KERN_FAILURE            The upl is not currently mapped
2063  *              KERN_INVALID_ARGUMENT   If the upl is UPL_NULL
2064  */
2065 kern_return_t
2066 ubc_upl_unmap(
2067         upl_t   upl)
2068 {
2069         return(vm_upl_unmap(kernel_map, upl));
2070 }
2071
2072
2073 /*
2074  * ubc_upl_commit
2075  *
2076  * Commit the contents of the upl to the backing store
2077  *
2078  * Parameters:  upl                     The upl to commit
2079  *
2080  * Returns:     KERN_SUCCESS            The upl has been committed
2081  *              KERN_INVALID_ARGUMENT   The supplied upl was UPL_NULL
2082  *              KERN_FAILURE            The supplied upl does not represent
2083  *                                      device memory, and the offset plus the
2084  *                                      size would exceed the actual size of
2085  *                                      the upl
2086  *
2087  * Notes:       In practice, the only return value for this function should be
2088  *              KERN_SUCCESS, unless there has been data structure corruption;
2089  *              since the upl is deallocated regardless of success or failure,
2090  *              there's really nothing to do about this other than panic.
2091  *
2092  *              IMPORTANT: Use of this function should not be mixed with use of
2093  *              ubc_upl_commit_range(), due to the unconditional deallocation
2094  *              by this function.
2095  */
2096 kern_return_t
2097 ubc_upl_commit(
2098         upl_t                   upl)
2099 {
2100         upl_page_info_t *pl;
2101         kern_return_t   kr;
2102
2103         pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2104         kr = upl_commit(upl, pl, MAX_UPL_SIZE);
2105         upl_deallocate(upl);
2106         return kr;
2107 }
2108
2109
2110 /*
2111  * ubc_upl_commit
2112  *
2113  * Commit the contents of the specified range of the upl to the backing store
2114  *
2115  * Parameters:  upl                     The upl to commit
2116  *              offset                  The offset into the upl
2117  *              size                    The size of the region to be committed,
2118  *                                      starting at the specified offset
2119  *              flags                   commit type (see below)
2120  *
2121  * Returns:     KERN_SUCCESS            The range has been committed
2122  *              KERN_INVALID_ARGUMENT   The supplied upl was UPL_NULL
2123  *              KERN_FAILURE            The supplied upl does not represent
2124  *                                      device memory, and the offset plus the
2125  *                                      size would exceed the actual size of
2126  *                                      the upl
2127  *
2128  * Notes:       IMPORTANT: If the commit is successful, and the object is now
2129  *              empty, the upl will be deallocated.  Since the caller cannot
2130  *              check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag
2131  *              should generally only be used when the offset is 0 and the size
2132  *              is equal to the upl size.
2133  *
2134  *              The flags argument is a bitmap of flags on the rage of pages in
2135  *              the upl to be committed; allowable flags are:
2136  *
2137  *              o       UPL_COMMIT_FREE_ON_EMPTY        Free the upl when it is
2138  *                                                      both empty and has been
2139  *                                                      successfully committed
2140  *              o       UPL_COMMIT_CLEAR_DIRTY          Clear each pages dirty
2141  *                                                      bit; will prevent a
2142  *                                                      later pageout
2143  *              o       UPL_COMMIT_SET_DIRTY            Set each pages dirty
2144  *                                                      bit; will cause a later
2145  *                                                      pageout
2146  *              o       UPL_COMMIT_INACTIVATE           Clear each pages
2147  *                                                      reference bit; the page
2148  *                                                      will not be accessed
2149  *              o       UPL_COMMIT_ALLOW_ACCESS         Unbusy each page; pages
2150  *                                                      become busy when an
2151  *                                                      IOMemoryDescriptor is
2152  *                                                      mapped or redirected,
2153  *                                                      and we have to wait for
2154  *                                                      an IOKit driver
2155  *
2156  *              The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should
2157  *              not be specified by the caller.
2158  *
2159  *              The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are
2160  *              mutually exclusive, and should not be combined.
2161  */
2162 kern_return_t
2163 ubc_upl_commit_range(
2164         upl_t                   upl,
2165         upl_offset_t            offset,
2166         upl_size_t              size,
2167         int                             flags)
2168 {
2169         upl_page_info_t *pl;
2170         boolean_t               empty;
2171         kern_return_t   kr;
2172
2173         if (flags & UPL_COMMIT_FREE_ON_EMPTY)
2174                 flags |= UPL_COMMIT_NOTIFY_EMPTY;
2175
2176         if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
2177                 return KERN_INVALID_ARGUMENT;
2178         }
2179
2180         pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2181
2182         kr = upl_commit_range(upl, offset, size, flags,
2183                                                   pl, MAX_UPL_SIZE, &empty);
2184
2185         if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty)
2186                 upl_deallocate(upl);
2187
2188         return kr;
2189 }
2190
2191
2192 /*
2193  * ubc_upl_abort_range
2194  *
2195  * Abort the contents of the specified range of the specified upl
2196  *
2197  * Parameters:  upl                     The upl to abort
2198  *              offset                  The offset into the upl
2199  *              size                    The size of the region to be aborted,
2200  *                                      starting at the specified offset
2201  *              abort_flags             abort type (see below)
2202  *
2203  * Returns:     KERN_SUCCESS            The range has been aborted
2204  *              KERN_INVALID_ARGUMENT   The supplied upl was UPL_NULL
2205  *              KERN_FAILURE            The supplied upl does not represent
2206  *                                      device memory, and the offset plus the
2207  *                                      size would exceed the actual size of
2208  *                                      the upl
2209  *
2210  * Notes:       IMPORTANT: If the abort is successful, and the object is now
2211  *              empty, the upl will be deallocated.  Since the caller cannot
2212  *              check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2213  *              should generally only be used when the offset is 0 and the size
2214  *              is equal to the upl size.
2215  *
2216  *              The abort_flags argument is a bitmap of flags on the range of
2217  *              pages in the upl to be aborted; allowable flags are:
2218  *
2219  *              o       UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2220  *                                              empty and has been successfully
2221  *                                              aborted
2222  *              o       UPL_ABORT_RESTART       The operation must be restarted
2223  *              o       UPL_ABORT_UNAVAILABLE   The pages are unavailable
2224  *              o       UPL_ABORT_ERROR         An I/O error occurred
2225  *              o       UPL_ABORT_DUMP_PAGES    Just free the pages
2226  *              o       UPL_ABORT_NOTIFY_EMPTY  RESERVED
2227  *              o       UPL_ABORT_ALLOW_ACCESS  RESERVED
2228  *
2229  *              The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2230  *              not be specified by the caller.  It is intended to fulfill the
2231  *              same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2232  *              ubc_upl_commit_range(), but is never referenced internally.
2233  *
2234  *              The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2235  *              referenced; do not use it.
2236  */
2237 kern_return_t
2238 ubc_upl_abort_range(
2239         upl_t                   upl,
2240         upl_offset_t            offset,
2241         upl_size_t              size,
2242         int                             abort_flags)
2243 {
2244         kern_return_t   kr;
2245         boolean_t               empty = FALSE;
2246
2247         if (abort_flags & UPL_ABORT_FREE_ON_EMPTY)
2248                 abort_flags |= UPL_ABORT_NOTIFY_EMPTY;
2249
2250         kr = upl_abort_range(upl, offset, size, abort_flags, &empty);
2251
2252         if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty)
2253                 upl_deallocate(upl);
2254
2255         return kr;
2256 }
2257
2258
2259 /*
2260  * ubc_upl_abort
2261  *
2262  * Abort the contents of the specified upl
2263  *
2264  * Parameters:  upl                     The upl to abort
2265  *              abort_type              abort type (see below)
2266  *
2267  * Returns:     KERN_SUCCESS            The range has been aborted
2268  *              KERN_INVALID_ARGUMENT   The supplied upl was UPL_NULL
2269  *              KERN_FAILURE            The supplied upl does not represent
2270  *                                      device memory, and the offset plus the
2271  *                                      size would exceed the actual size of
2272  *                                      the upl
2273  *
2274  * Notes:       IMPORTANT: If the abort is successful, and the object is now
2275  *              empty, the upl will be deallocated.  Since the caller cannot
2276  *              check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2277  *              should generally only be used when the offset is 0 and the size
2278  *              is equal to the upl size.
2279  *
2280  *              The abort_type is a bitmap of flags on the range of
2281  *              pages in the upl to be aborted; allowable flags are:
2282  *
2283  *              o       UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2284  *                                              empty and has been successfully
2285  *                                              aborted
2286  *              o       UPL_ABORT_RESTART       The operation must be restarted
2287  *              o       UPL_ABORT_UNAVAILABLE   The pages are unavailable
2288  *              o       UPL_ABORT_ERROR         An I/O error occurred
2289  *              o       UPL_ABORT_DUMP_PAGES    Just free the pages
2290  *              o       UPL_ABORT_NOTIFY_EMPTY  RESERVED
2291  *              o       UPL_ABORT_ALLOW_ACCESS  RESERVED
2292  *
2293  *              The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2294  *              not be specified by the caller.  It is intended to fulfill the
2295  *              same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2296  *              ubc_upl_commit_range(), but is never referenced internally.
2297  *
2298  *              The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2299  *              referenced; do not use it.
2300  */
2301 kern_return_t
2302 ubc_upl_abort(
2303         upl_t                   upl,
2304         int                             abort_type)
2305 {
2306         kern_return_t   kr;
2307
2308         kr = upl_abort(upl, abort_type);
2309         upl_deallocate(upl);
2310         return kr;
2311 }
2312
2313
2314 /*
2315  * ubc_upl_pageinfo
2316  *
2317  *  Retrieve the internal page list for the specified upl
2318  *
2319  * Parameters:  upl                     The upl to obtain the page list from
2320  *
2321  * Returns:     !NULL                   The (upl_page_info_t *) for the page
2322  *                                      list internal to the upl
2323  *              NULL                    Error/no page list associated
2324  *
2325  * Notes:       IMPORTANT: The function is only valid on internal objects
2326  *              where the list request was made with the UPL_INTERNAL flag.
2327  *
2328  *              This function is a utility helper function, since some callers
2329  *              may not have direct access to the header defining the macro,
2330  *              due to abstraction layering constraints.
2331  */
2332 upl_page_info_t *
2333 ubc_upl_pageinfo(
2334         upl_t                   upl)
2335 {
2336         return (UPL_GET_INTERNAL_PAGE_LIST(upl));
2337 }
2338
2339
2340 int
2341 UBCINFOEXISTS(struct vnode * vp)
2342 {
2343         return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL));
2344 }
2345
2346
2347 /*
2348  * CODE SIGNING
2349  */
2350 #define CS_BLOB_PAGEABLE 0
2351 static volatile SInt32 cs_blob_size = 0;
2352 static volatile SInt32 cs_blob_count = 0;
2353 static SInt32 cs_blob_size_peak = 0;
2354 static UInt32 cs_blob_size_max = 0;
2355 static SInt32 cs_blob_count_peak = 0;
2356
2357 int cs_validation = 1;
2358
2359 SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_validation, 0, "Do validate code signatures");
2360 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_count, 0, "Current number of code signature blobs");
2361 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_size, 0, "Current size of all code signature blobs");
2362 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
2363 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
2364 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_max, 0, "Size of biggest code signature blob");
2365
2366 kern_return_t
2367 ubc_cs_blob_allocate(
2368         vm_offset_t     *blob_addr_p,
2369         vm_size_t       *blob_size_p)
2370 {
2371         kern_return_t   kr;
2372
2373 #if CS_BLOB_PAGEABLE
2374         *blob_size_p = round_page(*blob_size_p);
2375         kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
2376 #else   /* CS_BLOB_PAGEABLE */
2377         *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
2378         if (*blob_addr_p == 0) {
2379                 kr = KERN_NO_SPACE;
2380         } else {
2381                 kr = KERN_SUCCESS;
2382         }
2383 #endif  /* CS_BLOB_PAGEABLE */
2384         return kr;
2385 }
2386
2387 void
2388 ubc_cs_blob_deallocate(
2389         vm_offset_t     blob_addr,
2390         vm_size_t       blob_size)
2391 {
2392 #if CS_BLOB_PAGEABLE
2393         kmem_free(kernel_map, blob_addr, blob_size);
2394 #else   /* CS_BLOB_PAGEABLE */
2395         kfree((void *) blob_addr, blob_size);
2396 #endif  /* CS_BLOB_PAGEABLE */
2397 }
2398
2399 int
2400 ubc_cs_blob_add(
2401         struct vnode    *vp,
2402         cpu_type_t      cputype,
2403         off_t           base_offset,
2404         vm_address_t    addr,
2405         vm_size_t       size)
2406 {
2407         kern_return_t           kr;
2408         struct ubc_info         *uip;
2409         struct cs_blob          *blob, *oblob;
2410         int                     error;
2411         ipc_port_t              blob_handle;
2412         memory_object_size_t    blob_size;
2413         const CS_CodeDirectory *cd;
2414         off_t                   blob_start_offset, blob_end_offset;
2415         SHA1_CTX                sha1ctxt;
2416
2417         blob_handle = IPC_PORT_NULL;
2418
2419         blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
2420         if (blob == NULL) {
2421                 return ENOMEM;
2422         }
2423
2424 #if CS_BLOB_PAGEABLE
2425         /* get a memory entry on the blob */
2426         blob_size = (memory_object_size_t) size;
2427         kr = mach_make_memory_entry_64(kernel_map,
2428                                        &blob_size,
2429                                        addr,
2430                                        VM_PROT_READ,
2431                                        &blob_handle,
2432                                        IPC_PORT_NULL);
2433         if (kr != KERN_SUCCESS) {
2434                 error = ENOMEM;
2435                 goto out;
2436         }
2437         if (memory_object_round_page(blob_size) !=
2438             (memory_object_size_t) round_page(size)) {
2439                 printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%lx !?\n",
2440                        blob_size, (size_t)size);
2441                 panic("XXX FBDP size mismatch 0x%llx 0x%lx\n", blob_size, (size_t)size);
2442                 error = EINVAL;
2443                 goto out;
2444         }
2445 #else
2446         blob_size = (memory_object_size_t) size;
2447         blob_handle = IPC_PORT_NULL;
2448 #endif
2449
2450         /* fill in the new blob */
2451         blob->csb_cpu_type = cputype;
2452         blob->csb_base_offset = base_offset;
2453         blob->csb_mem_size = size;
2454         blob->csb_mem_offset = 0;
2455         blob->csb_mem_handle = blob_handle;
2456         blob->csb_mem_kaddr = addr;
2457
2458         /*
2459          * Validate the blob's contents
2460          */
2461         cd = findCodeDirectory(
2462                 (const CS_SuperBlob *) addr,
2463                 (char *) addr,
2464                 (char *) addr + blob->csb_mem_size);
2465         if (cd == NULL) {
2466                 /* no code directory => useless blob ! */
2467                 blob->csb_flags = 0;
2468                 blob->csb_start_offset = 0;
2469                 blob->csb_end_offset = 0;
2470         } else {
2471                 const unsigned char *sha1_base;
2472                 int sha1_size;
2473
2474                 blob->csb_flags = ntohl(cd->flags) | CS_VALID;
2475                 blob->csb_end_offset = round_page(ntohl(cd->codeLimit));
2476                 if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
2477                         const struct Scatter *scatter = (const struct Scatter*)
2478                                 ((const char*)cd + ntohl(cd->scatterOffset));
2479                         blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE;
2480                 } else {
2481                         blob->csb_start_offset = (blob->csb_end_offset -
2482                                                   (ntohl(cd->nCodeSlots) * PAGE_SIZE));
2483                 }
2484                 /* compute the blob's SHA1 hash */
2485                 sha1_base = (const unsigned char *) cd;
2486                 sha1_size = ntohl(cd->length);
2487                 SHA1Init(&sha1ctxt);
2488                 SHA1Update(&sha1ctxt, sha1_base, sha1_size);
2489                 SHA1Final(blob->csb_sha1, &sha1ctxt);
2490         }
2491
2492         /*
2493          * Let policy module check whether the blob's signature is accepted.
2494          */
2495 #if CONFIG_MACF
2496         error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size);
2497         if (error)
2498                 goto out;
2499 #endif
2500
2501         /*
2502          * Validate the blob's coverage
2503          */
2504         blob_start_offset = blob->csb_base_offset + blob->csb_start_offset;
2505         blob_end_offset = blob->csb_base_offset + blob->csb_end_offset;
2506
2507         if (blob_start_offset >= blob_end_offset ||
2508             blob_start_offset < 0 ||
2509             blob_end_offset <= 0) {
2510                 /* reject empty or backwards blob */
2511                 error = EINVAL;
2512                 goto out;
2513         }
2514
2515         vnode_lock(vp);
2516         if (! UBCINFOEXISTS(vp)) {
2517                 vnode_unlock(vp);
2518                 error = ENOENT;
2519                 goto out;
2520         }
2521         uip = vp->v_ubcinfo;
2522
2523         /* check if this new blob overlaps with an existing blob */
2524         for (oblob = uip->cs_blobs;
2525              oblob != NULL;
2526              oblob = oblob->csb_next) {
2527                  off_t oblob_start_offset, oblob_end_offset;
2528
2529                  oblob_start_offset = (oblob->csb_base_offset +
2530                                        oblob->csb_start_offset);
2531                  oblob_end_offset = (oblob->csb_base_offset +
2532                                      oblob->csb_end_offset);
2533                  if (blob_start_offset >= oblob_end_offset ||
2534                      blob_end_offset <= oblob_start_offset) {
2535                          /* no conflict with this existing blob */
2536                  } else {
2537                          /* conflict ! */
2538                          if (blob_start_offset == oblob_start_offset &&
2539                              blob_end_offset == oblob_end_offset &&
2540                              blob->csb_mem_size == oblob->csb_mem_size &&
2541                              blob->csb_flags == oblob->csb_flags &&
2542                              (blob->csb_cpu_type == CPU_TYPE_ANY ||
2543                               oblob->csb_cpu_type == CPU_TYPE_ANY ||
2544                               blob->csb_cpu_type == oblob->csb_cpu_type) &&
2545                              !bcmp(blob->csb_sha1,
2546                                    oblob->csb_sha1,
2547                                    SHA1_RESULTLEN)) {
2548                                  /*
2549                                   * We already have this blob:
2550                                   * we'll return success but
2551                                   * throw away the new blob.
2552                                   */
2553                                  if (oblob->csb_cpu_type == CPU_TYPE_ANY) {
2554                                          /*
2555                                           * The old blob matches this one
2556                                           * but doesn't have any CPU type.
2557                                           * Update it with whatever the caller
2558                                           * provided this time.
2559                                           */
2560                                          oblob->csb_cpu_type = cputype;
2561                                  }
2562                                  vnode_unlock(vp);
2563                                  error = EAGAIN;
2564                                  goto out;
2565                          } else {
2566                                  /* different blob: reject the new one */
2567                                  vnode_unlock(vp);
2568                                  error = EALREADY;
2569                                  goto out;
2570                          }
2571                  }
2572
2573         }
2574
2575
2576         /* mark this vnode's VM object as having "signed pages" */
2577         kr = memory_object_signed(uip->ui_control, TRUE);
2578         if (kr != KERN_SUCCESS) {
2579                 vnode_unlock(vp);
2580                 error = ENOENT;
2581                 goto out;
2582         }
2583
2584         /*
2585          * Add this blob to the list of blobs for this vnode.
2586          * We always add at the front of the list and we never remove a
2587          * blob from the list, so ubc_cs_get_blobs() can return whatever
2588          * the top of the list was and that list will remain valid
2589          * while we validate a page, even after we release the vnode's lock.
2590          */
2591         blob->csb_next = uip->cs_blobs;
2592         uip->cs_blobs = blob;
2593
2594         OSAddAtomic(+1, &cs_blob_count);
2595         if (cs_blob_count > cs_blob_count_peak) {
2596                 cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */
2597         }
2598         OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size);
2599         if ((SInt32) cs_blob_size > cs_blob_size_peak) {
2600                 cs_blob_size_peak = (SInt32) cs_blob_size; /* XXX atomic ? */
2601         }
2602         if ((UInt32) blob->csb_mem_size > cs_blob_size_max) {
2603                 cs_blob_size_max = (UInt32) blob->csb_mem_size;
2604         }
2605
2606         if (cs_debug > 1) {
2607                 proc_t p;
2608
2609                 p = current_proc();
2610                 printf("CODE SIGNING: proc %d(%s) "
2611                        "loaded %s signatures for file (%s) "
2612                        "range 0x%llx:0x%llx flags 0x%x\n",
2613                        p->p_pid, p->p_comm,
2614                        blob->csb_cpu_type == -1 ? "detached" : "embedded",
2615                        vnode_name(vp),
2616                        blob->csb_base_offset + blob->csb_start_offset,
2617                        blob->csb_base_offset + blob->csb_end_offset,
2618                        blob->csb_flags);
2619         }
2620
2621         vnode_unlock(vp);
2622
2623         error = 0;      /* success ! */
2624
2625 out:
2626         if (error) {
2627                 /* we failed; release what we allocated */
2628                 if (blob) {
2629                         kfree(blob, sizeof (*blob));
2630                         blob = NULL;
2631                 }
2632                 if (blob_handle != IPC_PORT_NULL) {
2633                         mach_memory_entry_port_release(blob_handle);
2634                         blob_handle = IPC_PORT_NULL;
2635                 }
2636         }
2637
2638         if (error == EAGAIN) {
2639                 /*
2640                  * See above:  error is EAGAIN if we were asked
2641                  * to add an existing blob again.  We cleaned the new
2642                  * blob and we want to return success.
2643                  */
2644                 error = 0;
2645                 /*
2646                  * Since we're not failing, consume the data we received.
2647                  */
2648                 ubc_cs_blob_deallocate(addr, size);
2649         }
2650
2651         return error;
2652 }
2653
2654
2655 struct cs_blob *
2656 ubc_cs_blob_get(
2657         struct vnode    *vp,
2658         cpu_type_t      cputype,
2659         off_t           offset)
2660 {
2661         struct ubc_info *uip;
2662         struct cs_blob  *blob;
2663         off_t offset_in_blob;
2664
2665         vnode_lock_spin(vp);
2666
2667         if (! UBCINFOEXISTS(vp)) {
2668                 blob = NULL;
2669                 goto out;
2670         }
2671
2672         uip = vp->v_ubcinfo;
2673         for (blob = uip->cs_blobs;
2674              blob != NULL;
2675              blob = blob->csb_next) {
2676                 if (cputype != -1 && blob->csb_cpu_type == cputype) {
2677                         break;
2678                 }
2679                 if (offset != -1) {
2680                         offset_in_blob = offset - blob->csb_base_offset;
2681                         if (offset_in_blob >= blob->csb_start_offset &&
2682                             offset_in_blob < blob->csb_end_offset) {
2683                                 /* our offset is covered by this blob */
2684                                 break;
2685                         }
2686                 }
2687         }
2688
2689 out:
2690         vnode_unlock(vp);
2691
2692         return blob;
2693 }
2694
2695 static void
2696 ubc_cs_free(
2697         struct ubc_info *uip)
2698 {
2699         struct cs_blob  *blob, *next_blob;
2700
2701         for (blob = uip->cs_blobs;
2702              blob != NULL;
2703              blob = next_blob) {
2704                 next_blob = blob->csb_next;
2705                 if (blob->csb_mem_kaddr != 0) {
2706                         ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
2707                                                blob->csb_mem_size);
2708                         blob->csb_mem_kaddr = 0;
2709                 }
2710                 if (blob->csb_mem_handle != IPC_PORT_NULL) {
2711                         mach_memory_entry_port_release(blob->csb_mem_handle);
2712                 }
2713                 blob->csb_mem_handle = IPC_PORT_NULL;
2714                 OSAddAtomic(-1, &cs_blob_count);
2715                 OSAddAtomic((SInt32) -blob->csb_mem_size, &cs_blob_size);
2716                 kfree(blob, sizeof (*blob));
2717         }
2718 #if CHECK_CS_VALIDATION_BITMAP
2719         ubc_cs_validation_bitmap_deallocate( uip->ui_vnode );
2720 #endif
2721         uip->cs_blobs = NULL;
2722 }
2723
2724 struct cs_blob *
2725 ubc_get_cs_blobs(
2726         struct vnode    *vp)
2727 {
2728         struct ubc_info *uip;
2729         struct cs_blob  *blobs;
2730
2731         /*
2732          * No need to take the vnode lock here.  The caller must be holding
2733          * a reference on the vnode (via a VM mapping or open file descriptor),
2734          * so the vnode will not go away.  The ubc_info stays until the vnode
2735          * goes away.  And we only modify "blobs" by adding to the head of the
2736          * list.
2737          * The ubc_info could go away entirely if the vnode gets reclaimed as
2738          * part of a forced unmount.  In the case of a code-signature validation
2739          * during a page fault, the "paging_in_progress" reference on the VM
2740          * object guarantess that the vnode pager (and the ubc_info) won't go
2741          * away during the fault.
2742          * Other callers need to protect against vnode reclaim by holding the
2743          * vnode lock, for example.
2744          */
2745
2746         if (! UBCINFOEXISTS(vp)) {
2747                 blobs = NULL;
2748                 goto out;
2749         }
2750
2751         uip = vp->v_ubcinfo;
2752         blobs = uip->cs_blobs;
2753
2754 out:
2755         return blobs;
2756 }
2757
2758 unsigned long cs_validate_page_no_hash = 0;
2759 unsigned long cs_validate_page_bad_hash = 0;
2760 boolean_t
2761 cs_validate_page(
2762         void                    *_blobs,
2763         memory_object_offset_t  page_offset,
2764         const void              *data,
2765         boolean_t               *tainted)
2766 {
2767         SHA1_CTX                sha1ctxt;
2768         unsigned char           actual_hash[SHA1_RESULTLEN];
2769         unsigned char           expected_hash[SHA1_RESULTLEN];
2770         boolean_t               found_hash;
2771         struct cs_blob          *blobs, *blob;
2772         const CS_CodeDirectory  *cd;
2773         const CS_SuperBlob      *embedded;
2774         const unsigned char     *hash;
2775         boolean_t               validated;
2776         off_t                   offset; /* page offset in the file */
2777         size_t                  size;
2778         off_t                   codeLimit = 0;
2779         char                    *lower_bound, *upper_bound;
2780         vm_offset_t             kaddr, blob_addr;
2781         vm_size_t               ksize;
2782         kern_return_t           kr;
2783
2784         offset = page_offset;
2785
2786         /* retrieve the expected hash */
2787         found_hash = FALSE;
2788         blobs = (struct cs_blob *) _blobs;
2789
2790         for (blob = blobs;
2791              blob != NULL;
2792              blob = blob->csb_next) {
2793                 offset = page_offset - blob->csb_base_offset;
2794                 if (offset < blob->csb_start_offset ||
2795                     offset >= blob->csb_end_offset) {
2796                         /* our page is not covered by this blob */
2797                         continue;
2798                 }
2799
2800                 /* map the blob in the kernel address space */
2801                 kaddr = blob->csb_mem_kaddr;
2802                 if (kaddr == 0) {
2803                         ksize = (vm_size_t) (blob->csb_mem_size +
2804                                              blob->csb_mem_offset);
2805                         kr = vm_map(kernel_map,
2806                                     &kaddr,
2807                                     ksize,
2808                                     0,
2809                                     VM_FLAGS_ANYWHERE,
2810                                     blob->csb_mem_handle,
2811                                     0,
2812                                     TRUE,
2813                                     VM_PROT_READ,
2814                                     VM_PROT_READ,
2815                                     VM_INHERIT_NONE);
2816                         if (kr != KERN_SUCCESS) {
2817                                 /* XXX FBDP what to do !? */
2818                                 printf("cs_validate_page: failed to map blob, "
2819                                        "size=0x%lx kr=0x%x\n",
2820                                        (size_t)blob->csb_mem_size, kr);
2821                                 break;
2822                         }
2823                 }
2824                 blob_addr = kaddr + blob->csb_mem_offset;
2825
2826                 lower_bound = CAST_DOWN(char *, blob_addr);
2827                 upper_bound = lower_bound + blob->csb_mem_size;
2828
2829                 embedded = (const CS_SuperBlob *) blob_addr;
2830                 cd = findCodeDirectory(embedded, lower_bound, upper_bound);
2831                 if (cd != NULL) {
2832                         if (cd->pageSize != PAGE_SHIFT ||
2833                             cd->hashType != 0x1 ||
2834                             cd->hashSize != SHA1_RESULTLEN) {
2835                                 /* bogus blob ? */
2836                                 continue;
2837                         }
2838
2839                         offset = page_offset - blob->csb_base_offset;
2840                         if (offset < blob->csb_start_offset ||
2841                             offset >= blob->csb_end_offset) {
2842                                 /* our page is not covered by this blob */
2843                                 continue;
2844                         }
2845
2846                         codeLimit = ntohl(cd->codeLimit);
2847                         hash = hashes(cd, atop(offset),
2848                                       lower_bound, upper_bound);
2849                         if (hash != NULL) {
2850                                 bcopy(hash, expected_hash,
2851                                       sizeof (expected_hash));
2852                                 found_hash = TRUE;
2853                         }
2854
2855                         break;
2856                 }
2857         }
2858
2859         if (found_hash == FALSE) {
2860                 /*
2861                  * We can't verify this page because there is no signature
2862                  * for it (yet).  It's possible that this part of the object
2863                  * is not signed, or that signatures for that part have not
2864                  * been loaded yet.
2865                  * Report that the page has not been validated and let the
2866                  * caller decide if it wants to accept it or not.
2867                  */
2868                 cs_validate_page_no_hash++;
2869                 if (cs_debug > 1) {
2870                         printf("CODE SIGNING: cs_validate_page: "
2871                                "off 0x%llx: no hash to validate !?\n",
2872                                page_offset);
2873                 }
2874                 validated = FALSE;
2875                 *tainted = FALSE;
2876         } else {
2877
2878                 size = PAGE_SIZE;
2879                 const uint32_t *asha1, *esha1;
2880                 if ((off_t)(offset + size) > codeLimit) {
2881                         /* partial page at end of segment */
2882                         assert(offset < codeLimit);
2883                         size = (size_t) (codeLimit & PAGE_MASK);
2884                 }
2885                 /* compute the actual page's SHA1 hash */
2886                 SHA1Init(&sha1ctxt);
2887                 SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
2888                 SHA1Final(actual_hash, &sha1ctxt);
2889
2890                 asha1 = (const uint32_t *) actual_hash;
2891                 esha1 = (const uint32_t *) expected_hash;
2892
2893                 if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
2894                         if (cs_debug) {
2895                                 printf("CODE SIGNING: cs_validate_page: "
2896                                        "off 0x%llx size 0x%lx: "
2897                                        "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
2898                                        "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
2899                                        page_offset, size,
2900                                        asha1[0], asha1[1], asha1[2],
2901                                        asha1[3], asha1[4],
2902                                        esha1[0], esha1[1], esha1[2],
2903                                        esha1[3], esha1[4]);
2904                         }
2905                         cs_validate_page_bad_hash++;
2906                         *tainted = TRUE;
2907                 } else {
2908                         if (cs_debug > 1) {
2909                                 printf("CODE SIGNING: cs_validate_page: "
2910                                        "off 0x%llx size 0x%lx: SHA1 OK\n",
2911                                        page_offset, size);
2912                         }
2913                         *tainted = FALSE;
2914                 }
2915                 validated = TRUE;
2916         }
2917
2918         return validated;
2919 }
2920
2921 int
2922 ubc_cs_getcdhash(
2923         vnode_t         vp,
2924         off_t           offset,
2925         unsigned char   *cdhash)
2926 {
2927         struct cs_blob  *blobs, *blob;
2928         off_t           rel_offset;
2929         int             ret;
2930
2931         vnode_lock(vp);
2932
2933         blobs = ubc_get_cs_blobs(vp);
2934         for (blob = blobs;
2935              blob != NULL;
2936              blob = blob->csb_next) {
2937                 /* compute offset relative to this blob */
2938                 rel_offset = offset - blob->csb_base_offset;
2939                 if (rel_offset >= blob->csb_start_offset &&
2940                     rel_offset < blob->csb_end_offset) {
2941                         /* this blob does cover our "offset" ! */
2942                         break;
2943                 }
2944         }
2945
2946         if (blob == NULL) {
2947                 /* we didn't find a blob covering "offset" */
2948                 ret = EBADEXEC; /* XXX any better error ? */
2949         } else {
2950                 /* get the SHA1 hash of that blob */
2951                 bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
2952                 ret = 0;
2953         }
2954
2955         vnode_unlock(vp);
2956
2957         return ret;
2958 }
2959
2960 #if CHECK_CS_VALIDATION_BITMAP
2961 #define stob(s) ((atop_64((s)) + 07) >> 3)
2962 extern  boolean_t       root_fs_upgrade_try;
2963
2964 /*
2965  * Should we use the code-sign bitmap to avoid repeated code-sign validation?
2966  * Depends:
2967  * a) Is the target vnode on the root filesystem?
2968  * b) Has someone tried to mount the root filesystem read-write?
2969  * If answers are (a) yes AND (b) no, then we can use the bitmap.
2970  */
2971 #define USE_CODE_SIGN_BITMAP(vp)        ( (vp != NULL) && (vp->v_mount != NULL) && (vp->v_mount->mnt_flag & MNT_ROOTFS) && !root_fs_upgrade_try)
2972 kern_return_t
2973 ubc_cs_validation_bitmap_allocate(
2974         vnode_t         vp)
2975 {
2976         kern_return_t   kr = KERN_SUCCESS;
2977         struct ubc_info *uip;
2978         char            *target_bitmap;
2979         vm_object_size_t        bitmap_size;
2980
2981         if ( ! USE_CODE_SIGN_BITMAP(vp) || (! UBCINFOEXISTS(vp))) {
2982                 kr = KERN_INVALID_ARGUMENT;
2983         } else {
2984                 uip = vp->v_ubcinfo;
2985
2986                 if ( uip->cs_valid_bitmap == NULL ) {
2987                         bitmap_size = stob(uip->ui_size);
2988                         target_bitmap = (char*) kalloc( (vm_size_t)bitmap_size );
2989                         if (target_bitmap == 0) {
2990                                 kr = KERN_NO_SPACE;
2991                         } else {
2992                                 kr = KERN_SUCCESS;
2993                         }
2994                         if( kr == KERN_SUCCESS ) {
2995                                 memset( target_bitmap, 0, (size_t)bitmap_size);
2996                                 uip->cs_valid_bitmap = (void*)target_bitmap;
2997                                 uip->cs_valid_bitmap_size = bitmap_size;
2998                         }
2999                 }
3000         }
3001         return kr;
3002 }
3003
3004 kern_return_t
3005 ubc_cs_check_validation_bitmap (
3006         vnode_t                 vp,
3007         memory_object_offset_t          offset,
3008         int                     optype)
3009 {
3010         kern_return_t   kr = KERN_SUCCESS;
3011
3012         if ( ! USE_CODE_SIGN_BITMAP(vp) || ! UBCINFOEXISTS(vp)) {
3013                 kr = KERN_INVALID_ARGUMENT;
3014         } else {
3015                 struct ubc_info *uip = vp->v_ubcinfo;
3016                 char            *target_bitmap = uip->cs_valid_bitmap;
3017
3018                 if ( target_bitmap == NULL ) {
3019                        kr = KERN_INVALID_ARGUMENT;
3020                 } else {
3021                         uint64_t        bit, byte;
3022                         bit = atop_64( offset );
3023                         byte = bit >> 3;
3024
3025                         if ( byte > uip->cs_valid_bitmap_size ) {
3026                                kr = KERN_INVALID_ARGUMENT;
3027                         } else {
3028
3029                                 if (optype == CS_BITMAP_SET) {
3030                                         target_bitmap[byte] |= (1 << (bit & 07));
3031                                         kr = KERN_SUCCESS;
3032                                 } else if (optype == CS_BITMAP_CLEAR) {
3033                                         target_bitmap[byte] &= ~(1 << (bit & 07));
3034                                         kr = KERN_SUCCESS;
3035                                 } else if (optype == CS_BITMAP_CHECK) {
3036                                         if ( target_bitmap[byte] & (1 << (bit & 07))) {
3037                                                 kr = KERN_SUCCESS;
3038                                         } else {
3039                                                 kr = KERN_FAILURE;
3040                                         }
3041                                 }
3042                         }
3043                 }
3044         }
3045         return kr;
3046 }
3047
3048 void
3049 ubc_cs_validation_bitmap_deallocate(
3050         vnode_t         vp)
3051 {
3052         struct ubc_info *uip;
3053         void            *target_bitmap;
3054         vm_object_size_t        bitmap_size;
3055
3056         if ( UBCINFOEXISTS(vp)) {
3057                 uip = vp->v_ubcinfo;
3058
3059                 if ( (target_bitmap = uip->cs_valid_bitmap) != NULL ) {
3060                         bitmap_size = uip->cs_valid_bitmap_size;
3061                         kfree( target_bitmap, (vm_size_t) bitmap_size );
3062                         uip->cs_valid_bitmap = NULL;
3063                 }
3064         }
3065 }
3066 #else
3067 kern_return_t   ubc_cs_validation_bitmap_allocate(__unused vnode_t vp){
3068         return KERN_INVALID_ARGUMENT;
3069 }
3070
3071 kern_return_t ubc_cs_check_validation_bitmap(
3072         __unused struct vnode *vp,
3073         __unused memory_object_offset_t offset,
3074         __unused int optype){
3075
3076         return KERN_INVALID_ARGUMENT;
3077 }
3078
3079 void    ubc_cs_validation_bitmap_deallocate(__unused vnode_t vp){
3080         return;
3081 }
3082 #endif /* CHECK_CS_VALIDATION_BITMAP */