]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/ubc_subr.c
xnu-1699.26.8.tar.gz
[apple/xnu.git] / bsd / kern / ubc_subr.c
1 /*
2 * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * File: ubc_subr.c
30 * Author: Umesh Vaishampayan [umeshv@apple.com]
31 * 05-Aug-1999 umeshv Created.
32 *
33 * Functions related to Unified Buffer cache.
34 *
35 * Caller of UBC functions MUST have a valid reference on the vnode.
36 *
37 */
38
39 #include <sys/types.h>
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/lock.h>
43 #include <sys/mman.h>
44 #include <sys/mount_internal.h>
45 #include <sys/vnode_internal.h>
46 #include <sys/ubc_internal.h>
47 #include <sys/ucred.h>
48 #include <sys/proc_internal.h>
49 #include <sys/kauth.h>
50 #include <sys/buf.h>
51 #include <sys/user.h>
52 #include <sys/codesign.h>
53
54 #include <mach/mach_types.h>
55 #include <mach/memory_object_types.h>
56 #include <mach/memory_object_control.h>
57 #include <mach/vm_map.h>
58 #include <mach/mach_vm.h>
59 #include <mach/upl.h>
60
61 #include <kern/kern_types.h>
62 #include <kern/kalloc.h>
63 #include <kern/zalloc.h>
64 #include <kern/thread.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_protos.h> /* last */
67
68 #include <libkern/crypto/sha1.h>
69
70 #include <security/mac_framework.h>
71
72 /* XXX These should be in a BSD accessible Mach header, but aren't. */
73 extern kern_return_t memory_object_pages_resident(memory_object_control_t,
74 boolean_t *);
75 extern kern_return_t memory_object_signed(memory_object_control_t control,
76 boolean_t is_signed);
77 extern boolean_t memory_object_is_slid(memory_object_control_t control);
78
79 extern void Debugger(const char *message);
80
81
82 /* XXX no one uses this interface! */
83 kern_return_t ubc_page_op_with_control(
84 memory_object_control_t control,
85 off_t f_offset,
86 int ops,
87 ppnum_t *phys_entryp,
88 int *flagsp);
89
90
91 #if DIAGNOSTIC
92 #if defined(assert)
93 #undef assert
94 #endif
95 #define assert(cond) \
96 ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond)))
97 #else
98 #include <kern/assert.h>
99 #endif /* DIAGNOSTIC */
100
101 static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize);
102 static int ubc_umcallback(vnode_t, void *);
103 static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *);
104 static void ubc_cs_free(struct ubc_info *uip);
105
106 struct zone *ubc_info_zone;
107
108
109 /*
110 * CODESIGNING
111 * Routines to navigate code signing data structures in the kernel...
112 */
113
114 extern int cs_debug;
115
116 static boolean_t
117 cs_valid_range(
118 const void *start,
119 const void *end,
120 const void *lower_bound,
121 const void *upper_bound)
122 {
123 if (upper_bound < lower_bound ||
124 end < start) {
125 return FALSE;
126 }
127
128 if (start < lower_bound ||
129 end > upper_bound) {
130 return FALSE;
131 }
132
133 return TRUE;
134 }
135
136 /*
137 * Magic numbers used by Code Signing
138 */
139 enum {
140 CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */
141 CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */
142 CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */
143 CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
144 CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02, /* XXX */
145 CSMAGIC_EMBEDDED_ENTITLEMENTS = 0xfade7171, /* embedded entitlements */
146 CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
147
148 CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */
149 CSSLOT_ENTITLEMENTS = 5
150 };
151
152 static const uint32_t supportsScatter = 0x20100; // first version to support scatter option
153
154 /*
155 * Structure of an embedded-signature SuperBlob
156 */
157 typedef struct __BlobIndex {
158 uint32_t type; /* type of entry */
159 uint32_t offset; /* offset of entry */
160 } CS_BlobIndex;
161
162 typedef struct __SuperBlob {
163 uint32_t magic; /* magic number */
164 uint32_t length; /* total length of SuperBlob */
165 uint32_t count; /* number of index entries following */
166 CS_BlobIndex index[]; /* (count) entries */
167 /* followed by Blobs in no particular order as indicated by offsets in index */
168 } CS_SuperBlob;
169
170 typedef struct __GenericBlob {
171 uint32_t magic; /* magic number */
172 uint32_t length; /* total length of blob */
173 char data[];
174 } CS_GenericBlob;
175
176 struct Scatter {
177 uint32_t count; // number of pages; zero for sentinel (only)
178 uint32_t base; // first page number
179 uint64_t targetOffset; // offset in target
180 uint64_t spare; // reserved
181 };
182
183 /*
184 * C form of a CodeDirectory.
185 */
186 typedef struct __CodeDirectory {
187 uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */
188 uint32_t length; /* total length of CodeDirectory blob */
189 uint32_t version; /* compatibility version */
190 uint32_t flags; /* setup and mode flags */
191 uint32_t hashOffset; /* offset of hash slot element at index zero */
192 uint32_t identOffset; /* offset of identifier string */
193 uint32_t nSpecialSlots; /* number of special hash slots */
194 uint32_t nCodeSlots; /* number of ordinary (code) hash slots */
195 uint32_t codeLimit; /* limit to main image signature range */
196 uint8_t hashSize; /* size of each hash in bytes */
197 uint8_t hashType; /* type of hash (cdHashType* constants) */
198 uint8_t spare1; /* unused (must be zero) */
199 uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */
200 uint32_t spare2; /* unused (must be zero) */
201 /* Version 0x20100 */
202 uint32_t scatterOffset; /* offset of optional scatter vector */
203 /* followed by dynamic content as located by offset fields above */
204 } CS_CodeDirectory;
205
206
207 /*
208 * Locate the CodeDirectory from an embedded signature blob
209 */
210 static const
211 CS_CodeDirectory *findCodeDirectory(
212 const CS_SuperBlob *embedded,
213 char *lower_bound,
214 char *upper_bound)
215 {
216 const CS_CodeDirectory *cd = NULL;
217
218 if (embedded &&
219 cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) &&
220 ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
221 const CS_BlobIndex *limit;
222 const CS_BlobIndex *p;
223
224 limit = &embedded->index[ntohl(embedded->count)];
225 if (!cs_valid_range(&embedded->index[0], limit,
226 lower_bound, upper_bound)) {
227 return NULL;
228 }
229 for (p = embedded->index; p < limit; ++p) {
230 if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
231 const unsigned char *base;
232
233 base = (const unsigned char *)embedded;
234 cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
235 break;
236 }
237 }
238 } else {
239 /*
240 * Detached signatures come as a bare CS_CodeDirectory,
241 * without a blob.
242 */
243 cd = (const CS_CodeDirectory *) embedded;
244 }
245
246 if (cd &&
247 cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
248 cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
249 lower_bound, upper_bound) &&
250 cs_valid_range(cd, (const char *) cd + ntohl(cd->hashOffset),
251 lower_bound, upper_bound) &&
252 cs_valid_range(cd, (const char *) cd +
253 ntohl(cd->hashOffset) +
254 (ntohl(cd->nCodeSlots) * SHA1_RESULTLEN),
255 lower_bound, upper_bound) &&
256
257 ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) {
258 return cd;
259 }
260
261 // not found or not a valid code directory
262 return NULL;
263 }
264
265
266 /*
267 * Locating a page hash
268 */
269 static const unsigned char *
270 hashes(
271 const CS_CodeDirectory *cd,
272 unsigned page,
273 char *lower_bound,
274 char *upper_bound)
275 {
276 const unsigned char *base, *top, *hash;
277 uint32_t nCodeSlots = ntohl(cd->nCodeSlots);
278
279 assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound));
280
281 if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
282 /* Get first scatter struct */
283 const struct Scatter *scatter = (const struct Scatter*)
284 ((const char*)cd + ntohl(cd->scatterOffset));
285 uint32_t hashindex=0, scount, sbase=0;
286 /* iterate all scatter structs */
287 do {
288 if((const char*)scatter > (const char*)cd + ntohl(cd->length)) {
289 if(cs_debug) {
290 printf("CODE SIGNING: Scatter extends past Code Directory\n");
291 }
292 return NULL;
293 }
294
295 scount = ntohl(scatter->count);
296 uint32_t new_base = ntohl(scatter->base);
297
298 /* last scatter? */
299 if (scount == 0) {
300 return NULL;
301 }
302
303 if((hashindex > 0) && (new_base <= sbase)) {
304 if(cs_debug) {
305 printf("CODE SIGNING: unordered Scatter, prev base %d, cur base %d\n",
306 sbase, new_base);
307 }
308 return NULL; /* unordered scatter array */
309 }
310 sbase = new_base;
311
312 /* this scatter beyond page we're looking for? */
313 if (sbase > page) {
314 return NULL;
315 }
316
317 if (sbase+scount >= page) {
318 /* Found the scatter struct that is
319 * referencing our page */
320
321 /* base = address of first hash covered by scatter */
322 base = (const unsigned char *)cd + ntohl(cd->hashOffset) +
323 hashindex * SHA1_RESULTLEN;
324 /* top = address of first hash after this scatter */
325 top = base + scount * SHA1_RESULTLEN;
326 if (!cs_valid_range(base, top, lower_bound,
327 upper_bound) ||
328 hashindex > nCodeSlots) {
329 return NULL;
330 }
331
332 break;
333 }
334
335 /* this scatter struct is before the page we're looking
336 * for. Iterate. */
337 hashindex+=scount;
338 scatter++;
339 } while(1);
340
341 hash = base + (page - sbase) * SHA1_RESULTLEN;
342 } else {
343 base = (const unsigned char *)cd + ntohl(cd->hashOffset);
344 top = base + nCodeSlots * SHA1_RESULTLEN;
345 if (!cs_valid_range(base, top, lower_bound, upper_bound) ||
346 page > nCodeSlots) {
347 return NULL;
348 }
349 assert(page < nCodeSlots);
350
351 hash = base + page * SHA1_RESULTLEN;
352 }
353
354 if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
355 lower_bound, upper_bound)) {
356 hash = NULL;
357 }
358
359 return hash;
360 }
361 /*
362 * CODESIGNING
363 * End of routines to navigate code signing data structures in the kernel.
364 */
365
366 /*
367 * ENTITLEMENTS
368 * Routines to navigate entitlements in the kernel.
369 */
370
371 /* Retrieve the entitlements blob for a process.
372 * Returns:
373 * EINVAL no text vnode associated with the process
374 * EBADEXEC invalid code signing data
375 * ENOMEM you should reboot
376 * 0 no error occurred
377 *
378 * On success, out_start and out_length will point to the
379 * entitlements blob if found; or will be set to NULL/zero
380 * if there were no entitlements.
381 */
382 int
383 cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length)
384 {
385 SHA1_CTX context; /* XXX hash agility */
386 int error = 0;
387 struct cs_blob *blob_list_entry;
388 CS_SuperBlob *super_blob;
389 CS_BlobIndex *blob_index;
390 CS_GenericBlob *blob;
391 CS_CodeDirectory *code_dir;
392 unsigned char *computed_hash = NULL;
393 unsigned char *embedded_hash = NULL;
394 void *start = NULL;
395 size_t length = 0;
396 size_t hash_size = 0;
397 unsigned int i, count;
398
399 if (NULL == p->p_textvp) {
400 error = EINVAL;
401 goto out;
402 }
403 if (NULL == (blob_list_entry = ubc_cs_blob_get(p->p_textvp, -1,
404 p->p_textoff)))
405 goto out;
406 super_blob = (void *)blob_list_entry->csb_mem_kaddr;
407 if (CSMAGIC_EMBEDDED_SIGNATURE != ntohl(super_blob->magic)) {
408 error = EBADEXEC;
409 goto out;
410 }
411 count = ntohl(super_blob->count);
412 for (i = 0; i < count; ++i) {
413 blob_index = &super_blob->index[i];
414 blob = (void *)((char *)super_blob + ntohl(blob_index->offset));
415 switch (ntohl(blob_index->type)) {
416 case CSSLOT_CODEDIRECTORY:
417 if (CSMAGIC_CODEDIRECTORY != ntohl(blob->magic))
418 break;
419 code_dir = (void *)blob;
420 hash_size = code_dir->hashSize;
421 if (CSSLOT_ENTITLEMENTS <=
422 ntohl(code_dir->nSpecialSlots)) {
423 embedded_hash = (void *)((char *)code_dir +
424 ntohl(code_dir->hashOffset) -
425 (hash_size * CSSLOT_ENTITLEMENTS));
426 }
427 break;
428 case CSSLOT_ENTITLEMENTS:
429 if (CSMAGIC_EMBEDDED_ENTITLEMENTS != ntohl(blob->magic))
430 break;
431 start = (void *)blob;
432 length = ntohl(blob->length);
433 break;
434 default:
435 break;
436 }
437 }
438 if (NULL == start && NULL == embedded_hash) {
439 error = 0;
440 goto out;
441 } else if (NULL == start || NULL == embedded_hash) {
442 error = EBADEXEC;
443 goto out;
444 }
445 if (NULL == (computed_hash = kalloc(hash_size))) {
446 error = ENOMEM;
447 goto out;
448 }
449 SHA1Init(&context);
450 SHA1Update(&context, start, length);
451 SHA1Final(computed_hash, &context);
452 if (0 != memcmp(computed_hash, embedded_hash, hash_size)) {
453 error = EBADEXEC;
454 goto out;
455 }
456 error = 0;
457 out:
458 if (NULL != computed_hash)
459 kfree(computed_hash, hash_size);
460 if (0 == error) {
461 *out_start = start;
462 *out_length = length;
463 }
464 return error;
465 }
466
467 /*
468 * ENTITLEMENTS
469 * End of routines to navigate entitlements in the kernel.
470 */
471
472
473
474 /*
475 * ubc_init
476 *
477 * Initialization of the zone for Unified Buffer Cache.
478 *
479 * Parameters: (void)
480 *
481 * Returns: (void)
482 *
483 * Implicit returns:
484 * ubc_info_zone(global) initialized for subsequent allocations
485 */
486 __private_extern__ void
487 ubc_init(void)
488 {
489 int i;
490
491 i = (vm_size_t) sizeof (struct ubc_info);
492
493 ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone");
494
495 zone_change(ubc_info_zone, Z_NOENCRYPT, TRUE);
496 }
497
498
499 /*
500 * ubc_info_init
501 *
502 * Allocate and attach an empty ubc_info structure to a vnode
503 *
504 * Parameters: vp Pointer to the vnode
505 *
506 * Returns: 0 Success
507 * vnode_size:ENOMEM Not enough space
508 * vnode_size:??? Other error from vnode_getattr
509 *
510 */
511 int
512 ubc_info_init(struct vnode *vp)
513 {
514 return(ubc_info_init_internal(vp, 0, 0));
515 }
516
517
518 /*
519 * ubc_info_init_withsize
520 *
521 * Allocate and attach a sized ubc_info structure to a vnode
522 *
523 * Parameters: vp Pointer to the vnode
524 * filesize The size of the file
525 *
526 * Returns: 0 Success
527 * vnode_size:ENOMEM Not enough space
528 * vnode_size:??? Other error from vnode_getattr
529 */
530 int
531 ubc_info_init_withsize(struct vnode *vp, off_t filesize)
532 {
533 return(ubc_info_init_internal(vp, 1, filesize));
534 }
535
536
537 /*
538 * ubc_info_init_internal
539 *
540 * Allocate and attach a ubc_info structure to a vnode
541 *
542 * Parameters: vp Pointer to the vnode
543 * withfsize{0,1} Zero if the size should be obtained
544 * from the vnode; otherwise, use filesize
545 * filesize The size of the file, if withfsize == 1
546 *
547 * Returns: 0 Success
548 * vnode_size:ENOMEM Not enough space
549 * vnode_size:??? Other error from vnode_getattr
550 *
551 * Notes: We call a blocking zalloc(), and the zone was created as an
552 * expandable and collectable zone, so if no memory is available,
553 * it is possible for zalloc() to block indefinitely. zalloc()
554 * may also panic if the zone of zones is exhausted, since it's
555 * NOT expandable.
556 *
557 * We unconditionally call vnode_pager_setup(), even if this is
558 * a reuse of a ubc_info; in that case, we should probably assert
559 * that it does not already have a pager association, but do not.
560 *
561 * Since memory_object_create_named() can only fail from receiving
562 * an invalid pager argument, the explicit check and panic is
563 * merely precautionary.
564 */
565 static int
566 ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize)
567 {
568 register struct ubc_info *uip;
569 void * pager;
570 int error = 0;
571 kern_return_t kret;
572 memory_object_control_t control;
573
574 uip = vp->v_ubcinfo;
575
576 /*
577 * If there is not already a ubc_info attached to the vnode, we
578 * attach one; otherwise, we will reuse the one that's there.
579 */
580 if (uip == UBC_INFO_NULL) {
581
582 uip = (struct ubc_info *) zalloc(ubc_info_zone);
583 bzero((char *)uip, sizeof(struct ubc_info));
584
585 uip->ui_vnode = vp;
586 uip->ui_flags = UI_INITED;
587 uip->ui_ucred = NOCRED;
588 }
589 assert(uip->ui_flags != UI_NONE);
590 assert(uip->ui_vnode == vp);
591
592 /* now set this ubc_info in the vnode */
593 vp->v_ubcinfo = uip;
594
595 /*
596 * Allocate a pager object for this vnode
597 *
598 * XXX The value of the pager parameter is currently ignored.
599 * XXX Presumably, this API changed to avoid the race between
600 * XXX setting the pager and the UI_HASPAGER flag.
601 */
602 pager = (void *)vnode_pager_setup(vp, uip->ui_pager);
603 assert(pager);
604
605 /*
606 * Explicitly set the pager into the ubc_info, after setting the
607 * UI_HASPAGER flag.
608 */
609 SET(uip->ui_flags, UI_HASPAGER);
610 uip->ui_pager = pager;
611
612 /*
613 * Note: We can not use VNOP_GETATTR() to get accurate
614 * value of ui_size because this may be an NFS vnode, and
615 * nfs_getattr() can call vinvalbuf(); if this happens,
616 * ubc_info is not set up to deal with that event.
617 * So use bogus size.
618 */
619
620 /*
621 * create a vnode - vm_object association
622 * memory_object_create_named() creates a "named" reference on the
623 * memory object we hold this reference as long as the vnode is
624 * "alive." Since memory_object_create_named() took its own reference
625 * on the vnode pager we passed it, we can drop the reference
626 * vnode_pager_setup() returned here.
627 */
628 kret = memory_object_create_named(pager,
629 (memory_object_size_t)uip->ui_size, &control);
630 vnode_pager_deallocate(pager);
631 if (kret != KERN_SUCCESS)
632 panic("ubc_info_init: memory_object_create_named returned %d", kret);
633
634 assert(control);
635 uip->ui_control = control; /* cache the value of the mo control */
636 SET(uip->ui_flags, UI_HASOBJREF); /* with a named reference */
637
638 if (withfsize == 0) {
639 /* initialize the size */
640 error = vnode_size(vp, &uip->ui_size, vfs_context_current());
641 if (error)
642 uip->ui_size = 0;
643 } else {
644 uip->ui_size = filesize;
645 }
646 vp->v_lflag |= VNAMED_UBC; /* vnode has a named ubc reference */
647
648 return (error);
649 }
650
651
652 /*
653 * ubc_info_free
654 *
655 * Free a ubc_info structure
656 *
657 * Parameters: uip A pointer to the ubc_info to free
658 *
659 * Returns: (void)
660 *
661 * Notes: If there is a credential that has subsequently been associated
662 * with the ubc_info via a call to ubc_setcred(), the reference
663 * to the credential is dropped.
664 *
665 * It's actually impossible for a ubc_info.ui_control to take the
666 * value MEMORY_OBJECT_CONTROL_NULL.
667 */
668 static void
669 ubc_info_free(struct ubc_info *uip)
670 {
671 if (IS_VALID_CRED(uip->ui_ucred)) {
672 kauth_cred_unref(&uip->ui_ucred);
673 }
674
675 if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL)
676 memory_object_control_deallocate(uip->ui_control);
677
678 cluster_release(uip);
679 ubc_cs_free(uip);
680
681 zfree(ubc_info_zone, uip);
682 return;
683 }
684
685
686 void
687 ubc_info_deallocate(struct ubc_info *uip)
688 {
689 ubc_info_free(uip);
690 }
691
692
693 /*
694 * ubc_setsize
695 *
696 * Tell the VM that the the size of the file represented by the vnode has
697 * changed
698 *
699 * Parameters: vp The vp whose backing file size is
700 * being changed
701 * nsize The new size of the backing file
702 *
703 * Returns: 1 Success
704 * 0 Failure
705 *
706 * Notes: This function will indicate failure if the new size that's
707 * being attempted to be set is negative.
708 *
709 * This function will fail if there is no ubc_info currently
710 * associated with the vnode.
711 *
712 * This function will indicate success it the new size is the
713 * same or larger than the old size (in this case, the remainder
714 * of the file will require modification or use of an existing upl
715 * to access successfully).
716 *
717 * This function will fail if the new file size is smaller, and
718 * the memory region being invalidated was unable to actually be
719 * invalidated and/or the last page could not be flushed, if the
720 * new size is not aligned to a page boundary. This is usually
721 * indicative of an I/O error.
722 */
723 int
724 ubc_setsize(struct vnode *vp, off_t nsize)
725 {
726 off_t osize; /* ui_size before change */
727 off_t lastpg, olastpgend, lastoff;
728 struct ubc_info *uip;
729 memory_object_control_t control;
730 kern_return_t kret = KERN_SUCCESS;
731
732 if (nsize < (off_t)0)
733 return (0);
734
735 if (!UBCINFOEXISTS(vp))
736 return (0);
737
738 uip = vp->v_ubcinfo;
739 osize = uip->ui_size;
740 /*
741 * Update the size before flushing the VM
742 */
743 uip->ui_size = nsize;
744
745 if (nsize >= osize) { /* Nothing more to do */
746 if (nsize > osize) {
747 lock_vnode_and_post(vp, NOTE_EXTEND);
748 }
749
750 return (1); /* return success */
751 }
752
753 /*
754 * When the file shrinks, invalidate the pages beyond the
755 * new size. Also get rid of garbage beyond nsize on the
756 * last page. The ui_size already has the nsize, so any
757 * subsequent page-in will zero-fill the tail properly
758 */
759 lastpg = trunc_page_64(nsize);
760 olastpgend = round_page_64(osize);
761 control = uip->ui_control;
762 assert(control);
763 lastoff = (nsize & PAGE_MASK_64);
764
765 if (lastoff) {
766 upl_t upl;
767 upl_page_info_t *pl;
768
769
770 /*
771 * new EOF ends up in the middle of a page
772 * zero the tail of this page if its currently
773 * present in the cache
774 */
775 kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE);
776
777 if (kret != KERN_SUCCESS)
778 panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret);
779
780 if (upl_valid_page(pl, 0))
781 cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL);
782
783 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
784
785 lastpg += PAGE_SIZE_64;
786 }
787 if (olastpgend > lastpg) {
788 int flags;
789
790 if (lastpg == 0)
791 flags = MEMORY_OBJECT_DATA_FLUSH_ALL;
792 else
793 flags = MEMORY_OBJECT_DATA_FLUSH;
794 /*
795 * invalidate the pages beyond the new EOF page
796 *
797 */
798 kret = memory_object_lock_request(control,
799 (memory_object_offset_t)lastpg,
800 (memory_object_size_t)(olastpgend - lastpg), NULL, NULL,
801 MEMORY_OBJECT_RETURN_NONE, flags, VM_PROT_NO_CHANGE);
802 if (kret != KERN_SUCCESS)
803 printf("ubc_setsize: invalidate failed (error = %d)\n", kret);
804 }
805 return ((kret == KERN_SUCCESS) ? 1 : 0);
806 }
807
808
809 /*
810 * ubc_getsize
811 *
812 * Get the size of the file assocated with the specified vnode
813 *
814 * Parameters: vp The vnode whose size is of interest
815 *
816 * Returns: 0 There is no ubc_info associated with
817 * this vnode, or the size is zero
818 * !0 The size of the file
819 *
820 * Notes: Using this routine, it is not possible for a caller to
821 * successfully distinguish between a vnode associate with a zero
822 * length file, and a vnode with no associated ubc_info. The
823 * caller therefore needs to not care, or needs to ensure that
824 * they have previously successfully called ubc_info_init() or
825 * ubc_info_init_withsize().
826 */
827 off_t
828 ubc_getsize(struct vnode *vp)
829 {
830 /* people depend on the side effect of this working this way
831 * as they call this for directory
832 */
833 if (!UBCINFOEXISTS(vp))
834 return ((off_t)0);
835 return (vp->v_ubcinfo->ui_size);
836 }
837
838
839 /*
840 * ubc_umount
841 *
842 * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this
843 * mount point
844 *
845 * Parameters: mp The mount point
846 *
847 * Returns: 0 Success
848 *
849 * Notes: There is no failure indication for this function.
850 *
851 * This function is used in the unmount path; since it may block
852 * I/O indefinitely, it should not be used in the forced unmount
853 * path, since a device unavailability could also block that
854 * indefinitely.
855 *
856 * Because there is no device ejection interlock on USB, FireWire,
857 * or similar devices, it's possible that an ejection that begins
858 * subsequent to the vnode_iterate() completing, either on one of
859 * those devices, or a network mount for which the server quits
860 * responding, etc., may cause the caller to block indefinitely.
861 */
862 __private_extern__ int
863 ubc_umount(struct mount *mp)
864 {
865 vnode_iterate(mp, 0, ubc_umcallback, 0);
866 return(0);
867 }
868
869
870 /*
871 * ubc_umcallback
872 *
873 * Used by ubc_umount() as an internal implementation detail; see ubc_umount()
874 * and vnode_iterate() for details of implementation.
875 */
876 static int
877 ubc_umcallback(vnode_t vp, __unused void * args)
878 {
879
880 if (UBCINFOEXISTS(vp)) {
881
882 (void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL);
883 }
884 return (VNODE_RETURNED);
885 }
886
887
888 /*
889 * ubc_getcred
890 *
891 * Get the credentials currently active for the ubc_info associated with the
892 * vnode.
893 *
894 * Parameters: vp The vnode whose ubc_info credentials
895 * are to be retrieved
896 *
897 * Returns: !NOCRED The credentials
898 * NOCRED If there is no ubc_info for the vnode,
899 * or if there is one, but it has not had
900 * any credentials associated with it via
901 * a call to ubc_setcred()
902 */
903 kauth_cred_t
904 ubc_getcred(struct vnode *vp)
905 {
906 if (UBCINFOEXISTS(vp))
907 return (vp->v_ubcinfo->ui_ucred);
908
909 return (NOCRED);
910 }
911
912
913 /*
914 * ubc_setthreadcred
915 *
916 * If they are not already set, set the credentials of the ubc_info structure
917 * associated with the vnode to those of the supplied thread; otherwise leave
918 * them alone.
919 *
920 * Parameters: vp The vnode whose ubc_info creds are to
921 * be set
922 * p The process whose credentials are to
923 * be used, if not running on an assumed
924 * credential
925 * thread The thread whose credentials are to
926 * be used
927 *
928 * Returns: 1 This vnode has no associated ubc_info
929 * 0 Success
930 *
931 * Notes: This function takes a proc parameter to account for bootstrap
932 * issues where a task or thread may call this routine, either
933 * before credentials have been initialized by bsd_init(), or if
934 * there is no BSD info asscoiate with a mach thread yet. This
935 * is known to happen in both the initial swap and memory mapping
936 * calls.
937 *
938 * This function is generally used only in the following cases:
939 *
940 * o a memory mapped file via the mmap() system call
941 * o a memory mapped file via the deprecated map_fd() call
942 * o a swap store backing file
943 * o subsequent to a successful write via vn_write()
944 *
945 * The information is then used by the NFS client in order to
946 * cons up a wire message in either the page-in or page-out path.
947 *
948 * There are two potential problems with the use of this API:
949 *
950 * o Because the write path only set it on a successful
951 * write, there is a race window between setting the
952 * credential and its use to evict the pages to the
953 * remote file server
954 *
955 * o Because a page-in may occur prior to a write, the
956 * credential may not be set at this time, if the page-in
957 * is not the result of a mapping established via mmap()
958 * or map_fd().
959 *
960 * In both these cases, this will be triggered from the paging
961 * path, which will instead use the credential of the current
962 * process, which in this case is either the dynamic_pager or
963 * the kernel task, both of which utilize "root" credentials.
964 *
965 * This may potentially permit operations to occur which should
966 * be denied, or it may cause to be denied operations which
967 * should be permitted, depending on the configuration of the NFS
968 * server.
969 */
970 int
971 ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread)
972 {
973 struct ubc_info *uip;
974 kauth_cred_t credp;
975 struct uthread *uthread = get_bsdthread_info(thread);
976
977 if (!UBCINFOEXISTS(vp))
978 return (1);
979
980 vnode_lock(vp);
981
982 uip = vp->v_ubcinfo;
983 credp = uip->ui_ucred;
984
985 if (!IS_VALID_CRED(credp)) {
986 /* use per-thread cred, if assumed identity, else proc cred */
987 if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) {
988 uip->ui_ucred = kauth_cred_proc_ref(p);
989 } else {
990 uip->ui_ucred = uthread->uu_ucred;
991 kauth_cred_ref(uip->ui_ucred);
992 }
993 }
994 vnode_unlock(vp);
995
996 return (0);
997 }
998
999
1000 /*
1001 * ubc_setcred
1002 *
1003 * If they are not already set, set the credentials of the ubc_info structure
1004 * associated with the vnode to those of the process; otherwise leave them
1005 * alone.
1006 *
1007 * Parameters: vp The vnode whose ubc_info creds are to
1008 * be set
1009 * p The process whose credentials are to
1010 * be used
1011 *
1012 * Returns: 0 This vnode has no associated ubc_info
1013 * 1 Success
1014 *
1015 * Notes: The return values for this function are inverted from nearly
1016 * all other uses in the kernel.
1017 *
1018 * See also ubc_setthreadcred(), above.
1019 *
1020 * This function is considered deprecated, and generally should
1021 * not be used, as it is incompatible with per-thread credentials;
1022 * it exists for legacy KPI reasons.
1023 *
1024 * DEPRECATION: ubc_setcred() is being deprecated. Please use
1025 * ubc_setthreadcred() instead.
1026 */
1027 int
1028 ubc_setcred(struct vnode *vp, proc_t p)
1029 {
1030 struct ubc_info *uip;
1031 kauth_cred_t credp;
1032
1033 /* If there is no ubc_info, deny the operation */
1034 if ( !UBCINFOEXISTS(vp))
1035 return (0);
1036
1037 /*
1038 * Check to see if there is already a credential reference in the
1039 * ubc_info; if there is not, take one on the supplied credential.
1040 */
1041 vnode_lock(vp);
1042 uip = vp->v_ubcinfo;
1043 credp = uip->ui_ucred;
1044 if (!IS_VALID_CRED(credp)) {
1045 uip->ui_ucred = kauth_cred_proc_ref(p);
1046 }
1047 vnode_unlock(vp);
1048
1049 return (1);
1050 }
1051
1052 /*
1053 * ubc_getpager
1054 *
1055 * Get the pager associated with the ubc_info associated with the vnode.
1056 *
1057 * Parameters: vp The vnode to obtain the pager from
1058 *
1059 * Returns: !VNODE_PAGER_NULL The memory_object_t for the pager
1060 * VNODE_PAGER_NULL There is no ubc_info for this vnode
1061 *
1062 * Notes: For each vnode that has a ubc_info associated with it, that
1063 * ubc_info SHALL have a pager associated with it, so in the
1064 * normal case, it's impossible to return VNODE_PAGER_NULL for
1065 * a vnode with an associated ubc_info.
1066 */
1067 __private_extern__ memory_object_t
1068 ubc_getpager(struct vnode *vp)
1069 {
1070 if (UBCINFOEXISTS(vp))
1071 return (vp->v_ubcinfo->ui_pager);
1072
1073 return (0);
1074 }
1075
1076
1077 /*
1078 * ubc_getobject
1079 *
1080 * Get the memory object control associated with the ubc_info associated with
1081 * the vnode
1082 *
1083 * Parameters: vp The vnode to obtain the memory object
1084 * from
1085 * flags DEPRECATED
1086 *
1087 * Returns: !MEMORY_OBJECT_CONTROL_NULL
1088 * MEMORY_OBJECT_CONTROL_NULL
1089 *
1090 * Notes: Historically, if the flags were not "do not reactivate", this
1091 * function would look up the memory object using the pager if
1092 * it did not exist (this could be the case if the vnode had
1093 * been previously reactivated). The flags would also permit a
1094 * hold to be requested, which would have created an object
1095 * reference, if one had not already existed. This usage is
1096 * deprecated, as it would permit a race between finding and
1097 * taking the reference vs. a single reference being dropped in
1098 * another thread.
1099 */
1100 memory_object_control_t
1101 ubc_getobject(struct vnode *vp, __unused int flags)
1102 {
1103 if (UBCINFOEXISTS(vp))
1104 return((vp->v_ubcinfo->ui_control));
1105
1106 return (MEMORY_OBJECT_CONTROL_NULL);
1107 }
1108
1109 boolean_t
1110 ubc_strict_uncached_IO(struct vnode *vp)
1111 {
1112 boolean_t result = FALSE;
1113
1114 if (UBCINFOEXISTS(vp)) {
1115 result = memory_object_is_slid(vp->v_ubcinfo->ui_control);
1116 }
1117 return result;
1118 }
1119
1120 /*
1121 * ubc_blktooff
1122 *
1123 * Convert a given block number to a memory backing object (file) offset for a
1124 * given vnode
1125 *
1126 * Parameters: vp The vnode in which the block is located
1127 * blkno The block number to convert
1128 *
1129 * Returns: !-1 The offset into the backing object
1130 * -1 There is no ubc_info associated with
1131 * the vnode
1132 * -1 An error occurred in the underlying VFS
1133 * while translating the block to an
1134 * offset; the most likely cause is that
1135 * the caller specified a block past the
1136 * end of the file, but this could also be
1137 * any other error from VNOP_BLKTOOFF().
1138 *
1139 * Note: Representing the error in band loses some information, but does
1140 * not occlude a valid offset, since an off_t of -1 is normally
1141 * used to represent EOF. If we had a more reliable constant in
1142 * our header files for it (i.e. explicitly cast to an off_t), we
1143 * would use it here instead.
1144 */
1145 off_t
1146 ubc_blktooff(vnode_t vp, daddr64_t blkno)
1147 {
1148 off_t file_offset = -1;
1149 int error;
1150
1151 if (UBCINFOEXISTS(vp)) {
1152 error = VNOP_BLKTOOFF(vp, blkno, &file_offset);
1153 if (error)
1154 file_offset = -1;
1155 }
1156
1157 return (file_offset);
1158 }
1159
1160
1161 /*
1162 * ubc_offtoblk
1163 *
1164 * Convert a given offset in a memory backing object into a block number for a
1165 * given vnode
1166 *
1167 * Parameters: vp The vnode in which the offset is
1168 * located
1169 * offset The offset into the backing object
1170 *
1171 * Returns: !-1 The returned block number
1172 * -1 There is no ubc_info associated with
1173 * the vnode
1174 * -1 An error occurred in the underlying VFS
1175 * while translating the block to an
1176 * offset; the most likely cause is that
1177 * the caller specified a block past the
1178 * end of the file, but this could also be
1179 * any other error from VNOP_OFFTOBLK().
1180 *
1181 * Note: Representing the error in band loses some information, but does
1182 * not occlude a valid block number, since block numbers exceed
1183 * the valid range for offsets, due to their relative sizes. If
1184 * we had a more reliable constant than -1 in our header files
1185 * for it (i.e. explicitly cast to an daddr64_t), we would use it
1186 * here instead.
1187 */
1188 daddr64_t
1189 ubc_offtoblk(vnode_t vp, off_t offset)
1190 {
1191 daddr64_t blkno = -1;
1192 int error = 0;
1193
1194 if (UBCINFOEXISTS(vp)) {
1195 error = VNOP_OFFTOBLK(vp, offset, &blkno);
1196 if (error)
1197 blkno = -1;
1198 }
1199
1200 return (blkno);
1201 }
1202
1203
1204 /*
1205 * ubc_pages_resident
1206 *
1207 * Determine whether or not a given vnode has pages resident via the memory
1208 * object control associated with the ubc_info associated with the vnode
1209 *
1210 * Parameters: vp The vnode we want to know about
1211 *
1212 * Returns: 1 Yes
1213 * 0 No
1214 */
1215 int
1216 ubc_pages_resident(vnode_t vp)
1217 {
1218 kern_return_t kret;
1219 boolean_t has_pages_resident;
1220
1221 if (!UBCINFOEXISTS(vp))
1222 return (0);
1223
1224 /*
1225 * The following call may fail if an invalid ui_control is specified,
1226 * or if there is no VM object associated with the control object. In
1227 * either case, reacting to it as if there were no pages resident will
1228 * result in correct behavior.
1229 */
1230 kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident);
1231
1232 if (kret != KERN_SUCCESS)
1233 return (0);
1234
1235 if (has_pages_resident == TRUE)
1236 return (1);
1237
1238 return (0);
1239 }
1240
1241
1242 /*
1243 * ubc_sync_range
1244 *
1245 * Clean and/or invalidate a range in the memory object that backs this vnode
1246 *
1247 * Parameters: vp The vnode whose associated ubc_info's
1248 * associated memory object is to have a
1249 * range invalidated within it
1250 * beg_off The start of the range, as an offset
1251 * end_off The end of the range, as an offset
1252 * flags See ubc_msync_internal()
1253 *
1254 * Returns: 1 Success
1255 * 0 Failure
1256 *
1257 * Notes: see ubc_msync_internal() for more detailed information.
1258 *
1259 * DEPRECATED: This interface is obsolete due to a failure to return error
1260 * information needed in order to correct failures. The currently
1261 * recommended interface is ubc_msync().
1262 */
1263 int
1264 ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags)
1265 {
1266 return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL));
1267 }
1268
1269
1270 /*
1271 * ubc_msync
1272 *
1273 * Clean and/or invalidate a range in the memory object that backs this vnode
1274 *
1275 * Parameters: vp The vnode whose associated ubc_info's
1276 * associated memory object is to have a
1277 * range invalidated within it
1278 * beg_off The start of the range, as an offset
1279 * end_off The end of the range, as an offset
1280 * resid_off The address of an off_t supplied by the
1281 * caller; may be set to NULL to ignore
1282 * flags See ubc_msync_internal()
1283 *
1284 * Returns: 0 Success
1285 * !0 Failure; an errno is returned
1286 *
1287 * Implicit Returns:
1288 * *resid_off, modified If non-NULL, the contents are ALWAYS
1289 * modified; they are initialized to the
1290 * beg_off, and in case of an I/O error,
1291 * the difference between beg_off and the
1292 * current value will reflect what was
1293 * able to be written before the error
1294 * occurred. If no error is returned, the
1295 * value of the resid_off is undefined; do
1296 * NOT use it in place of end_off if you
1297 * intend to increment from the end of the
1298 * last call and call iteratively.
1299 *
1300 * Notes: see ubc_msync_internal() for more detailed information.
1301 *
1302 */
1303 errno_t
1304 ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags)
1305 {
1306 int retval;
1307 int io_errno = 0;
1308
1309 if (resid_off)
1310 *resid_off = beg_off;
1311
1312 retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno);
1313
1314 if (retval == 0 && io_errno == 0)
1315 return (EINVAL);
1316 return (io_errno);
1317 }
1318
1319
1320 /*
1321 * Clean and/or invalidate a range in the memory object that backs this vnode
1322 *
1323 * Parameters: vp The vnode whose associated ubc_info's
1324 * associated memory object is to have a
1325 * range invalidated within it
1326 * beg_off The start of the range, as an offset
1327 * end_off The end of the range, as an offset
1328 * resid_off The address of an off_t supplied by the
1329 * caller; may be set to NULL to ignore
1330 * flags MUST contain at least one of the flags
1331 * UBC_INVALIDATE, UBC_PUSHDIRTY, or
1332 * UBC_PUSHALL; if UBC_PUSHDIRTY is used,
1333 * UBC_SYNC may also be specified to cause
1334 * this function to block until the
1335 * operation is complete. The behavior
1336 * of UBC_SYNC is otherwise undefined.
1337 * io_errno The address of an int to contain the
1338 * errno from a failed I/O operation, if
1339 * one occurs; may be set to NULL to
1340 * ignore
1341 *
1342 * Returns: 1 Success
1343 * 0 Failure
1344 *
1345 * Implicit Returns:
1346 * *resid_off, modified The contents of this offset MAY be
1347 * modified; in case of an I/O error, the
1348 * difference between beg_off and the
1349 * current value will reflect what was
1350 * able to be written before the error
1351 * occurred.
1352 * *io_errno, modified The contents of this offset are set to
1353 * an errno, if an error occurs; if the
1354 * caller supplies an io_errno parameter,
1355 * they should be careful to initialize it
1356 * to 0 before calling this function to
1357 * enable them to distinguish an error
1358 * with a valid *resid_off from an invalid
1359 * one, and to avoid potentially falsely
1360 * reporting an error, depending on use.
1361 *
1362 * Notes: If there is no ubc_info associated with the vnode supplied,
1363 * this function immediately returns success.
1364 *
1365 * If the value of end_off is less than or equal to beg_off, this
1366 * function immediately returns success; that is, end_off is NOT
1367 * inclusive.
1368 *
1369 * IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or
1370 * UBC_PUSHALL MUST be specified; that is, it is NOT possible to
1371 * attempt to block on in-progress I/O by calling this function
1372 * with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC
1373 * in order to block pending on the I/O already in progress.
1374 *
1375 * The start offset is truncated to the page boundary and the
1376 * size is adjusted to include the last page in the range; that
1377 * is, end_off on exactly a page boundary will not change if it
1378 * is rounded, and the range of bytes written will be from the
1379 * truncate beg_off to the rounded (end_off - 1).
1380 */
1381 static int
1382 ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno)
1383 {
1384 memory_object_size_t tsize;
1385 kern_return_t kret;
1386 int request_flags = 0;
1387 int flush_flags = MEMORY_OBJECT_RETURN_NONE;
1388
1389 if ( !UBCINFOEXISTS(vp))
1390 return (0);
1391 if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0)
1392 return (0);
1393 if (end_off <= beg_off)
1394 return (1);
1395
1396 if (flags & UBC_INVALIDATE)
1397 /*
1398 * discard the resident pages
1399 */
1400 request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE);
1401
1402 if (flags & UBC_SYNC)
1403 /*
1404 * wait for all the I/O to complete before returning
1405 */
1406 request_flags |= MEMORY_OBJECT_IO_SYNC;
1407
1408 if (flags & UBC_PUSHDIRTY)
1409 /*
1410 * we only return the dirty pages in the range
1411 */
1412 flush_flags = MEMORY_OBJECT_RETURN_DIRTY;
1413
1414 if (flags & UBC_PUSHALL)
1415 /*
1416 * then return all the interesting pages in the range (both
1417 * dirty and precious) to the pager
1418 */
1419 flush_flags = MEMORY_OBJECT_RETURN_ALL;
1420
1421 beg_off = trunc_page_64(beg_off);
1422 end_off = round_page_64(end_off);
1423 tsize = (memory_object_size_t)end_off - beg_off;
1424
1425 /* flush and/or invalidate pages in the range requested */
1426 kret = memory_object_lock_request(vp->v_ubcinfo->ui_control,
1427 beg_off, tsize,
1428 (memory_object_offset_t *)resid_off,
1429 io_errno, flush_flags, request_flags,
1430 VM_PROT_NO_CHANGE);
1431
1432 return ((kret == KERN_SUCCESS) ? 1 : 0);
1433 }
1434
1435
1436 /*
1437 * ubc_msync_internal
1438 *
1439 * Explicitly map a vnode that has an associate ubc_info, and add a reference
1440 * to it for the ubc system, if there isn't one already, so it will not be
1441 * recycled while it's in use, and set flags on the ubc_info to indicate that
1442 * we have done this
1443 *
1444 * Parameters: vp The vnode to map
1445 * flags The mapping flags for the vnode; this
1446 * will be a combination of one or more of
1447 * PROT_READ, PROT_WRITE, and PROT_EXEC
1448 *
1449 * Returns: 0 Success
1450 * EPERM Permission was denied
1451 *
1452 * Notes: An I/O reference on the vnode must already be held on entry
1453 *
1454 * If there is no ubc_info associated with the vnode, this function
1455 * will return success.
1456 *
1457 * If a permission error occurs, this function will return
1458 * failure; all other failures will cause this function to return
1459 * success.
1460 *
1461 * IMPORTANT: This is an internal use function, and its symbols
1462 * are not exported, hence its error checking is not very robust.
1463 * It is primarily used by:
1464 *
1465 * o mmap(), when mapping a file
1466 * o The deprecated map_fd() interface, when mapping a file
1467 * o When mapping a shared file (a shared library in the
1468 * shared segment region)
1469 * o When loading a program image during the exec process
1470 *
1471 * ...all of these uses ignore the return code, and any fault that
1472 * results later because of a failure is handled in the fix-up path
1473 * of the fault handler. The interface exists primarily as a
1474 * performance hint.
1475 *
1476 * Given that third party implementation of the type of interfaces
1477 * that would use this function, such as alternative executable
1478 * formats, etc., are unsupported, this function is not exported
1479 * for general use.
1480 *
1481 * The extra reference is held until the VM system unmaps the
1482 * vnode from its own context to maintain a vnode reference in
1483 * cases like open()/mmap()/close(), which leave the backing
1484 * object referenced by a mapped memory region in a process
1485 * address space.
1486 */
1487 __private_extern__ int
1488 ubc_map(vnode_t vp, int flags)
1489 {
1490 struct ubc_info *uip;
1491 int error = 0;
1492 int need_ref = 0;
1493 int need_wakeup = 0;
1494
1495 if (UBCINFOEXISTS(vp)) {
1496
1497 vnode_lock(vp);
1498 uip = vp->v_ubcinfo;
1499
1500 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1501 SET(uip->ui_flags, UI_MAPWAITING);
1502 (void) msleep(&uip->ui_flags, &vp->v_lock,
1503 PRIBIO, "ubc_map", NULL);
1504 }
1505 SET(uip->ui_flags, UI_MAPBUSY);
1506 vnode_unlock(vp);
1507
1508 error = VNOP_MMAP(vp, flags, vfs_context_current());
1509
1510 if (error != EPERM)
1511 error = 0;
1512
1513 vnode_lock_spin(vp);
1514
1515 if (error == 0) {
1516 if ( !ISSET(uip->ui_flags, UI_ISMAPPED))
1517 need_ref = 1;
1518 SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED));
1519 }
1520 CLR(uip->ui_flags, UI_MAPBUSY);
1521
1522 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1523 CLR(uip->ui_flags, UI_MAPWAITING);
1524 need_wakeup = 1;
1525 }
1526 vnode_unlock(vp);
1527
1528 if (need_wakeup)
1529 wakeup(&uip->ui_flags);
1530
1531 if (need_ref)
1532 vnode_ref(vp);
1533 }
1534 return (error);
1535 }
1536
1537
1538 /*
1539 * ubc_destroy_named
1540 *
1541 * Destroy the named memory object associated with the ubc_info control object
1542 * associated with the designated vnode, if there is a ubc_info associated
1543 * with the vnode, and a control object is associated with it
1544 *
1545 * Parameters: vp The designated vnode
1546 *
1547 * Returns: (void)
1548 *
1549 * Notes: This function is called on vnode termination for all vnodes,
1550 * and must therefore not assume that there is a ubc_info that is
1551 * associated with the vnode, nor that there is a control object
1552 * associated with the ubc_info.
1553 *
1554 * If all the conditions necessary are present, this function
1555 * calls memory_object_destory(), which will in turn end up
1556 * calling ubc_unmap() to release any vnode references that were
1557 * established via ubc_map().
1558 *
1559 * IMPORTANT: This is an internal use function that is used
1560 * exclusively by the internal use function vclean().
1561 */
1562 __private_extern__ void
1563 ubc_destroy_named(vnode_t vp)
1564 {
1565 memory_object_control_t control;
1566 struct ubc_info *uip;
1567 kern_return_t kret;
1568
1569 if (UBCINFOEXISTS(vp)) {
1570 uip = vp->v_ubcinfo;
1571
1572 /* Terminate the memory object */
1573 control = ubc_getobject(vp, UBC_HOLDOBJECT);
1574 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1575 kret = memory_object_destroy(control, 0);
1576 if (kret != KERN_SUCCESS)
1577 panic("ubc_destroy_named: memory_object_destroy failed");
1578 }
1579 }
1580 }
1581
1582
1583 /*
1584 * ubc_isinuse
1585 *
1586 * Determine whether or not a vnode is currently in use by ubc at a level in
1587 * excess of the requested busycount
1588 *
1589 * Parameters: vp The vnode to check
1590 * busycount The threshold busy count, used to bias
1591 * the count usually already held by the
1592 * caller to avoid races
1593 *
1594 * Returns: 1 The vnode is in use over the threshold
1595 * 0 The vnode is not in use over the
1596 * threshold
1597 *
1598 * Notes: Because the vnode is only held locked while actually asking
1599 * the use count, this function only represents a snapshot of the
1600 * current state of the vnode. If more accurate information is
1601 * required, an additional busycount should be held by the caller
1602 * and a non-zero busycount used.
1603 *
1604 * If there is no ubc_info associated with the vnode, this
1605 * function will report that the vnode is not in use by ubc.
1606 */
1607 int
1608 ubc_isinuse(struct vnode *vp, int busycount)
1609 {
1610 if ( !UBCINFOEXISTS(vp))
1611 return (0);
1612 return(ubc_isinuse_locked(vp, busycount, 0));
1613 }
1614
1615
1616 /*
1617 * ubc_isinuse_locked
1618 *
1619 * Determine whether or not a vnode is currently in use by ubc at a level in
1620 * excess of the requested busycount
1621 *
1622 * Parameters: vp The vnode to check
1623 * busycount The threshold busy count, used to bias
1624 * the count usually already held by the
1625 * caller to avoid races
1626 * locked True if the vnode is already locked by
1627 * the caller
1628 *
1629 * Returns: 1 The vnode is in use over the threshold
1630 * 0 The vnode is not in use over the
1631 * threshold
1632 *
1633 * Notes: If the vnode is not locked on entry, it is locked while
1634 * actually asking the use count. If this is the case, this
1635 * function only represents a snapshot of the current state of
1636 * the vnode. If more accurate information is required, the
1637 * vnode lock should be held by the caller, otherwise an
1638 * additional busycount should be held by the caller and a
1639 * non-zero busycount used.
1640 *
1641 * If there is no ubc_info associated with the vnode, this
1642 * function will report that the vnode is not in use by ubc.
1643 */
1644 int
1645 ubc_isinuse_locked(struct vnode *vp, int busycount, int locked)
1646 {
1647 int retval = 0;
1648
1649
1650 if (!locked)
1651 vnode_lock_spin(vp);
1652
1653 if ((vp->v_usecount - vp->v_kusecount) > busycount)
1654 retval = 1;
1655
1656 if (!locked)
1657 vnode_unlock(vp);
1658 return (retval);
1659 }
1660
1661
1662 /*
1663 * ubc_unmap
1664 *
1665 * Reverse the effects of a ubc_map() call for a given vnode
1666 *
1667 * Parameters: vp vnode to unmap from ubc
1668 *
1669 * Returns: (void)
1670 *
1671 * Notes: This is an internal use function used by vnode_pager_unmap().
1672 * It will attempt to obtain a reference on the supplied vnode,
1673 * and if it can do so, and there is an associated ubc_info, and
1674 * the flags indicate that it was mapped via ubc_map(), then the
1675 * flag is cleared, the mapping removed, and the reference taken
1676 * by ubc_map() is released.
1677 *
1678 * IMPORTANT: This MUST only be called by the VM
1679 * to prevent race conditions.
1680 */
1681 __private_extern__ void
1682 ubc_unmap(struct vnode *vp)
1683 {
1684 struct ubc_info *uip;
1685 int need_rele = 0;
1686 int need_wakeup = 0;
1687
1688 if (vnode_getwithref(vp))
1689 return;
1690
1691 if (UBCINFOEXISTS(vp)) {
1692 vnode_lock(vp);
1693 uip = vp->v_ubcinfo;
1694
1695 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1696 SET(uip->ui_flags, UI_MAPWAITING);
1697 (void) msleep(&uip->ui_flags, &vp->v_lock,
1698 PRIBIO, "ubc_unmap", NULL);
1699 }
1700 SET(uip->ui_flags, UI_MAPBUSY);
1701
1702 if (ISSET(uip->ui_flags, UI_ISMAPPED)) {
1703 CLR(uip->ui_flags, UI_ISMAPPED);
1704 need_rele = 1;
1705 }
1706 vnode_unlock(vp);
1707
1708 if (need_rele) {
1709 (void)VNOP_MNOMAP(vp, vfs_context_current());
1710 vnode_rele(vp);
1711 }
1712
1713 vnode_lock_spin(vp);
1714
1715 CLR(uip->ui_flags, UI_MAPBUSY);
1716 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1717 CLR(uip->ui_flags, UI_MAPWAITING);
1718 need_wakeup = 1;
1719 }
1720 vnode_unlock(vp);
1721
1722 if (need_wakeup)
1723 wakeup(&uip->ui_flags);
1724
1725 }
1726 /*
1727 * the drop of the vnode ref will cleanup
1728 */
1729 vnode_put(vp);
1730 }
1731
1732
1733 /*
1734 * ubc_page_op
1735 *
1736 * Manipulate individual page state for a vnode with an associated ubc_info
1737 * with an associated memory object control.
1738 *
1739 * Parameters: vp The vnode backing the page
1740 * f_offset A file offset interior to the page
1741 * ops The operations to perform, as a bitmap
1742 * (see below for more information)
1743 * phys_entryp The address of a ppnum_t; may be NULL
1744 * to ignore
1745 * flagsp A pointer to an int to contain flags;
1746 * may be NULL to ignore
1747 *
1748 * Returns: KERN_SUCCESS Success
1749 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1750 * object associated
1751 * KERN_INVALID_OBJECT If UPL_POP_PHYSICAL and the object is
1752 * not physically contiguous
1753 * KERN_INVALID_OBJECT If !UPL_POP_PHYSICAL and the object is
1754 * physically contiguous
1755 * KERN_FAILURE If the page cannot be looked up
1756 *
1757 * Implicit Returns:
1758 * *phys_entryp (modified) If phys_entryp is non-NULL and
1759 * UPL_POP_PHYSICAL
1760 * *flagsp (modified) If flagsp is non-NULL and there was
1761 * !UPL_POP_PHYSICAL and a KERN_SUCCESS
1762 *
1763 * Notes: For object boundaries, it is considerably more efficient to
1764 * ensure that f_offset is in fact on a page boundary, as this
1765 * will avoid internal use of the hash table to identify the
1766 * page, and would therefore skip a number of early optimizations.
1767 * Since this is a page operation anyway, the caller should try
1768 * to pass only a page aligned offset because of this.
1769 *
1770 * *flagsp may be modified even if this function fails. If it is
1771 * modified, it will contain the condition of the page before the
1772 * requested operation was attempted; these will only include the
1773 * bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP,
1774 * UPL_POP_SET, or UPL_POP_CLR bits.
1775 *
1776 * The flags field may contain a specific operation, such as
1777 * UPL_POP_PHYSICAL or UPL_POP_DUMP:
1778 *
1779 * o UPL_POP_PHYSICAL Fail if not contiguous; if
1780 * *phys_entryp and successful, set
1781 * *phys_entryp
1782 * o UPL_POP_DUMP Dump the specified page
1783 *
1784 * Otherwise, it is treated as a bitmap of one or more page
1785 * operations to perform on the final memory object; allowable
1786 * bit values are:
1787 *
1788 * o UPL_POP_DIRTY The page is dirty
1789 * o UPL_POP_PAGEOUT The page is paged out
1790 * o UPL_POP_PRECIOUS The page is precious
1791 * o UPL_POP_ABSENT The page is absent
1792 * o UPL_POP_BUSY The page is busy
1793 *
1794 * If the page status is only being queried and not modified, then
1795 * not other bits should be specified. However, if it is being
1796 * modified, exactly ONE of the following bits should be set:
1797 *
1798 * o UPL_POP_SET Set the current bitmap bits
1799 * o UPL_POP_CLR Clear the current bitmap bits
1800 *
1801 * Thus to effect a combination of setting an clearing, it may be
1802 * necessary to call this function twice. If this is done, the
1803 * set should be used before the clear, since clearing may trigger
1804 * a wakeup on the destination page, and if the page is backed by
1805 * an encrypted swap file, setting will trigger the decryption
1806 * needed before the wakeup occurs.
1807 */
1808 kern_return_t
1809 ubc_page_op(
1810 struct vnode *vp,
1811 off_t f_offset,
1812 int ops,
1813 ppnum_t *phys_entryp,
1814 int *flagsp)
1815 {
1816 memory_object_control_t control;
1817
1818 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1819 if (control == MEMORY_OBJECT_CONTROL_NULL)
1820 return KERN_INVALID_ARGUMENT;
1821
1822 return (memory_object_page_op(control,
1823 (memory_object_offset_t)f_offset,
1824 ops,
1825 phys_entryp,
1826 flagsp));
1827 }
1828
1829
1830 /*
1831 * ubc_range_op
1832 *
1833 * Manipulate page state for a range of memory for a vnode with an associated
1834 * ubc_info with an associated memory object control, when page level state is
1835 * not required to be returned from the call (i.e. there are no phys_entryp or
1836 * flagsp parameters to this call, and it takes a range which may contain
1837 * multiple pages, rather than an offset interior to a single page).
1838 *
1839 * Parameters: vp The vnode backing the page
1840 * f_offset_beg A file offset interior to the start page
1841 * f_offset_end A file offset interior to the end page
1842 * ops The operations to perform, as a bitmap
1843 * (see below for more information)
1844 * range The address of an int; may be NULL to
1845 * ignore
1846 *
1847 * Returns: KERN_SUCCESS Success
1848 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1849 * object associated
1850 * KERN_INVALID_OBJECT If the object is physically contiguous
1851 *
1852 * Implicit Returns:
1853 * *range (modified) If range is non-NULL, its contents will
1854 * be modified to contain the number of
1855 * bytes successfully operated upon.
1856 *
1857 * Notes: IMPORTANT: This function cannot be used on a range that
1858 * consists of physically contiguous pages.
1859 *
1860 * For object boundaries, it is considerably more efficient to
1861 * ensure that f_offset_beg and f_offset_end are in fact on page
1862 * boundaries, as this will avoid internal use of the hash table
1863 * to identify the page, and would therefore skip a number of
1864 * early optimizations. Since this is an operation on a set of
1865 * pages anyway, the caller should try to pass only a page aligned
1866 * offsets because of this.
1867 *
1868 * *range will be modified only if this function succeeds.
1869 *
1870 * The flags field MUST contain a specific operation; allowable
1871 * values are:
1872 *
1873 * o UPL_ROP_ABSENT Returns the extent of the range
1874 * presented which is absent, starting
1875 * with the start address presented
1876 *
1877 * o UPL_ROP_PRESENT Returns the extent of the range
1878 * presented which is present (resident),
1879 * starting with the start address
1880 * presented
1881 * o UPL_ROP_DUMP Dump the pages which are found in the
1882 * target object for the target range.
1883 *
1884 * IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are
1885 * multiple regions in the range, only the first matching region
1886 * is returned.
1887 */
1888 kern_return_t
1889 ubc_range_op(
1890 struct vnode *vp,
1891 off_t f_offset_beg,
1892 off_t f_offset_end,
1893 int ops,
1894 int *range)
1895 {
1896 memory_object_control_t control;
1897
1898 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1899 if (control == MEMORY_OBJECT_CONTROL_NULL)
1900 return KERN_INVALID_ARGUMENT;
1901
1902 return (memory_object_range_op(control,
1903 (memory_object_offset_t)f_offset_beg,
1904 (memory_object_offset_t)f_offset_end,
1905 ops,
1906 range));
1907 }
1908
1909
1910 /*
1911 * ubc_create_upl
1912 *
1913 * Given a vnode, cause the population of a portion of the vm_object; based on
1914 * the nature of the request, the pages returned may contain valid data, or
1915 * they may be uninitialized.
1916 *
1917 * Parameters: vp The vnode from which to create the upl
1918 * f_offset The start offset into the backing store
1919 * represented by the vnode
1920 * bufsize The size of the upl to create
1921 * uplp Pointer to the upl_t to receive the
1922 * created upl; MUST NOT be NULL
1923 * plp Pointer to receive the internal page
1924 * list for the created upl; MAY be NULL
1925 * to ignore
1926 *
1927 * Returns: KERN_SUCCESS The requested upl has been created
1928 * KERN_INVALID_ARGUMENT The bufsize argument is not an even
1929 * multiple of the page size
1930 * KERN_INVALID_ARGUMENT There is no ubc_info associated with
1931 * the vnode, or there is no memory object
1932 * control associated with the ubc_info
1933 * memory_object_upl_request:KERN_INVALID_VALUE
1934 * The supplied upl_flags argument is
1935 * invalid
1936 * Implicit Returns:
1937 * *uplp (modified)
1938 * *plp (modified) If non-NULL, the value of *plp will be
1939 * modified to point to the internal page
1940 * list; this modification may occur even
1941 * if this function is unsuccessful, in
1942 * which case the contents may be invalid
1943 *
1944 * Note: If successful, the returned *uplp MUST subsequently be freed
1945 * via a call to ubc_upl_commit(), ubc_upl_commit_range(),
1946 * ubc_upl_abort(), or ubc_upl_abort_range().
1947 */
1948 kern_return_t
1949 ubc_create_upl(
1950 struct vnode *vp,
1951 off_t f_offset,
1952 int bufsize,
1953 upl_t *uplp,
1954 upl_page_info_t **plp,
1955 int uplflags)
1956 {
1957 memory_object_control_t control;
1958 kern_return_t kr;
1959
1960 if (plp != NULL)
1961 *plp = NULL;
1962 *uplp = NULL;
1963
1964 if (bufsize & 0xfff)
1965 return KERN_INVALID_ARGUMENT;
1966
1967 if (bufsize > MAX_UPL_SIZE * PAGE_SIZE)
1968 return KERN_INVALID_ARGUMENT;
1969
1970 if (uplflags & (UPL_UBC_MSYNC | UPL_UBC_PAGEOUT | UPL_UBC_PAGEIN)) {
1971
1972 if (uplflags & UPL_UBC_MSYNC) {
1973 uplflags &= UPL_RET_ONLY_DIRTY;
1974
1975 uplflags |= UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
1976 UPL_SET_INTERNAL | UPL_SET_LITE;
1977
1978 } else if (uplflags & UPL_UBC_PAGEOUT) {
1979 uplflags &= UPL_RET_ONLY_DIRTY;
1980
1981 if (uplflags & UPL_RET_ONLY_DIRTY)
1982 uplflags |= UPL_NOBLOCK;
1983
1984 uplflags |= UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
1985 UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE;
1986 } else {
1987 uplflags |= UPL_RET_ONLY_ABSENT | UPL_NOBLOCK |
1988 UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
1989 UPL_SET_INTERNAL | UPL_SET_LITE;
1990 }
1991 } else {
1992 uplflags &= ~UPL_FOR_PAGEOUT;
1993
1994 if (uplflags & UPL_WILL_BE_DUMPED) {
1995 uplflags &= ~UPL_WILL_BE_DUMPED;
1996 uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL);
1997 } else
1998 uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL);
1999 }
2000 control = ubc_getobject(vp, UBC_FLAGS_NONE);
2001 if (control == MEMORY_OBJECT_CONTROL_NULL)
2002 return KERN_INVALID_ARGUMENT;
2003
2004 kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, NULL, uplflags);
2005 if (kr == KERN_SUCCESS && plp != NULL)
2006 *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp);
2007 return kr;
2008 }
2009
2010
2011 /*
2012 * ubc_upl_maxbufsize
2013 *
2014 * Return the maximum bufsize ubc_create_upl( ) will take.
2015 *
2016 * Parameters: none
2017 *
2018 * Returns: maximum size buffer (in bytes) ubc_create_upl( ) will take.
2019 */
2020 upl_size_t
2021 ubc_upl_maxbufsize(
2022 void)
2023 {
2024 return(MAX_UPL_SIZE * PAGE_SIZE);
2025 }
2026
2027 /*
2028 * ubc_upl_map
2029 *
2030 * Map the page list assocated with the supplied upl into the kernel virtual
2031 * address space at the virtual address indicated by the dst_addr argument;
2032 * the entire upl is mapped
2033 *
2034 * Parameters: upl The upl to map
2035 * dst_addr The address at which to map the upl
2036 *
2037 * Returns: KERN_SUCCESS The upl has been mapped
2038 * KERN_INVALID_ARGUMENT The upl is UPL_NULL
2039 * KERN_FAILURE The upl is already mapped
2040 * vm_map_enter:KERN_INVALID_ARGUMENT
2041 * A failure code from vm_map_enter() due
2042 * to an invalid argument
2043 */
2044 kern_return_t
2045 ubc_upl_map(
2046 upl_t upl,
2047 vm_offset_t *dst_addr)
2048 {
2049 return (vm_upl_map(kernel_map, upl, dst_addr));
2050 }
2051
2052
2053 /*
2054 * ubc_upl_unmap
2055 *
2056 * Unmap the page list assocated with the supplied upl from the kernel virtual
2057 * address space; the entire upl is unmapped.
2058 *
2059 * Parameters: upl The upl to unmap
2060 *
2061 * Returns: KERN_SUCCESS The upl has been unmapped
2062 * KERN_FAILURE The upl is not currently mapped
2063 * KERN_INVALID_ARGUMENT If the upl is UPL_NULL
2064 */
2065 kern_return_t
2066 ubc_upl_unmap(
2067 upl_t upl)
2068 {
2069 return(vm_upl_unmap(kernel_map, upl));
2070 }
2071
2072
2073 /*
2074 * ubc_upl_commit
2075 *
2076 * Commit the contents of the upl to the backing store
2077 *
2078 * Parameters: upl The upl to commit
2079 *
2080 * Returns: KERN_SUCCESS The upl has been committed
2081 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2082 * KERN_FAILURE The supplied upl does not represent
2083 * device memory, and the offset plus the
2084 * size would exceed the actual size of
2085 * the upl
2086 *
2087 * Notes: In practice, the only return value for this function should be
2088 * KERN_SUCCESS, unless there has been data structure corruption;
2089 * since the upl is deallocated regardless of success or failure,
2090 * there's really nothing to do about this other than panic.
2091 *
2092 * IMPORTANT: Use of this function should not be mixed with use of
2093 * ubc_upl_commit_range(), due to the unconditional deallocation
2094 * by this function.
2095 */
2096 kern_return_t
2097 ubc_upl_commit(
2098 upl_t upl)
2099 {
2100 upl_page_info_t *pl;
2101 kern_return_t kr;
2102
2103 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2104 kr = upl_commit(upl, pl, MAX_UPL_SIZE);
2105 upl_deallocate(upl);
2106 return kr;
2107 }
2108
2109
2110 /*
2111 * ubc_upl_commit
2112 *
2113 * Commit the contents of the specified range of the upl to the backing store
2114 *
2115 * Parameters: upl The upl to commit
2116 * offset The offset into the upl
2117 * size The size of the region to be committed,
2118 * starting at the specified offset
2119 * flags commit type (see below)
2120 *
2121 * Returns: KERN_SUCCESS The range has been committed
2122 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2123 * KERN_FAILURE The supplied upl does not represent
2124 * device memory, and the offset plus the
2125 * size would exceed the actual size of
2126 * the upl
2127 *
2128 * Notes: IMPORTANT: If the commit is successful, and the object is now
2129 * empty, the upl will be deallocated. Since the caller cannot
2130 * check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag
2131 * should generally only be used when the offset is 0 and the size
2132 * is equal to the upl size.
2133 *
2134 * The flags argument is a bitmap of flags on the rage of pages in
2135 * the upl to be committed; allowable flags are:
2136 *
2137 * o UPL_COMMIT_FREE_ON_EMPTY Free the upl when it is
2138 * both empty and has been
2139 * successfully committed
2140 * o UPL_COMMIT_CLEAR_DIRTY Clear each pages dirty
2141 * bit; will prevent a
2142 * later pageout
2143 * o UPL_COMMIT_SET_DIRTY Set each pages dirty
2144 * bit; will cause a later
2145 * pageout
2146 * o UPL_COMMIT_INACTIVATE Clear each pages
2147 * reference bit; the page
2148 * will not be accessed
2149 * o UPL_COMMIT_ALLOW_ACCESS Unbusy each page; pages
2150 * become busy when an
2151 * IOMemoryDescriptor is
2152 * mapped or redirected,
2153 * and we have to wait for
2154 * an IOKit driver
2155 *
2156 * The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should
2157 * not be specified by the caller.
2158 *
2159 * The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are
2160 * mutually exclusive, and should not be combined.
2161 */
2162 kern_return_t
2163 ubc_upl_commit_range(
2164 upl_t upl,
2165 upl_offset_t offset,
2166 upl_size_t size,
2167 int flags)
2168 {
2169 upl_page_info_t *pl;
2170 boolean_t empty;
2171 kern_return_t kr;
2172
2173 if (flags & UPL_COMMIT_FREE_ON_EMPTY)
2174 flags |= UPL_COMMIT_NOTIFY_EMPTY;
2175
2176 if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
2177 return KERN_INVALID_ARGUMENT;
2178 }
2179
2180 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2181
2182 kr = upl_commit_range(upl, offset, size, flags,
2183 pl, MAX_UPL_SIZE, &empty);
2184
2185 if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty)
2186 upl_deallocate(upl);
2187
2188 return kr;
2189 }
2190
2191
2192 /*
2193 * ubc_upl_abort_range
2194 *
2195 * Abort the contents of the specified range of the specified upl
2196 *
2197 * Parameters: upl The upl to abort
2198 * offset The offset into the upl
2199 * size The size of the region to be aborted,
2200 * starting at the specified offset
2201 * abort_flags abort type (see below)
2202 *
2203 * Returns: KERN_SUCCESS The range has been aborted
2204 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2205 * KERN_FAILURE The supplied upl does not represent
2206 * device memory, and the offset plus the
2207 * size would exceed the actual size of
2208 * the upl
2209 *
2210 * Notes: IMPORTANT: If the abort is successful, and the object is now
2211 * empty, the upl will be deallocated. Since the caller cannot
2212 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2213 * should generally only be used when the offset is 0 and the size
2214 * is equal to the upl size.
2215 *
2216 * The abort_flags argument is a bitmap of flags on the range of
2217 * pages in the upl to be aborted; allowable flags are:
2218 *
2219 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2220 * empty and has been successfully
2221 * aborted
2222 * o UPL_ABORT_RESTART The operation must be restarted
2223 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
2224 * o UPL_ABORT_ERROR An I/O error occurred
2225 * o UPL_ABORT_DUMP_PAGES Just free the pages
2226 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
2227 * o UPL_ABORT_ALLOW_ACCESS RESERVED
2228 *
2229 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2230 * not be specified by the caller. It is intended to fulfill the
2231 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2232 * ubc_upl_commit_range(), but is never referenced internally.
2233 *
2234 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2235 * referenced; do not use it.
2236 */
2237 kern_return_t
2238 ubc_upl_abort_range(
2239 upl_t upl,
2240 upl_offset_t offset,
2241 upl_size_t size,
2242 int abort_flags)
2243 {
2244 kern_return_t kr;
2245 boolean_t empty = FALSE;
2246
2247 if (abort_flags & UPL_ABORT_FREE_ON_EMPTY)
2248 abort_flags |= UPL_ABORT_NOTIFY_EMPTY;
2249
2250 kr = upl_abort_range(upl, offset, size, abort_flags, &empty);
2251
2252 if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty)
2253 upl_deallocate(upl);
2254
2255 return kr;
2256 }
2257
2258
2259 /*
2260 * ubc_upl_abort
2261 *
2262 * Abort the contents of the specified upl
2263 *
2264 * Parameters: upl The upl to abort
2265 * abort_type abort type (see below)
2266 *
2267 * Returns: KERN_SUCCESS The range has been aborted
2268 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2269 * KERN_FAILURE The supplied upl does not represent
2270 * device memory, and the offset plus the
2271 * size would exceed the actual size of
2272 * the upl
2273 *
2274 * Notes: IMPORTANT: If the abort is successful, and the object is now
2275 * empty, the upl will be deallocated. Since the caller cannot
2276 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2277 * should generally only be used when the offset is 0 and the size
2278 * is equal to the upl size.
2279 *
2280 * The abort_type is a bitmap of flags on the range of
2281 * pages in the upl to be aborted; allowable flags are:
2282 *
2283 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2284 * empty and has been successfully
2285 * aborted
2286 * o UPL_ABORT_RESTART The operation must be restarted
2287 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
2288 * o UPL_ABORT_ERROR An I/O error occurred
2289 * o UPL_ABORT_DUMP_PAGES Just free the pages
2290 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
2291 * o UPL_ABORT_ALLOW_ACCESS RESERVED
2292 *
2293 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2294 * not be specified by the caller. It is intended to fulfill the
2295 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2296 * ubc_upl_commit_range(), but is never referenced internally.
2297 *
2298 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2299 * referenced; do not use it.
2300 */
2301 kern_return_t
2302 ubc_upl_abort(
2303 upl_t upl,
2304 int abort_type)
2305 {
2306 kern_return_t kr;
2307
2308 kr = upl_abort(upl, abort_type);
2309 upl_deallocate(upl);
2310 return kr;
2311 }
2312
2313
2314 /*
2315 * ubc_upl_pageinfo
2316 *
2317 * Retrieve the internal page list for the specified upl
2318 *
2319 * Parameters: upl The upl to obtain the page list from
2320 *
2321 * Returns: !NULL The (upl_page_info_t *) for the page
2322 * list internal to the upl
2323 * NULL Error/no page list associated
2324 *
2325 * Notes: IMPORTANT: The function is only valid on internal objects
2326 * where the list request was made with the UPL_INTERNAL flag.
2327 *
2328 * This function is a utility helper function, since some callers
2329 * may not have direct access to the header defining the macro,
2330 * due to abstraction layering constraints.
2331 */
2332 upl_page_info_t *
2333 ubc_upl_pageinfo(
2334 upl_t upl)
2335 {
2336 return (UPL_GET_INTERNAL_PAGE_LIST(upl));
2337 }
2338
2339
2340 int
2341 UBCINFOEXISTS(struct vnode * vp)
2342 {
2343 return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL));
2344 }
2345
2346
2347 /*
2348 * CODE SIGNING
2349 */
2350 #define CS_BLOB_PAGEABLE 0
2351 static volatile SInt32 cs_blob_size = 0;
2352 static volatile SInt32 cs_blob_count = 0;
2353 static SInt32 cs_blob_size_peak = 0;
2354 static UInt32 cs_blob_size_max = 0;
2355 static SInt32 cs_blob_count_peak = 0;
2356
2357 int cs_validation = 1;
2358
2359 SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_validation, 0, "Do validate code signatures");
2360 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_count, 0, "Current number of code signature blobs");
2361 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_size, 0, "Current size of all code signature blobs");
2362 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
2363 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
2364 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_max, 0, "Size of biggest code signature blob");
2365
2366 kern_return_t
2367 ubc_cs_blob_allocate(
2368 vm_offset_t *blob_addr_p,
2369 vm_size_t *blob_size_p)
2370 {
2371 kern_return_t kr;
2372
2373 #if CS_BLOB_PAGEABLE
2374 *blob_size_p = round_page(*blob_size_p);
2375 kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
2376 #else /* CS_BLOB_PAGEABLE */
2377 *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
2378 if (*blob_addr_p == 0) {
2379 kr = KERN_NO_SPACE;
2380 } else {
2381 kr = KERN_SUCCESS;
2382 }
2383 #endif /* CS_BLOB_PAGEABLE */
2384 return kr;
2385 }
2386
2387 void
2388 ubc_cs_blob_deallocate(
2389 vm_offset_t blob_addr,
2390 vm_size_t blob_size)
2391 {
2392 #if CS_BLOB_PAGEABLE
2393 kmem_free(kernel_map, blob_addr, blob_size);
2394 #else /* CS_BLOB_PAGEABLE */
2395 kfree((void *) blob_addr, blob_size);
2396 #endif /* CS_BLOB_PAGEABLE */
2397 }
2398
2399 int
2400 ubc_cs_blob_add(
2401 struct vnode *vp,
2402 cpu_type_t cputype,
2403 off_t base_offset,
2404 vm_address_t addr,
2405 vm_size_t size)
2406 {
2407 kern_return_t kr;
2408 struct ubc_info *uip;
2409 struct cs_blob *blob, *oblob;
2410 int error;
2411 ipc_port_t blob_handle;
2412 memory_object_size_t blob_size;
2413 const CS_CodeDirectory *cd;
2414 off_t blob_start_offset, blob_end_offset;
2415 SHA1_CTX sha1ctxt;
2416
2417 blob_handle = IPC_PORT_NULL;
2418
2419 blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
2420 if (blob == NULL) {
2421 return ENOMEM;
2422 }
2423
2424 #if CS_BLOB_PAGEABLE
2425 /* get a memory entry on the blob */
2426 blob_size = (memory_object_size_t) size;
2427 kr = mach_make_memory_entry_64(kernel_map,
2428 &blob_size,
2429 addr,
2430 VM_PROT_READ,
2431 &blob_handle,
2432 IPC_PORT_NULL);
2433 if (kr != KERN_SUCCESS) {
2434 error = ENOMEM;
2435 goto out;
2436 }
2437 if (memory_object_round_page(blob_size) !=
2438 (memory_object_size_t) round_page(size)) {
2439 printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%lx !?\n",
2440 blob_size, (size_t)size);
2441 panic("XXX FBDP size mismatch 0x%llx 0x%lx\n", blob_size, (size_t)size);
2442 error = EINVAL;
2443 goto out;
2444 }
2445 #else
2446 blob_size = (memory_object_size_t) size;
2447 blob_handle = IPC_PORT_NULL;
2448 #endif
2449
2450 /* fill in the new blob */
2451 blob->csb_cpu_type = cputype;
2452 blob->csb_base_offset = base_offset;
2453 blob->csb_mem_size = size;
2454 blob->csb_mem_offset = 0;
2455 blob->csb_mem_handle = blob_handle;
2456 blob->csb_mem_kaddr = addr;
2457
2458 /*
2459 * Validate the blob's contents
2460 */
2461 cd = findCodeDirectory(
2462 (const CS_SuperBlob *) addr,
2463 (char *) addr,
2464 (char *) addr + blob->csb_mem_size);
2465 if (cd == NULL) {
2466 /* no code directory => useless blob ! */
2467 blob->csb_flags = 0;
2468 blob->csb_start_offset = 0;
2469 blob->csb_end_offset = 0;
2470 } else {
2471 const unsigned char *sha1_base;
2472 int sha1_size;
2473
2474 blob->csb_flags = ntohl(cd->flags) | CS_VALID;
2475 blob->csb_end_offset = round_page(ntohl(cd->codeLimit));
2476 if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
2477 const struct Scatter *scatter = (const struct Scatter*)
2478 ((const char*)cd + ntohl(cd->scatterOffset));
2479 blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE;
2480 } else {
2481 blob->csb_start_offset = (blob->csb_end_offset -
2482 (ntohl(cd->nCodeSlots) * PAGE_SIZE));
2483 }
2484 /* compute the blob's SHA1 hash */
2485 sha1_base = (const unsigned char *) cd;
2486 sha1_size = ntohl(cd->length);
2487 SHA1Init(&sha1ctxt);
2488 SHA1Update(&sha1ctxt, sha1_base, sha1_size);
2489 SHA1Final(blob->csb_sha1, &sha1ctxt);
2490 }
2491
2492 /*
2493 * Let policy module check whether the blob's signature is accepted.
2494 */
2495 #if CONFIG_MACF
2496 error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size);
2497 if (error)
2498 goto out;
2499 #endif
2500
2501 /*
2502 * Validate the blob's coverage
2503 */
2504 blob_start_offset = blob->csb_base_offset + blob->csb_start_offset;
2505 blob_end_offset = blob->csb_base_offset + blob->csb_end_offset;
2506
2507 if (blob_start_offset >= blob_end_offset ||
2508 blob_start_offset < 0 ||
2509 blob_end_offset <= 0) {
2510 /* reject empty or backwards blob */
2511 error = EINVAL;
2512 goto out;
2513 }
2514
2515 vnode_lock(vp);
2516 if (! UBCINFOEXISTS(vp)) {
2517 vnode_unlock(vp);
2518 error = ENOENT;
2519 goto out;
2520 }
2521 uip = vp->v_ubcinfo;
2522
2523 /* check if this new blob overlaps with an existing blob */
2524 for (oblob = uip->cs_blobs;
2525 oblob != NULL;
2526 oblob = oblob->csb_next) {
2527 off_t oblob_start_offset, oblob_end_offset;
2528
2529 oblob_start_offset = (oblob->csb_base_offset +
2530 oblob->csb_start_offset);
2531 oblob_end_offset = (oblob->csb_base_offset +
2532 oblob->csb_end_offset);
2533 if (blob_start_offset >= oblob_end_offset ||
2534 blob_end_offset <= oblob_start_offset) {
2535 /* no conflict with this existing blob */
2536 } else {
2537 /* conflict ! */
2538 if (blob_start_offset == oblob_start_offset &&
2539 blob_end_offset == oblob_end_offset &&
2540 blob->csb_mem_size == oblob->csb_mem_size &&
2541 blob->csb_flags == oblob->csb_flags &&
2542 (blob->csb_cpu_type == CPU_TYPE_ANY ||
2543 oblob->csb_cpu_type == CPU_TYPE_ANY ||
2544 blob->csb_cpu_type == oblob->csb_cpu_type) &&
2545 !bcmp(blob->csb_sha1,
2546 oblob->csb_sha1,
2547 SHA1_RESULTLEN)) {
2548 /*
2549 * We already have this blob:
2550 * we'll return success but
2551 * throw away the new blob.
2552 */
2553 if (oblob->csb_cpu_type == CPU_TYPE_ANY) {
2554 /*
2555 * The old blob matches this one
2556 * but doesn't have any CPU type.
2557 * Update it with whatever the caller
2558 * provided this time.
2559 */
2560 oblob->csb_cpu_type = cputype;
2561 }
2562 vnode_unlock(vp);
2563 error = EAGAIN;
2564 goto out;
2565 } else {
2566 /* different blob: reject the new one */
2567 vnode_unlock(vp);
2568 error = EALREADY;
2569 goto out;
2570 }
2571 }
2572
2573 }
2574
2575
2576 /* mark this vnode's VM object as having "signed pages" */
2577 kr = memory_object_signed(uip->ui_control, TRUE);
2578 if (kr != KERN_SUCCESS) {
2579 vnode_unlock(vp);
2580 error = ENOENT;
2581 goto out;
2582 }
2583
2584 /*
2585 * Add this blob to the list of blobs for this vnode.
2586 * We always add at the front of the list and we never remove a
2587 * blob from the list, so ubc_cs_get_blobs() can return whatever
2588 * the top of the list was and that list will remain valid
2589 * while we validate a page, even after we release the vnode's lock.
2590 */
2591 blob->csb_next = uip->cs_blobs;
2592 uip->cs_blobs = blob;
2593
2594 OSAddAtomic(+1, &cs_blob_count);
2595 if (cs_blob_count > cs_blob_count_peak) {
2596 cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */
2597 }
2598 OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size);
2599 if ((SInt32) cs_blob_size > cs_blob_size_peak) {
2600 cs_blob_size_peak = (SInt32) cs_blob_size; /* XXX atomic ? */
2601 }
2602 if ((UInt32) blob->csb_mem_size > cs_blob_size_max) {
2603 cs_blob_size_max = (UInt32) blob->csb_mem_size;
2604 }
2605
2606 if (cs_debug > 1) {
2607 proc_t p;
2608
2609 p = current_proc();
2610 printf("CODE SIGNING: proc %d(%s) "
2611 "loaded %s signatures for file (%s) "
2612 "range 0x%llx:0x%llx flags 0x%x\n",
2613 p->p_pid, p->p_comm,
2614 blob->csb_cpu_type == -1 ? "detached" : "embedded",
2615 vnode_name(vp),
2616 blob->csb_base_offset + blob->csb_start_offset,
2617 blob->csb_base_offset + blob->csb_end_offset,
2618 blob->csb_flags);
2619 }
2620
2621 vnode_unlock(vp);
2622
2623 error = 0; /* success ! */
2624
2625 out:
2626 if (error) {
2627 /* we failed; release what we allocated */
2628 if (blob) {
2629 kfree(blob, sizeof (*blob));
2630 blob = NULL;
2631 }
2632 if (blob_handle != IPC_PORT_NULL) {
2633 mach_memory_entry_port_release(blob_handle);
2634 blob_handle = IPC_PORT_NULL;
2635 }
2636 }
2637
2638 if (error == EAGAIN) {
2639 /*
2640 * See above: error is EAGAIN if we were asked
2641 * to add an existing blob again. We cleaned the new
2642 * blob and we want to return success.
2643 */
2644 error = 0;
2645 /*
2646 * Since we're not failing, consume the data we received.
2647 */
2648 ubc_cs_blob_deallocate(addr, size);
2649 }
2650
2651 return error;
2652 }
2653
2654
2655 struct cs_blob *
2656 ubc_cs_blob_get(
2657 struct vnode *vp,
2658 cpu_type_t cputype,
2659 off_t offset)
2660 {
2661 struct ubc_info *uip;
2662 struct cs_blob *blob;
2663 off_t offset_in_blob;
2664
2665 vnode_lock_spin(vp);
2666
2667 if (! UBCINFOEXISTS(vp)) {
2668 blob = NULL;
2669 goto out;
2670 }
2671
2672 uip = vp->v_ubcinfo;
2673 for (blob = uip->cs_blobs;
2674 blob != NULL;
2675 blob = blob->csb_next) {
2676 if (cputype != -1 && blob->csb_cpu_type == cputype) {
2677 break;
2678 }
2679 if (offset != -1) {
2680 offset_in_blob = offset - blob->csb_base_offset;
2681 if (offset_in_blob >= blob->csb_start_offset &&
2682 offset_in_blob < blob->csb_end_offset) {
2683 /* our offset is covered by this blob */
2684 break;
2685 }
2686 }
2687 }
2688
2689 out:
2690 vnode_unlock(vp);
2691
2692 return blob;
2693 }
2694
2695 static void
2696 ubc_cs_free(
2697 struct ubc_info *uip)
2698 {
2699 struct cs_blob *blob, *next_blob;
2700
2701 for (blob = uip->cs_blobs;
2702 blob != NULL;
2703 blob = next_blob) {
2704 next_blob = blob->csb_next;
2705 if (blob->csb_mem_kaddr != 0) {
2706 ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
2707 blob->csb_mem_size);
2708 blob->csb_mem_kaddr = 0;
2709 }
2710 if (blob->csb_mem_handle != IPC_PORT_NULL) {
2711 mach_memory_entry_port_release(blob->csb_mem_handle);
2712 }
2713 blob->csb_mem_handle = IPC_PORT_NULL;
2714 OSAddAtomic(-1, &cs_blob_count);
2715 OSAddAtomic((SInt32) -blob->csb_mem_size, &cs_blob_size);
2716 kfree(blob, sizeof (*blob));
2717 }
2718 #if CHECK_CS_VALIDATION_BITMAP
2719 ubc_cs_validation_bitmap_deallocate( uip->ui_vnode );
2720 #endif
2721 uip->cs_blobs = NULL;
2722 }
2723
2724 struct cs_blob *
2725 ubc_get_cs_blobs(
2726 struct vnode *vp)
2727 {
2728 struct ubc_info *uip;
2729 struct cs_blob *blobs;
2730
2731 /*
2732 * No need to take the vnode lock here. The caller must be holding
2733 * a reference on the vnode (via a VM mapping or open file descriptor),
2734 * so the vnode will not go away. The ubc_info stays until the vnode
2735 * goes away. And we only modify "blobs" by adding to the head of the
2736 * list.
2737 * The ubc_info could go away entirely if the vnode gets reclaimed as
2738 * part of a forced unmount. In the case of a code-signature validation
2739 * during a page fault, the "paging_in_progress" reference on the VM
2740 * object guarantess that the vnode pager (and the ubc_info) won't go
2741 * away during the fault.
2742 * Other callers need to protect against vnode reclaim by holding the
2743 * vnode lock, for example.
2744 */
2745
2746 if (! UBCINFOEXISTS(vp)) {
2747 blobs = NULL;
2748 goto out;
2749 }
2750
2751 uip = vp->v_ubcinfo;
2752 blobs = uip->cs_blobs;
2753
2754 out:
2755 return blobs;
2756 }
2757
2758 unsigned long cs_validate_page_no_hash = 0;
2759 unsigned long cs_validate_page_bad_hash = 0;
2760 boolean_t
2761 cs_validate_page(
2762 void *_blobs,
2763 memory_object_offset_t page_offset,
2764 const void *data,
2765 boolean_t *tainted)
2766 {
2767 SHA1_CTX sha1ctxt;
2768 unsigned char actual_hash[SHA1_RESULTLEN];
2769 unsigned char expected_hash[SHA1_RESULTLEN];
2770 boolean_t found_hash;
2771 struct cs_blob *blobs, *blob;
2772 const CS_CodeDirectory *cd;
2773 const CS_SuperBlob *embedded;
2774 const unsigned char *hash;
2775 boolean_t validated;
2776 off_t offset; /* page offset in the file */
2777 size_t size;
2778 off_t codeLimit = 0;
2779 char *lower_bound, *upper_bound;
2780 vm_offset_t kaddr, blob_addr;
2781 vm_size_t ksize;
2782 kern_return_t kr;
2783
2784 offset = page_offset;
2785
2786 /* retrieve the expected hash */
2787 found_hash = FALSE;
2788 blobs = (struct cs_blob *) _blobs;
2789
2790 for (blob = blobs;
2791 blob != NULL;
2792 blob = blob->csb_next) {
2793 offset = page_offset - blob->csb_base_offset;
2794 if (offset < blob->csb_start_offset ||
2795 offset >= blob->csb_end_offset) {
2796 /* our page is not covered by this blob */
2797 continue;
2798 }
2799
2800 /* map the blob in the kernel address space */
2801 kaddr = blob->csb_mem_kaddr;
2802 if (kaddr == 0) {
2803 ksize = (vm_size_t) (blob->csb_mem_size +
2804 blob->csb_mem_offset);
2805 kr = vm_map(kernel_map,
2806 &kaddr,
2807 ksize,
2808 0,
2809 VM_FLAGS_ANYWHERE,
2810 blob->csb_mem_handle,
2811 0,
2812 TRUE,
2813 VM_PROT_READ,
2814 VM_PROT_READ,
2815 VM_INHERIT_NONE);
2816 if (kr != KERN_SUCCESS) {
2817 /* XXX FBDP what to do !? */
2818 printf("cs_validate_page: failed to map blob, "
2819 "size=0x%lx kr=0x%x\n",
2820 (size_t)blob->csb_mem_size, kr);
2821 break;
2822 }
2823 }
2824 blob_addr = kaddr + blob->csb_mem_offset;
2825
2826 lower_bound = CAST_DOWN(char *, blob_addr);
2827 upper_bound = lower_bound + blob->csb_mem_size;
2828
2829 embedded = (const CS_SuperBlob *) blob_addr;
2830 cd = findCodeDirectory(embedded, lower_bound, upper_bound);
2831 if (cd != NULL) {
2832 if (cd->pageSize != PAGE_SHIFT ||
2833 cd->hashType != 0x1 ||
2834 cd->hashSize != SHA1_RESULTLEN) {
2835 /* bogus blob ? */
2836 continue;
2837 }
2838
2839 offset = page_offset - blob->csb_base_offset;
2840 if (offset < blob->csb_start_offset ||
2841 offset >= blob->csb_end_offset) {
2842 /* our page is not covered by this blob */
2843 continue;
2844 }
2845
2846 codeLimit = ntohl(cd->codeLimit);
2847 hash = hashes(cd, atop(offset),
2848 lower_bound, upper_bound);
2849 if (hash != NULL) {
2850 bcopy(hash, expected_hash,
2851 sizeof (expected_hash));
2852 found_hash = TRUE;
2853 }
2854
2855 break;
2856 }
2857 }
2858
2859 if (found_hash == FALSE) {
2860 /*
2861 * We can't verify this page because there is no signature
2862 * for it (yet). It's possible that this part of the object
2863 * is not signed, or that signatures for that part have not
2864 * been loaded yet.
2865 * Report that the page has not been validated and let the
2866 * caller decide if it wants to accept it or not.
2867 */
2868 cs_validate_page_no_hash++;
2869 if (cs_debug > 1) {
2870 printf("CODE SIGNING: cs_validate_page: "
2871 "off 0x%llx: no hash to validate !?\n",
2872 page_offset);
2873 }
2874 validated = FALSE;
2875 *tainted = FALSE;
2876 } else {
2877
2878 size = PAGE_SIZE;
2879 const uint32_t *asha1, *esha1;
2880 if ((off_t)(offset + size) > codeLimit) {
2881 /* partial page at end of segment */
2882 assert(offset < codeLimit);
2883 size = (size_t) (codeLimit & PAGE_MASK);
2884 }
2885 /* compute the actual page's SHA1 hash */
2886 SHA1Init(&sha1ctxt);
2887 SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
2888 SHA1Final(actual_hash, &sha1ctxt);
2889
2890 asha1 = (const uint32_t *) actual_hash;
2891 esha1 = (const uint32_t *) expected_hash;
2892
2893 if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
2894 if (cs_debug) {
2895 printf("CODE SIGNING: cs_validate_page: "
2896 "off 0x%llx size 0x%lx: "
2897 "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
2898 "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
2899 page_offset, size,
2900 asha1[0], asha1[1], asha1[2],
2901 asha1[3], asha1[4],
2902 esha1[0], esha1[1], esha1[2],
2903 esha1[3], esha1[4]);
2904 }
2905 cs_validate_page_bad_hash++;
2906 *tainted = TRUE;
2907 } else {
2908 if (cs_debug > 1) {
2909 printf("CODE SIGNING: cs_validate_page: "
2910 "off 0x%llx size 0x%lx: SHA1 OK\n",
2911 page_offset, size);
2912 }
2913 *tainted = FALSE;
2914 }
2915 validated = TRUE;
2916 }
2917
2918 return validated;
2919 }
2920
2921 int
2922 ubc_cs_getcdhash(
2923 vnode_t vp,
2924 off_t offset,
2925 unsigned char *cdhash)
2926 {
2927 struct cs_blob *blobs, *blob;
2928 off_t rel_offset;
2929 int ret;
2930
2931 vnode_lock(vp);
2932
2933 blobs = ubc_get_cs_blobs(vp);
2934 for (blob = blobs;
2935 blob != NULL;
2936 blob = blob->csb_next) {
2937 /* compute offset relative to this blob */
2938 rel_offset = offset - blob->csb_base_offset;
2939 if (rel_offset >= blob->csb_start_offset &&
2940 rel_offset < blob->csb_end_offset) {
2941 /* this blob does cover our "offset" ! */
2942 break;
2943 }
2944 }
2945
2946 if (blob == NULL) {
2947 /* we didn't find a blob covering "offset" */
2948 ret = EBADEXEC; /* XXX any better error ? */
2949 } else {
2950 /* get the SHA1 hash of that blob */
2951 bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
2952 ret = 0;
2953 }
2954
2955 vnode_unlock(vp);
2956
2957 return ret;
2958 }
2959
2960 #if CHECK_CS_VALIDATION_BITMAP
2961 #define stob(s) ((atop_64((s)) + 07) >> 3)
2962 extern boolean_t root_fs_upgrade_try;
2963
2964 /*
2965 * Should we use the code-sign bitmap to avoid repeated code-sign validation?
2966 * Depends:
2967 * a) Is the target vnode on the root filesystem?
2968 * b) Has someone tried to mount the root filesystem read-write?
2969 * If answers are (a) yes AND (b) no, then we can use the bitmap.
2970 */
2971 #define USE_CODE_SIGN_BITMAP(vp) ( (vp != NULL) && (vp->v_mount != NULL) && (vp->v_mount->mnt_flag & MNT_ROOTFS) && !root_fs_upgrade_try)
2972 kern_return_t
2973 ubc_cs_validation_bitmap_allocate(
2974 vnode_t vp)
2975 {
2976 kern_return_t kr = KERN_SUCCESS;
2977 struct ubc_info *uip;
2978 char *target_bitmap;
2979 vm_object_size_t bitmap_size;
2980
2981 if ( ! USE_CODE_SIGN_BITMAP(vp) || (! UBCINFOEXISTS(vp))) {
2982 kr = KERN_INVALID_ARGUMENT;
2983 } else {
2984 uip = vp->v_ubcinfo;
2985
2986 if ( uip->cs_valid_bitmap == NULL ) {
2987 bitmap_size = stob(uip->ui_size);
2988 target_bitmap = (char*) kalloc( (vm_size_t)bitmap_size );
2989 if (target_bitmap == 0) {
2990 kr = KERN_NO_SPACE;
2991 } else {
2992 kr = KERN_SUCCESS;
2993 }
2994 if( kr == KERN_SUCCESS ) {
2995 memset( target_bitmap, 0, (size_t)bitmap_size);
2996 uip->cs_valid_bitmap = (void*)target_bitmap;
2997 uip->cs_valid_bitmap_size = bitmap_size;
2998 }
2999 }
3000 }
3001 return kr;
3002 }
3003
3004 kern_return_t
3005 ubc_cs_check_validation_bitmap (
3006 vnode_t vp,
3007 memory_object_offset_t offset,
3008 int optype)
3009 {
3010 kern_return_t kr = KERN_SUCCESS;
3011
3012 if ( ! USE_CODE_SIGN_BITMAP(vp) || ! UBCINFOEXISTS(vp)) {
3013 kr = KERN_INVALID_ARGUMENT;
3014 } else {
3015 struct ubc_info *uip = vp->v_ubcinfo;
3016 char *target_bitmap = uip->cs_valid_bitmap;
3017
3018 if ( target_bitmap == NULL ) {
3019 kr = KERN_INVALID_ARGUMENT;
3020 } else {
3021 uint64_t bit, byte;
3022 bit = atop_64( offset );
3023 byte = bit >> 3;
3024
3025 if ( byte > uip->cs_valid_bitmap_size ) {
3026 kr = KERN_INVALID_ARGUMENT;
3027 } else {
3028
3029 if (optype == CS_BITMAP_SET) {
3030 target_bitmap[byte] |= (1 << (bit & 07));
3031 kr = KERN_SUCCESS;
3032 } else if (optype == CS_BITMAP_CLEAR) {
3033 target_bitmap[byte] &= ~(1 << (bit & 07));
3034 kr = KERN_SUCCESS;
3035 } else if (optype == CS_BITMAP_CHECK) {
3036 if ( target_bitmap[byte] & (1 << (bit & 07))) {
3037 kr = KERN_SUCCESS;
3038 } else {
3039 kr = KERN_FAILURE;
3040 }
3041 }
3042 }
3043 }
3044 }
3045 return kr;
3046 }
3047
3048 void
3049 ubc_cs_validation_bitmap_deallocate(
3050 vnode_t vp)
3051 {
3052 struct ubc_info *uip;
3053 void *target_bitmap;
3054 vm_object_size_t bitmap_size;
3055
3056 if ( UBCINFOEXISTS(vp)) {
3057 uip = vp->v_ubcinfo;
3058
3059 if ( (target_bitmap = uip->cs_valid_bitmap) != NULL ) {
3060 bitmap_size = uip->cs_valid_bitmap_size;
3061 kfree( target_bitmap, (vm_size_t) bitmap_size );
3062 uip->cs_valid_bitmap = NULL;
3063 }
3064 }
3065 }
3066 #else
3067 kern_return_t ubc_cs_validation_bitmap_allocate(__unused vnode_t vp){
3068 return KERN_INVALID_ARGUMENT;
3069 }
3070
3071 kern_return_t ubc_cs_check_validation_bitmap(
3072 __unused struct vnode *vp,
3073 __unused memory_object_offset_t offset,
3074 __unused int optype){
3075
3076 return KERN_INVALID_ARGUMENT;
3077 }
3078
3079 void ubc_cs_validation_bitmap_deallocate(__unused vnode_t vp){
3080 return;
3081 }
3082 #endif /* CHECK_CS_VALIDATION_BITMAP */