]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/ubc_subr.c
xnu-2050.24.15.tar.gz
[apple/xnu.git] / bsd / kern / ubc_subr.c
1 /*
2 * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * File: ubc_subr.c
30 * Author: Umesh Vaishampayan [umeshv@apple.com]
31 * 05-Aug-1999 umeshv Created.
32 *
33 * Functions related to Unified Buffer cache.
34 *
35 * Caller of UBC functions MUST have a valid reference on the vnode.
36 *
37 */
38
39 #include <sys/types.h>
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/lock.h>
43 #include <sys/mman.h>
44 #include <sys/mount_internal.h>
45 #include <sys/vnode_internal.h>
46 #include <sys/ubc_internal.h>
47 #include <sys/ucred.h>
48 #include <sys/proc_internal.h>
49 #include <sys/kauth.h>
50 #include <sys/buf.h>
51 #include <sys/user.h>
52 #include <sys/codesign.h>
53
54 #include <mach/mach_types.h>
55 #include <mach/memory_object_types.h>
56 #include <mach/memory_object_control.h>
57 #include <mach/vm_map.h>
58 #include <mach/mach_vm.h>
59 #include <mach/upl.h>
60
61 #include <kern/kern_types.h>
62 #include <kern/kalloc.h>
63 #include <kern/zalloc.h>
64 #include <kern/thread.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_protos.h> /* last */
67
68 #include <libkern/crypto/sha1.h>
69
70 #include <security/mac_framework.h>
71
72 /* XXX These should be in a BSD accessible Mach header, but aren't. */
73 extern kern_return_t memory_object_pages_resident(memory_object_control_t,
74 boolean_t *);
75 extern kern_return_t memory_object_signed(memory_object_control_t control,
76 boolean_t is_signed);
77 extern boolean_t memory_object_is_slid(memory_object_control_t control);
78
79 extern void Debugger(const char *message);
80
81
82 /* XXX no one uses this interface! */
83 kern_return_t ubc_page_op_with_control(
84 memory_object_control_t control,
85 off_t f_offset,
86 int ops,
87 ppnum_t *phys_entryp,
88 int *flagsp);
89
90
91 #if DIAGNOSTIC
92 #if defined(assert)
93 #undef assert
94 #endif
95 #define assert(cond) \
96 ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond)))
97 #else
98 #include <kern/assert.h>
99 #endif /* DIAGNOSTIC */
100
101 static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize);
102 static int ubc_umcallback(vnode_t, void *);
103 static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *);
104 static void ubc_cs_free(struct ubc_info *uip);
105
106 struct zone *ubc_info_zone;
107
108
109 /*
110 * CODESIGNING
111 * Routines to navigate code signing data structures in the kernel...
112 */
113
114 extern int cs_debug;
115
116 static boolean_t
117 cs_valid_range(
118 const void *start,
119 const void *end,
120 const void *lower_bound,
121 const void *upper_bound)
122 {
123 if (upper_bound < lower_bound ||
124 end < start) {
125 return FALSE;
126 }
127
128 if (start < lower_bound ||
129 end > upper_bound) {
130 return FALSE;
131 }
132
133 return TRUE;
134 }
135
136 /*
137 * Magic numbers used by Code Signing
138 */
139 enum {
140 CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */
141 CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */
142 CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */
143 CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
144 CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02, /* XXX */
145 CSMAGIC_EMBEDDED_ENTITLEMENTS = 0xfade7171, /* embedded entitlements */
146 CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
147
148 CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */
149 CSSLOT_ENTITLEMENTS = 5
150 };
151
152 static const uint32_t supportsScatter = 0x20100; // first version to support scatter option
153
154 /*
155 * Structure of an embedded-signature SuperBlob
156 */
157 typedef struct __BlobIndex {
158 uint32_t type; /* type of entry */
159 uint32_t offset; /* offset of entry */
160 } CS_BlobIndex;
161
162 typedef struct __SuperBlob {
163 uint32_t magic; /* magic number */
164 uint32_t length; /* total length of SuperBlob */
165 uint32_t count; /* number of index entries following */
166 CS_BlobIndex index[]; /* (count) entries */
167 /* followed by Blobs in no particular order as indicated by offsets in index */
168 } CS_SuperBlob;
169
170 typedef struct __GenericBlob {
171 uint32_t magic; /* magic number */
172 uint32_t length; /* total length of blob */
173 char data[];
174 } CS_GenericBlob;
175
176 struct Scatter {
177 uint32_t count; // number of pages; zero for sentinel (only)
178 uint32_t base; // first page number
179 uint64_t targetOffset; // offset in target
180 uint64_t spare; // reserved
181 };
182
183 /*
184 * C form of a CodeDirectory.
185 */
186 typedef struct __CodeDirectory {
187 uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */
188 uint32_t length; /* total length of CodeDirectory blob */
189 uint32_t version; /* compatibility version */
190 uint32_t flags; /* setup and mode flags */
191 uint32_t hashOffset; /* offset of hash slot element at index zero */
192 uint32_t identOffset; /* offset of identifier string */
193 uint32_t nSpecialSlots; /* number of special hash slots */
194 uint32_t nCodeSlots; /* number of ordinary (code) hash slots */
195 uint32_t codeLimit; /* limit to main image signature range */
196 uint8_t hashSize; /* size of each hash in bytes */
197 uint8_t hashType; /* type of hash (cdHashType* constants) */
198 uint8_t spare1; /* unused (must be zero) */
199 uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */
200 uint32_t spare2; /* unused (must be zero) */
201 /* Version 0x20100 */
202 uint32_t scatterOffset; /* offset of optional scatter vector */
203 /* followed by dynamic content as located by offset fields above */
204 } CS_CodeDirectory;
205
206
207 /*
208 * Locate the CodeDirectory from an embedded signature blob
209 */
210 static const
211 CS_CodeDirectory *findCodeDirectory(
212 const CS_SuperBlob *embedded,
213 char *lower_bound,
214 char *upper_bound)
215 {
216 const CS_CodeDirectory *cd = NULL;
217
218 if (embedded &&
219 cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) &&
220 ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
221 const CS_BlobIndex *limit;
222 const CS_BlobIndex *p;
223
224 limit = &embedded->index[ntohl(embedded->count)];
225 if (!cs_valid_range(&embedded->index[0], limit,
226 lower_bound, upper_bound)) {
227 return NULL;
228 }
229 for (p = embedded->index; p < limit; ++p) {
230 if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
231 const unsigned char *base;
232
233 base = (const unsigned char *)embedded;
234 cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
235 break;
236 }
237 }
238 } else {
239 /*
240 * Detached signatures come as a bare CS_CodeDirectory,
241 * without a blob.
242 */
243 cd = (const CS_CodeDirectory *) embedded;
244 }
245
246 if (cd &&
247 cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
248 cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
249 lower_bound, upper_bound) &&
250 cs_valid_range(cd, (const char *) cd + ntohl(cd->hashOffset),
251 lower_bound, upper_bound) &&
252 cs_valid_range(cd, (const char *) cd +
253 ntohl(cd->hashOffset) +
254 (ntohl(cd->nCodeSlots) * SHA1_RESULTLEN),
255 lower_bound, upper_bound) &&
256
257 ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) {
258 return cd;
259 }
260
261 // not found or not a valid code directory
262 return NULL;
263 }
264
265
266 /*
267 * Locating a page hash
268 */
269 static const unsigned char *
270 hashes(
271 const CS_CodeDirectory *cd,
272 unsigned page,
273 char *lower_bound,
274 char *upper_bound)
275 {
276 const unsigned char *base, *top, *hash;
277 uint32_t nCodeSlots = ntohl(cd->nCodeSlots);
278
279 assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound));
280
281 if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
282 /* Get first scatter struct */
283 const struct Scatter *scatter = (const struct Scatter*)
284 ((const char*)cd + ntohl(cd->scatterOffset));
285 uint32_t hashindex=0, scount, sbase=0;
286 /* iterate all scatter structs */
287 do {
288 if((const char*)scatter > (const char*)cd + ntohl(cd->length)) {
289 if(cs_debug) {
290 printf("CODE SIGNING: Scatter extends past Code Directory\n");
291 }
292 return NULL;
293 }
294
295 scount = ntohl(scatter->count);
296 uint32_t new_base = ntohl(scatter->base);
297
298 /* last scatter? */
299 if (scount == 0) {
300 return NULL;
301 }
302
303 if((hashindex > 0) && (new_base <= sbase)) {
304 if(cs_debug) {
305 printf("CODE SIGNING: unordered Scatter, prev base %d, cur base %d\n",
306 sbase, new_base);
307 }
308 return NULL; /* unordered scatter array */
309 }
310 sbase = new_base;
311
312 /* this scatter beyond page we're looking for? */
313 if (sbase > page) {
314 return NULL;
315 }
316
317 if (sbase+scount >= page) {
318 /* Found the scatter struct that is
319 * referencing our page */
320
321 /* base = address of first hash covered by scatter */
322 base = (const unsigned char *)cd + ntohl(cd->hashOffset) +
323 hashindex * SHA1_RESULTLEN;
324 /* top = address of first hash after this scatter */
325 top = base + scount * SHA1_RESULTLEN;
326 if (!cs_valid_range(base, top, lower_bound,
327 upper_bound) ||
328 hashindex > nCodeSlots) {
329 return NULL;
330 }
331
332 break;
333 }
334
335 /* this scatter struct is before the page we're looking
336 * for. Iterate. */
337 hashindex+=scount;
338 scatter++;
339 } while(1);
340
341 hash = base + (page - sbase) * SHA1_RESULTLEN;
342 } else {
343 base = (const unsigned char *)cd + ntohl(cd->hashOffset);
344 top = base + nCodeSlots * SHA1_RESULTLEN;
345 if (!cs_valid_range(base, top, lower_bound, upper_bound) ||
346 page > nCodeSlots) {
347 return NULL;
348 }
349 assert(page < nCodeSlots);
350
351 hash = base + page * SHA1_RESULTLEN;
352 }
353
354 if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
355 lower_bound, upper_bound)) {
356 hash = NULL;
357 }
358
359 return hash;
360 }
361 /*
362 * CODESIGNING
363 * End of routines to navigate code signing data structures in the kernel.
364 */
365
366 /*
367 * ENTITLEMENTS
368 * Routines to navigate entitlements in the kernel.
369 */
370
371 /* Retrieve the entitlements blob for a process.
372 * Returns:
373 * EINVAL no text vnode associated with the process
374 * EBADEXEC invalid code signing data
375 * ENOMEM you should reboot
376 * 0 no error occurred
377 *
378 * On success, out_start and out_length will point to the
379 * entitlements blob if found; or will be set to NULL/zero
380 * if there were no entitlements.
381 */
382 int
383 cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length)
384 {
385 SHA1_CTX context; /* XXX hash agility */
386 int error = 0;
387 struct cs_blob *blob_list_entry;
388 CS_SuperBlob *super_blob;
389 CS_BlobIndex *blob_index;
390 CS_GenericBlob *blob;
391 CS_CodeDirectory *code_dir;
392 unsigned char *computed_hash = NULL;
393 unsigned char *embedded_hash = NULL;
394 void *start = NULL;
395 size_t length = 0;
396 size_t hash_size = 0;
397 unsigned int i, count;
398
399 if (NULL == p->p_textvp) {
400 error = EINVAL;
401 goto out;
402 }
403 if (NULL == (blob_list_entry = ubc_cs_blob_get(p->p_textvp, -1,
404 p->p_textoff)))
405 goto out;
406 super_blob = (void *)blob_list_entry->csb_mem_kaddr;
407 if (CSMAGIC_EMBEDDED_SIGNATURE != ntohl(super_blob->magic)) {
408 error = EBADEXEC;
409 goto out;
410 }
411 count = ntohl(super_blob->count);
412 for (i = 0; i < count; ++i) {
413 blob_index = &super_blob->index[i];
414 blob = (void *)((char *)super_blob + ntohl(blob_index->offset));
415 switch (ntohl(blob_index->type)) {
416 case CSSLOT_CODEDIRECTORY:
417 if (CSMAGIC_CODEDIRECTORY != ntohl(blob->magic))
418 break;
419 code_dir = (void *)blob;
420 hash_size = code_dir->hashSize;
421 if (CSSLOT_ENTITLEMENTS <=
422 ntohl(code_dir->nSpecialSlots)) {
423 embedded_hash = (void *)((char *)code_dir +
424 ntohl(code_dir->hashOffset) -
425 (hash_size * CSSLOT_ENTITLEMENTS));
426 }
427 break;
428 case CSSLOT_ENTITLEMENTS:
429 if (CSMAGIC_EMBEDDED_ENTITLEMENTS != ntohl(blob->magic))
430 break;
431 start = (void *)blob;
432 length = ntohl(blob->length);
433 break;
434 default:
435 break;
436 }
437 }
438 if (NULL == start && NULL == embedded_hash) {
439 error = 0;
440 goto out;
441 } else if (NULL == start || NULL == embedded_hash) {
442 error = EBADEXEC;
443 goto out;
444 }
445 if (NULL == (computed_hash = kalloc(hash_size))) {
446 error = ENOMEM;
447 goto out;
448 }
449 SHA1Init(&context);
450 SHA1Update(&context, start, length);
451 SHA1Final(computed_hash, &context);
452 if (0 != memcmp(computed_hash, embedded_hash, hash_size)) {
453 error = EBADEXEC;
454 goto out;
455 }
456 error = 0;
457 out:
458 if (NULL != computed_hash)
459 kfree(computed_hash, hash_size);
460 if (0 == error) {
461 *out_start = start;
462 *out_length = length;
463 }
464 return error;
465 }
466
467 /*
468 * ENTITLEMENTS
469 * End of routines to navigate entitlements in the kernel.
470 */
471
472
473
474 /*
475 * ubc_init
476 *
477 * Initialization of the zone for Unified Buffer Cache.
478 *
479 * Parameters: (void)
480 *
481 * Returns: (void)
482 *
483 * Implicit returns:
484 * ubc_info_zone(global) initialized for subsequent allocations
485 */
486 __private_extern__ void
487 ubc_init(void)
488 {
489 int i;
490
491 i = (vm_size_t) sizeof (struct ubc_info);
492
493 ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone");
494
495 zone_change(ubc_info_zone, Z_NOENCRYPT, TRUE);
496 }
497
498
499 /*
500 * ubc_info_init
501 *
502 * Allocate and attach an empty ubc_info structure to a vnode
503 *
504 * Parameters: vp Pointer to the vnode
505 *
506 * Returns: 0 Success
507 * vnode_size:ENOMEM Not enough space
508 * vnode_size:??? Other error from vnode_getattr
509 *
510 */
511 int
512 ubc_info_init(struct vnode *vp)
513 {
514 return(ubc_info_init_internal(vp, 0, 0));
515 }
516
517
518 /*
519 * ubc_info_init_withsize
520 *
521 * Allocate and attach a sized ubc_info structure to a vnode
522 *
523 * Parameters: vp Pointer to the vnode
524 * filesize The size of the file
525 *
526 * Returns: 0 Success
527 * vnode_size:ENOMEM Not enough space
528 * vnode_size:??? Other error from vnode_getattr
529 */
530 int
531 ubc_info_init_withsize(struct vnode *vp, off_t filesize)
532 {
533 return(ubc_info_init_internal(vp, 1, filesize));
534 }
535
536
537 /*
538 * ubc_info_init_internal
539 *
540 * Allocate and attach a ubc_info structure to a vnode
541 *
542 * Parameters: vp Pointer to the vnode
543 * withfsize{0,1} Zero if the size should be obtained
544 * from the vnode; otherwise, use filesize
545 * filesize The size of the file, if withfsize == 1
546 *
547 * Returns: 0 Success
548 * vnode_size:ENOMEM Not enough space
549 * vnode_size:??? Other error from vnode_getattr
550 *
551 * Notes: We call a blocking zalloc(), and the zone was created as an
552 * expandable and collectable zone, so if no memory is available,
553 * it is possible for zalloc() to block indefinitely. zalloc()
554 * may also panic if the zone of zones is exhausted, since it's
555 * NOT expandable.
556 *
557 * We unconditionally call vnode_pager_setup(), even if this is
558 * a reuse of a ubc_info; in that case, we should probably assert
559 * that it does not already have a pager association, but do not.
560 *
561 * Since memory_object_create_named() can only fail from receiving
562 * an invalid pager argument, the explicit check and panic is
563 * merely precautionary.
564 */
565 static int
566 ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize)
567 {
568 register struct ubc_info *uip;
569 void * pager;
570 int error = 0;
571 kern_return_t kret;
572 memory_object_control_t control;
573
574 uip = vp->v_ubcinfo;
575
576 /*
577 * If there is not already a ubc_info attached to the vnode, we
578 * attach one; otherwise, we will reuse the one that's there.
579 */
580 if (uip == UBC_INFO_NULL) {
581
582 uip = (struct ubc_info *) zalloc(ubc_info_zone);
583 bzero((char *)uip, sizeof(struct ubc_info));
584
585 uip->ui_vnode = vp;
586 uip->ui_flags = UI_INITED;
587 uip->ui_ucred = NOCRED;
588 }
589 assert(uip->ui_flags != UI_NONE);
590 assert(uip->ui_vnode == vp);
591
592 /* now set this ubc_info in the vnode */
593 vp->v_ubcinfo = uip;
594
595 /*
596 * Allocate a pager object for this vnode
597 *
598 * XXX The value of the pager parameter is currently ignored.
599 * XXX Presumably, this API changed to avoid the race between
600 * XXX setting the pager and the UI_HASPAGER flag.
601 */
602 pager = (void *)vnode_pager_setup(vp, uip->ui_pager);
603 assert(pager);
604
605 /*
606 * Explicitly set the pager into the ubc_info, after setting the
607 * UI_HASPAGER flag.
608 */
609 SET(uip->ui_flags, UI_HASPAGER);
610 uip->ui_pager = pager;
611
612 /*
613 * Note: We can not use VNOP_GETATTR() to get accurate
614 * value of ui_size because this may be an NFS vnode, and
615 * nfs_getattr() can call vinvalbuf(); if this happens,
616 * ubc_info is not set up to deal with that event.
617 * So use bogus size.
618 */
619
620 /*
621 * create a vnode - vm_object association
622 * memory_object_create_named() creates a "named" reference on the
623 * memory object we hold this reference as long as the vnode is
624 * "alive." Since memory_object_create_named() took its own reference
625 * on the vnode pager we passed it, we can drop the reference
626 * vnode_pager_setup() returned here.
627 */
628 kret = memory_object_create_named(pager,
629 (memory_object_size_t)uip->ui_size, &control);
630 vnode_pager_deallocate(pager);
631 if (kret != KERN_SUCCESS)
632 panic("ubc_info_init: memory_object_create_named returned %d", kret);
633
634 assert(control);
635 uip->ui_control = control; /* cache the value of the mo control */
636 SET(uip->ui_flags, UI_HASOBJREF); /* with a named reference */
637
638 if (withfsize == 0) {
639 /* initialize the size */
640 error = vnode_size(vp, &uip->ui_size, vfs_context_current());
641 if (error)
642 uip->ui_size = 0;
643 } else {
644 uip->ui_size = filesize;
645 }
646 vp->v_lflag |= VNAMED_UBC; /* vnode has a named ubc reference */
647
648 return (error);
649 }
650
651
652 /*
653 * ubc_info_free
654 *
655 * Free a ubc_info structure
656 *
657 * Parameters: uip A pointer to the ubc_info to free
658 *
659 * Returns: (void)
660 *
661 * Notes: If there is a credential that has subsequently been associated
662 * with the ubc_info via a call to ubc_setcred(), the reference
663 * to the credential is dropped.
664 *
665 * It's actually impossible for a ubc_info.ui_control to take the
666 * value MEMORY_OBJECT_CONTROL_NULL.
667 */
668 static void
669 ubc_info_free(struct ubc_info *uip)
670 {
671 if (IS_VALID_CRED(uip->ui_ucred)) {
672 kauth_cred_unref(&uip->ui_ucred);
673 }
674
675 if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL)
676 memory_object_control_deallocate(uip->ui_control);
677
678 cluster_release(uip);
679 ubc_cs_free(uip);
680
681 zfree(ubc_info_zone, uip);
682 return;
683 }
684
685
686 void
687 ubc_info_deallocate(struct ubc_info *uip)
688 {
689 ubc_info_free(uip);
690 }
691
692
693 /*
694 * ubc_setsize
695 *
696 * Tell the VM that the the size of the file represented by the vnode has
697 * changed
698 *
699 * Parameters: vp The vp whose backing file size is
700 * being changed
701 * nsize The new size of the backing file
702 *
703 * Returns: 1 Success
704 * 0 Failure
705 *
706 * Notes: This function will indicate failure if the new size that's
707 * being attempted to be set is negative.
708 *
709 * This function will fail if there is no ubc_info currently
710 * associated with the vnode.
711 *
712 * This function will indicate success it the new size is the
713 * same or larger than the old size (in this case, the remainder
714 * of the file will require modification or use of an existing upl
715 * to access successfully).
716 *
717 * This function will fail if the new file size is smaller, and
718 * the memory region being invalidated was unable to actually be
719 * invalidated and/or the last page could not be flushed, if the
720 * new size is not aligned to a page boundary. This is usually
721 * indicative of an I/O error.
722 */
723 int
724 ubc_setsize(struct vnode *vp, off_t nsize)
725 {
726 off_t osize; /* ui_size before change */
727 off_t lastpg, olastpgend, lastoff;
728 struct ubc_info *uip;
729 memory_object_control_t control;
730 kern_return_t kret = KERN_SUCCESS;
731
732 if (nsize < (off_t)0)
733 return (0);
734
735 if (!UBCINFOEXISTS(vp))
736 return (0);
737
738 uip = vp->v_ubcinfo;
739 osize = uip->ui_size;
740 /*
741 * Update the size before flushing the VM
742 */
743 uip->ui_size = nsize;
744
745 if (nsize >= osize) { /* Nothing more to do */
746 if (nsize > osize) {
747 lock_vnode_and_post(vp, NOTE_EXTEND);
748 }
749
750 return (1); /* return success */
751 }
752
753 /*
754 * When the file shrinks, invalidate the pages beyond the
755 * new size. Also get rid of garbage beyond nsize on the
756 * last page. The ui_size already has the nsize, so any
757 * subsequent page-in will zero-fill the tail properly
758 */
759 lastpg = trunc_page_64(nsize);
760 olastpgend = round_page_64(osize);
761 control = uip->ui_control;
762 assert(control);
763 lastoff = (nsize & PAGE_MASK_64);
764
765 if (lastoff) {
766 upl_t upl;
767 upl_page_info_t *pl;
768
769
770 /*
771 * new EOF ends up in the middle of a page
772 * zero the tail of this page if its currently
773 * present in the cache
774 */
775 kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE);
776
777 if (kret != KERN_SUCCESS)
778 panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret);
779
780 if (upl_valid_page(pl, 0))
781 cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL);
782
783 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
784
785 lastpg += PAGE_SIZE_64;
786 }
787 if (olastpgend > lastpg) {
788 int flags;
789
790 if (lastpg == 0)
791 flags = MEMORY_OBJECT_DATA_FLUSH_ALL;
792 else
793 flags = MEMORY_OBJECT_DATA_FLUSH;
794 /*
795 * invalidate the pages beyond the new EOF page
796 *
797 */
798 kret = memory_object_lock_request(control,
799 (memory_object_offset_t)lastpg,
800 (memory_object_size_t)(olastpgend - lastpg), NULL, NULL,
801 MEMORY_OBJECT_RETURN_NONE, flags, VM_PROT_NO_CHANGE);
802 if (kret != KERN_SUCCESS)
803 printf("ubc_setsize: invalidate failed (error = %d)\n", kret);
804 }
805 return ((kret == KERN_SUCCESS) ? 1 : 0);
806 }
807
808
809 /*
810 * ubc_getsize
811 *
812 * Get the size of the file assocated with the specified vnode
813 *
814 * Parameters: vp The vnode whose size is of interest
815 *
816 * Returns: 0 There is no ubc_info associated with
817 * this vnode, or the size is zero
818 * !0 The size of the file
819 *
820 * Notes: Using this routine, it is not possible for a caller to
821 * successfully distinguish between a vnode associate with a zero
822 * length file, and a vnode with no associated ubc_info. The
823 * caller therefore needs to not care, or needs to ensure that
824 * they have previously successfully called ubc_info_init() or
825 * ubc_info_init_withsize().
826 */
827 off_t
828 ubc_getsize(struct vnode *vp)
829 {
830 /* people depend on the side effect of this working this way
831 * as they call this for directory
832 */
833 if (!UBCINFOEXISTS(vp))
834 return ((off_t)0);
835 return (vp->v_ubcinfo->ui_size);
836 }
837
838
839 /*
840 * ubc_umount
841 *
842 * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this
843 * mount point
844 *
845 * Parameters: mp The mount point
846 *
847 * Returns: 0 Success
848 *
849 * Notes: There is no failure indication for this function.
850 *
851 * This function is used in the unmount path; since it may block
852 * I/O indefinitely, it should not be used in the forced unmount
853 * path, since a device unavailability could also block that
854 * indefinitely.
855 *
856 * Because there is no device ejection interlock on USB, FireWire,
857 * or similar devices, it's possible that an ejection that begins
858 * subsequent to the vnode_iterate() completing, either on one of
859 * those devices, or a network mount for which the server quits
860 * responding, etc., may cause the caller to block indefinitely.
861 */
862 __private_extern__ int
863 ubc_umount(struct mount *mp)
864 {
865 vnode_iterate(mp, 0, ubc_umcallback, 0);
866 return(0);
867 }
868
869
870 /*
871 * ubc_umcallback
872 *
873 * Used by ubc_umount() as an internal implementation detail; see ubc_umount()
874 * and vnode_iterate() for details of implementation.
875 */
876 static int
877 ubc_umcallback(vnode_t vp, __unused void * args)
878 {
879
880 if (UBCINFOEXISTS(vp)) {
881
882 (void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL);
883 }
884 return (VNODE_RETURNED);
885 }
886
887
888 /*
889 * ubc_getcred
890 *
891 * Get the credentials currently active for the ubc_info associated with the
892 * vnode.
893 *
894 * Parameters: vp The vnode whose ubc_info credentials
895 * are to be retrieved
896 *
897 * Returns: !NOCRED The credentials
898 * NOCRED If there is no ubc_info for the vnode,
899 * or if there is one, but it has not had
900 * any credentials associated with it via
901 * a call to ubc_setcred()
902 */
903 kauth_cred_t
904 ubc_getcred(struct vnode *vp)
905 {
906 if (UBCINFOEXISTS(vp))
907 return (vp->v_ubcinfo->ui_ucred);
908
909 return (NOCRED);
910 }
911
912
913 /*
914 * ubc_setthreadcred
915 *
916 * If they are not already set, set the credentials of the ubc_info structure
917 * associated with the vnode to those of the supplied thread; otherwise leave
918 * them alone.
919 *
920 * Parameters: vp The vnode whose ubc_info creds are to
921 * be set
922 * p The process whose credentials are to
923 * be used, if not running on an assumed
924 * credential
925 * thread The thread whose credentials are to
926 * be used
927 *
928 * Returns: 1 This vnode has no associated ubc_info
929 * 0 Success
930 *
931 * Notes: This function takes a proc parameter to account for bootstrap
932 * issues where a task or thread may call this routine, either
933 * before credentials have been initialized by bsd_init(), or if
934 * there is no BSD info asscoiate with a mach thread yet. This
935 * is known to happen in both the initial swap and memory mapping
936 * calls.
937 *
938 * This function is generally used only in the following cases:
939 *
940 * o a memory mapped file via the mmap() system call
941 * o a memory mapped file via the deprecated map_fd() call
942 * o a swap store backing file
943 * o subsequent to a successful write via vn_write()
944 *
945 * The information is then used by the NFS client in order to
946 * cons up a wire message in either the page-in or page-out path.
947 *
948 * There are two potential problems with the use of this API:
949 *
950 * o Because the write path only set it on a successful
951 * write, there is a race window between setting the
952 * credential and its use to evict the pages to the
953 * remote file server
954 *
955 * o Because a page-in may occur prior to a write, the
956 * credential may not be set at this time, if the page-in
957 * is not the result of a mapping established via mmap()
958 * or map_fd().
959 *
960 * In both these cases, this will be triggered from the paging
961 * path, which will instead use the credential of the current
962 * process, which in this case is either the dynamic_pager or
963 * the kernel task, both of which utilize "root" credentials.
964 *
965 * This may potentially permit operations to occur which should
966 * be denied, or it may cause to be denied operations which
967 * should be permitted, depending on the configuration of the NFS
968 * server.
969 */
970 int
971 ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread)
972 {
973 struct ubc_info *uip;
974 kauth_cred_t credp;
975 struct uthread *uthread = get_bsdthread_info(thread);
976
977 if (!UBCINFOEXISTS(vp))
978 return (1);
979
980 vnode_lock(vp);
981
982 uip = vp->v_ubcinfo;
983 credp = uip->ui_ucred;
984
985 if (!IS_VALID_CRED(credp)) {
986 /* use per-thread cred, if assumed identity, else proc cred */
987 if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) {
988 uip->ui_ucred = kauth_cred_proc_ref(p);
989 } else {
990 uip->ui_ucred = uthread->uu_ucred;
991 kauth_cred_ref(uip->ui_ucred);
992 }
993 }
994 vnode_unlock(vp);
995
996 return (0);
997 }
998
999
1000 /*
1001 * ubc_setcred
1002 *
1003 * If they are not already set, set the credentials of the ubc_info structure
1004 * associated with the vnode to those of the process; otherwise leave them
1005 * alone.
1006 *
1007 * Parameters: vp The vnode whose ubc_info creds are to
1008 * be set
1009 * p The process whose credentials are to
1010 * be used
1011 *
1012 * Returns: 0 This vnode has no associated ubc_info
1013 * 1 Success
1014 *
1015 * Notes: The return values for this function are inverted from nearly
1016 * all other uses in the kernel.
1017 *
1018 * See also ubc_setthreadcred(), above.
1019 *
1020 * This function is considered deprecated, and generally should
1021 * not be used, as it is incompatible with per-thread credentials;
1022 * it exists for legacy KPI reasons.
1023 *
1024 * DEPRECATION: ubc_setcred() is being deprecated. Please use
1025 * ubc_setthreadcred() instead.
1026 */
1027 int
1028 ubc_setcred(struct vnode *vp, proc_t p)
1029 {
1030 struct ubc_info *uip;
1031 kauth_cred_t credp;
1032
1033 /* If there is no ubc_info, deny the operation */
1034 if ( !UBCINFOEXISTS(vp))
1035 return (0);
1036
1037 /*
1038 * Check to see if there is already a credential reference in the
1039 * ubc_info; if there is not, take one on the supplied credential.
1040 */
1041 vnode_lock(vp);
1042 uip = vp->v_ubcinfo;
1043 credp = uip->ui_ucred;
1044 if (!IS_VALID_CRED(credp)) {
1045 uip->ui_ucred = kauth_cred_proc_ref(p);
1046 }
1047 vnode_unlock(vp);
1048
1049 return (1);
1050 }
1051
1052 /*
1053 * ubc_getpager
1054 *
1055 * Get the pager associated with the ubc_info associated with the vnode.
1056 *
1057 * Parameters: vp The vnode to obtain the pager from
1058 *
1059 * Returns: !VNODE_PAGER_NULL The memory_object_t for the pager
1060 * VNODE_PAGER_NULL There is no ubc_info for this vnode
1061 *
1062 * Notes: For each vnode that has a ubc_info associated with it, that
1063 * ubc_info SHALL have a pager associated with it, so in the
1064 * normal case, it's impossible to return VNODE_PAGER_NULL for
1065 * a vnode with an associated ubc_info.
1066 */
1067 __private_extern__ memory_object_t
1068 ubc_getpager(struct vnode *vp)
1069 {
1070 if (UBCINFOEXISTS(vp))
1071 return (vp->v_ubcinfo->ui_pager);
1072
1073 return (0);
1074 }
1075
1076
1077 /*
1078 * ubc_getobject
1079 *
1080 * Get the memory object control associated with the ubc_info associated with
1081 * the vnode
1082 *
1083 * Parameters: vp The vnode to obtain the memory object
1084 * from
1085 * flags DEPRECATED
1086 *
1087 * Returns: !MEMORY_OBJECT_CONTROL_NULL
1088 * MEMORY_OBJECT_CONTROL_NULL
1089 *
1090 * Notes: Historically, if the flags were not "do not reactivate", this
1091 * function would look up the memory object using the pager if
1092 * it did not exist (this could be the case if the vnode had
1093 * been previously reactivated). The flags would also permit a
1094 * hold to be requested, which would have created an object
1095 * reference, if one had not already existed. This usage is
1096 * deprecated, as it would permit a race between finding and
1097 * taking the reference vs. a single reference being dropped in
1098 * another thread.
1099 */
1100 memory_object_control_t
1101 ubc_getobject(struct vnode *vp, __unused int flags)
1102 {
1103 if (UBCINFOEXISTS(vp))
1104 return((vp->v_ubcinfo->ui_control));
1105
1106 return (MEMORY_OBJECT_CONTROL_NULL);
1107 }
1108
1109 boolean_t
1110 ubc_strict_uncached_IO(struct vnode *vp)
1111 {
1112 boolean_t result = FALSE;
1113
1114 if (UBCINFOEXISTS(vp)) {
1115 result = memory_object_is_slid(vp->v_ubcinfo->ui_control);
1116 }
1117 return result;
1118 }
1119
1120 /*
1121 * ubc_blktooff
1122 *
1123 * Convert a given block number to a memory backing object (file) offset for a
1124 * given vnode
1125 *
1126 * Parameters: vp The vnode in which the block is located
1127 * blkno The block number to convert
1128 *
1129 * Returns: !-1 The offset into the backing object
1130 * -1 There is no ubc_info associated with
1131 * the vnode
1132 * -1 An error occurred in the underlying VFS
1133 * while translating the block to an
1134 * offset; the most likely cause is that
1135 * the caller specified a block past the
1136 * end of the file, but this could also be
1137 * any other error from VNOP_BLKTOOFF().
1138 *
1139 * Note: Representing the error in band loses some information, but does
1140 * not occlude a valid offset, since an off_t of -1 is normally
1141 * used to represent EOF. If we had a more reliable constant in
1142 * our header files for it (i.e. explicitly cast to an off_t), we
1143 * would use it here instead.
1144 */
1145 off_t
1146 ubc_blktooff(vnode_t vp, daddr64_t blkno)
1147 {
1148 off_t file_offset = -1;
1149 int error;
1150
1151 if (UBCINFOEXISTS(vp)) {
1152 error = VNOP_BLKTOOFF(vp, blkno, &file_offset);
1153 if (error)
1154 file_offset = -1;
1155 }
1156
1157 return (file_offset);
1158 }
1159
1160
1161 /*
1162 * ubc_offtoblk
1163 *
1164 * Convert a given offset in a memory backing object into a block number for a
1165 * given vnode
1166 *
1167 * Parameters: vp The vnode in which the offset is
1168 * located
1169 * offset The offset into the backing object
1170 *
1171 * Returns: !-1 The returned block number
1172 * -1 There is no ubc_info associated with
1173 * the vnode
1174 * -1 An error occurred in the underlying VFS
1175 * while translating the block to an
1176 * offset; the most likely cause is that
1177 * the caller specified a block past the
1178 * end of the file, but this could also be
1179 * any other error from VNOP_OFFTOBLK().
1180 *
1181 * Note: Representing the error in band loses some information, but does
1182 * not occlude a valid block number, since block numbers exceed
1183 * the valid range for offsets, due to their relative sizes. If
1184 * we had a more reliable constant than -1 in our header files
1185 * for it (i.e. explicitly cast to an daddr64_t), we would use it
1186 * here instead.
1187 */
1188 daddr64_t
1189 ubc_offtoblk(vnode_t vp, off_t offset)
1190 {
1191 daddr64_t blkno = -1;
1192 int error = 0;
1193
1194 if (UBCINFOEXISTS(vp)) {
1195 error = VNOP_OFFTOBLK(vp, offset, &blkno);
1196 if (error)
1197 blkno = -1;
1198 }
1199
1200 return (blkno);
1201 }
1202
1203
1204 /*
1205 * ubc_pages_resident
1206 *
1207 * Determine whether or not a given vnode has pages resident via the memory
1208 * object control associated with the ubc_info associated with the vnode
1209 *
1210 * Parameters: vp The vnode we want to know about
1211 *
1212 * Returns: 1 Yes
1213 * 0 No
1214 */
1215 int
1216 ubc_pages_resident(vnode_t vp)
1217 {
1218 kern_return_t kret;
1219 boolean_t has_pages_resident;
1220
1221 if (!UBCINFOEXISTS(vp))
1222 return (0);
1223
1224 /*
1225 * The following call may fail if an invalid ui_control is specified,
1226 * or if there is no VM object associated with the control object. In
1227 * either case, reacting to it as if there were no pages resident will
1228 * result in correct behavior.
1229 */
1230 kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident);
1231
1232 if (kret != KERN_SUCCESS)
1233 return (0);
1234
1235 if (has_pages_resident == TRUE)
1236 return (1);
1237
1238 return (0);
1239 }
1240
1241
1242 /*
1243 * ubc_sync_range
1244 *
1245 * Clean and/or invalidate a range in the memory object that backs this vnode
1246 *
1247 * Parameters: vp The vnode whose associated ubc_info's
1248 * associated memory object is to have a
1249 * range invalidated within it
1250 * beg_off The start of the range, as an offset
1251 * end_off The end of the range, as an offset
1252 * flags See ubc_msync_internal()
1253 *
1254 * Returns: 1 Success
1255 * 0 Failure
1256 *
1257 * Notes: see ubc_msync_internal() for more detailed information.
1258 *
1259 * DEPRECATED: This interface is obsolete due to a failure to return error
1260 * information needed in order to correct failures. The currently
1261 * recommended interface is ubc_msync().
1262 */
1263 int
1264 ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags)
1265 {
1266 return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL));
1267 }
1268
1269
1270 /*
1271 * ubc_msync
1272 *
1273 * Clean and/or invalidate a range in the memory object that backs this vnode
1274 *
1275 * Parameters: vp The vnode whose associated ubc_info's
1276 * associated memory object is to have a
1277 * range invalidated within it
1278 * beg_off The start of the range, as an offset
1279 * end_off The end of the range, as an offset
1280 * resid_off The address of an off_t supplied by the
1281 * caller; may be set to NULL to ignore
1282 * flags See ubc_msync_internal()
1283 *
1284 * Returns: 0 Success
1285 * !0 Failure; an errno is returned
1286 *
1287 * Implicit Returns:
1288 * *resid_off, modified If non-NULL, the contents are ALWAYS
1289 * modified; they are initialized to the
1290 * beg_off, and in case of an I/O error,
1291 * the difference between beg_off and the
1292 * current value will reflect what was
1293 * able to be written before the error
1294 * occurred. If no error is returned, the
1295 * value of the resid_off is undefined; do
1296 * NOT use it in place of end_off if you
1297 * intend to increment from the end of the
1298 * last call and call iteratively.
1299 *
1300 * Notes: see ubc_msync_internal() for more detailed information.
1301 *
1302 */
1303 errno_t
1304 ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags)
1305 {
1306 int retval;
1307 int io_errno = 0;
1308
1309 if (resid_off)
1310 *resid_off = beg_off;
1311
1312 retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno);
1313
1314 if (retval == 0 && io_errno == 0)
1315 return (EINVAL);
1316 return (io_errno);
1317 }
1318
1319
1320 /*
1321 * Clean and/or invalidate a range in the memory object that backs this vnode
1322 *
1323 * Parameters: vp The vnode whose associated ubc_info's
1324 * associated memory object is to have a
1325 * range invalidated within it
1326 * beg_off The start of the range, as an offset
1327 * end_off The end of the range, as an offset
1328 * resid_off The address of an off_t supplied by the
1329 * caller; may be set to NULL to ignore
1330 * flags MUST contain at least one of the flags
1331 * UBC_INVALIDATE, UBC_PUSHDIRTY, or
1332 * UBC_PUSHALL; if UBC_PUSHDIRTY is used,
1333 * UBC_SYNC may also be specified to cause
1334 * this function to block until the
1335 * operation is complete. The behavior
1336 * of UBC_SYNC is otherwise undefined.
1337 * io_errno The address of an int to contain the
1338 * errno from a failed I/O operation, if
1339 * one occurs; may be set to NULL to
1340 * ignore
1341 *
1342 * Returns: 1 Success
1343 * 0 Failure
1344 *
1345 * Implicit Returns:
1346 * *resid_off, modified The contents of this offset MAY be
1347 * modified; in case of an I/O error, the
1348 * difference between beg_off and the
1349 * current value will reflect what was
1350 * able to be written before the error
1351 * occurred.
1352 * *io_errno, modified The contents of this offset are set to
1353 * an errno, if an error occurs; if the
1354 * caller supplies an io_errno parameter,
1355 * they should be careful to initialize it
1356 * to 0 before calling this function to
1357 * enable them to distinguish an error
1358 * with a valid *resid_off from an invalid
1359 * one, and to avoid potentially falsely
1360 * reporting an error, depending on use.
1361 *
1362 * Notes: If there is no ubc_info associated with the vnode supplied,
1363 * this function immediately returns success.
1364 *
1365 * If the value of end_off is less than or equal to beg_off, this
1366 * function immediately returns success; that is, end_off is NOT
1367 * inclusive.
1368 *
1369 * IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or
1370 * UBC_PUSHALL MUST be specified; that is, it is NOT possible to
1371 * attempt to block on in-progress I/O by calling this function
1372 * with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC
1373 * in order to block pending on the I/O already in progress.
1374 *
1375 * The start offset is truncated to the page boundary and the
1376 * size is adjusted to include the last page in the range; that
1377 * is, end_off on exactly a page boundary will not change if it
1378 * is rounded, and the range of bytes written will be from the
1379 * truncate beg_off to the rounded (end_off - 1).
1380 */
1381 static int
1382 ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno)
1383 {
1384 memory_object_size_t tsize;
1385 kern_return_t kret;
1386 int request_flags = 0;
1387 int flush_flags = MEMORY_OBJECT_RETURN_NONE;
1388
1389 if ( !UBCINFOEXISTS(vp))
1390 return (0);
1391 if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0)
1392 return (0);
1393 if (end_off <= beg_off)
1394 return (1);
1395
1396 if (flags & UBC_INVALIDATE)
1397 /*
1398 * discard the resident pages
1399 */
1400 request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE);
1401
1402 if (flags & UBC_SYNC)
1403 /*
1404 * wait for all the I/O to complete before returning
1405 */
1406 request_flags |= MEMORY_OBJECT_IO_SYNC;
1407
1408 if (flags & UBC_PUSHDIRTY)
1409 /*
1410 * we only return the dirty pages in the range
1411 */
1412 flush_flags = MEMORY_OBJECT_RETURN_DIRTY;
1413
1414 if (flags & UBC_PUSHALL)
1415 /*
1416 * then return all the interesting pages in the range (both
1417 * dirty and precious) to the pager
1418 */
1419 flush_flags = MEMORY_OBJECT_RETURN_ALL;
1420
1421 beg_off = trunc_page_64(beg_off);
1422 end_off = round_page_64(end_off);
1423 tsize = (memory_object_size_t)end_off - beg_off;
1424
1425 /* flush and/or invalidate pages in the range requested */
1426 kret = memory_object_lock_request(vp->v_ubcinfo->ui_control,
1427 beg_off, tsize,
1428 (memory_object_offset_t *)resid_off,
1429 io_errno, flush_flags, request_flags,
1430 VM_PROT_NO_CHANGE);
1431
1432 return ((kret == KERN_SUCCESS) ? 1 : 0);
1433 }
1434
1435
1436 /*
1437 * ubc_msync_internal
1438 *
1439 * Explicitly map a vnode that has an associate ubc_info, and add a reference
1440 * to it for the ubc system, if there isn't one already, so it will not be
1441 * recycled while it's in use, and set flags on the ubc_info to indicate that
1442 * we have done this
1443 *
1444 * Parameters: vp The vnode to map
1445 * flags The mapping flags for the vnode; this
1446 * will be a combination of one or more of
1447 * PROT_READ, PROT_WRITE, and PROT_EXEC
1448 *
1449 * Returns: 0 Success
1450 * EPERM Permission was denied
1451 *
1452 * Notes: An I/O reference on the vnode must already be held on entry
1453 *
1454 * If there is no ubc_info associated with the vnode, this function
1455 * will return success.
1456 *
1457 * If a permission error occurs, this function will return
1458 * failure; all other failures will cause this function to return
1459 * success.
1460 *
1461 * IMPORTANT: This is an internal use function, and its symbols
1462 * are not exported, hence its error checking is not very robust.
1463 * It is primarily used by:
1464 *
1465 * o mmap(), when mapping a file
1466 * o The deprecated map_fd() interface, when mapping a file
1467 * o When mapping a shared file (a shared library in the
1468 * shared segment region)
1469 * o When loading a program image during the exec process
1470 *
1471 * ...all of these uses ignore the return code, and any fault that
1472 * results later because of a failure is handled in the fix-up path
1473 * of the fault handler. The interface exists primarily as a
1474 * performance hint.
1475 *
1476 * Given that third party implementation of the type of interfaces
1477 * that would use this function, such as alternative executable
1478 * formats, etc., are unsupported, this function is not exported
1479 * for general use.
1480 *
1481 * The extra reference is held until the VM system unmaps the
1482 * vnode from its own context to maintain a vnode reference in
1483 * cases like open()/mmap()/close(), which leave the backing
1484 * object referenced by a mapped memory region in a process
1485 * address space.
1486 */
1487 __private_extern__ int
1488 ubc_map(vnode_t vp, int flags)
1489 {
1490 struct ubc_info *uip;
1491 int error = 0;
1492 int need_ref = 0;
1493 int need_wakeup = 0;
1494
1495 if (UBCINFOEXISTS(vp)) {
1496
1497 vnode_lock(vp);
1498 uip = vp->v_ubcinfo;
1499
1500 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1501 SET(uip->ui_flags, UI_MAPWAITING);
1502 (void) msleep(&uip->ui_flags, &vp->v_lock,
1503 PRIBIO, "ubc_map", NULL);
1504 }
1505 SET(uip->ui_flags, UI_MAPBUSY);
1506 vnode_unlock(vp);
1507
1508 error = VNOP_MMAP(vp, flags, vfs_context_current());
1509
1510 if (error != EPERM)
1511 error = 0;
1512
1513 vnode_lock_spin(vp);
1514
1515 if (error == 0) {
1516 if ( !ISSET(uip->ui_flags, UI_ISMAPPED))
1517 need_ref = 1;
1518 SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED));
1519 }
1520 CLR(uip->ui_flags, UI_MAPBUSY);
1521
1522 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1523 CLR(uip->ui_flags, UI_MAPWAITING);
1524 need_wakeup = 1;
1525 }
1526 vnode_unlock(vp);
1527
1528 if (need_wakeup)
1529 wakeup(&uip->ui_flags);
1530
1531 if (need_ref)
1532 vnode_ref(vp);
1533 }
1534 return (error);
1535 }
1536
1537
1538 /*
1539 * ubc_destroy_named
1540 *
1541 * Destroy the named memory object associated with the ubc_info control object
1542 * associated with the designated vnode, if there is a ubc_info associated
1543 * with the vnode, and a control object is associated with it
1544 *
1545 * Parameters: vp The designated vnode
1546 *
1547 * Returns: (void)
1548 *
1549 * Notes: This function is called on vnode termination for all vnodes,
1550 * and must therefore not assume that there is a ubc_info that is
1551 * associated with the vnode, nor that there is a control object
1552 * associated with the ubc_info.
1553 *
1554 * If all the conditions necessary are present, this function
1555 * calls memory_object_destory(), which will in turn end up
1556 * calling ubc_unmap() to release any vnode references that were
1557 * established via ubc_map().
1558 *
1559 * IMPORTANT: This is an internal use function that is used
1560 * exclusively by the internal use function vclean().
1561 */
1562 __private_extern__ void
1563 ubc_destroy_named(vnode_t vp)
1564 {
1565 memory_object_control_t control;
1566 struct ubc_info *uip;
1567 kern_return_t kret;
1568
1569 if (UBCINFOEXISTS(vp)) {
1570 uip = vp->v_ubcinfo;
1571
1572 /* Terminate the memory object */
1573 control = ubc_getobject(vp, UBC_HOLDOBJECT);
1574 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1575 kret = memory_object_destroy(control, 0);
1576 if (kret != KERN_SUCCESS)
1577 panic("ubc_destroy_named: memory_object_destroy failed");
1578 }
1579 }
1580 }
1581
1582
1583 /*
1584 * ubc_isinuse
1585 *
1586 * Determine whether or not a vnode is currently in use by ubc at a level in
1587 * excess of the requested busycount
1588 *
1589 * Parameters: vp The vnode to check
1590 * busycount The threshold busy count, used to bias
1591 * the count usually already held by the
1592 * caller to avoid races
1593 *
1594 * Returns: 1 The vnode is in use over the threshold
1595 * 0 The vnode is not in use over the
1596 * threshold
1597 *
1598 * Notes: Because the vnode is only held locked while actually asking
1599 * the use count, this function only represents a snapshot of the
1600 * current state of the vnode. If more accurate information is
1601 * required, an additional busycount should be held by the caller
1602 * and a non-zero busycount used.
1603 *
1604 * If there is no ubc_info associated with the vnode, this
1605 * function will report that the vnode is not in use by ubc.
1606 */
1607 int
1608 ubc_isinuse(struct vnode *vp, int busycount)
1609 {
1610 if ( !UBCINFOEXISTS(vp))
1611 return (0);
1612 return(ubc_isinuse_locked(vp, busycount, 0));
1613 }
1614
1615
1616 /*
1617 * ubc_isinuse_locked
1618 *
1619 * Determine whether or not a vnode is currently in use by ubc at a level in
1620 * excess of the requested busycount
1621 *
1622 * Parameters: vp The vnode to check
1623 * busycount The threshold busy count, used to bias
1624 * the count usually already held by the
1625 * caller to avoid races
1626 * locked True if the vnode is already locked by
1627 * the caller
1628 *
1629 * Returns: 1 The vnode is in use over the threshold
1630 * 0 The vnode is not in use over the
1631 * threshold
1632 *
1633 * Notes: If the vnode is not locked on entry, it is locked while
1634 * actually asking the use count. If this is the case, this
1635 * function only represents a snapshot of the current state of
1636 * the vnode. If more accurate information is required, the
1637 * vnode lock should be held by the caller, otherwise an
1638 * additional busycount should be held by the caller and a
1639 * non-zero busycount used.
1640 *
1641 * If there is no ubc_info associated with the vnode, this
1642 * function will report that the vnode is not in use by ubc.
1643 */
1644 int
1645 ubc_isinuse_locked(struct vnode *vp, int busycount, int locked)
1646 {
1647 int retval = 0;
1648
1649
1650 if (!locked)
1651 vnode_lock_spin(vp);
1652
1653 if ((vp->v_usecount - vp->v_kusecount) > busycount)
1654 retval = 1;
1655
1656 if (!locked)
1657 vnode_unlock(vp);
1658 return (retval);
1659 }
1660
1661
1662 /*
1663 * ubc_unmap
1664 *
1665 * Reverse the effects of a ubc_map() call for a given vnode
1666 *
1667 * Parameters: vp vnode to unmap from ubc
1668 *
1669 * Returns: (void)
1670 *
1671 * Notes: This is an internal use function used by vnode_pager_unmap().
1672 * It will attempt to obtain a reference on the supplied vnode,
1673 * and if it can do so, and there is an associated ubc_info, and
1674 * the flags indicate that it was mapped via ubc_map(), then the
1675 * flag is cleared, the mapping removed, and the reference taken
1676 * by ubc_map() is released.
1677 *
1678 * IMPORTANT: This MUST only be called by the VM
1679 * to prevent race conditions.
1680 */
1681 __private_extern__ void
1682 ubc_unmap(struct vnode *vp)
1683 {
1684 struct ubc_info *uip;
1685 int need_rele = 0;
1686 int need_wakeup = 0;
1687
1688 if (vnode_getwithref(vp))
1689 return;
1690
1691 if (UBCINFOEXISTS(vp)) {
1692 vnode_lock(vp);
1693 uip = vp->v_ubcinfo;
1694
1695 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1696 SET(uip->ui_flags, UI_MAPWAITING);
1697 (void) msleep(&uip->ui_flags, &vp->v_lock,
1698 PRIBIO, "ubc_unmap", NULL);
1699 }
1700 SET(uip->ui_flags, UI_MAPBUSY);
1701
1702 if (ISSET(uip->ui_flags, UI_ISMAPPED)) {
1703 CLR(uip->ui_flags, UI_ISMAPPED);
1704 need_rele = 1;
1705 }
1706 vnode_unlock(vp);
1707
1708 if (need_rele) {
1709 (void)VNOP_MNOMAP(vp, vfs_context_current());
1710 vnode_rele(vp);
1711 }
1712
1713 vnode_lock_spin(vp);
1714
1715 CLR(uip->ui_flags, UI_MAPBUSY);
1716 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1717 CLR(uip->ui_flags, UI_MAPWAITING);
1718 need_wakeup = 1;
1719 }
1720 vnode_unlock(vp);
1721
1722 if (need_wakeup)
1723 wakeup(&uip->ui_flags);
1724
1725 }
1726 /*
1727 * the drop of the vnode ref will cleanup
1728 */
1729 vnode_put(vp);
1730 }
1731
1732
1733 /*
1734 * ubc_page_op
1735 *
1736 * Manipulate individual page state for a vnode with an associated ubc_info
1737 * with an associated memory object control.
1738 *
1739 * Parameters: vp The vnode backing the page
1740 * f_offset A file offset interior to the page
1741 * ops The operations to perform, as a bitmap
1742 * (see below for more information)
1743 * phys_entryp The address of a ppnum_t; may be NULL
1744 * to ignore
1745 * flagsp A pointer to an int to contain flags;
1746 * may be NULL to ignore
1747 *
1748 * Returns: KERN_SUCCESS Success
1749 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1750 * object associated
1751 * KERN_INVALID_OBJECT If UPL_POP_PHYSICAL and the object is
1752 * not physically contiguous
1753 * KERN_INVALID_OBJECT If !UPL_POP_PHYSICAL and the object is
1754 * physically contiguous
1755 * KERN_FAILURE If the page cannot be looked up
1756 *
1757 * Implicit Returns:
1758 * *phys_entryp (modified) If phys_entryp is non-NULL and
1759 * UPL_POP_PHYSICAL
1760 * *flagsp (modified) If flagsp is non-NULL and there was
1761 * !UPL_POP_PHYSICAL and a KERN_SUCCESS
1762 *
1763 * Notes: For object boundaries, it is considerably more efficient to
1764 * ensure that f_offset is in fact on a page boundary, as this
1765 * will avoid internal use of the hash table to identify the
1766 * page, and would therefore skip a number of early optimizations.
1767 * Since this is a page operation anyway, the caller should try
1768 * to pass only a page aligned offset because of this.
1769 *
1770 * *flagsp may be modified even if this function fails. If it is
1771 * modified, it will contain the condition of the page before the
1772 * requested operation was attempted; these will only include the
1773 * bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP,
1774 * UPL_POP_SET, or UPL_POP_CLR bits.
1775 *
1776 * The flags field may contain a specific operation, such as
1777 * UPL_POP_PHYSICAL or UPL_POP_DUMP:
1778 *
1779 * o UPL_POP_PHYSICAL Fail if not contiguous; if
1780 * *phys_entryp and successful, set
1781 * *phys_entryp
1782 * o UPL_POP_DUMP Dump the specified page
1783 *
1784 * Otherwise, it is treated as a bitmap of one or more page
1785 * operations to perform on the final memory object; allowable
1786 * bit values are:
1787 *
1788 * o UPL_POP_DIRTY The page is dirty
1789 * o UPL_POP_PAGEOUT The page is paged out
1790 * o UPL_POP_PRECIOUS The page is precious
1791 * o UPL_POP_ABSENT The page is absent
1792 * o UPL_POP_BUSY The page is busy
1793 *
1794 * If the page status is only being queried and not modified, then
1795 * not other bits should be specified. However, if it is being
1796 * modified, exactly ONE of the following bits should be set:
1797 *
1798 * o UPL_POP_SET Set the current bitmap bits
1799 * o UPL_POP_CLR Clear the current bitmap bits
1800 *
1801 * Thus to effect a combination of setting an clearing, it may be
1802 * necessary to call this function twice. If this is done, the
1803 * set should be used before the clear, since clearing may trigger
1804 * a wakeup on the destination page, and if the page is backed by
1805 * an encrypted swap file, setting will trigger the decryption
1806 * needed before the wakeup occurs.
1807 */
1808 kern_return_t
1809 ubc_page_op(
1810 struct vnode *vp,
1811 off_t f_offset,
1812 int ops,
1813 ppnum_t *phys_entryp,
1814 int *flagsp)
1815 {
1816 memory_object_control_t control;
1817
1818 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1819 if (control == MEMORY_OBJECT_CONTROL_NULL)
1820 return KERN_INVALID_ARGUMENT;
1821
1822 return (memory_object_page_op(control,
1823 (memory_object_offset_t)f_offset,
1824 ops,
1825 phys_entryp,
1826 flagsp));
1827 }
1828
1829
1830 /*
1831 * ubc_range_op
1832 *
1833 * Manipulate page state for a range of memory for a vnode with an associated
1834 * ubc_info with an associated memory object control, when page level state is
1835 * not required to be returned from the call (i.e. there are no phys_entryp or
1836 * flagsp parameters to this call, and it takes a range which may contain
1837 * multiple pages, rather than an offset interior to a single page).
1838 *
1839 * Parameters: vp The vnode backing the page
1840 * f_offset_beg A file offset interior to the start page
1841 * f_offset_end A file offset interior to the end page
1842 * ops The operations to perform, as a bitmap
1843 * (see below for more information)
1844 * range The address of an int; may be NULL to
1845 * ignore
1846 *
1847 * Returns: KERN_SUCCESS Success
1848 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1849 * object associated
1850 * KERN_INVALID_OBJECT If the object is physically contiguous
1851 *
1852 * Implicit Returns:
1853 * *range (modified) If range is non-NULL, its contents will
1854 * be modified to contain the number of
1855 * bytes successfully operated upon.
1856 *
1857 * Notes: IMPORTANT: This function cannot be used on a range that
1858 * consists of physically contiguous pages.
1859 *
1860 * For object boundaries, it is considerably more efficient to
1861 * ensure that f_offset_beg and f_offset_end are in fact on page
1862 * boundaries, as this will avoid internal use of the hash table
1863 * to identify the page, and would therefore skip a number of
1864 * early optimizations. Since this is an operation on a set of
1865 * pages anyway, the caller should try to pass only a page aligned
1866 * offsets because of this.
1867 *
1868 * *range will be modified only if this function succeeds.
1869 *
1870 * The flags field MUST contain a specific operation; allowable
1871 * values are:
1872 *
1873 * o UPL_ROP_ABSENT Returns the extent of the range
1874 * presented which is absent, starting
1875 * with the start address presented
1876 *
1877 * o UPL_ROP_PRESENT Returns the extent of the range
1878 * presented which is present (resident),
1879 * starting with the start address
1880 * presented
1881 * o UPL_ROP_DUMP Dump the pages which are found in the
1882 * target object for the target range.
1883 *
1884 * IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are
1885 * multiple regions in the range, only the first matching region
1886 * is returned.
1887 */
1888 kern_return_t
1889 ubc_range_op(
1890 struct vnode *vp,
1891 off_t f_offset_beg,
1892 off_t f_offset_end,
1893 int ops,
1894 int *range)
1895 {
1896 memory_object_control_t control;
1897
1898 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1899 if (control == MEMORY_OBJECT_CONTROL_NULL)
1900 return KERN_INVALID_ARGUMENT;
1901
1902 return (memory_object_range_op(control,
1903 (memory_object_offset_t)f_offset_beg,
1904 (memory_object_offset_t)f_offset_end,
1905 ops,
1906 range));
1907 }
1908
1909
1910 /*
1911 * ubc_create_upl
1912 *
1913 * Given a vnode, cause the population of a portion of the vm_object; based on
1914 * the nature of the request, the pages returned may contain valid data, or
1915 * they may be uninitialized.
1916 *
1917 * Parameters: vp The vnode from which to create the upl
1918 * f_offset The start offset into the backing store
1919 * represented by the vnode
1920 * bufsize The size of the upl to create
1921 * uplp Pointer to the upl_t to receive the
1922 * created upl; MUST NOT be NULL
1923 * plp Pointer to receive the internal page
1924 * list for the created upl; MAY be NULL
1925 * to ignore
1926 *
1927 * Returns: KERN_SUCCESS The requested upl has been created
1928 * KERN_INVALID_ARGUMENT The bufsize argument is not an even
1929 * multiple of the page size
1930 * KERN_INVALID_ARGUMENT There is no ubc_info associated with
1931 * the vnode, or there is no memory object
1932 * control associated with the ubc_info
1933 * memory_object_upl_request:KERN_INVALID_VALUE
1934 * The supplied upl_flags argument is
1935 * invalid
1936 * Implicit Returns:
1937 * *uplp (modified)
1938 * *plp (modified) If non-NULL, the value of *plp will be
1939 * modified to point to the internal page
1940 * list; this modification may occur even
1941 * if this function is unsuccessful, in
1942 * which case the contents may be invalid
1943 *
1944 * Note: If successful, the returned *uplp MUST subsequently be freed
1945 * via a call to ubc_upl_commit(), ubc_upl_commit_range(),
1946 * ubc_upl_abort(), or ubc_upl_abort_range().
1947 */
1948 kern_return_t
1949 ubc_create_upl(
1950 struct vnode *vp,
1951 off_t f_offset,
1952 int bufsize,
1953 upl_t *uplp,
1954 upl_page_info_t **plp,
1955 int uplflags)
1956 {
1957 memory_object_control_t control;
1958 kern_return_t kr;
1959
1960 if (plp != NULL)
1961 *plp = NULL;
1962 *uplp = NULL;
1963
1964 if (bufsize & 0xfff)
1965 return KERN_INVALID_ARGUMENT;
1966
1967 if (bufsize > MAX_UPL_SIZE * PAGE_SIZE)
1968 return KERN_INVALID_ARGUMENT;
1969
1970 if (uplflags & (UPL_UBC_MSYNC | UPL_UBC_PAGEOUT | UPL_UBC_PAGEIN)) {
1971
1972 if (uplflags & UPL_UBC_MSYNC) {
1973 uplflags &= UPL_RET_ONLY_DIRTY;
1974
1975 uplflags |= UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
1976 UPL_SET_INTERNAL | UPL_SET_LITE;
1977
1978 } else if (uplflags & UPL_UBC_PAGEOUT) {
1979 uplflags &= UPL_RET_ONLY_DIRTY;
1980
1981 if (uplflags & UPL_RET_ONLY_DIRTY)
1982 uplflags |= UPL_NOBLOCK;
1983
1984 uplflags |= UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
1985 UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE;
1986 } else {
1987 uplflags |= UPL_RET_ONLY_ABSENT |
1988 UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
1989 UPL_SET_INTERNAL | UPL_SET_LITE;
1990
1991 /*
1992 * if the requested size == PAGE_SIZE, we don't want to set
1993 * the UPL_NOBLOCK since we may be trying to recover from a
1994 * previous partial pagein I/O that occurred because we were low
1995 * on memory and bailed early in order to honor the UPL_NOBLOCK...
1996 * since we're only asking for a single page, we can block w/o fear
1997 * of tying up pages while waiting for more to become available
1998 */
1999 if (bufsize > PAGE_SIZE)
2000 uplflags |= UPL_NOBLOCK;
2001 }
2002 } else {
2003 uplflags &= ~UPL_FOR_PAGEOUT;
2004
2005 if (uplflags & UPL_WILL_BE_DUMPED) {
2006 uplflags &= ~UPL_WILL_BE_DUMPED;
2007 uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL);
2008 } else
2009 uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL);
2010 }
2011 control = ubc_getobject(vp, UBC_FLAGS_NONE);
2012 if (control == MEMORY_OBJECT_CONTROL_NULL)
2013 return KERN_INVALID_ARGUMENT;
2014
2015 kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, NULL, uplflags);
2016 if (kr == KERN_SUCCESS && plp != NULL)
2017 *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp);
2018 return kr;
2019 }
2020
2021
2022 /*
2023 * ubc_upl_maxbufsize
2024 *
2025 * Return the maximum bufsize ubc_create_upl( ) will take.
2026 *
2027 * Parameters: none
2028 *
2029 * Returns: maximum size buffer (in bytes) ubc_create_upl( ) will take.
2030 */
2031 upl_size_t
2032 ubc_upl_maxbufsize(
2033 void)
2034 {
2035 return(MAX_UPL_SIZE * PAGE_SIZE);
2036 }
2037
2038 /*
2039 * ubc_upl_map
2040 *
2041 * Map the page list assocated with the supplied upl into the kernel virtual
2042 * address space at the virtual address indicated by the dst_addr argument;
2043 * the entire upl is mapped
2044 *
2045 * Parameters: upl The upl to map
2046 * dst_addr The address at which to map the upl
2047 *
2048 * Returns: KERN_SUCCESS The upl has been mapped
2049 * KERN_INVALID_ARGUMENT The upl is UPL_NULL
2050 * KERN_FAILURE The upl is already mapped
2051 * vm_map_enter:KERN_INVALID_ARGUMENT
2052 * A failure code from vm_map_enter() due
2053 * to an invalid argument
2054 */
2055 kern_return_t
2056 ubc_upl_map(
2057 upl_t upl,
2058 vm_offset_t *dst_addr)
2059 {
2060 return (vm_upl_map(kernel_map, upl, dst_addr));
2061 }
2062
2063
2064 /*
2065 * ubc_upl_unmap
2066 *
2067 * Unmap the page list assocated with the supplied upl from the kernel virtual
2068 * address space; the entire upl is unmapped.
2069 *
2070 * Parameters: upl The upl to unmap
2071 *
2072 * Returns: KERN_SUCCESS The upl has been unmapped
2073 * KERN_FAILURE The upl is not currently mapped
2074 * KERN_INVALID_ARGUMENT If the upl is UPL_NULL
2075 */
2076 kern_return_t
2077 ubc_upl_unmap(
2078 upl_t upl)
2079 {
2080 return(vm_upl_unmap(kernel_map, upl));
2081 }
2082
2083
2084 /*
2085 * ubc_upl_commit
2086 *
2087 * Commit the contents of the upl to the backing store
2088 *
2089 * Parameters: upl The upl to commit
2090 *
2091 * Returns: KERN_SUCCESS The upl has been committed
2092 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2093 * KERN_FAILURE The supplied upl does not represent
2094 * device memory, and the offset plus the
2095 * size would exceed the actual size of
2096 * the upl
2097 *
2098 * Notes: In practice, the only return value for this function should be
2099 * KERN_SUCCESS, unless there has been data structure corruption;
2100 * since the upl is deallocated regardless of success or failure,
2101 * there's really nothing to do about this other than panic.
2102 *
2103 * IMPORTANT: Use of this function should not be mixed with use of
2104 * ubc_upl_commit_range(), due to the unconditional deallocation
2105 * by this function.
2106 */
2107 kern_return_t
2108 ubc_upl_commit(
2109 upl_t upl)
2110 {
2111 upl_page_info_t *pl;
2112 kern_return_t kr;
2113
2114 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2115 kr = upl_commit(upl, pl, MAX_UPL_SIZE);
2116 upl_deallocate(upl);
2117 return kr;
2118 }
2119
2120
2121 /*
2122 * ubc_upl_commit
2123 *
2124 * Commit the contents of the specified range of the upl to the backing store
2125 *
2126 * Parameters: upl The upl to commit
2127 * offset The offset into the upl
2128 * size The size of the region to be committed,
2129 * starting at the specified offset
2130 * flags commit type (see below)
2131 *
2132 * Returns: KERN_SUCCESS The range has been committed
2133 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2134 * KERN_FAILURE The supplied upl does not represent
2135 * device memory, and the offset plus the
2136 * size would exceed the actual size of
2137 * the upl
2138 *
2139 * Notes: IMPORTANT: If the commit is successful, and the object is now
2140 * empty, the upl will be deallocated. Since the caller cannot
2141 * check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag
2142 * should generally only be used when the offset is 0 and the size
2143 * is equal to the upl size.
2144 *
2145 * The flags argument is a bitmap of flags on the rage of pages in
2146 * the upl to be committed; allowable flags are:
2147 *
2148 * o UPL_COMMIT_FREE_ON_EMPTY Free the upl when it is
2149 * both empty and has been
2150 * successfully committed
2151 * o UPL_COMMIT_CLEAR_DIRTY Clear each pages dirty
2152 * bit; will prevent a
2153 * later pageout
2154 * o UPL_COMMIT_SET_DIRTY Set each pages dirty
2155 * bit; will cause a later
2156 * pageout
2157 * o UPL_COMMIT_INACTIVATE Clear each pages
2158 * reference bit; the page
2159 * will not be accessed
2160 * o UPL_COMMIT_ALLOW_ACCESS Unbusy each page; pages
2161 * become busy when an
2162 * IOMemoryDescriptor is
2163 * mapped or redirected,
2164 * and we have to wait for
2165 * an IOKit driver
2166 *
2167 * The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should
2168 * not be specified by the caller.
2169 *
2170 * The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are
2171 * mutually exclusive, and should not be combined.
2172 */
2173 kern_return_t
2174 ubc_upl_commit_range(
2175 upl_t upl,
2176 upl_offset_t offset,
2177 upl_size_t size,
2178 int flags)
2179 {
2180 upl_page_info_t *pl;
2181 boolean_t empty;
2182 kern_return_t kr;
2183
2184 if (flags & UPL_COMMIT_FREE_ON_EMPTY)
2185 flags |= UPL_COMMIT_NOTIFY_EMPTY;
2186
2187 if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
2188 return KERN_INVALID_ARGUMENT;
2189 }
2190
2191 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2192
2193 kr = upl_commit_range(upl, offset, size, flags,
2194 pl, MAX_UPL_SIZE, &empty);
2195
2196 if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty)
2197 upl_deallocate(upl);
2198
2199 return kr;
2200 }
2201
2202
2203 /*
2204 * ubc_upl_abort_range
2205 *
2206 * Abort the contents of the specified range of the specified upl
2207 *
2208 * Parameters: upl The upl to abort
2209 * offset The offset into the upl
2210 * size The size of the region to be aborted,
2211 * starting at the specified offset
2212 * abort_flags abort type (see below)
2213 *
2214 * Returns: KERN_SUCCESS The range has been aborted
2215 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2216 * KERN_FAILURE The supplied upl does not represent
2217 * device memory, and the offset plus the
2218 * size would exceed the actual size of
2219 * the upl
2220 *
2221 * Notes: IMPORTANT: If the abort is successful, and the object is now
2222 * empty, the upl will be deallocated. Since the caller cannot
2223 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2224 * should generally only be used when the offset is 0 and the size
2225 * is equal to the upl size.
2226 *
2227 * The abort_flags argument is a bitmap of flags on the range of
2228 * pages in the upl to be aborted; allowable flags are:
2229 *
2230 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2231 * empty and has been successfully
2232 * aborted
2233 * o UPL_ABORT_RESTART The operation must be restarted
2234 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
2235 * o UPL_ABORT_ERROR An I/O error occurred
2236 * o UPL_ABORT_DUMP_PAGES Just free the pages
2237 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
2238 * o UPL_ABORT_ALLOW_ACCESS RESERVED
2239 *
2240 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2241 * not be specified by the caller. It is intended to fulfill the
2242 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2243 * ubc_upl_commit_range(), but is never referenced internally.
2244 *
2245 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2246 * referenced; do not use it.
2247 */
2248 kern_return_t
2249 ubc_upl_abort_range(
2250 upl_t upl,
2251 upl_offset_t offset,
2252 upl_size_t size,
2253 int abort_flags)
2254 {
2255 kern_return_t kr;
2256 boolean_t empty = FALSE;
2257
2258 if (abort_flags & UPL_ABORT_FREE_ON_EMPTY)
2259 abort_flags |= UPL_ABORT_NOTIFY_EMPTY;
2260
2261 kr = upl_abort_range(upl, offset, size, abort_flags, &empty);
2262
2263 if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty)
2264 upl_deallocate(upl);
2265
2266 return kr;
2267 }
2268
2269
2270 /*
2271 * ubc_upl_abort
2272 *
2273 * Abort the contents of the specified upl
2274 *
2275 * Parameters: upl The upl to abort
2276 * abort_type abort type (see below)
2277 *
2278 * Returns: KERN_SUCCESS The range has been aborted
2279 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2280 * KERN_FAILURE The supplied upl does not represent
2281 * device memory, and the offset plus the
2282 * size would exceed the actual size of
2283 * the upl
2284 *
2285 * Notes: IMPORTANT: If the abort is successful, and the object is now
2286 * empty, the upl will be deallocated. Since the caller cannot
2287 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2288 * should generally only be used when the offset is 0 and the size
2289 * is equal to the upl size.
2290 *
2291 * The abort_type is a bitmap of flags on the range of
2292 * pages in the upl to be aborted; allowable flags are:
2293 *
2294 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2295 * empty and has been successfully
2296 * aborted
2297 * o UPL_ABORT_RESTART The operation must be restarted
2298 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
2299 * o UPL_ABORT_ERROR An I/O error occurred
2300 * o UPL_ABORT_DUMP_PAGES Just free the pages
2301 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
2302 * o UPL_ABORT_ALLOW_ACCESS RESERVED
2303 *
2304 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2305 * not be specified by the caller. It is intended to fulfill the
2306 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2307 * ubc_upl_commit_range(), but is never referenced internally.
2308 *
2309 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2310 * referenced; do not use it.
2311 */
2312 kern_return_t
2313 ubc_upl_abort(
2314 upl_t upl,
2315 int abort_type)
2316 {
2317 kern_return_t kr;
2318
2319 kr = upl_abort(upl, abort_type);
2320 upl_deallocate(upl);
2321 return kr;
2322 }
2323
2324
2325 /*
2326 * ubc_upl_pageinfo
2327 *
2328 * Retrieve the internal page list for the specified upl
2329 *
2330 * Parameters: upl The upl to obtain the page list from
2331 *
2332 * Returns: !NULL The (upl_page_info_t *) for the page
2333 * list internal to the upl
2334 * NULL Error/no page list associated
2335 *
2336 * Notes: IMPORTANT: The function is only valid on internal objects
2337 * where the list request was made with the UPL_INTERNAL flag.
2338 *
2339 * This function is a utility helper function, since some callers
2340 * may not have direct access to the header defining the macro,
2341 * due to abstraction layering constraints.
2342 */
2343 upl_page_info_t *
2344 ubc_upl_pageinfo(
2345 upl_t upl)
2346 {
2347 return (UPL_GET_INTERNAL_PAGE_LIST(upl));
2348 }
2349
2350
2351 int
2352 UBCINFOEXISTS(struct vnode * vp)
2353 {
2354 return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL));
2355 }
2356
2357
2358 void
2359 ubc_upl_range_needed(
2360 upl_t upl,
2361 int index,
2362 int count)
2363 {
2364 upl_range_needed(upl, index, count);
2365 }
2366
2367
2368 /*
2369 * CODE SIGNING
2370 */
2371 #define CS_BLOB_PAGEABLE 0
2372 static volatile SInt32 cs_blob_size = 0;
2373 static volatile SInt32 cs_blob_count = 0;
2374 static SInt32 cs_blob_size_peak = 0;
2375 static UInt32 cs_blob_size_max = 0;
2376 static SInt32 cs_blob_count_peak = 0;
2377
2378 int cs_validation = 1;
2379
2380 #ifndef SECURE_KERNEL
2381 SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_validation, 0, "Do validate code signatures");
2382 #endif
2383 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_count, 0, "Current number of code signature blobs");
2384 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_size, 0, "Current size of all code signature blobs");
2385 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
2386 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
2387 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_max, 0, "Size of biggest code signature blob");
2388
2389 kern_return_t
2390 ubc_cs_blob_allocate(
2391 vm_offset_t *blob_addr_p,
2392 vm_size_t *blob_size_p)
2393 {
2394 kern_return_t kr;
2395
2396 #if CS_BLOB_PAGEABLE
2397 *blob_size_p = round_page(*blob_size_p);
2398 kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
2399 #else /* CS_BLOB_PAGEABLE */
2400 *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
2401 if (*blob_addr_p == 0) {
2402 kr = KERN_NO_SPACE;
2403 } else {
2404 kr = KERN_SUCCESS;
2405 }
2406 #endif /* CS_BLOB_PAGEABLE */
2407 return kr;
2408 }
2409
2410 void
2411 ubc_cs_blob_deallocate(
2412 vm_offset_t blob_addr,
2413 vm_size_t blob_size)
2414 {
2415 #if CS_BLOB_PAGEABLE
2416 kmem_free(kernel_map, blob_addr, blob_size);
2417 #else /* CS_BLOB_PAGEABLE */
2418 kfree((void *) blob_addr, blob_size);
2419 #endif /* CS_BLOB_PAGEABLE */
2420 }
2421
2422 int
2423 ubc_cs_blob_add(
2424 struct vnode *vp,
2425 cpu_type_t cputype,
2426 off_t base_offset,
2427 vm_address_t addr,
2428 vm_size_t size)
2429 {
2430 kern_return_t kr;
2431 struct ubc_info *uip;
2432 struct cs_blob *blob, *oblob;
2433 int error;
2434 ipc_port_t blob_handle;
2435 memory_object_size_t blob_size;
2436 const CS_CodeDirectory *cd;
2437 off_t blob_start_offset, blob_end_offset;
2438 SHA1_CTX sha1ctxt;
2439
2440 blob_handle = IPC_PORT_NULL;
2441
2442 blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
2443 if (blob == NULL) {
2444 return ENOMEM;
2445 }
2446
2447 #if CS_BLOB_PAGEABLE
2448 /* get a memory entry on the blob */
2449 blob_size = (memory_object_size_t) size;
2450 kr = mach_make_memory_entry_64(kernel_map,
2451 &blob_size,
2452 addr,
2453 VM_PROT_READ,
2454 &blob_handle,
2455 IPC_PORT_NULL);
2456 if (kr != KERN_SUCCESS) {
2457 error = ENOMEM;
2458 goto out;
2459 }
2460 if (memory_object_round_page(blob_size) !=
2461 (memory_object_size_t) round_page(size)) {
2462 printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%lx !?\n",
2463 blob_size, (size_t)size);
2464 panic("XXX FBDP size mismatch 0x%llx 0x%lx\n", blob_size, (size_t)size);
2465 error = EINVAL;
2466 goto out;
2467 }
2468 #else
2469 blob_size = (memory_object_size_t) size;
2470 blob_handle = IPC_PORT_NULL;
2471 #endif
2472
2473 /* fill in the new blob */
2474 blob->csb_cpu_type = cputype;
2475 blob->csb_base_offset = base_offset;
2476 blob->csb_mem_size = size;
2477 blob->csb_mem_offset = 0;
2478 blob->csb_mem_handle = blob_handle;
2479 blob->csb_mem_kaddr = addr;
2480
2481 /*
2482 * Validate the blob's contents
2483 */
2484 cd = findCodeDirectory(
2485 (const CS_SuperBlob *) addr,
2486 (char *) addr,
2487 (char *) addr + blob->csb_mem_size);
2488 if (cd == NULL) {
2489 /* no code directory => useless blob ! */
2490 blob->csb_flags = 0;
2491 blob->csb_start_offset = 0;
2492 blob->csb_end_offset = 0;
2493 } else {
2494 const unsigned char *sha1_base;
2495 int sha1_size;
2496
2497 blob->csb_flags = ntohl(cd->flags) | CS_VALID;
2498 blob->csb_end_offset = round_page(ntohl(cd->codeLimit));
2499 if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
2500 const struct Scatter *scatter = (const struct Scatter*)
2501 ((const char*)cd + ntohl(cd->scatterOffset));
2502 blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE;
2503 } else {
2504 blob->csb_start_offset = (blob->csb_end_offset -
2505 (ntohl(cd->nCodeSlots) * PAGE_SIZE));
2506 }
2507 /* compute the blob's SHA1 hash */
2508 sha1_base = (const unsigned char *) cd;
2509 sha1_size = ntohl(cd->length);
2510 SHA1Init(&sha1ctxt);
2511 SHA1Update(&sha1ctxt, sha1_base, sha1_size);
2512 SHA1Final(blob->csb_sha1, &sha1ctxt);
2513 }
2514
2515 /*
2516 * Let policy module check whether the blob's signature is accepted.
2517 */
2518 #if CONFIG_MACF
2519 error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size);
2520 if (error)
2521 goto out;
2522 #endif
2523
2524 /*
2525 * Validate the blob's coverage
2526 */
2527 blob_start_offset = blob->csb_base_offset + blob->csb_start_offset;
2528 blob_end_offset = blob->csb_base_offset + blob->csb_end_offset;
2529
2530 if (blob_start_offset >= blob_end_offset ||
2531 blob_start_offset < 0 ||
2532 blob_end_offset <= 0) {
2533 /* reject empty or backwards blob */
2534 error = EINVAL;
2535 goto out;
2536 }
2537
2538 vnode_lock(vp);
2539 if (! UBCINFOEXISTS(vp)) {
2540 vnode_unlock(vp);
2541 error = ENOENT;
2542 goto out;
2543 }
2544 uip = vp->v_ubcinfo;
2545
2546 /* check if this new blob overlaps with an existing blob */
2547 for (oblob = uip->cs_blobs;
2548 oblob != NULL;
2549 oblob = oblob->csb_next) {
2550 off_t oblob_start_offset, oblob_end_offset;
2551
2552 oblob_start_offset = (oblob->csb_base_offset +
2553 oblob->csb_start_offset);
2554 oblob_end_offset = (oblob->csb_base_offset +
2555 oblob->csb_end_offset);
2556 if (blob_start_offset >= oblob_end_offset ||
2557 blob_end_offset <= oblob_start_offset) {
2558 /* no conflict with this existing blob */
2559 } else {
2560 /* conflict ! */
2561 if (blob_start_offset == oblob_start_offset &&
2562 blob_end_offset == oblob_end_offset &&
2563 blob->csb_mem_size == oblob->csb_mem_size &&
2564 blob->csb_flags == oblob->csb_flags &&
2565 (blob->csb_cpu_type == CPU_TYPE_ANY ||
2566 oblob->csb_cpu_type == CPU_TYPE_ANY ||
2567 blob->csb_cpu_type == oblob->csb_cpu_type) &&
2568 !bcmp(blob->csb_sha1,
2569 oblob->csb_sha1,
2570 SHA1_RESULTLEN)) {
2571 /*
2572 * We already have this blob:
2573 * we'll return success but
2574 * throw away the new blob.
2575 */
2576 if (oblob->csb_cpu_type == CPU_TYPE_ANY) {
2577 /*
2578 * The old blob matches this one
2579 * but doesn't have any CPU type.
2580 * Update it with whatever the caller
2581 * provided this time.
2582 */
2583 oblob->csb_cpu_type = cputype;
2584 }
2585 vnode_unlock(vp);
2586 error = EAGAIN;
2587 goto out;
2588 } else {
2589 /* different blob: reject the new one */
2590 vnode_unlock(vp);
2591 error = EALREADY;
2592 goto out;
2593 }
2594 }
2595
2596 }
2597
2598
2599 /* mark this vnode's VM object as having "signed pages" */
2600 kr = memory_object_signed(uip->ui_control, TRUE);
2601 if (kr != KERN_SUCCESS) {
2602 vnode_unlock(vp);
2603 error = ENOENT;
2604 goto out;
2605 }
2606
2607 /*
2608 * Add this blob to the list of blobs for this vnode.
2609 * We always add at the front of the list and we never remove a
2610 * blob from the list, so ubc_cs_get_blobs() can return whatever
2611 * the top of the list was and that list will remain valid
2612 * while we validate a page, even after we release the vnode's lock.
2613 */
2614 blob->csb_next = uip->cs_blobs;
2615 uip->cs_blobs = blob;
2616
2617 OSAddAtomic(+1, &cs_blob_count);
2618 if (cs_blob_count > cs_blob_count_peak) {
2619 cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */
2620 }
2621 OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size);
2622 if ((SInt32) cs_blob_size > cs_blob_size_peak) {
2623 cs_blob_size_peak = (SInt32) cs_blob_size; /* XXX atomic ? */
2624 }
2625 if ((UInt32) blob->csb_mem_size > cs_blob_size_max) {
2626 cs_blob_size_max = (UInt32) blob->csb_mem_size;
2627 }
2628
2629 if (cs_debug > 1) {
2630 proc_t p;
2631
2632 p = current_proc();
2633 printf("CODE SIGNING: proc %d(%s) "
2634 "loaded %s signatures for file (%s) "
2635 "range 0x%llx:0x%llx flags 0x%x\n",
2636 p->p_pid, p->p_comm,
2637 blob->csb_cpu_type == -1 ? "detached" : "embedded",
2638 vnode_name(vp),
2639 blob->csb_base_offset + blob->csb_start_offset,
2640 blob->csb_base_offset + blob->csb_end_offset,
2641 blob->csb_flags);
2642 }
2643
2644 vnode_unlock(vp);
2645
2646 error = 0; /* success ! */
2647
2648 out:
2649 if (error) {
2650 /* we failed; release what we allocated */
2651 if (blob) {
2652 kfree(blob, sizeof (*blob));
2653 blob = NULL;
2654 }
2655 if (blob_handle != IPC_PORT_NULL) {
2656 mach_memory_entry_port_release(blob_handle);
2657 blob_handle = IPC_PORT_NULL;
2658 }
2659 }
2660
2661 if (error == EAGAIN) {
2662 /*
2663 * See above: error is EAGAIN if we were asked
2664 * to add an existing blob again. We cleaned the new
2665 * blob and we want to return success.
2666 */
2667 error = 0;
2668 /*
2669 * Since we're not failing, consume the data we received.
2670 */
2671 ubc_cs_blob_deallocate(addr, size);
2672 }
2673
2674 return error;
2675 }
2676
2677
2678 struct cs_blob *
2679 ubc_cs_blob_get(
2680 struct vnode *vp,
2681 cpu_type_t cputype,
2682 off_t offset)
2683 {
2684 struct ubc_info *uip;
2685 struct cs_blob *blob;
2686 off_t offset_in_blob;
2687
2688 vnode_lock_spin(vp);
2689
2690 if (! UBCINFOEXISTS(vp)) {
2691 blob = NULL;
2692 goto out;
2693 }
2694
2695 uip = vp->v_ubcinfo;
2696 for (blob = uip->cs_blobs;
2697 blob != NULL;
2698 blob = blob->csb_next) {
2699 if (cputype != -1 && blob->csb_cpu_type == cputype) {
2700 break;
2701 }
2702 if (offset != -1) {
2703 offset_in_blob = offset - blob->csb_base_offset;
2704 if (offset_in_blob >= blob->csb_start_offset &&
2705 offset_in_blob < blob->csb_end_offset) {
2706 /* our offset is covered by this blob */
2707 break;
2708 }
2709 }
2710 }
2711
2712 out:
2713 vnode_unlock(vp);
2714
2715 return blob;
2716 }
2717
2718 static void
2719 ubc_cs_free(
2720 struct ubc_info *uip)
2721 {
2722 struct cs_blob *blob, *next_blob;
2723
2724 for (blob = uip->cs_blobs;
2725 blob != NULL;
2726 blob = next_blob) {
2727 next_blob = blob->csb_next;
2728 if (blob->csb_mem_kaddr != 0) {
2729 ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
2730 blob->csb_mem_size);
2731 blob->csb_mem_kaddr = 0;
2732 }
2733 if (blob->csb_mem_handle != IPC_PORT_NULL) {
2734 mach_memory_entry_port_release(blob->csb_mem_handle);
2735 }
2736 blob->csb_mem_handle = IPC_PORT_NULL;
2737 OSAddAtomic(-1, &cs_blob_count);
2738 OSAddAtomic((SInt32) -blob->csb_mem_size, &cs_blob_size);
2739 kfree(blob, sizeof (*blob));
2740 }
2741 #if CHECK_CS_VALIDATION_BITMAP
2742 ubc_cs_validation_bitmap_deallocate( uip->ui_vnode );
2743 #endif
2744 uip->cs_blobs = NULL;
2745 }
2746
2747 struct cs_blob *
2748 ubc_get_cs_blobs(
2749 struct vnode *vp)
2750 {
2751 struct ubc_info *uip;
2752 struct cs_blob *blobs;
2753
2754 /*
2755 * No need to take the vnode lock here. The caller must be holding
2756 * a reference on the vnode (via a VM mapping or open file descriptor),
2757 * so the vnode will not go away. The ubc_info stays until the vnode
2758 * goes away. And we only modify "blobs" by adding to the head of the
2759 * list.
2760 * The ubc_info could go away entirely if the vnode gets reclaimed as
2761 * part of a forced unmount. In the case of a code-signature validation
2762 * during a page fault, the "paging_in_progress" reference on the VM
2763 * object guarantess that the vnode pager (and the ubc_info) won't go
2764 * away during the fault.
2765 * Other callers need to protect against vnode reclaim by holding the
2766 * vnode lock, for example.
2767 */
2768
2769 if (! UBCINFOEXISTS(vp)) {
2770 blobs = NULL;
2771 goto out;
2772 }
2773
2774 uip = vp->v_ubcinfo;
2775 blobs = uip->cs_blobs;
2776
2777 out:
2778 return blobs;
2779 }
2780
2781 unsigned long cs_validate_page_no_hash = 0;
2782 unsigned long cs_validate_page_bad_hash = 0;
2783 boolean_t
2784 cs_validate_page(
2785 void *_blobs,
2786 memory_object_t pager,
2787 memory_object_offset_t page_offset,
2788 const void *data,
2789 boolean_t *tainted)
2790 {
2791 SHA1_CTX sha1ctxt;
2792 unsigned char actual_hash[SHA1_RESULTLEN];
2793 unsigned char expected_hash[SHA1_RESULTLEN];
2794 boolean_t found_hash;
2795 struct cs_blob *blobs, *blob;
2796 const CS_CodeDirectory *cd;
2797 const CS_SuperBlob *embedded;
2798 const unsigned char *hash;
2799 boolean_t validated;
2800 off_t offset; /* page offset in the file */
2801 size_t size;
2802 off_t codeLimit = 0;
2803 char *lower_bound, *upper_bound;
2804 vm_offset_t kaddr, blob_addr;
2805 vm_size_t ksize;
2806 kern_return_t kr;
2807
2808 offset = page_offset;
2809
2810 /* retrieve the expected hash */
2811 found_hash = FALSE;
2812 blobs = (struct cs_blob *) _blobs;
2813
2814 for (blob = blobs;
2815 blob != NULL;
2816 blob = blob->csb_next) {
2817 offset = page_offset - blob->csb_base_offset;
2818 if (offset < blob->csb_start_offset ||
2819 offset >= blob->csb_end_offset) {
2820 /* our page is not covered by this blob */
2821 continue;
2822 }
2823
2824 /* map the blob in the kernel address space */
2825 kaddr = blob->csb_mem_kaddr;
2826 if (kaddr == 0) {
2827 ksize = (vm_size_t) (blob->csb_mem_size +
2828 blob->csb_mem_offset);
2829 kr = vm_map(kernel_map,
2830 &kaddr,
2831 ksize,
2832 0,
2833 VM_FLAGS_ANYWHERE,
2834 blob->csb_mem_handle,
2835 0,
2836 TRUE,
2837 VM_PROT_READ,
2838 VM_PROT_READ,
2839 VM_INHERIT_NONE);
2840 if (kr != KERN_SUCCESS) {
2841 /* XXX FBDP what to do !? */
2842 printf("cs_validate_page: failed to map blob, "
2843 "size=0x%lx kr=0x%x\n",
2844 (size_t)blob->csb_mem_size, kr);
2845 break;
2846 }
2847 }
2848 blob_addr = kaddr + blob->csb_mem_offset;
2849
2850 lower_bound = CAST_DOWN(char *, blob_addr);
2851 upper_bound = lower_bound + blob->csb_mem_size;
2852
2853 embedded = (const CS_SuperBlob *) blob_addr;
2854 cd = findCodeDirectory(embedded, lower_bound, upper_bound);
2855 if (cd != NULL) {
2856 if (cd->pageSize != PAGE_SHIFT ||
2857 cd->hashType != 0x1 ||
2858 cd->hashSize != SHA1_RESULTLEN) {
2859 /* bogus blob ? */
2860 continue;
2861 }
2862
2863 offset = page_offset - blob->csb_base_offset;
2864 if (offset < blob->csb_start_offset ||
2865 offset >= blob->csb_end_offset) {
2866 /* our page is not covered by this blob */
2867 continue;
2868 }
2869
2870 codeLimit = ntohl(cd->codeLimit);
2871 hash = hashes(cd, atop(offset),
2872 lower_bound, upper_bound);
2873 if (hash != NULL) {
2874 bcopy(hash, expected_hash,
2875 sizeof (expected_hash));
2876 found_hash = TRUE;
2877 }
2878
2879 break;
2880 }
2881 }
2882
2883 if (found_hash == FALSE) {
2884 /*
2885 * We can't verify this page because there is no signature
2886 * for it (yet). It's possible that this part of the object
2887 * is not signed, or that signatures for that part have not
2888 * been loaded yet.
2889 * Report that the page has not been validated and let the
2890 * caller decide if it wants to accept it or not.
2891 */
2892 cs_validate_page_no_hash++;
2893 if (cs_debug > 1) {
2894 printf("CODE SIGNING: cs_validate_page: "
2895 "mobj %p off 0x%llx: no hash to validate !?\n",
2896 pager, page_offset);
2897 }
2898 validated = FALSE;
2899 *tainted = FALSE;
2900 } else {
2901
2902 size = PAGE_SIZE;
2903 const uint32_t *asha1, *esha1;
2904 if ((off_t)(offset + size) > codeLimit) {
2905 /* partial page at end of segment */
2906 assert(offset < codeLimit);
2907 size = (size_t) (codeLimit & PAGE_MASK);
2908 }
2909 /* compute the actual page's SHA1 hash */
2910 SHA1Init(&sha1ctxt);
2911 SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
2912 SHA1Final(actual_hash, &sha1ctxt);
2913
2914 asha1 = (const uint32_t *) actual_hash;
2915 esha1 = (const uint32_t *) expected_hash;
2916
2917 if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
2918 if (cs_debug) {
2919 printf("CODE SIGNING: cs_validate_page: "
2920 "mobj %p off 0x%llx size 0x%lx: "
2921 "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
2922 "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
2923 pager, page_offset, size,
2924 asha1[0], asha1[1], asha1[2],
2925 asha1[3], asha1[4],
2926 esha1[0], esha1[1], esha1[2],
2927 esha1[3], esha1[4]);
2928 }
2929 cs_validate_page_bad_hash++;
2930 *tainted = TRUE;
2931 } else {
2932 if (cs_debug > 1) {
2933 printf("CODE SIGNING: cs_validate_page: "
2934 "mobj %p off 0x%llx size 0x%lx: "
2935 "SHA1 OK\n",
2936 pager, page_offset, size);
2937 }
2938 *tainted = FALSE;
2939 }
2940 validated = TRUE;
2941 }
2942
2943 return validated;
2944 }
2945
2946 int
2947 ubc_cs_getcdhash(
2948 vnode_t vp,
2949 off_t offset,
2950 unsigned char *cdhash)
2951 {
2952 struct cs_blob *blobs, *blob;
2953 off_t rel_offset;
2954 int ret;
2955
2956 vnode_lock(vp);
2957
2958 blobs = ubc_get_cs_blobs(vp);
2959 for (blob = blobs;
2960 blob != NULL;
2961 blob = blob->csb_next) {
2962 /* compute offset relative to this blob */
2963 rel_offset = offset - blob->csb_base_offset;
2964 if (rel_offset >= blob->csb_start_offset &&
2965 rel_offset < blob->csb_end_offset) {
2966 /* this blob does cover our "offset" ! */
2967 break;
2968 }
2969 }
2970
2971 if (blob == NULL) {
2972 /* we didn't find a blob covering "offset" */
2973 ret = EBADEXEC; /* XXX any better error ? */
2974 } else {
2975 /* get the SHA1 hash of that blob */
2976 bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
2977 ret = 0;
2978 }
2979
2980 vnode_unlock(vp);
2981
2982 return ret;
2983 }
2984
2985 #if CHECK_CS_VALIDATION_BITMAP
2986 #define stob(s) ((atop_64((s)) + 07) >> 3)
2987 extern boolean_t root_fs_upgrade_try;
2988
2989 /*
2990 * Should we use the code-sign bitmap to avoid repeated code-sign validation?
2991 * Depends:
2992 * a) Is the target vnode on the root filesystem?
2993 * b) Has someone tried to mount the root filesystem read-write?
2994 * If answers are (a) yes AND (b) no, then we can use the bitmap.
2995 */
2996 #define USE_CODE_SIGN_BITMAP(vp) ( (vp != NULL) && (vp->v_mount != NULL) && (vp->v_mount->mnt_flag & MNT_ROOTFS) && !root_fs_upgrade_try)
2997 kern_return_t
2998 ubc_cs_validation_bitmap_allocate(
2999 vnode_t vp)
3000 {
3001 kern_return_t kr = KERN_SUCCESS;
3002 struct ubc_info *uip;
3003 char *target_bitmap;
3004 vm_object_size_t bitmap_size;
3005
3006 if ( ! USE_CODE_SIGN_BITMAP(vp) || (! UBCINFOEXISTS(vp))) {
3007 kr = KERN_INVALID_ARGUMENT;
3008 } else {
3009 uip = vp->v_ubcinfo;
3010
3011 if ( uip->cs_valid_bitmap == NULL ) {
3012 bitmap_size = stob(uip->ui_size);
3013 target_bitmap = (char*) kalloc( (vm_size_t)bitmap_size );
3014 if (target_bitmap == 0) {
3015 kr = KERN_NO_SPACE;
3016 } else {
3017 kr = KERN_SUCCESS;
3018 }
3019 if( kr == KERN_SUCCESS ) {
3020 memset( target_bitmap, 0, (size_t)bitmap_size);
3021 uip->cs_valid_bitmap = (void*)target_bitmap;
3022 uip->cs_valid_bitmap_size = bitmap_size;
3023 }
3024 }
3025 }
3026 return kr;
3027 }
3028
3029 kern_return_t
3030 ubc_cs_check_validation_bitmap (
3031 vnode_t vp,
3032 memory_object_offset_t offset,
3033 int optype)
3034 {
3035 kern_return_t kr = KERN_SUCCESS;
3036
3037 if ( ! USE_CODE_SIGN_BITMAP(vp) || ! UBCINFOEXISTS(vp)) {
3038 kr = KERN_INVALID_ARGUMENT;
3039 } else {
3040 struct ubc_info *uip = vp->v_ubcinfo;
3041 char *target_bitmap = uip->cs_valid_bitmap;
3042
3043 if ( target_bitmap == NULL ) {
3044 kr = KERN_INVALID_ARGUMENT;
3045 } else {
3046 uint64_t bit, byte;
3047 bit = atop_64( offset );
3048 byte = bit >> 3;
3049
3050 if ( byte > uip->cs_valid_bitmap_size ) {
3051 kr = KERN_INVALID_ARGUMENT;
3052 } else {
3053
3054 if (optype == CS_BITMAP_SET) {
3055 target_bitmap[byte] |= (1 << (bit & 07));
3056 kr = KERN_SUCCESS;
3057 } else if (optype == CS_BITMAP_CLEAR) {
3058 target_bitmap[byte] &= ~(1 << (bit & 07));
3059 kr = KERN_SUCCESS;
3060 } else if (optype == CS_BITMAP_CHECK) {
3061 if ( target_bitmap[byte] & (1 << (bit & 07))) {
3062 kr = KERN_SUCCESS;
3063 } else {
3064 kr = KERN_FAILURE;
3065 }
3066 }
3067 }
3068 }
3069 }
3070 return kr;
3071 }
3072
3073 void
3074 ubc_cs_validation_bitmap_deallocate(
3075 vnode_t vp)
3076 {
3077 struct ubc_info *uip;
3078 void *target_bitmap;
3079 vm_object_size_t bitmap_size;
3080
3081 if ( UBCINFOEXISTS(vp)) {
3082 uip = vp->v_ubcinfo;
3083
3084 if ( (target_bitmap = uip->cs_valid_bitmap) != NULL ) {
3085 bitmap_size = uip->cs_valid_bitmap_size;
3086 kfree( target_bitmap, (vm_size_t) bitmap_size );
3087 uip->cs_valid_bitmap = NULL;
3088 }
3089 }
3090 }
3091 #else
3092 kern_return_t ubc_cs_validation_bitmap_allocate(__unused vnode_t vp){
3093 return KERN_INVALID_ARGUMENT;
3094 }
3095
3096 kern_return_t ubc_cs_check_validation_bitmap(
3097 __unused struct vnode *vp,
3098 __unused memory_object_offset_t offset,
3099 __unused int optype){
3100
3101 return KERN_INVALID_ARGUMENT;
3102 }
3103
3104 void ubc_cs_validation_bitmap_deallocate(__unused vnode_t vp){
3105 return;
3106 }
3107 #endif /* CHECK_CS_VALIDATION_BITMAP */