]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/ubc_subr.c
xnu-1228.5.18.tar.gz
[apple/xnu.git] / bsd / kern / ubc_subr.c
1 /*
2 * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * File: ubc_subr.c
30 * Author: Umesh Vaishampayan [umeshv@apple.com]
31 * 05-Aug-1999 umeshv Created.
32 *
33 * Functions related to Unified Buffer cache.
34 *
35 * Caller of UBC functions MUST have a valid reference on the vnode.
36 *
37 */
38
39 #include <sys/types.h>
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/lock.h>
43 #include <sys/mman.h>
44 #include <sys/mount_internal.h>
45 #include <sys/vnode_internal.h>
46 #include <sys/ubc_internal.h>
47 #include <sys/ucred.h>
48 #include <sys/proc_internal.h>
49 #include <sys/kauth.h>
50 #include <sys/buf.h>
51 #include <sys/user.h>
52 #include <sys/codesign.h>
53
54 #include <mach/mach_types.h>
55 #include <mach/memory_object_types.h>
56 #include <mach/memory_object_control.h>
57 #include <mach/vm_map.h>
58 #include <mach/upl.h>
59
60 #include <kern/kern_types.h>
61 #include <kern/kalloc.h>
62 #include <kern/zalloc.h>
63 #include <kern/thread.h>
64 #include <vm/vm_kern.h>
65 #include <vm/vm_protos.h> /* last */
66
67 #include <libkern/crypto/sha1.h>
68
69 /* XXX These should be in a BSD accessible Mach header, but aren't. */
70 extern kern_return_t memory_object_pages_resident(memory_object_control_t,
71 boolean_t *);
72 extern kern_return_t memory_object_signed(memory_object_control_t control,
73 boolean_t is_signed);
74 extern void Debugger(const char *message);
75
76
77 /* XXX no one uses this interface! */
78 kern_return_t ubc_page_op_with_control(
79 memory_object_control_t control,
80 off_t f_offset,
81 int ops,
82 ppnum_t *phys_entryp,
83 int *flagsp);
84
85
86 #if DIAGNOSTIC
87 #if defined(assert)
88 #undef assert()
89 #endif
90 #define assert(cond) \
91 ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond)))
92 #else
93 #include <kern/assert.h>
94 #endif /* DIAGNOSTIC */
95
96 static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize);
97 static int ubc_umcallback(vnode_t, void *);
98 static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *);
99 static void ubc_cs_free(struct ubc_info *uip);
100
101 struct zone *ubc_info_zone;
102
103
104 /*
105 * CODESIGNING
106 * Routines to navigate code signing data structures in the kernel...
107 */
108 static boolean_t
109 cs_valid_range(
110 const void *start,
111 const void *end,
112 const void *lower_bound,
113 const void *upper_bound)
114 {
115 if (upper_bound < lower_bound ||
116 end < start) {
117 return FALSE;
118 }
119
120 if (start < lower_bound ||
121 end > upper_bound) {
122 return FALSE;
123 }
124
125 return TRUE;
126 }
127
128 /*
129 * Magic numbers used by Code Signing
130 */
131 enum {
132 CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */
133 CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */
134 CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */
135 CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
136 CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02, /* XXX */
137 CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
138
139 CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */
140 };
141
142
143 /*
144 * Structure of an embedded-signature SuperBlob
145 */
146 typedef struct __BlobIndex {
147 uint32_t type; /* type of entry */
148 uint32_t offset; /* offset of entry */
149 } CS_BlobIndex;
150
151 typedef struct __SuperBlob {
152 uint32_t magic; /* magic number */
153 uint32_t length; /* total length of SuperBlob */
154 uint32_t count; /* number of index entries following */
155 CS_BlobIndex index[]; /* (count) entries */
156 /* followed by Blobs in no particular order as indicated by offsets in index */
157 } CS_SuperBlob;
158
159
160 /*
161 * C form of a CodeDirectory.
162 */
163 typedef struct __CodeDirectory {
164 uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */
165 uint32_t length; /* total length of CodeDirectory blob */
166 uint32_t version; /* compatibility version */
167 uint32_t flags; /* setup and mode flags */
168 uint32_t hashOffset; /* offset of hash slot element at index zero */
169 uint32_t identOffset; /* offset of identifier string */
170 uint32_t nSpecialSlots; /* number of special hash slots */
171 uint32_t nCodeSlots; /* number of ordinary (code) hash slots */
172 uint32_t codeLimit; /* limit to main image signature range */
173 uint8_t hashSize; /* size of each hash in bytes */
174 uint8_t hashType; /* type of hash (cdHashType* constants) */
175 uint8_t spare1; /* unused (must be zero) */
176 uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */
177 uint32_t spare2; /* unused (must be zero) */
178 /* followed by dynamic content as located by offset fields above */
179 } CS_CodeDirectory;
180
181
182 /*
183 * Locate the CodeDirectory from an embedded signature blob
184 */
185 static const
186 CS_CodeDirectory *findCodeDirectory(
187 const CS_SuperBlob *embedded,
188 char *lower_bound,
189 char *upper_bound)
190 {
191 const CS_CodeDirectory *cd = NULL;
192
193 if (embedded &&
194 cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) &&
195 ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
196 const CS_BlobIndex *limit;
197 const CS_BlobIndex *p;
198
199 limit = &embedded->index[ntohl(embedded->count)];
200 if (!cs_valid_range(&embedded->index[0], limit,
201 lower_bound, upper_bound)) {
202 return NULL;
203 }
204 for (p = embedded->index; p < limit; ++p) {
205 if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
206 const unsigned char *base;
207
208 base = (const unsigned char *)embedded;
209 cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
210 break;
211 }
212 }
213 } else {
214 /*
215 * Detached signatures come as a bare CS_CodeDirectory,
216 * without a blob.
217 */
218 cd = (const CS_CodeDirectory *) embedded;
219 }
220
221 if (cd &&
222 cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
223 cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
224 lower_bound, upper_bound) &&
225 cs_valid_range(cd, (const char *) cd + ntohl(cd->hashOffset),
226 lower_bound, upper_bound) &&
227 cs_valid_range(cd, (const char *) cd +
228 ntohl(cd->hashOffset) +
229 (ntohl(cd->nCodeSlots) * SHA1_RESULTLEN),
230 lower_bound, upper_bound) &&
231
232 ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) {
233 return cd;
234 }
235
236 // not found or not a valid code directory
237 return NULL;
238 }
239
240
241 /*
242 * Locating a page hash
243 */
244 static const unsigned char *
245 hashes(
246 const CS_CodeDirectory *cd,
247 unsigned page,
248 char *lower_bound,
249 char *upper_bound)
250 {
251 const unsigned char *base, *top, *hash;
252 uint32_t nCodeSlots;
253
254 assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound));
255
256 base = (const unsigned char *)cd + ntohl(cd->hashOffset);
257 nCodeSlots = ntohl(cd->nCodeSlots);
258 top = base + nCodeSlots * SHA1_RESULTLEN;
259 if (!cs_valid_range(base, top,
260 lower_bound, upper_bound) ||
261 page > nCodeSlots) {
262 return NULL;
263 }
264 assert(page < nCodeSlots);
265
266 hash = base + page * SHA1_RESULTLEN;
267 if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
268 lower_bound, upper_bound)) {
269 hash = NULL;
270 }
271
272 return hash;
273 }
274 /*
275 * CODESIGNING
276 * End of routines to navigate code signing data structures in the kernel.
277 */
278
279
280 /*
281 * ubc_init
282 *
283 * Initialization of the zone for Unified Buffer Cache.
284 *
285 * Parameters: (void)
286 *
287 * Returns: (void)
288 *
289 * Implicit returns:
290 * ubc_info_zone(global) initialized for subsequent allocations
291 */
292 __private_extern__ void
293 ubc_init(void)
294 {
295 int i;
296
297 i = (vm_size_t) sizeof (struct ubc_info);
298
299 ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone");
300 }
301
302
303 /*
304 * ubc_info_init
305 *
306 * Allocate and attach an empty ubc_info structure to a vnode
307 *
308 * Parameters: vp Pointer to the vnode
309 *
310 * Returns: 0 Success
311 * vnode_size:ENOMEM Not enough space
312 * vnode_size:??? Other error from vnode_getattr
313 *
314 */
315 int
316 ubc_info_init(struct vnode *vp)
317 {
318 return(ubc_info_init_internal(vp, 0, 0));
319 }
320
321
322 /*
323 * ubc_info_init_withsize
324 *
325 * Allocate and attach a sized ubc_info structure to a vnode
326 *
327 * Parameters: vp Pointer to the vnode
328 * filesize The size of the file
329 *
330 * Returns: 0 Success
331 * vnode_size:ENOMEM Not enough space
332 * vnode_size:??? Other error from vnode_getattr
333 */
334 int
335 ubc_info_init_withsize(struct vnode *vp, off_t filesize)
336 {
337 return(ubc_info_init_internal(vp, 1, filesize));
338 }
339
340
341 /*
342 * ubc_info_init_internal
343 *
344 * Allocate and attach a ubc_info structure to a vnode
345 *
346 * Parameters: vp Pointer to the vnode
347 * withfsize{0,1} Zero if the size should be obtained
348 * from the vnode; otherwise, use filesize
349 * filesize The size of the file, if withfsize == 1
350 *
351 * Returns: 0 Success
352 * vnode_size:ENOMEM Not enough space
353 * vnode_size:??? Other error from vnode_getattr
354 *
355 * Notes: We call a blocking zalloc(), and the zone was created as an
356 * expandable and collectable zone, so if no memory is available,
357 * it is possible for zalloc() to block indefinitely. zalloc()
358 * may also panic if the zone of zones is exhausted, since it's
359 * NOT expandable.
360 *
361 * We unconditionally call vnode_pager_setup(), even if this is
362 * a reuse of a ubc_info; in that case, we should probably assert
363 * that it does not already have a pager association, but do not.
364 *
365 * Since memory_object_create_named() can only fail from receiving
366 * an invalid pager argument, the explicit check and panic is
367 * merely precautionary.
368 */
369 static int
370 ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize)
371 {
372 register struct ubc_info *uip;
373 void * pager;
374 int error = 0;
375 kern_return_t kret;
376 memory_object_control_t control;
377
378 uip = vp->v_ubcinfo;
379
380 /*
381 * If there is not already a ubc_info attached to the vnode, we
382 * attach one; otherwise, we will reuse the one that's there.
383 */
384 if (uip == UBC_INFO_NULL) {
385
386 uip = (struct ubc_info *) zalloc(ubc_info_zone);
387 bzero((char *)uip, sizeof(struct ubc_info));
388
389 uip->ui_vnode = vp;
390 uip->ui_flags = UI_INITED;
391 uip->ui_ucred = NOCRED;
392 }
393 assert(uip->ui_flags != UI_NONE);
394 assert(uip->ui_vnode == vp);
395
396 /* now set this ubc_info in the vnode */
397 vp->v_ubcinfo = uip;
398
399 /*
400 * Allocate a pager object for this vnode
401 *
402 * XXX The value of the pager parameter is currently ignored.
403 * XXX Presumably, this API changed to avoid the race between
404 * XXX setting the pager and the UI_HASPAGER flag.
405 */
406 pager = (void *)vnode_pager_setup(vp, uip->ui_pager);
407 assert(pager);
408
409 /*
410 * Explicitly set the pager into the ubc_info, after setting the
411 * UI_HASPAGER flag.
412 */
413 SET(uip->ui_flags, UI_HASPAGER);
414 uip->ui_pager = pager;
415
416 /*
417 * Note: We can not use VNOP_GETATTR() to get accurate
418 * value of ui_size because this may be an NFS vnode, and
419 * nfs_getattr() can call vinvalbuf(); if this happens,
420 * ubc_info is not set up to deal with that event.
421 * So use bogus size.
422 */
423
424 /*
425 * create a vnode - vm_object association
426 * memory_object_create_named() creates a "named" reference on the
427 * memory object we hold this reference as long as the vnode is
428 * "alive." Since memory_object_create_named() took its own reference
429 * on the vnode pager we passed it, we can drop the reference
430 * vnode_pager_setup() returned here.
431 */
432 kret = memory_object_create_named(pager,
433 (memory_object_size_t)uip->ui_size, &control);
434 vnode_pager_deallocate(pager);
435 if (kret != KERN_SUCCESS)
436 panic("ubc_info_init: memory_object_create_named returned %d", kret);
437
438 assert(control);
439 uip->ui_control = control; /* cache the value of the mo control */
440 SET(uip->ui_flags, UI_HASOBJREF); /* with a named reference */
441
442 if (withfsize == 0) {
443 /* initialize the size */
444 error = vnode_size(vp, &uip->ui_size, vfs_context_current());
445 if (error)
446 uip->ui_size = 0;
447 } else {
448 uip->ui_size = filesize;
449 }
450 vp->v_lflag |= VNAMED_UBC; /* vnode has a named ubc reference */
451
452 return (error);
453 }
454
455
456 /*
457 * ubc_info_free
458 *
459 * Free a ubc_info structure
460 *
461 * Parameters: uip A pointer to the ubc_info to free
462 *
463 * Returns: (void)
464 *
465 * Notes: If there is a credential that has subsequently been associated
466 * with the ubc_info via a call to ubc_setcred(), the reference
467 * to the credential is dropped.
468 *
469 * It's actually impossible for a ubc_info.ui_control to take the
470 * value MEMORY_OBJECT_CONTROL_NULL.
471 */
472 static void
473 ubc_info_free(struct ubc_info *uip)
474 {
475 if (IS_VALID_CRED(uip->ui_ucred)) {
476 kauth_cred_unref(&uip->ui_ucred);
477 }
478
479 if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL)
480 memory_object_control_deallocate(uip->ui_control);
481
482 cluster_release(uip);
483 ubc_cs_free(uip);
484
485 zfree(ubc_info_zone, uip);
486 return;
487 }
488
489
490 void
491 ubc_info_deallocate(struct ubc_info *uip)
492 {
493 ubc_info_free(uip);
494 }
495
496
497 /*
498 * ubc_setsize
499 *
500 * Tell the VM that the the size of the file represented by the vnode has
501 * changed
502 *
503 * Parameters: vp The vp whose backing file size is
504 * being changed
505 * nsize The new size of the backing file
506 *
507 * Returns: 1 Success
508 * 0 Failure
509 *
510 * Notes: This function will indicate failure if the new size that's
511 * being attempted to be set is negative.
512 *
513 * This function will fail if there is no ubc_info currently
514 * associated with the vnode.
515 *
516 * This function will indicate success it the new size is the
517 * same or larger than the old size (in this case, the remainder
518 * of the file will require modification or use of an existing upl
519 * to access successfully).
520 *
521 * This function will fail if the new file size is smaller, and
522 * the memory region being invalidated was unable to actually be
523 * invalidated and/or the last page could not be flushed, if the
524 * new size is not aligned to a page boundary. This is usually
525 * indicative of an I/O error.
526 */
527 int
528 ubc_setsize(struct vnode *vp, off_t nsize)
529 {
530 off_t osize; /* ui_size before change */
531 off_t lastpg, olastpgend, lastoff;
532 struct ubc_info *uip;
533 memory_object_control_t control;
534 kern_return_t kret = KERN_SUCCESS;
535
536 if (nsize < (off_t)0)
537 return (0);
538
539 if (!UBCINFOEXISTS(vp))
540 return (0);
541
542 uip = vp->v_ubcinfo;
543 osize = uip->ui_size;
544 /*
545 * Update the size before flushing the VM
546 */
547 uip->ui_size = nsize;
548
549 if (nsize >= osize) /* Nothing more to do */
550 return (1); /* return success */
551
552 /*
553 * When the file shrinks, invalidate the pages beyond the
554 * new size. Also get rid of garbage beyond nsize on the
555 * last page. The ui_size already has the nsize, so any
556 * subsequent page-in will zero-fill the tail properly
557 */
558 lastpg = trunc_page_64(nsize);
559 olastpgend = round_page_64(osize);
560 control = uip->ui_control;
561 assert(control);
562 lastoff = (nsize & PAGE_MASK_64);
563
564 if (lastoff) {
565 upl_t upl;
566 upl_page_info_t *pl;
567
568
569 /*
570 * new EOF ends up in the middle of a page
571 * zero the tail of this page if its currently
572 * present in the cache
573 */
574 kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE);
575
576 if (kret != KERN_SUCCESS)
577 panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret);
578
579 if (upl_valid_page(pl, 0))
580 cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL);
581
582 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
583
584 lastpg += PAGE_SIZE_64;
585 }
586 if (olastpgend > lastpg) {
587 /*
588 * invalidate the pages beyond the new EOF page
589 *
590 */
591 kret = memory_object_lock_request(control,
592 (memory_object_offset_t)lastpg,
593 (memory_object_size_t)(olastpgend - lastpg), NULL, NULL,
594 MEMORY_OBJECT_RETURN_NONE, MEMORY_OBJECT_DATA_FLUSH,
595 VM_PROT_NO_CHANGE);
596 if (kret != KERN_SUCCESS)
597 printf("ubc_setsize: invalidate failed (error = %d)\n", kret);
598 }
599 return ((kret == KERN_SUCCESS) ? 1 : 0);
600 }
601
602
603 /*
604 * ubc_getsize
605 *
606 * Get the size of the file assocated with the specified vnode
607 *
608 * Parameters: vp The vnode whose size is of interest
609 *
610 * Returns: 0 There is no ubc_info associated with
611 * this vnode, or the size is zero
612 * !0 The size of the file
613 *
614 * Notes: Using this routine, it is not possible for a caller to
615 * successfully distinguish between a vnode associate with a zero
616 * length file, and a vnode with no associated ubc_info. The
617 * caller therefore needs to not care, or needs to ensure that
618 * they have previously successfully called ubc_info_init() or
619 * ubc_info_init_withsize().
620 */
621 off_t
622 ubc_getsize(struct vnode *vp)
623 {
624 /* people depend on the side effect of this working this way
625 * as they call this for directory
626 */
627 if (!UBCINFOEXISTS(vp))
628 return ((off_t)0);
629 return (vp->v_ubcinfo->ui_size);
630 }
631
632
633 /*
634 * ubc_umount
635 *
636 * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this
637 * mount point
638 *
639 * Parameters: mp The mount point
640 *
641 * Returns: 0 Success
642 *
643 * Notes: There is no failure indication for this function.
644 *
645 * This function is used in the unmount path; since it may block
646 * I/O indefinitely, it should not be used in the forced unmount
647 * path, since a device unavailability could also block that
648 * indefinitely.
649 *
650 * Because there is no device ejection interlock on USB, FireWire,
651 * or similar devices, it's possible that an ejection that begins
652 * subsequent to the vnode_iterate() completing, either on one of
653 * those devices, or a network mount for which the server quits
654 * responding, etc., may cause the caller to block indefinitely.
655 */
656 __private_extern__ int
657 ubc_umount(struct mount *mp)
658 {
659 vnode_iterate(mp, 0, ubc_umcallback, 0);
660 return(0);
661 }
662
663
664 /*
665 * ubc_umcallback
666 *
667 * Used by ubc_umount() as an internal implementation detail; see ubc_umount()
668 * and vnode_iterate() for details of implementation.
669 */
670 static int
671 ubc_umcallback(vnode_t vp, __unused void * args)
672 {
673
674 if (UBCINFOEXISTS(vp)) {
675
676 (void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL);
677 }
678 return (VNODE_RETURNED);
679 }
680
681
682 /*
683 * ubc_getcred
684 *
685 * Get the credentials currently active for the ubc_info associated with the
686 * vnode.
687 *
688 * Parameters: vp The vnode whose ubc_info credentials
689 * are to be retrieved
690 *
691 * Returns: !NOCRED The credentials
692 * NOCRED If there is no ubc_info for the vnode,
693 * or if there is one, but it has not had
694 * any credentials associated with it via
695 * a call to ubc_setcred()
696 */
697 kauth_cred_t
698 ubc_getcred(struct vnode *vp)
699 {
700 if (UBCINFOEXISTS(vp))
701 return (vp->v_ubcinfo->ui_ucred);
702
703 return (NOCRED);
704 }
705
706
707 /*
708 * ubc_setthreadcred
709 *
710 * If they are not already set, set the credentials of the ubc_info structure
711 * associated with the vnode to those of the supplied thread; otherwise leave
712 * them alone.
713 *
714 * Parameters: vp The vnode whose ubc_info creds are to
715 * be set
716 * p The process whose credentials are to
717 * be used, if not running on an assumed
718 * credential
719 * thread The thread whose credentials are to
720 * be used
721 *
722 * Returns: 1 This vnode has no associated ubc_info
723 * 0 Success
724 *
725 * Notes: This function takes a proc parameter to account for bootstrap
726 * issues where a task or thread may call this routine, either
727 * before credentials have been initialized by bsd_init(), or if
728 * there is no BSD info asscoiate with a mach thread yet. This
729 * is known to happen in both the initial swap and memory mapping
730 * calls.
731 *
732 * This function is generally used only in the following cases:
733 *
734 * o a memory mapped file via the mmap() system call
735 * o a memory mapped file via the deprecated map_fd() call
736 * o a swap store backing file
737 * o subsequent to a successful write via vn_write()
738 *
739 * The information is then used by the NFS client in order to
740 * cons up a wire message in either the page-in or page-out path.
741 *
742 * There are two potential problems with the use of this API:
743 *
744 * o Because the write path only set it on a successful
745 * write, there is a race window between setting the
746 * credential and its use to evict the pages to the
747 * remote file server
748 *
749 * o Because a page-in may occur prior to a write, the
750 * credential may not be set at this time, if the page-in
751 * is not the result of a mapping established via mmap()
752 * or map_fd().
753 *
754 * In both these cases, this will be triggered from the paging
755 * path, which will instead use the credential of the current
756 * process, which in this case is either the dynamic_pager or
757 * the kernel task, both of which utilize "root" credentials.
758 *
759 * This may potentially permit operations to occur which should
760 * be denied, or it may cause to be denied operations which
761 * should be permitted, depending on the configuration of the NFS
762 * server.
763 */
764 int
765 ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread)
766 {
767 struct ubc_info *uip;
768 kauth_cred_t credp;
769 struct uthread *uthread = get_bsdthread_info(thread);
770
771 if (!UBCINFOEXISTS(vp))
772 return (1);
773
774 vnode_lock(vp);
775
776 uip = vp->v_ubcinfo;
777 credp = uip->ui_ucred;
778
779 if (!IS_VALID_CRED(credp)) {
780 /* use per-thread cred, if assumed identity, else proc cred */
781 if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) {
782 uip->ui_ucred = kauth_cred_proc_ref(p);
783 } else {
784 uip->ui_ucred = uthread->uu_ucred;
785 kauth_cred_ref(uip->ui_ucred);
786 }
787 }
788 vnode_unlock(vp);
789
790 return (0);
791 }
792
793
794 /*
795 * ubc_setcred
796 *
797 * If they are not already set, set the credentials of the ubc_info structure
798 * associated with the vnode to those of the process; otherwise leave them
799 * alone.
800 *
801 * Parameters: vp The vnode whose ubc_info creds are to
802 * be set
803 * p The process whose credentials are to
804 * be used
805 *
806 * Returns: 0 This vnode has no associated ubc_info
807 * 1 Success
808 *
809 * Notes: The return values for this function are inverted from nearly
810 * all other uses in the kernel.
811 *
812 * See also ubc_setthreadcred(), above.
813 *
814 * This function is considered deprecated, and generally should
815 * not be used, as it is incompatible with per-thread credentials;
816 * it exists for legacy KPI reasons.
817 *
818 * DEPRECATION: ubc_setcred() is being deprecated. Please use
819 * ubc_setthreadcred() instead.
820 */
821 int
822 ubc_setcred(struct vnode *vp, proc_t p)
823 {
824 struct ubc_info *uip;
825 kauth_cred_t credp;
826
827 /* If there is no ubc_info, deny the operation */
828 if ( !UBCINFOEXISTS(vp))
829 return (0);
830
831 /*
832 * Check to see if there is already a credential reference in the
833 * ubc_info; if there is not, take one on the supplied credential.
834 */
835 vnode_lock(vp);
836 uip = vp->v_ubcinfo;
837 credp = uip->ui_ucred;
838 if (!IS_VALID_CRED(credp)) {
839 uip->ui_ucred = kauth_cred_proc_ref(p);
840 }
841 vnode_unlock(vp);
842
843 return (1);
844 }
845
846
847 /*
848 * ubc_getpager
849 *
850 * Get the pager associated with the ubc_info associated with the vnode.
851 *
852 * Parameters: vp The vnode to obtain the pager from
853 *
854 * Returns: !VNODE_PAGER_NULL The memory_object_t for the pager
855 * VNODE_PAGER_NULL There is no ubc_info for this vnode
856 *
857 * Notes: For each vnode that has a ubc_info associated with it, that
858 * ubc_info SHALL have a pager associated with it, so in the
859 * normal case, it's impossible to return VNODE_PAGER_NULL for
860 * a vnode with an associated ubc_info.
861 */
862 __private_extern__ memory_object_t
863 ubc_getpager(struct vnode *vp)
864 {
865 if (UBCINFOEXISTS(vp))
866 return (vp->v_ubcinfo->ui_pager);
867
868 return (0);
869 }
870
871
872 /*
873 * ubc_getobject
874 *
875 * Get the memory object control associated with the ubc_info associated with
876 * the vnode
877 *
878 * Parameters: vp The vnode to obtain the memory object
879 * from
880 * flags DEPRECATED
881 *
882 * Returns: !MEMORY_OBJECT_CONTROL_NULL
883 * MEMORY_OBJECT_CONTROL_NULL
884 *
885 * Notes: Historically, if the flags were not "do not reactivate", this
886 * function would look up the memory object using the pager if
887 * it did not exist (this could be the case if the vnode had
888 * been previously reactivated). The flags would also permit a
889 * hold to be requested, which would have created an object
890 * reference, if one had not already existed. This usage is
891 * deprecated, as it would permit a race between finding and
892 * taking the reference vs. a single reference being dropped in
893 * another thread.
894 */
895 memory_object_control_t
896 ubc_getobject(struct vnode *vp, __unused int flags)
897 {
898 if (UBCINFOEXISTS(vp))
899 return((vp->v_ubcinfo->ui_control));
900
901 return (MEMORY_OBJECT_CONTROL_NULL);
902 }
903
904
905 /*
906 * ubc_blktooff
907 *
908 * Convert a given block number to a memory backing object (file) offset for a
909 * given vnode
910 *
911 * Parameters: vp The vnode in which the block is located
912 * blkno The block number to convert
913 *
914 * Returns: !-1 The offset into the backing object
915 * -1 There is no ubc_info associated with
916 * the vnode
917 * -1 An error occurred in the underlying VFS
918 * while translating the block to an
919 * offset; the most likely cause is that
920 * the caller specified a block past the
921 * end of the file, but this could also be
922 * any other error from VNOP_BLKTOOFF().
923 *
924 * Note: Representing the error in band loses some information, but does
925 * not occlude a valid offset, since an off_t of -1 is normally
926 * used to represent EOF. If we had a more reliable constant in
927 * our header files for it (i.e. explicitly cast to an off_t), we
928 * would use it here instead.
929 */
930 off_t
931 ubc_blktooff(vnode_t vp, daddr64_t blkno)
932 {
933 off_t file_offset = -1;
934 int error;
935
936 if (UBCINFOEXISTS(vp)) {
937 error = VNOP_BLKTOOFF(vp, blkno, &file_offset);
938 if (error)
939 file_offset = -1;
940 }
941
942 return (file_offset);
943 }
944
945
946 /*
947 * ubc_offtoblk
948 *
949 * Convert a given offset in a memory backing object into a block number for a
950 * given vnode
951 *
952 * Parameters: vp The vnode in which the offset is
953 * located
954 * offset The offset into the backing object
955 *
956 * Returns: !-1 The returned block number
957 * -1 There is no ubc_info associated with
958 * the vnode
959 * -1 An error occurred in the underlying VFS
960 * while translating the block to an
961 * offset; the most likely cause is that
962 * the caller specified a block past the
963 * end of the file, but this could also be
964 * any other error from VNOP_OFFTOBLK().
965 *
966 * Note: Representing the error in band loses some information, but does
967 * not occlude a valid block number, since block numbers exceed
968 * the valid range for offsets, due to their relative sizes. If
969 * we had a more reliable constant than -1 in our header files
970 * for it (i.e. explicitly cast to an daddr64_t), we would use it
971 * here instead.
972 */
973 daddr64_t
974 ubc_offtoblk(vnode_t vp, off_t offset)
975 {
976 daddr64_t blkno = -1;
977 int error = 0;
978
979 if (UBCINFOEXISTS(vp)) {
980 error = VNOP_OFFTOBLK(vp, offset, &blkno);
981 if (error)
982 blkno = -1;
983 }
984
985 return (blkno);
986 }
987
988
989 /*
990 * ubc_pages_resident
991 *
992 * Determine whether or not a given vnode has pages resident via the memory
993 * object control associated with the ubc_info associated with the vnode
994 *
995 * Parameters: vp The vnode we want to know about
996 *
997 * Returns: 1 Yes
998 * 0 No
999 */
1000 int
1001 ubc_pages_resident(vnode_t vp)
1002 {
1003 kern_return_t kret;
1004 boolean_t has_pages_resident;
1005
1006 if (!UBCINFOEXISTS(vp))
1007 return (0);
1008
1009 /*
1010 * The following call may fail if an invalid ui_control is specified,
1011 * or if there is no VM object associated with the control object. In
1012 * either case, reacting to it as if there were no pages resident will
1013 * result in correct behavior.
1014 */
1015 kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident);
1016
1017 if (kret != KERN_SUCCESS)
1018 return (0);
1019
1020 if (has_pages_resident == TRUE)
1021 return (1);
1022
1023 return (0);
1024 }
1025
1026
1027 /*
1028 * ubc_sync_range
1029 *
1030 * Clean and/or invalidate a range in the memory object that backs this vnode
1031 *
1032 * Parameters: vp The vnode whose associated ubc_info's
1033 * associated memory object is to have a
1034 * range invalidated within it
1035 * beg_off The start of the range, as an offset
1036 * end_off The end of the range, as an offset
1037 * flags See ubc_msync_internal()
1038 *
1039 * Returns: 1 Success
1040 * 0 Failure
1041 *
1042 * Notes: see ubc_msync_internal() for more detailed information.
1043 *
1044 * DEPRECATED: This interface is obsolete due to a failure to return error
1045 * information needed in order to correct failures. The currently
1046 * recommended interface is ubc_msync().
1047 */
1048 int
1049 ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags)
1050 {
1051 return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL));
1052 }
1053
1054
1055 /*
1056 * ubc_msync
1057 *
1058 * Clean and/or invalidate a range in the memory object that backs this vnode
1059 *
1060 * Parameters: vp The vnode whose associated ubc_info's
1061 * associated memory object is to have a
1062 * range invalidated within it
1063 * beg_off The start of the range, as an offset
1064 * end_off The end of the range, as an offset
1065 * resid_off The address of an off_t supplied by the
1066 * caller; may be set to NULL to ignore
1067 * flags See ubc_msync_internal()
1068 *
1069 * Returns: 0 Success
1070 * !0 Failure; an errno is returned
1071 *
1072 * Implicit Returns:
1073 * *resid_off, modified If non-NULL, the contents are ALWAYS
1074 * modified; they are initialized to the
1075 * beg_off, and in case of an I/O error,
1076 * the difference between beg_off and the
1077 * current value will reflect what was
1078 * able to be written before the error
1079 * occurred. If no error is returned, the
1080 * value of the resid_off is undefined; do
1081 * NOT use it in place of end_off if you
1082 * intend to increment from the end of the
1083 * last call and call iteratively.
1084 *
1085 * Notes: see ubc_msync_internal() for more detailed information.
1086 *
1087 */
1088 errno_t
1089 ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags)
1090 {
1091 int retval;
1092 int io_errno = 0;
1093
1094 if (resid_off)
1095 *resid_off = beg_off;
1096
1097 retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno);
1098
1099 if (retval == 0 && io_errno == 0)
1100 return (EINVAL);
1101 return (io_errno);
1102 }
1103
1104
1105 /*
1106 * Clean and/or invalidate a range in the memory object that backs this vnode
1107 *
1108 * Parameters: vp The vnode whose associated ubc_info's
1109 * associated memory object is to have a
1110 * range invalidated within it
1111 * beg_off The start of the range, as an offset
1112 * end_off The end of the range, as an offset
1113 * resid_off The address of an off_t supplied by the
1114 * caller; may be set to NULL to ignore
1115 * flags MUST contain at least one of the flags
1116 * UBC_INVALIDATE, UBC_PUSHDIRTY, or
1117 * UBC_PUSHALL; if UBC_PUSHDIRTY is used,
1118 * UBC_SYNC may also be specified to cause
1119 * this function to block until the
1120 * operation is complete. The behavior
1121 * of UBC_SYNC is otherwise undefined.
1122 * io_errno The address of an int to contain the
1123 * errno from a failed I/O operation, if
1124 * one occurs; may be set to NULL to
1125 * ignore
1126 *
1127 * Returns: 1 Success
1128 * 0 Failure
1129 *
1130 * Implicit Returns:
1131 * *resid_off, modified The contents of this offset MAY be
1132 * modified; in case of an I/O error, the
1133 * difference between beg_off and the
1134 * current value will reflect what was
1135 * able to be written before the error
1136 * occurred.
1137 * *io_errno, modified The contents of this offset are set to
1138 * an errno, if an error occurs; if the
1139 * caller supplies an io_errno parameter,
1140 * they should be careful to initialize it
1141 * to 0 before calling this function to
1142 * enable them to distinguish an error
1143 * with a valid *resid_off from an invalid
1144 * one, and to avoid potentially falsely
1145 * reporting an error, depending on use.
1146 *
1147 * Notes: If there is no ubc_info associated with the vnode supplied,
1148 * this function immediately returns success.
1149 *
1150 * If the value of end_off is less than or equal to beg_off, this
1151 * function immediately returns success; that is, end_off is NOT
1152 * inclusive.
1153 *
1154 * IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or
1155 * UBC_PUSHALL MUST be specified; that is, it is NOT possible to
1156 * attempt to block on in-progress I/O by calling this function
1157 * with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC
1158 * in order to block pending on the I/O already in progress.
1159 *
1160 * The start offset is truncated to the page boundary and the
1161 * size is adjusted to include the last page in the range; that
1162 * is, end_off on exactly a page boundary will not change if it
1163 * is rounded, and the range of bytes written will be from the
1164 * truncate beg_off to the rounded (end_off - 1).
1165 */
1166 static int
1167 ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno)
1168 {
1169 memory_object_size_t tsize;
1170 kern_return_t kret;
1171 int request_flags = 0;
1172 int flush_flags = MEMORY_OBJECT_RETURN_NONE;
1173
1174 if ( !UBCINFOEXISTS(vp))
1175 return (0);
1176 if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0)
1177 return (0);
1178 if (end_off <= beg_off)
1179 return (1);
1180
1181 if (flags & UBC_INVALIDATE)
1182 /*
1183 * discard the resident pages
1184 */
1185 request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE);
1186
1187 if (flags & UBC_SYNC)
1188 /*
1189 * wait for all the I/O to complete before returning
1190 */
1191 request_flags |= MEMORY_OBJECT_IO_SYNC;
1192
1193 if (flags & UBC_PUSHDIRTY)
1194 /*
1195 * we only return the dirty pages in the range
1196 */
1197 flush_flags = MEMORY_OBJECT_RETURN_DIRTY;
1198
1199 if (flags & UBC_PUSHALL)
1200 /*
1201 * then return all the interesting pages in the range (both
1202 * dirty and precious) to the pager
1203 */
1204 flush_flags = MEMORY_OBJECT_RETURN_ALL;
1205
1206 beg_off = trunc_page_64(beg_off);
1207 end_off = round_page_64(end_off);
1208 tsize = (memory_object_size_t)end_off - beg_off;
1209
1210 /* flush and/or invalidate pages in the range requested */
1211 kret = memory_object_lock_request(vp->v_ubcinfo->ui_control,
1212 beg_off, tsize,
1213 (memory_object_offset_t *)resid_off,
1214 io_errno, flush_flags, request_flags,
1215 VM_PROT_NO_CHANGE);
1216
1217 return ((kret == KERN_SUCCESS) ? 1 : 0);
1218 }
1219
1220
1221 /*
1222 * ubc_msync_internal
1223 *
1224 * Explicitly map a vnode that has an associate ubc_info, and add a reference
1225 * to it for the ubc system, if there isn't one already, so it will not be
1226 * recycled while it's in use, and set flags on the ubc_info to indicate that
1227 * we have done this
1228 *
1229 * Parameters: vp The vnode to map
1230 * flags The mapping flags for the vnode; this
1231 * will be a combination of one or more of
1232 * PROT_READ, PROT_WRITE, and PROT_EXEC
1233 *
1234 * Returns: 0 Success
1235 * EPERM Permission was denied
1236 *
1237 * Notes: An I/O reference on the vnode must already be held on entry
1238 *
1239 * If there is no ubc_info associated with the vnode, this function
1240 * will return success.
1241 *
1242 * If a permission error occurs, this function will return
1243 * failure; all other failures will cause this function to return
1244 * success.
1245 *
1246 * IMPORTANT: This is an internal use function, and its symbols
1247 * are not exported, hence its error checking is not very robust.
1248 * It is primarily used by:
1249 *
1250 * o mmap(), when mapping a file
1251 * o The deprecated map_fd() interface, when mapping a file
1252 * o When mapping a shared file (a shared library in the
1253 * shared segment region)
1254 * o When loading a program image during the exec process
1255 *
1256 * ...all of these uses ignore the return code, and any fault that
1257 * results later because of a failure is handled in the fix-up path
1258 * of the fault handler. The interface exists primarily as a
1259 * performance hint.
1260 *
1261 * Given that third party implementation of the type of interfaces
1262 * that would use this function, such as alternative executable
1263 * formats, etc., are unsupported, this function is not exported
1264 * for general use.
1265 *
1266 * The extra reference is held until the VM system unmaps the
1267 * vnode from its own context to maintain a vnode reference in
1268 * cases like open()/mmap()/close(), which leave the backing
1269 * object referenced by a mapped memory region in a process
1270 * address space.
1271 */
1272 __private_extern__ int
1273 ubc_map(vnode_t vp, int flags)
1274 {
1275 struct ubc_info *uip;
1276 int error = 0;
1277 int need_ref = 0;
1278 int need_wakeup = 0;
1279
1280 if (UBCINFOEXISTS(vp)) {
1281
1282 vnode_lock(vp);
1283 uip = vp->v_ubcinfo;
1284
1285 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1286 SET(uip->ui_flags, UI_MAPWAITING);
1287 (void) msleep(&uip->ui_flags, &vp->v_lock,
1288 PRIBIO, "ubc_map", NULL);
1289 }
1290 SET(uip->ui_flags, UI_MAPBUSY);
1291 vnode_unlock(vp);
1292
1293 error = VNOP_MMAP(vp, flags, vfs_context_current());
1294
1295 if (error != EPERM)
1296 error = 0;
1297
1298 vnode_lock_spin(vp);
1299
1300 if (error == 0) {
1301 if ( !ISSET(uip->ui_flags, UI_ISMAPPED))
1302 need_ref = 1;
1303 SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED));
1304 }
1305 CLR(uip->ui_flags, UI_MAPBUSY);
1306
1307 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1308 CLR(uip->ui_flags, UI_MAPWAITING);
1309 need_wakeup = 1;
1310 }
1311 vnode_unlock(vp);
1312
1313 if (need_wakeup)
1314 wakeup(&uip->ui_flags);
1315
1316 if (need_ref)
1317 vnode_ref(vp);
1318 }
1319 return (error);
1320 }
1321
1322
1323 /*
1324 * ubc_destroy_named
1325 *
1326 * Destroy the named memory object associated with the ubc_info control object
1327 * associated with the designated vnode, if there is a ubc_info associated
1328 * with the vnode, and a control object is associated with it
1329 *
1330 * Parameters: vp The designated vnode
1331 *
1332 * Returns: (void)
1333 *
1334 * Notes: This function is called on vnode termination for all vnodes,
1335 * and must therefore not assume that there is a ubc_info that is
1336 * associated with the vnode, nor that there is a control object
1337 * associated with the ubc_info.
1338 *
1339 * If all the conditions necessary are present, this function
1340 * calls memory_object_destory(), which will in turn end up
1341 * calling ubc_unmap() to release any vnode references that were
1342 * established via ubc_map().
1343 *
1344 * IMPORTANT: This is an internal use function that is used
1345 * exclusively by the internal use function vclean().
1346 */
1347 __private_extern__ void
1348 ubc_destroy_named(vnode_t vp)
1349 {
1350 memory_object_control_t control;
1351 struct ubc_info *uip;
1352 kern_return_t kret;
1353
1354 if (UBCINFOEXISTS(vp)) {
1355 uip = vp->v_ubcinfo;
1356
1357 /* Terminate the memory object */
1358 control = ubc_getobject(vp, UBC_HOLDOBJECT);
1359 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1360 kret = memory_object_destroy(control, 0);
1361 if (kret != KERN_SUCCESS)
1362 panic("ubc_destroy_named: memory_object_destroy failed");
1363 }
1364 }
1365 }
1366
1367
1368 /*
1369 * ubc_isinuse
1370 *
1371 * Determine whether or not a vnode is currently in use by ubc at a level in
1372 * excess of the requested busycount
1373 *
1374 * Parameters: vp The vnode to check
1375 * busycount The threshold busy count, used to bias
1376 * the count usually already held by the
1377 * caller to avoid races
1378 *
1379 * Returns: 1 The vnode is in use over the threshold
1380 * 0 The vnode is not in use over the
1381 * threshold
1382 *
1383 * Notes: Because the vnode is only held locked while actually asking
1384 * the use count, this function only represents a snapshot of the
1385 * current state of the vnode. If more accurate information is
1386 * required, an additional busycount should be held by the caller
1387 * and a non-zero busycount used.
1388 *
1389 * If there is no ubc_info associated with the vnode, this
1390 * function will report that the vnode is not in use by ubc.
1391 */
1392 int
1393 ubc_isinuse(struct vnode *vp, int busycount)
1394 {
1395 if ( !UBCINFOEXISTS(vp))
1396 return (0);
1397 return(ubc_isinuse_locked(vp, busycount, 0));
1398 }
1399
1400
1401 /*
1402 * ubc_isinuse_locked
1403 *
1404 * Determine whether or not a vnode is currently in use by ubc at a level in
1405 * excess of the requested busycount
1406 *
1407 * Parameters: vp The vnode to check
1408 * busycount The threshold busy count, used to bias
1409 * the count usually already held by the
1410 * caller to avoid races
1411 * locked True if the vnode is already locked by
1412 * the caller
1413 *
1414 * Returns: 1 The vnode is in use over the threshold
1415 * 0 The vnode is not in use over the
1416 * threshold
1417 *
1418 * Notes: If the vnode is not locked on entry, it is locked while
1419 * actually asking the use count. If this is the case, this
1420 * function only represents a snapshot of the current state of
1421 * the vnode. If more accurate information is required, the
1422 * vnode lock should be held by the caller, otherwise an
1423 * additional busycount should be held by the caller and a
1424 * non-zero busycount used.
1425 *
1426 * If there is no ubc_info associated with the vnode, this
1427 * function will report that the vnode is not in use by ubc.
1428 */
1429 int
1430 ubc_isinuse_locked(struct vnode *vp, int busycount, int locked)
1431 {
1432 int retval = 0;
1433
1434
1435 if (!locked)
1436 vnode_lock(vp);
1437
1438 if ((vp->v_usecount - vp->v_kusecount) > busycount)
1439 retval = 1;
1440
1441 if (!locked)
1442 vnode_unlock(vp);
1443 return (retval);
1444 }
1445
1446
1447 /*
1448 * ubc_unmap
1449 *
1450 * Reverse the effects of a ubc_map() call for a given vnode
1451 *
1452 * Parameters: vp vnode to unmap from ubc
1453 *
1454 * Returns: (void)
1455 *
1456 * Notes: This is an internal use function used by vnode_pager_unmap().
1457 * It will attempt to obtain a reference on the supplied vnode,
1458 * and if it can do so, and there is an associated ubc_info, and
1459 * the flags indicate that it was mapped via ubc_map(), then the
1460 * flag is cleared, the mapping removed, and the reference taken
1461 * by ubc_map() is released.
1462 *
1463 * IMPORTANT: This MUST only be called by the VM
1464 * to prevent race conditions.
1465 */
1466 __private_extern__ void
1467 ubc_unmap(struct vnode *vp)
1468 {
1469 struct ubc_info *uip;
1470 int need_rele = 0;
1471 int need_wakeup = 0;
1472
1473 if (vnode_getwithref(vp))
1474 return;
1475
1476 if (UBCINFOEXISTS(vp)) {
1477 vnode_lock(vp);
1478 uip = vp->v_ubcinfo;
1479
1480 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1481 SET(uip->ui_flags, UI_MAPWAITING);
1482 (void) msleep(&uip->ui_flags, &vp->v_lock,
1483 PRIBIO, "ubc_unmap", NULL);
1484 }
1485 SET(uip->ui_flags, UI_MAPBUSY);
1486
1487 if (ISSET(uip->ui_flags, UI_ISMAPPED)) {
1488 CLR(uip->ui_flags, UI_ISMAPPED);
1489 need_rele = 1;
1490 }
1491 vnode_unlock(vp);
1492
1493 if (need_rele) {
1494 (void) VNOP_MNOMAP(vp, vfs_context_current());
1495 vnode_rele(vp);
1496 }
1497
1498 vnode_lock_spin(vp);
1499
1500 CLR(uip->ui_flags, UI_MAPBUSY);
1501 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1502 CLR(uip->ui_flags, UI_MAPWAITING);
1503 need_wakeup = 1;
1504 }
1505 vnode_unlock(vp);
1506
1507 if (need_wakeup)
1508 wakeup(&uip->ui_flags);
1509
1510 }
1511 /*
1512 * the drop of the vnode ref will cleanup
1513 */
1514 vnode_put(vp);
1515 }
1516
1517
1518 /*
1519 * ubc_page_op
1520 *
1521 * Manipulate individual page state for a vnode with an associated ubc_info
1522 * with an associated memory object control.
1523 *
1524 * Parameters: vp The vnode backing the page
1525 * f_offset A file offset interior to the page
1526 * ops The operations to perform, as a bitmap
1527 * (see below for more information)
1528 * phys_entryp The address of a ppnum_t; may be NULL
1529 * to ignore
1530 * flagsp A pointer to an int to contain flags;
1531 * may be NULL to ignore
1532 *
1533 * Returns: KERN_SUCCESS Success
1534 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1535 * object associated
1536 * KERN_INVALID_OBJECT If UPL_POP_PHYSICAL and the object is
1537 * not physically contiguous
1538 * KERN_INVALID_OBJECT If !UPL_POP_PHYSICAL and the object is
1539 * physically contiguous
1540 * KERN_FAILURE If the page cannot be looked up
1541 *
1542 * Implicit Returns:
1543 * *phys_entryp (modified) If phys_entryp is non-NULL and
1544 * UPL_POP_PHYSICAL
1545 * *flagsp (modified) If flagsp is non-NULL and there was
1546 * !UPL_POP_PHYSICAL and a KERN_SUCCESS
1547 *
1548 * Notes: For object boundaries, it is considerably more efficient to
1549 * ensure that f_offset is in fact on a page boundary, as this
1550 * will avoid internal use of the hash table to identify the
1551 * page, and would therefore skip a number of early optimizations.
1552 * Since this is a page operation anyway, the caller should try
1553 * to pass only a page aligned offset because of this.
1554 *
1555 * *flagsp may be modified even if this function fails. If it is
1556 * modified, it will contain the condition of the page before the
1557 * requested operation was attempted; these will only include the
1558 * bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP,
1559 * UPL_POP_SET, or UPL_POP_CLR bits.
1560 *
1561 * The flags field may contain a specific operation, such as
1562 * UPL_POP_PHYSICAL or UPL_POP_DUMP:
1563 *
1564 * o UPL_POP_PHYSICAL Fail if not contiguous; if
1565 * *phys_entryp and successful, set
1566 * *phys_entryp
1567 * o UPL_POP_DUMP Dump the specified page
1568 *
1569 * Otherwise, it is treated as a bitmap of one or more page
1570 * operations to perform on the final memory object; allowable
1571 * bit values are:
1572 *
1573 * o UPL_POP_DIRTY The page is dirty
1574 * o UPL_POP_PAGEOUT The page is paged out
1575 * o UPL_POP_PRECIOUS The page is precious
1576 * o UPL_POP_ABSENT The page is absent
1577 * o UPL_POP_BUSY The page is busy
1578 *
1579 * If the page status is only being queried and not modified, then
1580 * not other bits should be specified. However, if it is being
1581 * modified, exactly ONE of the following bits should be set:
1582 *
1583 * o UPL_POP_SET Set the current bitmap bits
1584 * o UPL_POP_CLR Clear the current bitmap bits
1585 *
1586 * Thus to effect a combination of setting an clearing, it may be
1587 * necessary to call this function twice. If this is done, the
1588 * set should be used before the clear, since clearing may trigger
1589 * a wakeup on the destination page, and if the page is backed by
1590 * an encrypted swap file, setting will trigger the decryption
1591 * needed before the wakeup occurs.
1592 */
1593 kern_return_t
1594 ubc_page_op(
1595 struct vnode *vp,
1596 off_t f_offset,
1597 int ops,
1598 ppnum_t *phys_entryp,
1599 int *flagsp)
1600 {
1601 memory_object_control_t control;
1602
1603 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1604 if (control == MEMORY_OBJECT_CONTROL_NULL)
1605 return KERN_INVALID_ARGUMENT;
1606
1607 return (memory_object_page_op(control,
1608 (memory_object_offset_t)f_offset,
1609 ops,
1610 phys_entryp,
1611 flagsp));
1612 }
1613
1614
1615 /*
1616 * ubc_range_op
1617 *
1618 * Manipulate page state for a range of memory for a vnode with an associated
1619 * ubc_info with an associated memory object control, when page level state is
1620 * not required to be returned from the call (i.e. there are no phys_entryp or
1621 * flagsp parameters to this call, and it takes a range which may contain
1622 * multiple pages, rather than an offset interior to a single page).
1623 *
1624 * Parameters: vp The vnode backing the page
1625 * f_offset_beg A file offset interior to the start page
1626 * f_offset_end A file offset interior to the end page
1627 * ops The operations to perform, as a bitmap
1628 * (see below for more information)
1629 * range The address of an int; may be NULL to
1630 * ignore
1631 *
1632 * Returns: KERN_SUCCESS Success
1633 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1634 * object associated
1635 * KERN_INVALID_OBJECT If the object is physically contiguous
1636 *
1637 * Implicit Returns:
1638 * *range (modified) If range is non-NULL, its contents will
1639 * be modified to contain the number of
1640 * bytes successfully operated upon.
1641 *
1642 * Notes: IMPORTANT: This function cannot be used on a range that
1643 * consists of physically contiguous pages.
1644 *
1645 * For object boundaries, it is considerably more efficient to
1646 * ensure that f_offset_beg and f_offset_end are in fact on page
1647 * boundaries, as this will avoid internal use of the hash table
1648 * to identify the page, and would therefore skip a number of
1649 * early optimizations. Since this is an operation on a set of
1650 * pages anyway, the caller should try to pass only a page aligned
1651 * offsets because of this.
1652 *
1653 * *range will be modified only if this function succeeds.
1654 *
1655 * The flags field MUST contain a specific operation; allowable
1656 * values are:
1657 *
1658 * o UPL_ROP_ABSENT Returns the extent of the range
1659 * presented which is absent, starting
1660 * with the start address presented
1661 *
1662 * o UPL_ROP_PRESENT Returns the extent of the range
1663 * presented which is present (resident),
1664 * starting with the start address
1665 * presented
1666 * o UPL_ROP_DUMP Dump the pages which are found in the
1667 * target object for the target range.
1668 *
1669 * IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are
1670 * multiple regions in the range, only the first matching region
1671 * is returned.
1672 */
1673 kern_return_t
1674 ubc_range_op(
1675 struct vnode *vp,
1676 off_t f_offset_beg,
1677 off_t f_offset_end,
1678 int ops,
1679 int *range)
1680 {
1681 memory_object_control_t control;
1682
1683 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1684 if (control == MEMORY_OBJECT_CONTROL_NULL)
1685 return KERN_INVALID_ARGUMENT;
1686
1687 return (memory_object_range_op(control,
1688 (memory_object_offset_t)f_offset_beg,
1689 (memory_object_offset_t)f_offset_end,
1690 ops,
1691 range));
1692 }
1693
1694
1695 /*
1696 * ubc_create_upl
1697 *
1698 * Given a vnode, cause the population of a portion of the vm_object; based on
1699 * the nature of the request, the pages returned may contain valid data, or
1700 * they may be uninitialized.
1701 *
1702 * Parameters: vp The vnode from which to create the upl
1703 * f_offset The start offset into the backing store
1704 * represented by the vnode
1705 * bufsize The size of the upl to create
1706 * uplp Pointer to the upl_t to receive the
1707 * created upl; MUST NOT be NULL
1708 * plp Pointer to receive the internal page
1709 * list for the created upl; MAY be NULL
1710 * to ignore
1711 *
1712 * Returns: KERN_SUCCESS The requested upl has been created
1713 * KERN_INVALID_ARGUMENT The bufsize argument is not an even
1714 * multiple of the page size
1715 * KERN_INVALID_ARGUMENT There is no ubc_info associated with
1716 * the vnode, or there is no memory object
1717 * control associated with the ubc_info
1718 * memory_object_upl_request:KERN_INVALID_VALUE
1719 * The supplied upl_flags argument is
1720 * invalid
1721 * Implicit Returns:
1722 * *uplp (modified)
1723 * *plp (modified) If non-NULL, the value of *plp will be
1724 * modified to point to the internal page
1725 * list; this modification may occur even
1726 * if this function is unsuccessful, in
1727 * which case the contents may be invalid
1728 *
1729 * Note: If successful, the returned *uplp MUST subsequently be freed
1730 * via a call to ubc_upl_commit(), ubc_upl_commit_range(),
1731 * ubc_upl_abort(), or ubc_upl_abort_range().
1732 */
1733 kern_return_t
1734 ubc_create_upl(
1735 struct vnode *vp,
1736 off_t f_offset,
1737 long bufsize,
1738 upl_t *uplp,
1739 upl_page_info_t **plp,
1740 int uplflags)
1741 {
1742 memory_object_control_t control;
1743 mach_msg_type_number_t count;
1744 int ubcflags;
1745 kern_return_t kr;
1746
1747 if (bufsize & 0xfff)
1748 return KERN_INVALID_ARGUMENT;
1749
1750 if (uplflags & UPL_FOR_PAGEOUT) {
1751 uplflags &= ~UPL_FOR_PAGEOUT;
1752 ubcflags = UBC_FOR_PAGEOUT;
1753 } else
1754 ubcflags = UBC_FLAGS_NONE;
1755
1756 control = ubc_getobject(vp, ubcflags);
1757 if (control == MEMORY_OBJECT_CONTROL_NULL)
1758 return KERN_INVALID_ARGUMENT;
1759
1760 if (uplflags & UPL_WILL_BE_DUMPED) {
1761 uplflags &= ~UPL_WILL_BE_DUMPED;
1762 uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL);
1763 } else
1764 uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL);
1765 count = 0;
1766
1767 kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, &count, uplflags);
1768 if (plp != NULL)
1769 *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp);
1770 return kr;
1771 }
1772
1773
1774 /*
1775 * ubc_upl_maxbufsize
1776 *
1777 * Return the maximum bufsize ubc_create_upl( ) will take.
1778 *
1779 * Parameters: none
1780 *
1781 * Returns: maximum size buffer (in bytes) ubc_create_upl( ) will take.
1782 */
1783 upl_size_t
1784 ubc_upl_maxbufsize(
1785 void)
1786 {
1787 return(MAX_UPL_SIZE * PAGE_SIZE);
1788 }
1789
1790 /*
1791 * ubc_upl_map
1792 *
1793 * Map the page list assocated with the supplied upl into the kernel virtual
1794 * address space at the virtual address indicated by the dst_addr argument;
1795 * the entire upl is mapped
1796 *
1797 * Parameters: upl The upl to map
1798 * dst_addr The address at which to map the upl
1799 *
1800 * Returns: KERN_SUCCESS The upl has been mapped
1801 * KERN_INVALID_ARGUMENT The upl is UPL_NULL
1802 * KERN_FAILURE The upl is already mapped
1803 * vm_map_enter:KERN_INVALID_ARGUMENT
1804 * A failure code from vm_map_enter() due
1805 * to an invalid argument
1806 */
1807 kern_return_t
1808 ubc_upl_map(
1809 upl_t upl,
1810 vm_offset_t *dst_addr)
1811 {
1812 return (vm_upl_map(kernel_map, upl, dst_addr));
1813 }
1814
1815
1816 /*
1817 * ubc_upl_unmap
1818 *
1819 * Unmap the page list assocated with the supplied upl from the kernel virtual
1820 * address space; the entire upl is unmapped.
1821 *
1822 * Parameters: upl The upl to unmap
1823 *
1824 * Returns: KERN_SUCCESS The upl has been unmapped
1825 * KERN_FAILURE The upl is not currently mapped
1826 * KERN_INVALID_ARGUMENT If the upl is UPL_NULL
1827 */
1828 kern_return_t
1829 ubc_upl_unmap(
1830 upl_t upl)
1831 {
1832 return(vm_upl_unmap(kernel_map, upl));
1833 }
1834
1835
1836 /*
1837 * ubc_upl_commit
1838 *
1839 * Commit the contents of the upl to the backing store
1840 *
1841 * Parameters: upl The upl to commit
1842 *
1843 * Returns: KERN_SUCCESS The upl has been committed
1844 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
1845 * KERN_FAILURE The supplied upl does not represent
1846 * device memory, and the offset plus the
1847 * size would exceed the actual size of
1848 * the upl
1849 *
1850 * Notes: In practice, the only return value for this function should be
1851 * KERN_SUCCESS, unless there has been data structure corruption;
1852 * since the upl is deallocated regardless of success or failure,
1853 * there's really nothing to do about this other than panic.
1854 *
1855 * IMPORTANT: Use of this function should not be mixed with use of
1856 * ubc_upl_commit_range(), due to the unconditional deallocation
1857 * by this function.
1858 */
1859 kern_return_t
1860 ubc_upl_commit(
1861 upl_t upl)
1862 {
1863 upl_page_info_t *pl;
1864 kern_return_t kr;
1865
1866 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1867 kr = upl_commit(upl, pl, MAX_UPL_SIZE);
1868 upl_deallocate(upl);
1869 return kr;
1870 }
1871
1872
1873 /*
1874 * ubc_upl_commit
1875 *
1876 * Commit the contents of the specified range of the upl to the backing store
1877 *
1878 * Parameters: upl The upl to commit
1879 * offset The offset into the upl
1880 * size The size of the region to be committed,
1881 * starting at the specified offset
1882 * flags commit type (see below)
1883 *
1884 * Returns: KERN_SUCCESS The range has been committed
1885 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
1886 * KERN_FAILURE The supplied upl does not represent
1887 * device memory, and the offset plus the
1888 * size would exceed the actual size of
1889 * the upl
1890 *
1891 * Notes: IMPORTANT: If the commit is successful, and the object is now
1892 * empty, the upl will be deallocated. Since the caller cannot
1893 * check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag
1894 * should generally only be used when the offset is 0 and the size
1895 * is equal to the upl size.
1896 *
1897 * The flags argument is a bitmap of flags on the rage of pages in
1898 * the upl to be committed; allowable flags are:
1899 *
1900 * o UPL_COMMIT_FREE_ON_EMPTY Free the upl when it is
1901 * both empty and has been
1902 * successfully committed
1903 * o UPL_COMMIT_CLEAR_DIRTY Clear each pages dirty
1904 * bit; will prevent a
1905 * later pageout
1906 * o UPL_COMMIT_SET_DIRTY Set each pages dirty
1907 * bit; will cause a later
1908 * pageout
1909 * o UPL_COMMIT_INACTIVATE Clear each pages
1910 * reference bit; the page
1911 * will not be accessed
1912 * o UPL_COMMIT_ALLOW_ACCESS Unbusy each page; pages
1913 * become busy when an
1914 * IOMemoryDescriptor is
1915 * mapped or redirected,
1916 * and we have to wait for
1917 * an IOKit driver
1918 *
1919 * The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should
1920 * not be specified by the caller.
1921 *
1922 * The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are
1923 * mutually exclusive, and should not be combined.
1924 */
1925 kern_return_t
1926 ubc_upl_commit_range(
1927 upl_t upl,
1928 vm_offset_t offset,
1929 vm_size_t size,
1930 int flags)
1931 {
1932 upl_page_info_t *pl;
1933 boolean_t empty;
1934 kern_return_t kr;
1935
1936 if (flags & UPL_COMMIT_FREE_ON_EMPTY)
1937 flags |= UPL_COMMIT_NOTIFY_EMPTY;
1938
1939 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1940
1941 kr = upl_commit_range(upl, offset, size, flags,
1942 pl, MAX_UPL_SIZE, &empty);
1943
1944 if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty)
1945 upl_deallocate(upl);
1946
1947 return kr;
1948 }
1949
1950
1951 /*
1952 * ubc_upl_abort_range
1953 *
1954 * Abort the contents of the specified range of the specified upl
1955 *
1956 * Parameters: upl The upl to abort
1957 * offset The offset into the upl
1958 * size The size of the region to be aborted,
1959 * starting at the specified offset
1960 * abort_flags abort type (see below)
1961 *
1962 * Returns: KERN_SUCCESS The range has been aborted
1963 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
1964 * KERN_FAILURE The supplied upl does not represent
1965 * device memory, and the offset plus the
1966 * size would exceed the actual size of
1967 * the upl
1968 *
1969 * Notes: IMPORTANT: If the abort is successful, and the object is now
1970 * empty, the upl will be deallocated. Since the caller cannot
1971 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
1972 * should generally only be used when the offset is 0 and the size
1973 * is equal to the upl size.
1974 *
1975 * The abort_flags argument is a bitmap of flags on the range of
1976 * pages in the upl to be aborted; allowable flags are:
1977 *
1978 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
1979 * empty and has been successfully
1980 * aborted
1981 * o UPL_ABORT_RESTART The operation must be restarted
1982 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
1983 * o UPL_ABORT_ERROR An I/O error occurred
1984 * o UPL_ABORT_DUMP_PAGES Just free the pages
1985 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
1986 * o UPL_ABORT_ALLOW_ACCESS RESERVED
1987 *
1988 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
1989 * not be specified by the caller. It is intended to fulfill the
1990 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
1991 * ubc_upl_commit_range(), but is never referenced internally.
1992 *
1993 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
1994 * referenced; do not use it.
1995 */
1996 kern_return_t
1997 ubc_upl_abort_range(
1998 upl_t upl,
1999 vm_offset_t offset,
2000 vm_size_t size,
2001 int abort_flags)
2002 {
2003 kern_return_t kr;
2004 boolean_t empty = FALSE;
2005
2006 if (abort_flags & UPL_ABORT_FREE_ON_EMPTY)
2007 abort_flags |= UPL_ABORT_NOTIFY_EMPTY;
2008
2009 kr = upl_abort_range(upl, offset, size, abort_flags, &empty);
2010
2011 if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty)
2012 upl_deallocate(upl);
2013
2014 return kr;
2015 }
2016
2017
2018 /*
2019 * ubc_upl_abort
2020 *
2021 * Abort the contents of the specified upl
2022 *
2023 * Parameters: upl The upl to abort
2024 * abort_type abort type (see below)
2025 *
2026 * Returns: KERN_SUCCESS The range has been aborted
2027 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2028 * KERN_FAILURE The supplied upl does not represent
2029 * device memory, and the offset plus the
2030 * size would exceed the actual size of
2031 * the upl
2032 *
2033 * Notes: IMPORTANT: If the abort is successful, and the object is now
2034 * empty, the upl will be deallocated. Since the caller cannot
2035 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2036 * should generally only be used when the offset is 0 and the size
2037 * is equal to the upl size.
2038 *
2039 * The abort_type is a bitmap of flags on the range of
2040 * pages in the upl to be aborted; allowable flags are:
2041 *
2042 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2043 * empty and has been successfully
2044 * aborted
2045 * o UPL_ABORT_RESTART The operation must be restarted
2046 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
2047 * o UPL_ABORT_ERROR An I/O error occurred
2048 * o UPL_ABORT_DUMP_PAGES Just free the pages
2049 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
2050 * o UPL_ABORT_ALLOW_ACCESS RESERVED
2051 *
2052 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2053 * not be specified by the caller. It is intended to fulfill the
2054 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2055 * ubc_upl_commit_range(), but is never referenced internally.
2056 *
2057 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2058 * referenced; do not use it.
2059 */
2060 kern_return_t
2061 ubc_upl_abort(
2062 upl_t upl,
2063 int abort_type)
2064 {
2065 kern_return_t kr;
2066
2067 kr = upl_abort(upl, abort_type);
2068 upl_deallocate(upl);
2069 return kr;
2070 }
2071
2072
2073 /*
2074 * ubc_upl_pageinfo
2075 *
2076 * Retrieve the internal page list for the specified upl
2077 *
2078 * Parameters: upl The upl to obtain the page list from
2079 *
2080 * Returns: !NULL The (upl_page_info_t *) for the page
2081 * list internal to the upl
2082 * NULL Error/no page list associated
2083 *
2084 * Notes: IMPORTANT: The function is only valid on internal objects
2085 * where the list request was made with the UPL_INTERNAL flag.
2086 *
2087 * This function is a utility helper function, since some callers
2088 * may not have direct access to the header defining the macro,
2089 * due to abstraction layering constraints.
2090 */
2091 upl_page_info_t *
2092 ubc_upl_pageinfo(
2093 upl_t upl)
2094 {
2095 return (UPL_GET_INTERNAL_PAGE_LIST(upl));
2096 }
2097
2098
2099 int
2100 UBCINFOEXISTS(struct vnode * vp)
2101 {
2102 return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL));
2103 }
2104
2105
2106 /*
2107 * CODE SIGNING
2108 */
2109 #define CS_BLOB_KEEP_IN_KERNEL 1
2110 static volatile SInt32 cs_blob_size = 0;
2111 static volatile SInt32 cs_blob_count = 0;
2112 static SInt32 cs_blob_size_peak = 0;
2113 static UInt32 cs_blob_size_max = 0;
2114 static SInt32 cs_blob_count_peak = 0;
2115 extern int cs_debug;
2116
2117 int cs_validation = 1;
2118
2119 SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW, &cs_validation, 0, "Do validate code signatures");
2120 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD, &cs_blob_count, 0, "Current number of code signature blobs");
2121 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD, &cs_blob_size, 0, "Current size of all code signature blobs");
2122 SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
2123 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
2124 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD, &cs_blob_size_max, 0, "Size of biggest code signature blob");
2125
2126 int
2127 ubc_cs_blob_add(
2128 struct vnode *vp,
2129 cpu_type_t cputype,
2130 off_t base_offset,
2131 vm_address_t addr,
2132 vm_size_t size)
2133 {
2134 kern_return_t kr;
2135 struct ubc_info *uip;
2136 struct cs_blob *blob, *oblob;
2137 int error;
2138 ipc_port_t blob_handle;
2139 memory_object_size_t blob_size;
2140 const CS_CodeDirectory *cd;
2141 off_t blob_start_offset, blob_end_offset;
2142 SHA1_CTX sha1ctxt;
2143
2144 blob_handle = IPC_PORT_NULL;
2145
2146 blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
2147 if (blob == NULL) {
2148 return ENOMEM;
2149 }
2150
2151 /* get a memory entry on the blob */
2152 blob_size = (memory_object_size_t) size;
2153 kr = mach_make_memory_entry_64(kernel_map,
2154 &blob_size,
2155 addr,
2156 VM_PROT_READ,
2157 &blob_handle,
2158 IPC_PORT_NULL);
2159 if (kr != KERN_SUCCESS) {
2160 error = ENOMEM;
2161 goto out;
2162 }
2163 if (memory_object_round_page(blob_size) !=
2164 (memory_object_size_t) round_page(size)) {
2165 printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%x !?\n",
2166 blob_size, size);
2167 panic("XXX FBDP size mismatch 0x%llx 0x%x\n", blob_size, size);
2168 error = EINVAL;
2169 goto out;
2170 }
2171
2172
2173 /* fill in the new blob */
2174 blob->csb_cpu_type = cputype;
2175 blob->csb_base_offset = base_offset;
2176 blob->csb_mem_size = size;
2177 blob->csb_mem_offset = 0;
2178 blob->csb_mem_handle = blob_handle;
2179 blob->csb_mem_kaddr = addr;
2180
2181
2182 /*
2183 * Validate the blob's contents
2184 */
2185 cd = findCodeDirectory(
2186 (const CS_SuperBlob *) addr,
2187 (char *) addr,
2188 (char *) addr + blob->csb_mem_size);
2189 if (cd == NULL) {
2190 /* no code directory => useless blob ! */
2191 blob->csb_flags = 0;
2192 blob->csb_start_offset = 0;
2193 blob->csb_end_offset = 0;
2194 } else {
2195 unsigned char *sha1_base;
2196 int sha1_size;
2197
2198 blob->csb_flags = ntohl(cd->flags) | CS_VALID;
2199 blob->csb_end_offset = round_page(ntohl(cd->codeLimit));
2200 blob->csb_start_offset = (blob->csb_end_offset -
2201 (ntohl(cd->nCodeSlots) * PAGE_SIZE));
2202 /* compute the blob's SHA1 hash */
2203 sha1_base = (const unsigned char *) cd;
2204 sha1_size = ntohl(cd->length);
2205 SHA1Init(&sha1ctxt);
2206 SHA1Update(&sha1ctxt, sha1_base, sha1_size);
2207 SHA1Final(blob->csb_sha1, &sha1ctxt);
2208 }
2209
2210
2211 /*
2212 * Validate the blob's coverage
2213 */
2214 blob_start_offset = blob->csb_base_offset + blob->csb_start_offset;
2215 blob_end_offset = blob->csb_base_offset + blob->csb_end_offset;
2216
2217 if (blob_start_offset >= blob_end_offset ||
2218 blob_start_offset < 0 ||
2219 blob_end_offset <= 0) {
2220 /* reject empty or backwards blob */
2221 error = EINVAL;
2222 goto out;
2223 }
2224
2225 vnode_lock(vp);
2226 if (! UBCINFOEXISTS(vp)) {
2227 vnode_unlock(vp);
2228 error = ENOENT;
2229 goto out;
2230 }
2231 uip = vp->v_ubcinfo;
2232
2233 /* check if this new blob overlaps with an existing blob */
2234 for (oblob = uip->cs_blobs;
2235 oblob != NULL;
2236 oblob = oblob->csb_next) {
2237 off_t oblob_start_offset, oblob_end_offset;
2238
2239 oblob_start_offset = (oblob->csb_base_offset +
2240 oblob->csb_start_offset);
2241 oblob_end_offset = (oblob->csb_base_offset +
2242 oblob->csb_end_offset);
2243 if (blob_start_offset >= oblob_end_offset ||
2244 blob_end_offset <= oblob_start_offset) {
2245 /* no conflict with this existing blob */
2246 } else {
2247 /* conflict ! */
2248 if (blob_start_offset == oblob_start_offset &&
2249 blob_end_offset == oblob_end_offset &&
2250 blob->csb_mem_size == oblob->csb_mem_size &&
2251 blob->csb_flags == oblob->csb_flags &&
2252 (blob->csb_cpu_type == CPU_TYPE_ANY ||
2253 oblob->csb_cpu_type == CPU_TYPE_ANY ||
2254 blob->csb_cpu_type == oblob->csb_cpu_type) &&
2255 !bcmp(blob->csb_sha1,
2256 oblob->csb_sha1,
2257 SHA1_RESULTLEN)) {
2258 /*
2259 * We already have this blob:
2260 * we'll return success but
2261 * throw away the new blob.
2262 */
2263 if (oblob->csb_cpu_type == CPU_TYPE_ANY) {
2264 /*
2265 * The old blob matches this one
2266 * but doesn't have any CPU type.
2267 * Update it with whatever the caller
2268 * provided this time.
2269 */
2270 oblob->csb_cpu_type = cputype;
2271 }
2272 vnode_unlock(vp);
2273 error = EAGAIN;
2274 goto out;
2275 } else {
2276 /* different blob: reject the new one */
2277 vnode_unlock(vp);
2278 error = EALREADY;
2279 goto out;
2280 }
2281 }
2282
2283 }
2284
2285
2286 /* mark this vnode's VM object as having "signed pages" */
2287 kr = memory_object_signed(uip->ui_control, TRUE);
2288 if (kr != KERN_SUCCESS) {
2289 vnode_unlock(vp);
2290 error = ENOENT;
2291 goto out;
2292 }
2293
2294 /*
2295 * Add this blob to the list of blobs for this vnode.
2296 * We always add at the front of the list and we never remove a
2297 * blob from the list, so ubc_cs_get_blobs() can return whatever
2298 * the top of the list was and that list will remain valid
2299 * while we validate a page, even after we release the vnode's lock.
2300 */
2301 blob->csb_next = uip->cs_blobs;
2302 uip->cs_blobs = blob;
2303
2304 OSAddAtomic(+1, &cs_blob_count);
2305 if (cs_blob_count > cs_blob_count_peak) {
2306 cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */
2307 }
2308 OSAddAtomic(+blob->csb_mem_size, &cs_blob_size);
2309 if (cs_blob_size > cs_blob_size_peak) {
2310 cs_blob_size_peak = cs_blob_size; /* XXX atomic ? */
2311 }
2312 if (blob->csb_mem_size > cs_blob_size_max) {
2313 cs_blob_size_max = blob->csb_mem_size;
2314 }
2315
2316 if (cs_debug) {
2317 proc_t p;
2318
2319 p = current_proc();
2320 printf("CODE SIGNING: proc %d(%s) "
2321 "loaded %s signatures for file (%s) "
2322 "range 0x%llx:0x%llx flags 0x%x\n",
2323 p->p_pid, p->p_comm,
2324 blob->csb_cpu_type == -1 ? "detached" : "embedded",
2325 vnode_name(vp),
2326 blob->csb_base_offset + blob->csb_start_offset,
2327 blob->csb_base_offset + blob->csb_end_offset,
2328 blob->csb_flags);
2329 }
2330
2331 #if !CS_BLOB_KEEP_IN_KERNEL
2332 blob->csb_mem_kaddr = 0;
2333 #endif /* CS_BLOB_KEEP_IN_KERNEL */
2334
2335 vnode_unlock(vp);
2336
2337 error = 0; /* success ! */
2338
2339 out:
2340 if (error) {
2341 /* we failed; release what we allocated */
2342 if (blob) {
2343 kfree(blob, sizeof (*blob));
2344 blob = NULL;
2345 }
2346 if (blob_handle != IPC_PORT_NULL) {
2347 mach_memory_entry_port_release(blob_handle);
2348 blob_handle = IPC_PORT_NULL;
2349 }
2350 } else {
2351 #if !CS_BLOB_KEEP_IN_KERNEL
2352 kmem_free(kernel_map, addr, size);
2353 #endif /* CS_BLOB_KEEP_IN_KERNEL */
2354 }
2355
2356 if (error == EAGAIN) {
2357 /*
2358 * See above: error is EAGAIN if we were asked
2359 * to add an existing blob again. We cleaned the new
2360 * blob and we want to return success.
2361 */
2362 error = 0;
2363 /*
2364 * Since we're not failing, consume the data we received.
2365 */
2366 kmem_free(kernel_map, addr, size);
2367 }
2368
2369 return error;
2370 }
2371
2372
2373 struct cs_blob *
2374 ubc_cs_blob_get(
2375 struct vnode *vp,
2376 cpu_type_t cputype,
2377 off_t offset)
2378 {
2379 struct ubc_info *uip;
2380 struct cs_blob *blob;
2381 off_t offset_in_blob;
2382
2383 vnode_lock_spin(vp);
2384
2385 if (! UBCINFOEXISTS(vp)) {
2386 blob = NULL;
2387 goto out;
2388 }
2389
2390 uip = vp->v_ubcinfo;
2391 for (blob = uip->cs_blobs;
2392 blob != NULL;
2393 blob = blob->csb_next) {
2394 if (cputype != -1 && blob->csb_cpu_type == cputype) {
2395 break;
2396 }
2397 if (offset != -1) {
2398 offset_in_blob = offset - blob->csb_base_offset;
2399 if (offset_in_blob >= blob->csb_start_offset &&
2400 offset_in_blob < blob->csb_end_offset) {
2401 /* our offset is covered by this blob */
2402 break;
2403 }
2404 }
2405 }
2406
2407 out:
2408 vnode_unlock(vp);
2409
2410 return blob;
2411 }
2412
2413 static void
2414 ubc_cs_free(
2415 struct ubc_info *uip)
2416 {
2417 struct cs_blob *blob, *next_blob;
2418
2419 for (blob = uip->cs_blobs;
2420 blob != NULL;
2421 blob = next_blob) {
2422 next_blob = blob->csb_next;
2423 if (blob->csb_mem_kaddr != 0) {
2424 kmem_free(kernel_map,
2425 blob->csb_mem_kaddr,
2426 blob->csb_mem_size);
2427 blob->csb_mem_kaddr = 0;
2428 }
2429 mach_memory_entry_port_release(blob->csb_mem_handle);
2430 blob->csb_mem_handle = IPC_PORT_NULL;
2431 OSAddAtomic(-1, &cs_blob_count);
2432 OSAddAtomic(-blob->csb_mem_size, &cs_blob_size);
2433 kfree(blob, sizeof (*blob));
2434 }
2435 uip->cs_blobs = NULL;
2436 }
2437
2438 struct cs_blob *
2439 ubc_get_cs_blobs(
2440 struct vnode *vp)
2441 {
2442 struct ubc_info *uip;
2443 struct cs_blob *blobs;
2444
2445 vnode_lock_spin(vp);
2446
2447 if (! UBCINFOEXISTS(vp)) {
2448 blobs = NULL;
2449 goto out;
2450 }
2451
2452 uip = vp->v_ubcinfo;
2453 blobs = uip->cs_blobs;
2454
2455 out:
2456 vnode_unlock(vp);
2457
2458 return blobs;
2459 }
2460
2461 unsigned long cs_validate_page_no_hash = 0;
2462 unsigned long cs_validate_page_bad_hash = 0;
2463 boolean_t
2464 cs_validate_page(
2465 void *_blobs,
2466 memory_object_offset_t page_offset,
2467 const void *data,
2468 boolean_t *tainted)
2469 {
2470 SHA1_CTX sha1ctxt;
2471 unsigned char actual_hash[SHA1_RESULTLEN];
2472 unsigned char expected_hash[SHA1_RESULTLEN];
2473 boolean_t found_hash;
2474 struct cs_blob *blobs, *blob;
2475 const CS_CodeDirectory *cd;
2476 const CS_SuperBlob *embedded;
2477 off_t start_offset, end_offset;
2478 const unsigned char *hash;
2479 boolean_t validated;
2480 off_t offset; /* page offset in the file */
2481 size_t size;
2482 off_t codeLimit = 0;
2483 char *lower_bound, *upper_bound;
2484 vm_offset_t kaddr, blob_addr;
2485 vm_size_t ksize;
2486 kern_return_t kr;
2487
2488 offset = page_offset;
2489
2490 /* retrieve the expected hash */
2491 found_hash = FALSE;
2492 blobs = (struct cs_blob *) _blobs;
2493
2494 for (blob = blobs;
2495 blob != NULL;
2496 blob = blob->csb_next) {
2497 offset = page_offset - blob->csb_base_offset;
2498 if (offset < blob->csb_start_offset ||
2499 offset >= blob->csb_end_offset) {
2500 /* our page is not covered by this blob */
2501 continue;
2502 }
2503
2504 /* map the blob in the kernel address space */
2505 kaddr = blob->csb_mem_kaddr;
2506 if (kaddr == 0) {
2507 ksize = (vm_size_t) (blob->csb_mem_size +
2508 blob->csb_mem_offset);
2509 kr = vm_map(kernel_map,
2510 &kaddr,
2511 ksize,
2512 0,
2513 VM_FLAGS_ANYWHERE,
2514 blob->csb_mem_handle,
2515 0,
2516 TRUE,
2517 VM_PROT_READ,
2518 VM_PROT_READ,
2519 VM_INHERIT_NONE);
2520 if (kr != KERN_SUCCESS) {
2521 /* XXX FBDP what to do !? */
2522 printf("cs_validate_page: failed to map blob, "
2523 "size=0x%x kr=0x%x\n",
2524 blob->csb_mem_size, kr);
2525 break;
2526 }
2527 }
2528 blob_addr = kaddr + blob->csb_mem_offset;
2529
2530 lower_bound = CAST_DOWN(char *, blob_addr);
2531 upper_bound = lower_bound + blob->csb_mem_size;
2532
2533 embedded = (const CS_SuperBlob *) blob_addr;
2534 cd = findCodeDirectory(embedded, lower_bound, upper_bound);
2535 if (cd != NULL) {
2536 if (cd->pageSize != PAGE_SHIFT ||
2537 cd->hashType != 0x1 ||
2538 cd->hashSize != SHA1_RESULTLEN) {
2539 /* bogus blob ? */
2540 #if !CS_BLOB_KEEP_IN_KERNEL
2541 kmem_free(kernel_map, kaddr, ksize);
2542 #endif /* CS_BLOB_KEEP_IN_KERNEL */
2543 continue;
2544 }
2545
2546 end_offset = round_page(ntohl(cd->codeLimit));
2547 start_offset = end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE);
2548 offset = page_offset - blob->csb_base_offset;
2549 if (offset < start_offset ||
2550 offset >= end_offset) {
2551 /* our page is not covered by this blob */
2552 #if !CS_BLOB_KEEP_IN_KERNEL
2553 kmem_free(kernel_map, kaddr, ksize);
2554 #endif /* CS_BLOB_KEEP_IN_KERNEL */
2555 continue;
2556 }
2557
2558 codeLimit = ntohl(cd->codeLimit);
2559 hash = hashes(cd, atop(offset),
2560 lower_bound, upper_bound);
2561 if (hash != NULL) {
2562 bcopy(hash, expected_hash,
2563 sizeof (expected_hash));
2564 found_hash = TRUE;
2565 }
2566
2567 #if !CS_BLOB_KEEP_IN_KERNEL
2568 /* we no longer need that blob in the kernel map */
2569 kmem_free(kernel_map, kaddr, ksize);
2570 #endif /* CS_BLOB_KEEP_IN_KERNEL */
2571
2572 break;
2573 }
2574 }
2575
2576 if (found_hash == FALSE) {
2577 /*
2578 * We can't verify this page because there is no signature
2579 * for it (yet). It's possible that this part of the object
2580 * is not signed, or that signatures for that part have not
2581 * been loaded yet.
2582 * Report that the page has not been validated and let the
2583 * caller decide if it wants to accept it or not.
2584 */
2585 cs_validate_page_no_hash++;
2586 if (cs_debug > 1) {
2587 printf("CODE SIGNING: cs_validate_page: "
2588 "off 0x%llx: no hash to validate !?\n",
2589 page_offset);
2590 }
2591 validated = FALSE;
2592 *tainted = FALSE;
2593 } else {
2594 const uint32_t *asha1, *esha1;
2595
2596 size = PAGE_SIZE;
2597 if (offset + size > codeLimit) {
2598 /* partial page at end of segment */
2599 assert(offset < codeLimit);
2600 size = codeLimit & PAGE_MASK;
2601 }
2602 /* compute the actual page's SHA1 hash */
2603 SHA1Init(&sha1ctxt);
2604 SHA1Update(&sha1ctxt, data, size);
2605 SHA1Final(actual_hash, &sha1ctxt);
2606
2607 asha1 = (const uint32_t *) actual_hash;
2608 esha1 = (const uint32_t *) expected_hash;
2609
2610 if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
2611 if (cs_debug) {
2612 printf("CODE SIGNING: cs_validate_page: "
2613 "off 0x%llx size 0x%lx: "
2614 "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
2615 "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
2616 page_offset, size,
2617 asha1[0], asha1[1], asha1[2],
2618 asha1[3], asha1[4],
2619 esha1[0], esha1[1], esha1[2],
2620 esha1[3], esha1[4]);
2621 }
2622 cs_validate_page_bad_hash++;
2623 *tainted = TRUE;
2624 } else {
2625 if (cs_debug > 1) {
2626 printf("CODE SIGNING: cs_validate_page: "
2627 "off 0x%llx size 0x%lx: SHA1 OK\n",
2628 page_offset, size);
2629 }
2630 *tainted = FALSE;
2631 }
2632 validated = TRUE;
2633 }
2634
2635 return validated;
2636 }
2637
2638 int
2639 ubc_cs_getcdhash(
2640 vnode_t vp,
2641 off_t offset,
2642 unsigned char *cdhash)
2643 {
2644 struct cs_blob *blobs, *blob;
2645 off_t rel_offset;
2646
2647 blobs = ubc_get_cs_blobs(vp);
2648 for (blob = blobs;
2649 blob != NULL;
2650 blob = blob->csb_next) {
2651 /* compute offset relative to this blob */
2652 rel_offset = offset - blob->csb_base_offset;
2653 if (rel_offset >= blob->csb_start_offset &&
2654 rel_offset < blob->csb_end_offset) {
2655 /* this blob does cover our "offset" ! */
2656 break;
2657 }
2658 }
2659
2660 if (blob == NULL) {
2661 /* we didn't find a blob covering "offset" */
2662 return EBADEXEC; /* XXX any better error ? */
2663 }
2664
2665 /* get the SHA1 hash of that blob */
2666 bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
2667
2668 return 0;
2669 }