]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/kern/ubc_subr.c
xnu-1228.3.13.tar.gz
[apple/xnu.git] / bsd / kern / ubc_subr.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * File: ubc_subr.c
30 * Author: Umesh Vaishampayan [umeshv@apple.com]
31 * 05-Aug-1999 umeshv Created.
32 *
33 * Functions related to Unified Buffer cache.
34 *
35 * Caller of UBC functions MUST have a valid reference on the vnode.
36 *
37 */
38
39#include <sys/types.h>
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/lock.h>
43#include <sys/mman.h>
44#include <sys/mount_internal.h>
45#include <sys/vnode_internal.h>
46#include <sys/ubc_internal.h>
47#include <sys/ucred.h>
48#include <sys/proc_internal.h>
49#include <sys/kauth.h>
50#include <sys/buf.h>
51#include <sys/user.h>
52#include <sys/codesign.h>
53
54#include <mach/mach_types.h>
55#include <mach/memory_object_types.h>
56#include <mach/memory_object_control.h>
57#include <mach/vm_map.h>
58#include <mach/upl.h>
59
60#include <kern/kern_types.h>
61#include <kern/kalloc.h>
62#include <kern/zalloc.h>
63#include <kern/thread.h>
64#include <vm/vm_kern.h>
65#include <vm/vm_protos.h> /* last */
66
67#include <libkern/crypto/sha1.h>
68
69/* XXX These should be in a BSD accessible Mach header, but aren't. */
70extern kern_return_t memory_object_pages_resident(memory_object_control_t,
71 boolean_t *);
72extern kern_return_t memory_object_signed(memory_object_control_t control,
73 boolean_t is_signed);
74extern void Debugger(const char *message);
75
76
77/* XXX no one uses this interface! */
78kern_return_t ubc_page_op_with_control(
79 memory_object_control_t control,
80 off_t f_offset,
81 int ops,
82 ppnum_t *phys_entryp,
83 int *flagsp);
84
85
86#if DIAGNOSTIC
87#if defined(assert)
88#undef assert()
89#endif
90#define assert(cond) \
91 ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond)))
92#else
93#include <kern/assert.h>
94#endif /* DIAGNOSTIC */
95
96static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize);
97static int ubc_umcallback(vnode_t, void *);
98static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *);
99static void ubc_cs_free(struct ubc_info *uip);
100
101struct zone *ubc_info_zone;
102
103
104/*
105 * CODESIGNING
106 * Routines to navigate code signing data structures in the kernel...
107 */
108static boolean_t
109cs_valid_range(
110 const void *start,
111 const void *end,
112 const void *lower_bound,
113 const void *upper_bound)
114{
115 if (upper_bound < lower_bound ||
116 end < start) {
117 return FALSE;
118 }
119
120 if (start < lower_bound ||
121 end > upper_bound) {
122 return FALSE;
123 }
124
125 return TRUE;
126}
127
128/*
129 * Magic numbers used by Code Signing
130 */
131enum {
132 CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */
133 CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */
134 CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */
135 CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
136 CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02, /* XXX */
137 CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
138
139 CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */
140};
141
142
143/*
144 * Structure of an embedded-signature SuperBlob
145 */
146typedef struct __BlobIndex {
147 uint32_t type; /* type of entry */
148 uint32_t offset; /* offset of entry */
149} CS_BlobIndex;
150
151typedef struct __SuperBlob {
152 uint32_t magic; /* magic number */
153 uint32_t length; /* total length of SuperBlob */
154 uint32_t count; /* number of index entries following */
155 CS_BlobIndex index[]; /* (count) entries */
156 /* followed by Blobs in no particular order as indicated by offsets in index */
157} CS_SuperBlob;
158
159
160/*
161 * C form of a CodeDirectory.
162 */
163typedef struct __CodeDirectory {
164 uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */
165 uint32_t length; /* total length of CodeDirectory blob */
166 uint32_t version; /* compatibility version */
167 uint32_t flags; /* setup and mode flags */
168 uint32_t hashOffset; /* offset of hash slot element at index zero */
169 uint32_t identOffset; /* offset of identifier string */
170 uint32_t nSpecialSlots; /* number of special hash slots */
171 uint32_t nCodeSlots; /* number of ordinary (code) hash slots */
172 uint32_t codeLimit; /* limit to main image signature range */
173 uint8_t hashSize; /* size of each hash in bytes */
174 uint8_t hashType; /* type of hash (cdHashType* constants) */
175 uint8_t spare1; /* unused (must be zero) */
176 uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */
177 uint32_t spare2; /* unused (must be zero) */
178 /* followed by dynamic content as located by offset fields above */
179} CS_CodeDirectory;
180
181
182/*
183 * Locate the CodeDirectory from an embedded signature blob
184 */
185static const
186CS_CodeDirectory *findCodeDirectory(
187 const CS_SuperBlob *embedded,
188 char *lower_bound,
189 char *upper_bound)
190{
191 const CS_CodeDirectory *cd = NULL;
192
193 if (embedded &&
194 cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) &&
195 ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
196 const CS_BlobIndex *limit;
197 const CS_BlobIndex *p;
198
199 limit = &embedded->index[ntohl(embedded->count)];
200 if (!cs_valid_range(&embedded->index[0], limit,
201 lower_bound, upper_bound)) {
202 return NULL;
203 }
204 for (p = embedded->index; p < limit; ++p) {
205 if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
206 const unsigned char *base;
207
208 base = (const unsigned char *)embedded;
209 cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
210 break;
211 }
212 }
213 } else {
214 /*
215 * Detached signatures come as a bare CS_CodeDirectory,
216 * without a blob.
217 */
218 cd = (const CS_CodeDirectory *) embedded;
219 }
220
221 if (cd &&
222 cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
223 cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
224 lower_bound, upper_bound) &&
225 ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) {
226 return cd;
227 }
228
229 // not found or not a valid code directory
230 return NULL;
231}
232
233
234/*
235 * Locating a page hash
236 */
237static const unsigned char *
238hashes(
239 const CS_CodeDirectory *cd,
240 unsigned page,
241 char *lower_bound,
242 char *upper_bound)
243{
244 const unsigned char *base, *top, *hash;
245 uint32_t nCodeSlots;
246
247 assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound));
248
249 base = (const unsigned char *)cd + ntohl(cd->hashOffset);
250 nCodeSlots = ntohl(cd->nCodeSlots);
251 top = base + nCodeSlots * SHA1_RESULTLEN;
252 if (!cs_valid_range(base, top,
253 lower_bound, upper_bound) ||
254 page > nCodeSlots) {
255 return NULL;
256 }
257 assert(page < nCodeSlots);
258
259 hash = base + page * SHA1_RESULTLEN;
260 if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
261 lower_bound, upper_bound)) {
262 hash = NULL;
263 }
264
265 return hash;
266}
267/*
268 * CODESIGNING
269 * End of routines to navigate code signing data structures in the kernel.
270 */
271
272
273/*
274 * ubc_init
275 *
276 * Initialization of the zone for Unified Buffer Cache.
277 *
278 * Parameters: (void)
279 *
280 * Returns: (void)
281 *
282 * Implicit returns:
283 * ubc_info_zone(global) initialized for subsequent allocations
284 */
285__private_extern__ void
286ubc_init(void)
287{
288 int i;
289
290 i = (vm_size_t) sizeof (struct ubc_info);
291
292 ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone");
293}
294
295
296/*
297 * ubc_info_init
298 *
299 * Allocate and attach an empty ubc_info structure to a vnode
300 *
301 * Parameters: vp Pointer to the vnode
302 *
303 * Returns: 0 Success
304 * vnode_size:ENOMEM Not enough space
305 * vnode_size:??? Other error from vnode_getattr
306 *
307 */
308int
309ubc_info_init(struct vnode *vp)
310{
311 return(ubc_info_init_internal(vp, 0, 0));
312}
313
314
315/*
316 * ubc_info_init_withsize
317 *
318 * Allocate and attach a sized ubc_info structure to a vnode
319 *
320 * Parameters: vp Pointer to the vnode
321 * filesize The size of the file
322 *
323 * Returns: 0 Success
324 * vnode_size:ENOMEM Not enough space
325 * vnode_size:??? Other error from vnode_getattr
326 */
327int
328ubc_info_init_withsize(struct vnode *vp, off_t filesize)
329{
330 return(ubc_info_init_internal(vp, 1, filesize));
331}
332
333
334/*
335 * ubc_info_init_internal
336 *
337 * Allocate and attach a ubc_info structure to a vnode
338 *
339 * Parameters: vp Pointer to the vnode
340 * withfsize{0,1} Zero if the size should be obtained
341 * from the vnode; otherwise, use filesize
342 * filesize The size of the file, if withfsize == 1
343 *
344 * Returns: 0 Success
345 * vnode_size:ENOMEM Not enough space
346 * vnode_size:??? Other error from vnode_getattr
347 *
348 * Notes: We call a blocking zalloc(), and the zone was created as an
349 * expandable and collectable zone, so if no memory is available,
350 * it is possible for zalloc() to block indefinitely. zalloc()
351 * may also panic if the zone of zones is exhausted, since it's
352 * NOT expandable.
353 *
354 * We unconditionally call vnode_pager_setup(), even if this is
355 * a reuse of a ubc_info; in that case, we should probably assert
356 * that it does not already have a pager association, but do not.
357 *
358 * Since memory_object_create_named() can only fail from receiving
359 * an invalid pager argument, the explicit check and panic is
360 * merely precautionary.
361 */
362static int
363ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize)
364{
365 register struct ubc_info *uip;
366 void * pager;
367 int error = 0;
368 kern_return_t kret;
369 memory_object_control_t control;
370
371 uip = vp->v_ubcinfo;
372
373 /*
374 * If there is not already a ubc_info attached to the vnode, we
375 * attach one; otherwise, we will reuse the one that's there.
376 */
377 if (uip == UBC_INFO_NULL) {
378
379 uip = (struct ubc_info *) zalloc(ubc_info_zone);
380 bzero((char *)uip, sizeof(struct ubc_info));
381
382 uip->ui_vnode = vp;
383 uip->ui_flags = UI_INITED;
384 uip->ui_ucred = NOCRED;
385 }
386 assert(uip->ui_flags != UI_NONE);
387 assert(uip->ui_vnode == vp);
388
389 /* now set this ubc_info in the vnode */
390 vp->v_ubcinfo = uip;
391
392 /*
393 * Allocate a pager object for this vnode
394 *
395 * XXX The value of the pager parameter is currently ignored.
396 * XXX Presumably, this API changed to avoid the race between
397 * XXX setting the pager and the UI_HASPAGER flag.
398 */
399 pager = (void *)vnode_pager_setup(vp, uip->ui_pager);
400 assert(pager);
401
402 /*
403 * Explicitly set the pager into the ubc_info, after setting the
404 * UI_HASPAGER flag.
405 */
406 SET(uip->ui_flags, UI_HASPAGER);
407 uip->ui_pager = pager;
408
409 /*
410 * Note: We can not use VNOP_GETATTR() to get accurate
411 * value of ui_size because this may be an NFS vnode, and
412 * nfs_getattr() can call vinvalbuf(); if this happens,
413 * ubc_info is not set up to deal with that event.
414 * So use bogus size.
415 */
416
417 /*
418 * create a vnode - vm_object association
419 * memory_object_create_named() creates a "named" reference on the
420 * memory object we hold this reference as long as the vnode is
421 * "alive." Since memory_object_create_named() took its own reference
422 * on the vnode pager we passed it, we can drop the reference
423 * vnode_pager_setup() returned here.
424 */
425 kret = memory_object_create_named(pager,
426 (memory_object_size_t)uip->ui_size, &control);
427 vnode_pager_deallocate(pager);
428 if (kret != KERN_SUCCESS)
429 panic("ubc_info_init: memory_object_create_named returned %d", kret);
430
431 assert(control);
432 uip->ui_control = control; /* cache the value of the mo control */
433 SET(uip->ui_flags, UI_HASOBJREF); /* with a named reference */
434
435 if (withfsize == 0) {
436 /* initialize the size */
437 error = vnode_size(vp, &uip->ui_size, vfs_context_current());
438 if (error)
439 uip->ui_size = 0;
440 } else {
441 uip->ui_size = filesize;
442 }
443 vp->v_lflag |= VNAMED_UBC; /* vnode has a named ubc reference */
444
445 return (error);
446}
447
448
449/*
450 * ubc_info_free
451 *
452 * Free a ubc_info structure
453 *
454 * Parameters: uip A pointer to the ubc_info to free
455 *
456 * Returns: (void)
457 *
458 * Notes: If there is a credential that has subsequently been associated
459 * with the ubc_info via a call to ubc_setcred(), the reference
460 * to the credential is dropped.
461 *
462 * It's actually impossible for a ubc_info.ui_control to take the
463 * value MEMORY_OBJECT_CONTROL_NULL.
464 */
465static void
466ubc_info_free(struct ubc_info *uip)
467{
468 if (IS_VALID_CRED(uip->ui_ucred)) {
469 kauth_cred_unref(&uip->ui_ucred);
470 }
471
472 if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL)
473 memory_object_control_deallocate(uip->ui_control);
474
475 cluster_release(uip);
476 ubc_cs_free(uip);
477
478 zfree(ubc_info_zone, uip);
479 return;
480}
481
482
483void
484ubc_info_deallocate(struct ubc_info *uip)
485{
486 ubc_info_free(uip);
487}
488
489
490/*
491 * ubc_setsize
492 *
493 * Tell the VM that the the size of the file represented by the vnode has
494 * changed
495 *
496 * Parameters: vp The vp whose backing file size is
497 * being changed
498 * nsize The new size of the backing file
499 *
500 * Returns: 1 Success
501 * 0 Failure
502 *
503 * Notes: This function will indicate failure if the new size that's
504 * being attempted to be set is negative.
505 *
506 * This function will fail if there is no ubc_info currently
507 * associated with the vnode.
508 *
509 * This function will indicate success it the new size is the
510 * same or larger than the old size (in this case, the remainder
511 * of the file will require modification or use of an existing upl
512 * to access successfully).
513 *
514 * This function will fail if the new file size is smaller, and
515 * the memory region being invalidated was unable to actually be
516 * invalidated and/or the last page could not be flushed, if the
517 * new size is not aligned to a page boundary. This is usually
518 * indicative of an I/O error.
519 */
520int
521ubc_setsize(struct vnode *vp, off_t nsize)
522{
523 off_t osize; /* ui_size before change */
524 off_t lastpg, olastpgend, lastoff;
525 struct ubc_info *uip;
526 memory_object_control_t control;
527 kern_return_t kret = KERN_SUCCESS;
528
529 if (nsize < (off_t)0)
530 return (0);
531
532 if (!UBCINFOEXISTS(vp))
533 return (0);
534
535 uip = vp->v_ubcinfo;
536 osize = uip->ui_size;
537 /*
538 * Update the size before flushing the VM
539 */
540 uip->ui_size = nsize;
541
542 if (nsize >= osize) /* Nothing more to do */
543 return (1); /* return success */
544
545 /*
546 * When the file shrinks, invalidate the pages beyond the
547 * new size. Also get rid of garbage beyond nsize on the
548 * last page. The ui_size already has the nsize, so any
549 * subsequent page-in will zero-fill the tail properly
550 */
551 lastpg = trunc_page_64(nsize);
552 olastpgend = round_page_64(osize);
553 control = uip->ui_control;
554 assert(control);
555 lastoff = (nsize & PAGE_MASK_64);
556
557 if (lastoff) {
558 upl_t upl;
559 upl_page_info_t *pl;
560
561
562 /*
563 * new EOF ends up in the middle of a page
564 * zero the tail of this page if its currently
565 * present in the cache
566 */
567 kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE);
568
569 if (kret != KERN_SUCCESS)
570 panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret);
571
572 if (upl_valid_page(pl, 0))
573 cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL);
574
575 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
576
577 lastpg += PAGE_SIZE_64;
578 }
579 if (olastpgend > lastpg) {
580 /*
581 * invalidate the pages beyond the new EOF page
582 *
583 */
584 kret = memory_object_lock_request(control,
585 (memory_object_offset_t)lastpg,
586 (memory_object_size_t)(olastpgend - lastpg), NULL, NULL,
587 MEMORY_OBJECT_RETURN_NONE, MEMORY_OBJECT_DATA_FLUSH,
588 VM_PROT_NO_CHANGE);
589 if (kret != KERN_SUCCESS)
590 printf("ubc_setsize: invalidate failed (error = %d)\n", kret);
591 }
592 return ((kret == KERN_SUCCESS) ? 1 : 0);
593}
594
595
596/*
597 * ubc_getsize
598 *
599 * Get the size of the file assocated with the specified vnode
600 *
601 * Parameters: vp The vnode whose size is of interest
602 *
603 * Returns: 0 There is no ubc_info associated with
604 * this vnode, or the size is zero
605 * !0 The size of the file
606 *
607 * Notes: Using this routine, it is not possible for a caller to
608 * successfully distinguish between a vnode associate with a zero
609 * length file, and a vnode with no associated ubc_info. The
610 * caller therefore needs to not care, or needs to ensure that
611 * they have previously successfully called ubc_info_init() or
612 * ubc_info_init_withsize().
613 */
614off_t
615ubc_getsize(struct vnode *vp)
616{
617 /* people depend on the side effect of this working this way
618 * as they call this for directory
619 */
620 if (!UBCINFOEXISTS(vp))
621 return ((off_t)0);
622 return (vp->v_ubcinfo->ui_size);
623}
624
625
626/*
627 * ubc_umount
628 *
629 * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this
630 * mount point
631 *
632 * Parameters: mp The mount point
633 *
634 * Returns: 0 Success
635 *
636 * Notes: There is no failure indication for this function.
637 *
638 * This function is used in the unmount path; since it may block
639 * I/O indefinitely, it should not be used in the forced unmount
640 * path, since a device unavailability could also block that
641 * indefinitely.
642 *
643 * Because there is no device ejection interlock on USB, FireWire,
644 * or similar devices, it's possible that an ejection that begins
645 * subsequent to the vnode_iterate() completing, either on one of
646 * those devices, or a network mount for which the server quits
647 * responding, etc., may cause the caller to block indefinitely.
648 */
649__private_extern__ int
650ubc_umount(struct mount *mp)
651{
652 vnode_iterate(mp, 0, ubc_umcallback, 0);
653 return(0);
654}
655
656
657/*
658 * ubc_umcallback
659 *
660 * Used by ubc_umount() as an internal implementation detail; see ubc_umount()
661 * and vnode_iterate() for details of implementation.
662 */
663static int
664ubc_umcallback(vnode_t vp, __unused void * args)
665{
666
667 if (UBCINFOEXISTS(vp)) {
668
669 (void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL);
670 }
671 return (VNODE_RETURNED);
672}
673
674
675/*
676 * ubc_getcred
677 *
678 * Get the credentials currently active for the ubc_info associated with the
679 * vnode.
680 *
681 * Parameters: vp The vnode whose ubc_info credentials
682 * are to be retrieved
683 *
684 * Returns: !NOCRED The credentials
685 * NOCRED If there is no ubc_info for the vnode,
686 * or if there is one, but it has not had
687 * any credentials associated with it via
688 * a call to ubc_setcred()
689 */
690kauth_cred_t
691ubc_getcred(struct vnode *vp)
692{
693 if (UBCINFOEXISTS(vp))
694 return (vp->v_ubcinfo->ui_ucred);
695
696 return (NOCRED);
697}
698
699
700/*
701 * ubc_setthreadcred
702 *
703 * If they are not already set, set the credentials of the ubc_info structure
704 * associated with the vnode to those of the supplied thread; otherwise leave
705 * them alone.
706 *
707 * Parameters: vp The vnode whose ubc_info creds are to
708 * be set
709 * p The process whose credentials are to
710 * be used, if not running on an assumed
711 * credential
712 * thread The thread whose credentials are to
713 * be used
714 *
715 * Returns: 1 This vnode has no associated ubc_info
716 * 0 Success
717 *
718 * Notes: This function takes a proc parameter to account for bootstrap
719 * issues where a task or thread may call this routine, either
720 * before credentials have been initialized by bsd_init(), or if
721 * there is no BSD info asscoiate with a mach thread yet. This
722 * is known to happen in both the initial swap and memory mapping
723 * calls.
724 *
725 * This function is generally used only in the following cases:
726 *
727 * o a memory mapped file via the mmap() system call
728 * o a memory mapped file via the deprecated map_fd() call
729 * o a swap store backing file
730 * o subsequent to a successful write via vn_write()
731 *
732 * The information is then used by the NFS client in order to
733 * cons up a wire message in either the page-in or page-out path.
734 *
735 * There are two potential problems with the use of this API:
736 *
737 * o Because the write path only set it on a successful
738 * write, there is a race window between setting the
739 * credential and its use to evict the pages to the
740 * remote file server
741 *
742 * o Because a page-in may occur prior to a write, the
743 * credential may not be set at this time, if the page-in
744 * is not the result of a mapping established via mmap()
745 * or map_fd().
746 *
747 * In both these cases, this will be triggered from the paging
748 * path, which will instead use the credential of the current
749 * process, which in this case is either the dynamic_pager or
750 * the kernel task, both of which utilize "root" credentials.
751 *
752 * This may potentially permit operations to occur which should
753 * be denied, or it may cause to be denied operations which
754 * should be permitted, depending on the configuration of the NFS
755 * server.
756 */
757int
758ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread)
759{
760 struct ubc_info *uip;
761 kauth_cred_t credp;
762 struct uthread *uthread = get_bsdthread_info(thread);
763
764 if (!UBCINFOEXISTS(vp))
765 return (1);
766
767 vnode_lock(vp);
768
769 uip = vp->v_ubcinfo;
770 credp = uip->ui_ucred;
771
772 if (!IS_VALID_CRED(credp)) {
773 /* use per-thread cred, if assumed identity, else proc cred */
774 if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) {
775 uip->ui_ucred = kauth_cred_proc_ref(p);
776 } else {
777 uip->ui_ucred = uthread->uu_ucred;
778 kauth_cred_ref(uip->ui_ucred);
779 }
780 }
781 vnode_unlock(vp);
782
783 return (0);
784}
785
786
787/*
788 * ubc_setcred
789 *
790 * If they are not already set, set the credentials of the ubc_info structure
791 * associated with the vnode to those of the process; otherwise leave them
792 * alone.
793 *
794 * Parameters: vp The vnode whose ubc_info creds are to
795 * be set
796 * p The process whose credentials are to
797 * be used
798 *
799 * Returns: 0 This vnode has no associated ubc_info
800 * 1 Success
801 *
802 * Notes: The return values for this function are inverted from nearly
803 * all other uses in the kernel.
804 *
805 * See also ubc_setthreadcred(), above.
806 *
807 * This function is considered deprecated, and generally should
808 * not be used, as it is incompatible with per-thread credentials;
809 * it exists for legacy KPI reasons.
810 *
811 * DEPRECATION: ubc_setcred() is being deprecated. Please use
812 * ubc_setthreadcred() instead.
813 */
814int
815ubc_setcred(struct vnode *vp, proc_t p)
816{
817 struct ubc_info *uip;
818 kauth_cred_t credp;
819
820 /* If there is no ubc_info, deny the operation */
821 if ( !UBCINFOEXISTS(vp))
822 return (0);
823
824 /*
825 * Check to see if there is already a credential reference in the
826 * ubc_info; if there is not, take one on the supplied credential.
827 */
828 vnode_lock(vp);
829 uip = vp->v_ubcinfo;
830 credp = uip->ui_ucred;
831 if (!IS_VALID_CRED(credp)) {
832 uip->ui_ucred = kauth_cred_proc_ref(p);
833 }
834 vnode_unlock(vp);
835
836 return (1);
837}
838
839
840/*
841 * ubc_getpager
842 *
843 * Get the pager associated with the ubc_info associated with the vnode.
844 *
845 * Parameters: vp The vnode to obtain the pager from
846 *
847 * Returns: !VNODE_PAGER_NULL The memory_object_t for the pager
848 * VNODE_PAGER_NULL There is no ubc_info for this vnode
849 *
850 * Notes: For each vnode that has a ubc_info associated with it, that
851 * ubc_info SHALL have a pager associated with it, so in the
852 * normal case, it's impossible to return VNODE_PAGER_NULL for
853 * a vnode with an associated ubc_info.
854 */
855__private_extern__ memory_object_t
856ubc_getpager(struct vnode *vp)
857{
858 if (UBCINFOEXISTS(vp))
859 return (vp->v_ubcinfo->ui_pager);
860
861 return (0);
862}
863
864
865/*
866 * ubc_getobject
867 *
868 * Get the memory object control associated with the ubc_info associated with
869 * the vnode
870 *
871 * Parameters: vp The vnode to obtain the memory object
872 * from
873 * flags DEPRECATED
874 *
875 * Returns: !MEMORY_OBJECT_CONTROL_NULL
876 * MEMORY_OBJECT_CONTROL_NULL
877 *
878 * Notes: Historically, if the flags were not "do not reactivate", this
879 * function would look up the memory object using the pager if
880 * it did not exist (this could be the case if the vnode had
881 * been previously reactivated). The flags would also permit a
882 * hold to be requested, which would have created an object
883 * reference, if one had not already existed. This usage is
884 * deprecated, as it would permit a race between finding and
885 * taking the reference vs. a single reference being dropped in
886 * another thread.
887 */
888memory_object_control_t
889ubc_getobject(struct vnode *vp, __unused int flags)
890{
891 if (UBCINFOEXISTS(vp))
892 return((vp->v_ubcinfo->ui_control));
893
894 return (MEMORY_OBJECT_CONTROL_NULL);
895}
896
897
898/*
899 * ubc_blktooff
900 *
901 * Convert a given block number to a memory backing object (file) offset for a
902 * given vnode
903 *
904 * Parameters: vp The vnode in which the block is located
905 * blkno The block number to convert
906 *
907 * Returns: !-1 The offset into the backing object
908 * -1 There is no ubc_info associated with
909 * the vnode
910 * -1 An error occurred in the underlying VFS
911 * while translating the block to an
912 * offset; the most likely cause is that
913 * the caller specified a block past the
914 * end of the file, but this could also be
915 * any other error from VNOP_BLKTOOFF().
916 *
917 * Note: Representing the error in band loses some information, but does
918 * not occlude a valid offset, since an off_t of -1 is normally
919 * used to represent EOF. If we had a more reliable constant in
920 * our header files for it (i.e. explicitly cast to an off_t), we
921 * would use it here instead.
922 */
923off_t
924ubc_blktooff(vnode_t vp, daddr64_t blkno)
925{
926 off_t file_offset = -1;
927 int error;
928
929 if (UBCINFOEXISTS(vp)) {
930 error = VNOP_BLKTOOFF(vp, blkno, &file_offset);
931 if (error)
932 file_offset = -1;
933 }
934
935 return (file_offset);
936}
937
938
939/*
940 * ubc_offtoblk
941 *
942 * Convert a given offset in a memory backing object into a block number for a
943 * given vnode
944 *
945 * Parameters: vp The vnode in which the offset is
946 * located
947 * offset The offset into the backing object
948 *
949 * Returns: !-1 The returned block number
950 * -1 There is no ubc_info associated with
951 * the vnode
952 * -1 An error occurred in the underlying VFS
953 * while translating the block to an
954 * offset; the most likely cause is that
955 * the caller specified a block past the
956 * end of the file, but this could also be
957 * any other error from VNOP_OFFTOBLK().
958 *
959 * Note: Representing the error in band loses some information, but does
960 * not occlude a valid block number, since block numbers exceed
961 * the valid range for offsets, due to their relative sizes. If
962 * we had a more reliable constant than -1 in our header files
963 * for it (i.e. explicitly cast to an daddr64_t), we would use it
964 * here instead.
965 */
966daddr64_t
967ubc_offtoblk(vnode_t vp, off_t offset)
968{
969 daddr64_t blkno = -1;
970 int error = 0;
971
972 if (UBCINFOEXISTS(vp)) {
973 error = VNOP_OFFTOBLK(vp, offset, &blkno);
974 if (error)
975 blkno = -1;
976 }
977
978 return (blkno);
979}
980
981
982/*
983 * ubc_pages_resident
984 *
985 * Determine whether or not a given vnode has pages resident via the memory
986 * object control associated with the ubc_info associated with the vnode
987 *
988 * Parameters: vp The vnode we want to know about
989 *
990 * Returns: 1 Yes
991 * 0 No
992 */
993int
994ubc_pages_resident(vnode_t vp)
995{
996 kern_return_t kret;
997 boolean_t has_pages_resident;
998
999 if (!UBCINFOEXISTS(vp))
1000 return (0);
1001
1002 /*
1003 * The following call may fail if an invalid ui_control is specified,
1004 * or if there is no VM object associated with the control object. In
1005 * either case, reacting to it as if there were no pages resident will
1006 * result in correct behavior.
1007 */
1008 kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident);
1009
1010 if (kret != KERN_SUCCESS)
1011 return (0);
1012
1013 if (has_pages_resident == TRUE)
1014 return (1);
1015
1016 return (0);
1017}
1018
1019
1020/*
1021 * ubc_sync_range
1022 *
1023 * Clean and/or invalidate a range in the memory object that backs this vnode
1024 *
1025 * Parameters: vp The vnode whose associated ubc_info's
1026 * associated memory object is to have a
1027 * range invalidated within it
1028 * beg_off The start of the range, as an offset
1029 * end_off The end of the range, as an offset
1030 * flags See ubc_msync_internal()
1031 *
1032 * Returns: 1 Success
1033 * 0 Failure
1034 *
1035 * Notes: see ubc_msync_internal() for more detailed information.
1036 *
1037 * DEPRECATED: This interface is obsolete due to a failure to return error
1038 * information needed in order to correct failures. The currently
1039 * recommended interface is ubc_msync().
1040 */
1041int
1042ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags)
1043{
1044 return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL));
1045}
1046
1047
1048/*
1049 * ubc_msync
1050 *
1051 * Clean and/or invalidate a range in the memory object that backs this vnode
1052 *
1053 * Parameters: vp The vnode whose associated ubc_info's
1054 * associated memory object is to have a
1055 * range invalidated within it
1056 * beg_off The start of the range, as an offset
1057 * end_off The end of the range, as an offset
1058 * resid_off The address of an off_t supplied by the
1059 * caller; may be set to NULL to ignore
1060 * flags See ubc_msync_internal()
1061 *
1062 * Returns: 0 Success
1063 * !0 Failure; an errno is returned
1064 *
1065 * Implicit Returns:
1066 * *resid_off, modified If non-NULL, the contents are ALWAYS
1067 * modified; they are initialized to the
1068 * beg_off, and in case of an I/O error,
1069 * the difference between beg_off and the
1070 * current value will reflect what was
1071 * able to be written before the error
1072 * occurred. If no error is returned, the
1073 * value of the resid_off is undefined; do
1074 * NOT use it in place of end_off if you
1075 * intend to increment from the end of the
1076 * last call and call iteratively.
1077 *
1078 * Notes: see ubc_msync_internal() for more detailed information.
1079 *
1080 */
1081errno_t
1082ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags)
1083{
1084 int retval;
1085 int io_errno = 0;
1086
1087 if (resid_off)
1088 *resid_off = beg_off;
1089
1090 retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno);
1091
1092 if (retval == 0 && io_errno == 0)
1093 return (EINVAL);
1094 return (io_errno);
1095}
1096
1097
1098/*
1099 * Clean and/or invalidate a range in the memory object that backs this vnode
1100 *
1101 * Parameters: vp The vnode whose associated ubc_info's
1102 * associated memory object is to have a
1103 * range invalidated within it
1104 * beg_off The start of the range, as an offset
1105 * end_off The end of the range, as an offset
1106 * resid_off The address of an off_t supplied by the
1107 * caller; may be set to NULL to ignore
1108 * flags MUST contain at least one of the flags
1109 * UBC_INVALIDATE, UBC_PUSHDIRTY, or
1110 * UBC_PUSHALL; if UBC_PUSHDIRTY is used,
1111 * UBC_SYNC may also be specified to cause
1112 * this function to block until the
1113 * operation is complete. The behavior
1114 * of UBC_SYNC is otherwise undefined.
1115 * io_errno The address of an int to contain the
1116 * errno from a failed I/O operation, if
1117 * one occurs; may be set to NULL to
1118 * ignore
1119 *
1120 * Returns: 1 Success
1121 * 0 Failure
1122 *
1123 * Implicit Returns:
1124 * *resid_off, modified The contents of this offset MAY be
1125 * modified; in case of an I/O error, the
1126 * difference between beg_off and the
1127 * current value will reflect what was
1128 * able to be written before the error
1129 * occurred.
1130 * *io_errno, modified The contents of this offset are set to
1131 * an errno, if an error occurs; if the
1132 * caller supplies an io_errno parameter,
1133 * they should be careful to initialize it
1134 * to 0 before calling this function to
1135 * enable them to distinguish an error
1136 * with a valid *resid_off from an invalid
1137 * one, and to avoid potentially falsely
1138 * reporting an error, depending on use.
1139 *
1140 * Notes: If there is no ubc_info associated with the vnode supplied,
1141 * this function immediately returns success.
1142 *
1143 * If the value of end_off is less than or equal to beg_off, this
1144 * function immediately returns success; that is, end_off is NOT
1145 * inclusive.
1146 *
1147 * IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or
1148 * UBC_PUSHALL MUST be specified; that is, it is NOT possible to
1149 * attempt to block on in-progress I/O by calling this function
1150 * with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC
1151 * in order to block pending on the I/O already in progress.
1152 *
1153 * The start offset is truncated to the page boundary and the
1154 * size is adjusted to include the last page in the range; that
1155 * is, end_off on exactly a page boundary will not change if it
1156 * is rounded, and the range of bytes written will be from the
1157 * truncate beg_off to the rounded (end_off - 1).
1158 */
1159static int
1160ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno)
1161{
1162 memory_object_size_t tsize;
1163 kern_return_t kret;
1164 int request_flags = 0;
1165 int flush_flags = MEMORY_OBJECT_RETURN_NONE;
1166
1167 if ( !UBCINFOEXISTS(vp))
1168 return (0);
1169 if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0)
1170 return (0);
1171 if (end_off <= beg_off)
1172 return (1);
1173
1174 if (flags & UBC_INVALIDATE)
1175 /*
1176 * discard the resident pages
1177 */
1178 request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE);
1179
1180 if (flags & UBC_SYNC)
1181 /*
1182 * wait for all the I/O to complete before returning
1183 */
1184 request_flags |= MEMORY_OBJECT_IO_SYNC;
1185
1186 if (flags & UBC_PUSHDIRTY)
1187 /*
1188 * we only return the dirty pages in the range
1189 */
1190 flush_flags = MEMORY_OBJECT_RETURN_DIRTY;
1191
1192 if (flags & UBC_PUSHALL)
1193 /*
1194 * then return all the interesting pages in the range (both
1195 * dirty and precious) to the pager
1196 */
1197 flush_flags = MEMORY_OBJECT_RETURN_ALL;
1198
1199 beg_off = trunc_page_64(beg_off);
1200 end_off = round_page_64(end_off);
1201 tsize = (memory_object_size_t)end_off - beg_off;
1202
1203 /* flush and/or invalidate pages in the range requested */
1204 kret = memory_object_lock_request(vp->v_ubcinfo->ui_control,
1205 beg_off, tsize,
1206 (memory_object_offset_t *)resid_off,
1207 io_errno, flush_flags, request_flags,
1208 VM_PROT_NO_CHANGE);
1209
1210 return ((kret == KERN_SUCCESS) ? 1 : 0);
1211}
1212
1213
1214/*
1215 * ubc_msync_internal
1216 *
1217 * Explicitly map a vnode that has an associate ubc_info, and add a reference
1218 * to it for the ubc system, if there isn't one already, so it will not be
1219 * recycled while it's in use, and set flags on the ubc_info to indicate that
1220 * we have done this
1221 *
1222 * Parameters: vp The vnode to map
1223 * flags The mapping flags for the vnode; this
1224 * will be a combination of one or more of
1225 * PROT_READ, PROT_WRITE, and PROT_EXEC
1226 *
1227 * Returns: 0 Success
1228 * EPERM Permission was denied
1229 *
1230 * Notes: An I/O reference on the vnode must already be held on entry
1231 *
1232 * If there is no ubc_info associated with the vnode, this function
1233 * will return success.
1234 *
1235 * If a permission error occurs, this function will return
1236 * failure; all other failures will cause this function to return
1237 * success.
1238 *
1239 * IMPORTANT: This is an internal use function, and its symbols
1240 * are not exported, hence its error checking is not very robust.
1241 * It is primarily used by:
1242 *
1243 * o mmap(), when mapping a file
1244 * o The deprecated map_fd() interface, when mapping a file
1245 * o When mapping a shared file (a shared library in the
1246 * shared segment region)
1247 * o When loading a program image during the exec process
1248 *
1249 * ...all of these uses ignore the return code, and any fault that
1250 * results later because of a failure is handled in the fix-up path
1251 * of the fault handler. The interface exists primarily as a
1252 * performance hint.
1253 *
1254 * Given that third party implementation of the type of interfaces
1255 * that would use this function, such as alternative executable
1256 * formats, etc., are unsupported, this function is not exported
1257 * for general use.
1258 *
1259 * The extra reference is held until the VM system unmaps the
1260 * vnode from its own context to maintain a vnode reference in
1261 * cases like open()/mmap()/close(), which leave the backing
1262 * object referenced by a mapped memory region in a process
1263 * address space.
1264 */
1265__private_extern__ int
1266ubc_map(vnode_t vp, int flags)
1267{
1268 struct ubc_info *uip;
1269 int error = 0;
1270 int need_ref = 0;
1271 int need_wakeup = 0;
1272
1273 if (UBCINFOEXISTS(vp)) {
1274
1275 vnode_lock(vp);
1276 uip = vp->v_ubcinfo;
1277
1278 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1279 SET(uip->ui_flags, UI_MAPWAITING);
1280 (void) msleep(&uip->ui_flags, &vp->v_lock,
1281 PRIBIO, "ubc_map", NULL);
1282 }
1283 SET(uip->ui_flags, UI_MAPBUSY);
1284 vnode_unlock(vp);
1285
1286 error = VNOP_MMAP(vp, flags, vfs_context_current());
1287
1288 if (error != EPERM)
1289 error = 0;
1290
1291 vnode_lock_spin(vp);
1292
1293 if (error == 0) {
1294 if ( !ISSET(uip->ui_flags, UI_ISMAPPED))
1295 need_ref = 1;
1296 SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED));
1297 }
1298 CLR(uip->ui_flags, UI_MAPBUSY);
1299
1300 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1301 CLR(uip->ui_flags, UI_MAPWAITING);
1302 need_wakeup = 1;
1303 }
1304 vnode_unlock(vp);
1305
1306 if (need_wakeup)
1307 wakeup(&uip->ui_flags);
1308
1309 if (need_ref)
1310 vnode_ref(vp);
1311 }
1312 return (error);
1313}
1314
1315
1316/*
1317 * ubc_destroy_named
1318 *
1319 * Destroy the named memory object associated with the ubc_info control object
1320 * associated with the designated vnode, if there is a ubc_info associated
1321 * with the vnode, and a control object is associated with it
1322 *
1323 * Parameters: vp The designated vnode
1324 *
1325 * Returns: (void)
1326 *
1327 * Notes: This function is called on vnode termination for all vnodes,
1328 * and must therefore not assume that there is a ubc_info that is
1329 * associated with the vnode, nor that there is a control object
1330 * associated with the ubc_info.
1331 *
1332 * If all the conditions necessary are present, this function
1333 * calls memory_object_destory(), which will in turn end up
1334 * calling ubc_unmap() to release any vnode references that were
1335 * established via ubc_map().
1336 *
1337 * IMPORTANT: This is an internal use function that is used
1338 * exclusively by the internal use function vclean().
1339 */
1340__private_extern__ void
1341ubc_destroy_named(vnode_t vp)
1342{
1343 memory_object_control_t control;
1344 struct ubc_info *uip;
1345 kern_return_t kret;
1346
1347 if (UBCINFOEXISTS(vp)) {
1348 uip = vp->v_ubcinfo;
1349
1350 /* Terminate the memory object */
1351 control = ubc_getobject(vp, UBC_HOLDOBJECT);
1352 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1353 kret = memory_object_destroy(control, 0);
1354 if (kret != KERN_SUCCESS)
1355 panic("ubc_destroy_named: memory_object_destroy failed");
1356 }
1357 }
1358}
1359
1360
1361/*
1362 * ubc_isinuse
1363 *
1364 * Determine whether or not a vnode is currently in use by ubc at a level in
1365 * excess of the requested busycount
1366 *
1367 * Parameters: vp The vnode to check
1368 * busycount The threshold busy count, used to bias
1369 * the count usually already held by the
1370 * caller to avoid races
1371 *
1372 * Returns: 1 The vnode is in use over the threshold
1373 * 0 The vnode is not in use over the
1374 * threshold
1375 *
1376 * Notes: Because the vnode is only held locked while actually asking
1377 * the use count, this function only represents a snapshot of the
1378 * current state of the vnode. If more accurate information is
1379 * required, an additional busycount should be held by the caller
1380 * and a non-zero busycount used.
1381 *
1382 * If there is no ubc_info associated with the vnode, this
1383 * function will report that the vnode is not in use by ubc.
1384 */
1385int
1386ubc_isinuse(struct vnode *vp, int busycount)
1387{
1388 if ( !UBCINFOEXISTS(vp))
1389 return (0);
1390 return(ubc_isinuse_locked(vp, busycount, 0));
1391}
1392
1393
1394/*
1395 * ubc_isinuse_locked
1396 *
1397 * Determine whether or not a vnode is currently in use by ubc at a level in
1398 * excess of the requested busycount
1399 *
1400 * Parameters: vp The vnode to check
1401 * busycount The threshold busy count, used to bias
1402 * the count usually already held by the
1403 * caller to avoid races
1404 * locked True if the vnode is already locked by
1405 * the caller
1406 *
1407 * Returns: 1 The vnode is in use over the threshold
1408 * 0 The vnode is not in use over the
1409 * threshold
1410 *
1411 * Notes: If the vnode is not locked on entry, it is locked while
1412 * actually asking the use count. If this is the case, this
1413 * function only represents a snapshot of the current state of
1414 * the vnode. If more accurate information is required, the
1415 * vnode lock should be held by the caller, otherwise an
1416 * additional busycount should be held by the caller and a
1417 * non-zero busycount used.
1418 *
1419 * If there is no ubc_info associated with the vnode, this
1420 * function will report that the vnode is not in use by ubc.
1421 */
1422int
1423ubc_isinuse_locked(struct vnode *vp, int busycount, int locked)
1424{
1425 int retval = 0;
1426
1427
1428 if (!locked)
1429 vnode_lock(vp);
1430
1431 if ((vp->v_usecount - vp->v_kusecount) > busycount)
1432 retval = 1;
1433
1434 if (!locked)
1435 vnode_unlock(vp);
1436 return (retval);
1437}
1438
1439
1440/*
1441 * ubc_unmap
1442 *
1443 * Reverse the effects of a ubc_map() call for a given vnode
1444 *
1445 * Parameters: vp vnode to unmap from ubc
1446 *
1447 * Returns: (void)
1448 *
1449 * Notes: This is an internal use function used by vnode_pager_unmap().
1450 * It will attempt to obtain a reference on the supplied vnode,
1451 * and if it can do so, and there is an associated ubc_info, and
1452 * the flags indicate that it was mapped via ubc_map(), then the
1453 * flag is cleared, the mapping removed, and the reference taken
1454 * by ubc_map() is released.
1455 *
1456 * IMPORTANT: This MUST only be called by the VM
1457 * to prevent race conditions.
1458 */
1459__private_extern__ void
1460ubc_unmap(struct vnode *vp)
1461{
1462 struct ubc_info *uip;
1463 int need_rele = 0;
1464 int need_wakeup = 0;
1465#if NAMEDRSRCFORK
1466 int named_fork = 0;
1467#endif
1468
1469 if (vnode_getwithref(vp))
1470 return;
1471
1472 if (UBCINFOEXISTS(vp)) {
1473 vnode_lock(vp);
1474 uip = vp->v_ubcinfo;
1475
1476 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1477 SET(uip->ui_flags, UI_MAPWAITING);
1478 (void) msleep(&uip->ui_flags, &vp->v_lock,
1479 PRIBIO, "ubc_unmap", NULL);
1480 }
1481 SET(uip->ui_flags, UI_MAPBUSY);
1482
1483#if NAMEDRSRCFORK
1484 if ((vp->v_flag & VISNAMEDSTREAM) &&
1485 (vp->v_parent != NULLVP) &&
1486 !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS)) {
1487 named_fork = 1;
1488 }
1489#endif
1490
1491 if (ISSET(uip->ui_flags, UI_ISMAPPED)) {
1492 CLR(uip->ui_flags, UI_ISMAPPED);
1493 need_rele = 1;
1494 }
1495 vnode_unlock(vp);
1496
1497 if (need_rele) {
1498 (void)VNOP_MNOMAP(vp, vfs_context_current());
1499
1500#if NAMEDRSRCFORK
1501 if (named_fork) {
1502 vnode_relenamedstream(vp->v_parent, vp, vfs_context_current());
1503 }
1504#endif
1505
1506 vnode_rele(vp);
1507 }
1508
1509 vnode_lock_spin(vp);
1510
1511 CLR(uip->ui_flags, UI_MAPBUSY);
1512 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1513 CLR(uip->ui_flags, UI_MAPWAITING);
1514 need_wakeup = 1;
1515 }
1516 vnode_unlock(vp);
1517
1518 if (need_wakeup)
1519 wakeup(&uip->ui_flags);
1520
1521 }
1522 /*
1523 * the drop of the vnode ref will cleanup
1524 */
1525 vnode_put(vp);
1526}
1527
1528
1529/*
1530 * ubc_page_op
1531 *
1532 * Manipulate individual page state for a vnode with an associated ubc_info
1533 * with an associated memory object control.
1534 *
1535 * Parameters: vp The vnode backing the page
1536 * f_offset A file offset interior to the page
1537 * ops The operations to perform, as a bitmap
1538 * (see below for more information)
1539 * phys_entryp The address of a ppnum_t; may be NULL
1540 * to ignore
1541 * flagsp A pointer to an int to contain flags;
1542 * may be NULL to ignore
1543 *
1544 * Returns: KERN_SUCCESS Success
1545 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1546 * object associated
1547 * KERN_INVALID_OBJECT If UPL_POP_PHYSICAL and the object is
1548 * not physically contiguous
1549 * KERN_INVALID_OBJECT If !UPL_POP_PHYSICAL and the object is
1550 * physically contiguous
1551 * KERN_FAILURE If the page cannot be looked up
1552 *
1553 * Implicit Returns:
1554 * *phys_entryp (modified) If phys_entryp is non-NULL and
1555 * UPL_POP_PHYSICAL
1556 * *flagsp (modified) If flagsp is non-NULL and there was
1557 * !UPL_POP_PHYSICAL and a KERN_SUCCESS
1558 *
1559 * Notes: For object boundaries, it is considerably more efficient to
1560 * ensure that f_offset is in fact on a page boundary, as this
1561 * will avoid internal use of the hash table to identify the
1562 * page, and would therefore skip a number of early optimizations.
1563 * Since this is a page operation anyway, the caller should try
1564 * to pass only a page aligned offset because of this.
1565 *
1566 * *flagsp may be modified even if this function fails. If it is
1567 * modified, it will contain the condition of the page before the
1568 * requested operation was attempted; these will only include the
1569 * bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP,
1570 * UPL_POP_SET, or UPL_POP_CLR bits.
1571 *
1572 * The flags field may contain a specific operation, such as
1573 * UPL_POP_PHYSICAL or UPL_POP_DUMP:
1574 *
1575 * o UPL_POP_PHYSICAL Fail if not contiguous; if
1576 * *phys_entryp and successful, set
1577 * *phys_entryp
1578 * o UPL_POP_DUMP Dump the specified page
1579 *
1580 * Otherwise, it is treated as a bitmap of one or more page
1581 * operations to perform on the final memory object; allowable
1582 * bit values are:
1583 *
1584 * o UPL_POP_DIRTY The page is dirty
1585 * o UPL_POP_PAGEOUT The page is paged out
1586 * o UPL_POP_PRECIOUS The page is precious
1587 * o UPL_POP_ABSENT The page is absent
1588 * o UPL_POP_BUSY The page is busy
1589 *
1590 * If the page status is only being queried and not modified, then
1591 * not other bits should be specified. However, if it is being
1592 * modified, exactly ONE of the following bits should be set:
1593 *
1594 * o UPL_POP_SET Set the current bitmap bits
1595 * o UPL_POP_CLR Clear the current bitmap bits
1596 *
1597 * Thus to effect a combination of setting an clearing, it may be
1598 * necessary to call this function twice. If this is done, the
1599 * set should be used before the clear, since clearing may trigger
1600 * a wakeup on the destination page, and if the page is backed by
1601 * an encrypted swap file, setting will trigger the decryption
1602 * needed before the wakeup occurs.
1603 */
1604kern_return_t
1605ubc_page_op(
1606 struct vnode *vp,
1607 off_t f_offset,
1608 int ops,
1609 ppnum_t *phys_entryp,
1610 int *flagsp)
1611{
1612 memory_object_control_t control;
1613
1614 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1615 if (control == MEMORY_OBJECT_CONTROL_NULL)
1616 return KERN_INVALID_ARGUMENT;
1617
1618 return (memory_object_page_op(control,
1619 (memory_object_offset_t)f_offset,
1620 ops,
1621 phys_entryp,
1622 flagsp));
1623}
1624
1625
1626/*
1627 * ubc_range_op
1628 *
1629 * Manipulate page state for a range of memory for a vnode with an associated
1630 * ubc_info with an associated memory object control, when page level state is
1631 * not required to be returned from the call (i.e. there are no phys_entryp or
1632 * flagsp parameters to this call, and it takes a range which may contain
1633 * multiple pages, rather than an offset interior to a single page).
1634 *
1635 * Parameters: vp The vnode backing the page
1636 * f_offset_beg A file offset interior to the start page
1637 * f_offset_end A file offset interior to the end page
1638 * ops The operations to perform, as a bitmap
1639 * (see below for more information)
1640 * range The address of an int; may be NULL to
1641 * ignore
1642 *
1643 * Returns: KERN_SUCCESS Success
1644 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1645 * object associated
1646 * KERN_INVALID_OBJECT If the object is physically contiguous
1647 *
1648 * Implicit Returns:
1649 * *range (modified) If range is non-NULL, its contents will
1650 * be modified to contain the number of
1651 * bytes successfully operated upon.
1652 *
1653 * Notes: IMPORTANT: This function cannot be used on a range that
1654 * consists of physically contiguous pages.
1655 *
1656 * For object boundaries, it is considerably more efficient to
1657 * ensure that f_offset_beg and f_offset_end are in fact on page
1658 * boundaries, as this will avoid internal use of the hash table
1659 * to identify the page, and would therefore skip a number of
1660 * early optimizations. Since this is an operation on a set of
1661 * pages anyway, the caller should try to pass only a page aligned
1662 * offsets because of this.
1663 *
1664 * *range will be modified only if this function succeeds.
1665 *
1666 * The flags field MUST contain a specific operation; allowable
1667 * values are:
1668 *
1669 * o UPL_ROP_ABSENT Returns the extent of the range
1670 * presented which is absent, starting
1671 * with the start address presented
1672 *
1673 * o UPL_ROP_PRESENT Returns the extent of the range
1674 * presented which is present (resident),
1675 * starting with the start address
1676 * presented
1677 * o UPL_ROP_DUMP Dump the pages which are found in the
1678 * target object for the target range.
1679 *
1680 * IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are
1681 * multiple regions in the range, only the first matching region
1682 * is returned.
1683 */
1684kern_return_t
1685ubc_range_op(
1686 struct vnode *vp,
1687 off_t f_offset_beg,
1688 off_t f_offset_end,
1689 int ops,
1690 int *range)
1691{
1692 memory_object_control_t control;
1693
1694 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1695 if (control == MEMORY_OBJECT_CONTROL_NULL)
1696 return KERN_INVALID_ARGUMENT;
1697
1698 return (memory_object_range_op(control,
1699 (memory_object_offset_t)f_offset_beg,
1700 (memory_object_offset_t)f_offset_end,
1701 ops,
1702 range));
1703}
1704
1705
1706/*
1707 * ubc_create_upl
1708 *
1709 * Given a vnode, cause the population of a portion of the vm_object; based on
1710 * the nature of the request, the pages returned may contain valid data, or
1711 * they may be uninitialized.
1712 *
1713 * Parameters: vp The vnode from which to create the upl
1714 * f_offset The start offset into the backing store
1715 * represented by the vnode
1716 * bufsize The size of the upl to create
1717 * uplp Pointer to the upl_t to receive the
1718 * created upl; MUST NOT be NULL
1719 * plp Pointer to receive the internal page
1720 * list for the created upl; MAY be NULL
1721 * to ignore
1722 *
1723 * Returns: KERN_SUCCESS The requested upl has been created
1724 * KERN_INVALID_ARGUMENT The bufsize argument is not an even
1725 * multiple of the page size
1726 * KERN_INVALID_ARGUMENT There is no ubc_info associated with
1727 * the vnode, or there is no memory object
1728 * control associated with the ubc_info
1729 * memory_object_upl_request:KERN_INVALID_VALUE
1730 * The supplied upl_flags argument is
1731 * invalid
1732 * Implicit Returns:
1733 * *uplp (modified)
1734 * *plp (modified) If non-NULL, the value of *plp will be
1735 * modified to point to the internal page
1736 * list; this modification may occur even
1737 * if this function is unsuccessful, in
1738 * which case the contents may be invalid
1739 *
1740 * Note: If successful, the returned *uplp MUST subsequently be freed
1741 * via a call to ubc_upl_commit(), ubc_upl_commit_range(),
1742 * ubc_upl_abort(), or ubc_upl_abort_range().
1743 */
1744kern_return_t
1745ubc_create_upl(
1746 struct vnode *vp,
1747 off_t f_offset,
1748 long bufsize,
1749 upl_t *uplp,
1750 upl_page_info_t **plp,
1751 int uplflags)
1752{
1753 memory_object_control_t control;
1754 mach_msg_type_number_t count;
1755 int ubcflags;
1756 kern_return_t kr;
1757
1758 if (bufsize & 0xfff)
1759 return KERN_INVALID_ARGUMENT;
1760
1761 if (uplflags & UPL_FOR_PAGEOUT) {
1762 uplflags &= ~UPL_FOR_PAGEOUT;
1763 ubcflags = UBC_FOR_PAGEOUT;
1764 } else
1765 ubcflags = UBC_FLAGS_NONE;
1766
1767 control = ubc_getobject(vp, ubcflags);
1768 if (control == MEMORY_OBJECT_CONTROL_NULL)
1769 return KERN_INVALID_ARGUMENT;
1770
1771 if (uplflags & UPL_WILL_BE_DUMPED) {
1772 uplflags &= ~UPL_WILL_BE_DUMPED;
1773 uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL);
1774 } else
1775 uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL);
1776 count = 0;
1777
1778 kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, &count, uplflags);
1779 if (plp != NULL)
1780 *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp);
1781 return kr;
1782}
1783
1784
1785/*
1786 * ubc_upl_maxbufsize
1787 *
1788 * Return the maximum bufsize ubc_create_upl( ) will take.
1789 *
1790 * Parameters: none
1791 *
1792 * Returns: maximum size buffer (in bytes) ubc_create_upl( ) will take.
1793 */
1794upl_size_t
1795ubc_upl_maxbufsize(
1796 void)
1797{
1798 return(MAX_UPL_TRANSFER * PAGE_SIZE);
1799}
1800
1801/*
1802 * ubc_upl_map
1803 *
1804 * Map the page list assocated with the supplied upl into the kernel virtual
1805 * address space at the virtual address indicated by the dst_addr argument;
1806 * the entire upl is mapped
1807 *
1808 * Parameters: upl The upl to map
1809 * dst_addr The address at which to map the upl
1810 *
1811 * Returns: KERN_SUCCESS The upl has been mapped
1812 * KERN_INVALID_ARGUMENT The upl is UPL_NULL
1813 * KERN_FAILURE The upl is already mapped
1814 * vm_map_enter:KERN_INVALID_ARGUMENT
1815 * A failure code from vm_map_enter() due
1816 * to an invalid argument
1817 */
1818kern_return_t
1819ubc_upl_map(
1820 upl_t upl,
1821 vm_offset_t *dst_addr)
1822{
1823 return (vm_upl_map(kernel_map, upl, dst_addr));
1824}
1825
1826
1827/*
1828 * ubc_upl_unmap
1829 *
1830 * Unmap the page list assocated with the supplied upl from the kernel virtual
1831 * address space; the entire upl is unmapped.
1832 *
1833 * Parameters: upl The upl to unmap
1834 *
1835 * Returns: KERN_SUCCESS The upl has been unmapped
1836 * KERN_FAILURE The upl is not currently mapped
1837 * KERN_INVALID_ARGUMENT If the upl is UPL_NULL
1838 */
1839kern_return_t
1840ubc_upl_unmap(
1841 upl_t upl)
1842{
1843 return(vm_upl_unmap(kernel_map, upl));
1844}
1845
1846
1847/*
1848 * ubc_upl_commit
1849 *
1850 * Commit the contents of the upl to the backing store
1851 *
1852 * Parameters: upl The upl to commit
1853 *
1854 * Returns: KERN_SUCCESS The upl has been committed
1855 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
1856 * KERN_FAILURE The supplied upl does not represent
1857 * device memory, and the offset plus the
1858 * size would exceed the actual size of
1859 * the upl
1860 *
1861 * Notes: In practice, the only return value for this function should be
1862 * KERN_SUCCESS, unless there has been data structure corruption;
1863 * since the upl is deallocated regardless of success or failure,
1864 * there's really nothing to do about this other than panic.
1865 *
1866 * IMPORTANT: Use of this function should not be mixed with use of
1867 * ubc_upl_commit_range(), due to the unconditional deallocation
1868 * by this function.
1869 */
1870kern_return_t
1871ubc_upl_commit(
1872 upl_t upl)
1873{
1874 upl_page_info_t *pl;
1875 kern_return_t kr;
1876
1877 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1878 kr = upl_commit(upl, pl, MAX_UPL_TRANSFER);
1879 upl_deallocate(upl);
1880 return kr;
1881}
1882
1883
1884/*
1885 * ubc_upl_commit
1886 *
1887 * Commit the contents of the specified range of the upl to the backing store
1888 *
1889 * Parameters: upl The upl to commit
1890 * offset The offset into the upl
1891 * size The size of the region to be committed,
1892 * starting at the specified offset
1893 * flags commit type (see below)
1894 *
1895 * Returns: KERN_SUCCESS The range has been committed
1896 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
1897 * KERN_FAILURE The supplied upl does not represent
1898 * device memory, and the offset plus the
1899 * size would exceed the actual size of
1900 * the upl
1901 *
1902 * Notes: IMPORTANT: If the commit is successful, and the object is now
1903 * empty, the upl will be deallocated. Since the caller cannot
1904 * check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag
1905 * should generally only be used when the offset is 0 and the size
1906 * is equal to the upl size.
1907 *
1908 * The flags argument is a bitmap of flags on the rage of pages in
1909 * the upl to be committed; allowable flags are:
1910 *
1911 * o UPL_COMMIT_FREE_ON_EMPTY Free the upl when it is
1912 * both empty and has been
1913 * successfully committed
1914 * o UPL_COMMIT_CLEAR_DIRTY Clear each pages dirty
1915 * bit; will prevent a
1916 * later pageout
1917 * o UPL_COMMIT_SET_DIRTY Set each pages dirty
1918 * bit; will cause a later
1919 * pageout
1920 * o UPL_COMMIT_INACTIVATE Clear each pages
1921 * reference bit; the page
1922 * will not be accessed
1923 * o UPL_COMMIT_ALLOW_ACCESS Unbusy each page; pages
1924 * become busy when an
1925 * IOMemoryDescriptor is
1926 * mapped or redirected,
1927 * and we have to wait for
1928 * an IOKit driver
1929 *
1930 * The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should
1931 * not be specified by the caller.
1932 *
1933 * The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are
1934 * mutually exclusive, and should not be combined.
1935 */
1936kern_return_t
1937ubc_upl_commit_range(
1938 upl_t upl,
1939 vm_offset_t offset,
1940 vm_size_t size,
1941 int flags)
1942{
1943 upl_page_info_t *pl;
1944 boolean_t empty;
1945 kern_return_t kr;
1946
1947 if (flags & UPL_COMMIT_FREE_ON_EMPTY)
1948 flags |= UPL_COMMIT_NOTIFY_EMPTY;
1949
1950 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1951
1952 kr = upl_commit_range(upl, offset, size, flags,
1953 pl, MAX_UPL_TRANSFER, &empty);
1954
1955 if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty)
1956 upl_deallocate(upl);
1957
1958 return kr;
1959}
1960
1961
1962/*
1963 * ubc_upl_abort_range
1964 *
1965 * Abort the contents of the specified range of the specified upl
1966 *
1967 * Parameters: upl The upl to abort
1968 * offset The offset into the upl
1969 * size The size of the region to be aborted,
1970 * starting at the specified offset
1971 * abort_flags abort type (see below)
1972 *
1973 * Returns: KERN_SUCCESS The range has been aborted
1974 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
1975 * KERN_FAILURE The supplied upl does not represent
1976 * device memory, and the offset plus the
1977 * size would exceed the actual size of
1978 * the upl
1979 *
1980 * Notes: IMPORTANT: If the abort is successful, and the object is now
1981 * empty, the upl will be deallocated. Since the caller cannot
1982 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
1983 * should generally only be used when the offset is 0 and the size
1984 * is equal to the upl size.
1985 *
1986 * The abort_flags argument is a bitmap of flags on the range of
1987 * pages in the upl to be aborted; allowable flags are:
1988 *
1989 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
1990 * empty and has been successfully
1991 * aborted
1992 * o UPL_ABORT_RESTART The operation must be restarted
1993 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
1994 * o UPL_ABORT_ERROR An I/O error occurred
1995 * o UPL_ABORT_DUMP_PAGES Just free the pages
1996 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
1997 * o UPL_ABORT_ALLOW_ACCESS RESERVED
1998 *
1999 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2000 * not be specified by the caller. It is intended to fulfill the
2001 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2002 * ubc_upl_commit_range(), but is never referenced internally.
2003 *
2004 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2005 * referenced; do not use it.
2006 */
2007kern_return_t
2008ubc_upl_abort_range(
2009 upl_t upl,
2010 vm_offset_t offset,
2011 vm_size_t size,
2012 int abort_flags)
2013{
2014 kern_return_t kr;
2015 boolean_t empty = FALSE;
2016
2017 if (abort_flags & UPL_ABORT_FREE_ON_EMPTY)
2018 abort_flags |= UPL_ABORT_NOTIFY_EMPTY;
2019
2020 kr = upl_abort_range(upl, offset, size, abort_flags, &empty);
2021
2022 if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty)
2023 upl_deallocate(upl);
2024
2025 return kr;
2026}
2027
2028
2029/*
2030 * ubc_upl_abort
2031 *
2032 * Abort the contents of the specified upl
2033 *
2034 * Parameters: upl The upl to abort
2035 * abort_type abort type (see below)
2036 *
2037 * Returns: KERN_SUCCESS The range has been aborted
2038 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2039 * KERN_FAILURE The supplied upl does not represent
2040 * device memory, and the offset plus the
2041 * size would exceed the actual size of
2042 * the upl
2043 *
2044 * Notes: IMPORTANT: If the abort is successful, and the object is now
2045 * empty, the upl will be deallocated. Since the caller cannot
2046 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2047 * should generally only be used when the offset is 0 and the size
2048 * is equal to the upl size.
2049 *
2050 * The abort_type is a bitmap of flags on the range of
2051 * pages in the upl to be aborted; allowable flags are:
2052 *
2053 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2054 * empty and has been successfully
2055 * aborted
2056 * o UPL_ABORT_RESTART The operation must be restarted
2057 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
2058 * o UPL_ABORT_ERROR An I/O error occurred
2059 * o UPL_ABORT_DUMP_PAGES Just free the pages
2060 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
2061 * o UPL_ABORT_ALLOW_ACCESS RESERVED
2062 *
2063 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2064 * not be specified by the caller. It is intended to fulfill the
2065 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2066 * ubc_upl_commit_range(), but is never referenced internally.
2067 *
2068 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2069 * referenced; do not use it.
2070 */
2071kern_return_t
2072ubc_upl_abort(
2073 upl_t upl,
2074 int abort_type)
2075{
2076 kern_return_t kr;
2077
2078 kr = upl_abort(upl, abort_type);
2079 upl_deallocate(upl);
2080 return kr;
2081}
2082
2083
2084/*
2085 * ubc_upl_pageinfo
2086 *
2087 * Retrieve the internal page list for the specified upl
2088 *
2089 * Parameters: upl The upl to obtain the page list from
2090 *
2091 * Returns: !NULL The (upl_page_info_t *) for the page
2092 * list internal to the upl
2093 * NULL Error/no page list associated
2094 *
2095 * Notes: IMPORTANT: The function is only valid on internal objects
2096 * where the list request was made with the UPL_INTERNAL flag.
2097 *
2098 * This function is a utility helper function, since some callers
2099 * may not have direct access to the header defining the macro,
2100 * due to abstraction layering constraints.
2101 */
2102upl_page_info_t *
2103ubc_upl_pageinfo(
2104 upl_t upl)
2105{
2106 return (UPL_GET_INTERNAL_PAGE_LIST(upl));
2107}
2108
2109
2110int
2111UBCINFOEXISTS(struct vnode * vp)
2112{
2113 return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL));
2114}
2115
2116
2117/*
2118 * CODE SIGNING
2119 */
2120#define CS_BLOB_KEEP_IN_KERNEL 1
2121static volatile SInt32 cs_blob_size = 0;
2122static volatile SInt32 cs_blob_count = 0;
2123static SInt32 cs_blob_size_peak = 0;
2124static UInt32 cs_blob_size_max = 0;
2125static SInt32 cs_blob_count_peak = 0;
2126extern int cs_debug;
2127
2128int cs_validation = 1;
2129
2130SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW, &cs_validation, 0, "Do validate code signatures");
2131SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD, &cs_blob_count, 0, "Current number of code signature blobs");
2132SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD, &cs_blob_size, 0, "Current size of all code signature blobs");
2133SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
2134SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
2135SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD, &cs_blob_size_max, 0, "Size of biggest code signature blob");
2136
2137int
2138ubc_cs_blob_add(
2139 struct vnode *vp,
2140 cpu_type_t cputype,
2141 off_t base_offset,
2142 vm_address_t addr,
2143 vm_size_t size)
2144{
2145 kern_return_t kr;
2146 struct ubc_info *uip;
2147 struct cs_blob *blob, *oblob;
2148 int error;
2149 ipc_port_t blob_handle;
2150 memory_object_size_t blob_size;
2151 const CS_CodeDirectory *cd;
2152 off_t blob_start_offset, blob_end_offset;
2153 SHA1_CTX sha1ctxt;
2154
2155 blob_handle = IPC_PORT_NULL;
2156
2157 blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
2158 if (blob == NULL) {
2159 return ENOMEM;
2160 }
2161
2162 /* get a memory entry on the blob */
2163 blob_size = (memory_object_size_t) size;
2164 kr = mach_make_memory_entry_64(kernel_map,
2165 &blob_size,
2166 addr,
2167 VM_PROT_READ,
2168 &blob_handle,
2169 IPC_PORT_NULL);
2170 if (kr != KERN_SUCCESS) {
2171 error = ENOMEM;
2172 goto out;
2173 }
2174 if (memory_object_round_page(blob_size) !=
2175 (memory_object_size_t) round_page(size)) {
2176 printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%x !?\n",
2177 blob_size, size);
2178 panic("XXX FBDP size mismatch 0x%llx 0x%x\n", blob_size, size);
2179 error = EINVAL;
2180 goto out;
2181 }
2182
2183
2184 /* fill in the new blob */
2185 blob->csb_cpu_type = cputype;
2186 blob->csb_base_offset = base_offset;
2187 blob->csb_mem_size = size;
2188 blob->csb_mem_offset = 0;
2189 blob->csb_mem_handle = blob_handle;
2190 blob->csb_mem_kaddr = addr;
2191
2192
2193 /*
2194 * Validate the blob's contents
2195 */
2196 cd = findCodeDirectory(
2197 (const CS_SuperBlob *) addr,
2198 (char *) addr,
2199 (char *) addr + blob->csb_mem_size);
2200 if (cd == NULL) {
2201 /* no code directory => useless blob ! */
2202 blob->csb_flags = 0;
2203 blob->csb_start_offset = 0;
2204 blob->csb_end_offset = 0;
2205 } else {
2206 unsigned char *sha1_base;
2207 int sha1_size;
2208
2209 blob->csb_flags = ntohl(cd->flags) | CS_VALID;
2210 blob->csb_end_offset = round_page(ntohl(cd->codeLimit));
2211 blob->csb_start_offset = (blob->csb_end_offset -
2212 (ntohl(cd->nCodeSlots) * PAGE_SIZE));
2213 /* compute the blob's SHA1 hash */
2214 sha1_base = (const unsigned char *) cd;
2215 sha1_size = ntohl(cd->length);
2216 SHA1Init(&sha1ctxt);
2217 SHA1Update(&sha1ctxt, sha1_base, sha1_size);
2218 SHA1Final(blob->csb_sha1, &sha1ctxt);
2219 }
2220
2221
2222 /*
2223 * Validate the blob's coverage
2224 */
2225 blob_start_offset = blob->csb_base_offset + blob->csb_start_offset;
2226 blob_end_offset = blob->csb_base_offset + blob->csb_end_offset;
2227
2228 if (blob_start_offset >= blob_end_offset) {
2229 /* reject empty or backwards blob */
2230 error = EINVAL;
2231 goto out;
2232 }
2233
2234 vnode_lock(vp);
2235 if (! UBCINFOEXISTS(vp)) {
2236 vnode_unlock(vp);
2237 error = ENOENT;
2238 goto out;
2239 }
2240 uip = vp->v_ubcinfo;
2241
2242 /* check if this new blob overlaps with an existing blob */
2243 for (oblob = uip->cs_blobs;
2244 oblob != NULL;
2245 oblob = oblob->csb_next) {
2246 off_t oblob_start_offset, oblob_end_offset;
2247
2248 oblob_start_offset = (oblob->csb_base_offset +
2249 oblob->csb_start_offset);
2250 oblob_end_offset = (oblob->csb_base_offset +
2251 oblob->csb_end_offset);
2252 if (blob_start_offset >= oblob_end_offset ||
2253 blob_end_offset <= oblob_start_offset) {
2254 /* no conflict with this existing blob */
2255 } else {
2256 /* conflict ! */
2257 if (blob_start_offset == oblob_start_offset &&
2258 blob_end_offset == oblob_end_offset &&
2259 blob->csb_mem_size == oblob->csb_mem_size &&
2260 blob->csb_flags == oblob->csb_flags &&
2261 (blob->csb_cpu_type == CPU_TYPE_ANY ||
2262 oblob->csb_cpu_type == CPU_TYPE_ANY ||
2263 blob->csb_cpu_type == oblob->csb_cpu_type) &&
2264 !bcmp(blob->csb_sha1,
2265 oblob->csb_sha1,
2266 SHA1_RESULTLEN)) {
2267 /*
2268 * We already have this blob:
2269 * we'll return success but
2270 * throw away the new blob.
2271 */
2272 if (oblob->csb_cpu_type == CPU_TYPE_ANY) {
2273 /*
2274 * The old blob matches this one
2275 * but doesn't have any CPU type.
2276 * Update it with whatever the caller
2277 * provided this time.
2278 */
2279 oblob->csb_cpu_type = cputype;
2280 }
2281 vnode_unlock(vp);
2282 error = EAGAIN;
2283 goto out;
2284 } else {
2285 /* different blob: reject the new one */
2286 vnode_unlock(vp);
2287 error = EALREADY;
2288 goto out;
2289 }
2290 }
2291
2292 }
2293
2294
2295 /* mark this vnode's VM object as having "signed pages" */
2296 kr = memory_object_signed(uip->ui_control, TRUE);
2297 if (kr != KERN_SUCCESS) {
2298 vnode_unlock(vp);
2299 error = ENOENT;
2300 goto out;
2301 }
2302
2303 /*
2304 * Add this blob to the list of blobs for this vnode.
2305 * We always add at the front of the list and we never remove a
2306 * blob from the list, so ubc_cs_get_blobs() can return whatever
2307 * the top of the list was and that list will remain valid
2308 * while we validate a page, even after we release the vnode's lock.
2309 */
2310 blob->csb_next = uip->cs_blobs;
2311 uip->cs_blobs = blob;
2312
2313 OSAddAtomic(+1, &cs_blob_count);
2314 if (cs_blob_count > cs_blob_count_peak) {
2315 cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */
2316 }
2317 OSAddAtomic(+blob->csb_mem_size, &cs_blob_size);
2318 if (cs_blob_size > cs_blob_size_peak) {
2319 cs_blob_size_peak = cs_blob_size; /* XXX atomic ? */
2320 }
2321 if (blob->csb_mem_size > cs_blob_size_max) {
2322 cs_blob_size_max = blob->csb_mem_size;
2323 }
2324
2325 if (cs_debug) {
2326 proc_t p;
2327
2328 p = current_proc();
2329 printf("CODE SIGNING: proc %d(%s) "
2330 "loaded %s signatures for file (%s) "
2331 "range 0x%llx:0x%llx flags 0x%x\n",
2332 p->p_pid, p->p_comm,
2333 blob->csb_cpu_type == -1 ? "detached" : "embedded",
2334 vnode_name(vp),
2335 blob->csb_base_offset + blob->csb_start_offset,
2336 blob->csb_base_offset + blob->csb_end_offset,
2337 blob->csb_flags);
2338 }
2339
2340#if !CS_BLOB_KEEP_IN_KERNEL
2341 blob->csb_mem_kaddr = 0;
2342#endif /* CS_BLOB_KEEP_IN_KERNEL */
2343
2344 vnode_unlock(vp);
2345
2346 error = 0; /* success ! */
2347
2348out:
2349 if (error) {
2350 /* we failed; release what we allocated */
2351 if (blob) {
2352 kfree(blob, sizeof (*blob));
2353 blob = NULL;
2354 }
2355 if (blob_handle != IPC_PORT_NULL) {
2356 mach_memory_entry_port_release(blob_handle);
2357 blob_handle = IPC_PORT_NULL;
2358 }
2359 } else {
2360#if !CS_BLOB_KEEP_IN_KERNEL
2361 kmem_free(kernel_map, addr, size);
2362#endif /* CS_BLOB_KEEP_IN_KERNEL */
2363 }
2364
2365 if (error == EAGAIN) {
2366 /*
2367 * See above: error is EAGAIN if we were asked
2368 * to add an existing blob again. We cleaned the new
2369 * blob and we want to return success.
2370 */
2371 error = 0;
2372 /*
2373 * Since we're not failing, consume the data we received.
2374 */
2375 kmem_free(kernel_map, addr, size);
2376 }
2377
2378 return error;
2379}
2380
2381
2382struct cs_blob *
2383ubc_cs_blob_get(
2384 struct vnode *vp,
2385 cpu_type_t cputype,
2386 off_t offset)
2387{
2388 struct ubc_info *uip;
2389 struct cs_blob *blob;
2390 off_t offset_in_blob;
2391
2392 vnode_lock_spin(vp);
2393
2394 if (! UBCINFOEXISTS(vp)) {
2395 blob = NULL;
2396 goto out;
2397 }
2398
2399 uip = vp->v_ubcinfo;
2400 for (blob = uip->cs_blobs;
2401 blob != NULL;
2402 blob = blob->csb_next) {
2403 if (cputype != -1 && blob->csb_cpu_type == cputype) {
2404 break;
2405 }
2406 if (offset != -1) {
2407 offset_in_blob = offset - blob->csb_base_offset;
2408 if (offset_in_blob >= blob->csb_start_offset &&
2409 offset_in_blob < blob->csb_end_offset) {
2410 /* our offset is covered by this blob */
2411 break;
2412 }
2413 }
2414 }
2415
2416out:
2417 vnode_unlock(vp);
2418
2419 return blob;
2420}
2421
2422static void
2423ubc_cs_free(
2424 struct ubc_info *uip)
2425{
2426 struct cs_blob *blob, *next_blob;
2427
2428 for (blob = uip->cs_blobs;
2429 blob != NULL;
2430 blob = next_blob) {
2431 next_blob = blob->csb_next;
2432 if (blob->csb_mem_kaddr != 0) {
2433 kmem_free(kernel_map,
2434 blob->csb_mem_kaddr,
2435 blob->csb_mem_size);
2436 blob->csb_mem_kaddr = 0;
2437 }
2438 mach_memory_entry_port_release(blob->csb_mem_handle);
2439 blob->csb_mem_handle = IPC_PORT_NULL;
2440 OSAddAtomic(-1, &cs_blob_count);
2441 OSAddAtomic(-blob->csb_mem_size, &cs_blob_size);
2442 kfree(blob, sizeof (*blob));
2443 }
2444 uip->cs_blobs = NULL;
2445}
2446
2447struct cs_blob *
2448ubc_get_cs_blobs(
2449 struct vnode *vp)
2450{
2451 struct ubc_info *uip;
2452 struct cs_blob *blobs;
2453
2454 vnode_lock_spin(vp);
2455
2456 if (! UBCINFOEXISTS(vp)) {
2457 blobs = NULL;
2458 goto out;
2459 }
2460
2461 uip = vp->v_ubcinfo;
2462 blobs = uip->cs_blobs;
2463
2464out:
2465 vnode_unlock(vp);
2466
2467 return blobs;
2468}
2469
2470unsigned long cs_validate_page_no_hash = 0;
2471unsigned long cs_validate_page_bad_hash = 0;
2472boolean_t
2473cs_validate_page(
2474 void *_blobs,
2475 memory_object_offset_t page_offset,
2476 const void *data,
2477 boolean_t *tainted)
2478{
2479 SHA1_CTX sha1ctxt;
2480 unsigned char actual_hash[SHA1_RESULTLEN];
2481 unsigned char expected_hash[SHA1_RESULTLEN];
2482 boolean_t found_hash;
2483 struct cs_blob *blobs, *blob;
2484 const CS_CodeDirectory *cd;
2485 const CS_SuperBlob *embedded;
2486 off_t start_offset, end_offset;
2487 const unsigned char *hash;
2488 boolean_t validated;
2489 off_t offset; /* page offset in the file */
2490 size_t size;
2491 off_t codeLimit = 0;
2492 char *lower_bound, *upper_bound;
2493 vm_offset_t kaddr, blob_addr;
2494 vm_size_t ksize;
2495 kern_return_t kr;
2496
2497 offset = page_offset;
2498
2499 /* retrieve the expected hash */
2500 found_hash = FALSE;
2501 blobs = (struct cs_blob *) _blobs;
2502
2503 for (blob = blobs;
2504 blob != NULL;
2505 blob = blob->csb_next) {
2506 offset = page_offset - blob->csb_base_offset;
2507 if (offset < blob->csb_start_offset ||
2508 offset >= blob->csb_end_offset) {
2509 /* our page is not covered by this blob */
2510 continue;
2511 }
2512
2513 /* map the blob in the kernel address space */
2514 kaddr = blob->csb_mem_kaddr;
2515 if (kaddr == 0) {
2516 ksize = (vm_size_t) (blob->csb_mem_size +
2517 blob->csb_mem_offset);
2518 kr = vm_map(kernel_map,
2519 &kaddr,
2520 ksize,
2521 0,
2522 VM_FLAGS_ANYWHERE,
2523 blob->csb_mem_handle,
2524 0,
2525 TRUE,
2526 VM_PROT_READ,
2527 VM_PROT_READ,
2528 VM_INHERIT_NONE);
2529 if (kr != KERN_SUCCESS) {
2530 /* XXX FBDP what to do !? */
2531 printf("cs_validate_page: failed to map blob, "
2532 "size=0x%x kr=0x%x\n",
2533 blob->csb_mem_size, kr);
2534 break;
2535 }
2536 }
2537 blob_addr = kaddr + blob->csb_mem_offset;
2538
2539 lower_bound = CAST_DOWN(char *, blob_addr);
2540 upper_bound = lower_bound + blob->csb_mem_size;
2541
2542 embedded = (const CS_SuperBlob *) blob_addr;
2543 cd = findCodeDirectory(embedded, lower_bound, upper_bound);
2544 if (cd != NULL) {
2545 if (cd->pageSize != PAGE_SHIFT ||
2546 cd->hashType != 0x1 ||
2547 cd->hashSize != SHA1_RESULTLEN) {
2548 /* bogus blob ? */
2549#if !CS_BLOB_KEEP_IN_KERNEL
2550 kmem_free(kernel_map, kaddr, ksize);
2551#endif /* CS_BLOB_KEEP_IN_KERNEL */
2552 continue;
2553 }
2554
2555 end_offset = round_page(ntohl(cd->codeLimit));
2556 start_offset = end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE);
2557 offset = page_offset - blob->csb_base_offset;
2558 if (offset < start_offset ||
2559 offset >= end_offset) {
2560 /* our page is not covered by this blob */
2561#if !CS_BLOB_KEEP_IN_KERNEL
2562 kmem_free(kernel_map, kaddr, ksize);
2563#endif /* CS_BLOB_KEEP_IN_KERNEL */
2564 continue;
2565 }
2566
2567 codeLimit = ntohl(cd->codeLimit);
2568 hash = hashes(cd, atop(offset),
2569 lower_bound, upper_bound);
2570 bcopy(hash, expected_hash, sizeof (expected_hash));
2571 found_hash = TRUE;
2572
2573#if !CS_BLOB_KEEP_IN_KERNEL
2574 /* we no longer need that blob in the kernel map */
2575 kmem_free(kernel_map, kaddr, ksize);
2576#endif /* CS_BLOB_KEEP_IN_KERNEL */
2577
2578 break;
2579 }
2580 }
2581
2582 if (found_hash == FALSE) {
2583 /*
2584 * We can't verify this page because there is no signature
2585 * for it (yet). It's possible that this part of the object
2586 * is not signed, or that signatures for that part have not
2587 * been loaded yet.
2588 * Report that the page has not been validated and let the
2589 * caller decide if it wants to accept it or not.
2590 */
2591 cs_validate_page_no_hash++;
2592 if (cs_debug > 1) {
2593 printf("CODE SIGNING: cs_validate_page: "
2594 "off 0x%llx: no hash to validate !?\n",
2595 page_offset);
2596 }
2597 validated = FALSE;
2598 *tainted = FALSE;
2599 } else {
2600 const uint32_t *asha1, *esha1;
2601
2602 size = PAGE_SIZE;
2603 if (offset + size > codeLimit) {
2604 /* partial page at end of segment */
2605 assert(offset < codeLimit);
2606 size = codeLimit & PAGE_MASK;
2607 }
2608 /* compute the actual page's SHA1 hash */
2609 SHA1Init(&sha1ctxt);
2610 SHA1Update(&sha1ctxt, data, size);
2611 SHA1Final(actual_hash, &sha1ctxt);
2612
2613 asha1 = (const uint32_t *) actual_hash;
2614 esha1 = (const uint32_t *) expected_hash;
2615
2616 if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
2617 if (cs_debug) {
2618 printf("CODE SIGNING: cs_validate_page: "
2619 "off 0x%llx size 0x%lx: "
2620 "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
2621 "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
2622 page_offset, size,
2623 asha1[0], asha1[1], asha1[2],
2624 asha1[3], asha1[4],
2625 esha1[0], esha1[1], esha1[2],
2626 esha1[3], esha1[4]);
2627 }
2628 cs_validate_page_bad_hash++;
2629 *tainted = TRUE;
2630 } else {
2631 if (cs_debug > 1) {
2632 printf("CODE SIGNING: cs_validate_page: "
2633 "off 0x%llx size 0x%lx: SHA1 OK\n",
2634 page_offset, size);
2635 }
2636 *tainted = FALSE;
2637 }
2638 validated = TRUE;
2639 }
2640
2641 return validated;
2642}
2643
2644int
2645ubc_cs_getcdhash(
2646 vnode_t vp,
2647 off_t offset,
2648 unsigned char *cdhash)
2649{
2650 struct cs_blob *blobs, *blob;
2651 off_t rel_offset;
2652
2653 blobs = ubc_get_cs_blobs(vp);
2654 for (blob = blobs;
2655 blob != NULL;
2656 blob = blob->csb_next) {
2657 /* compute offset relative to this blob */
2658 rel_offset = offset - blob->csb_base_offset;
2659 if (rel_offset >= blob->csb_start_offset &&
2660 rel_offset < blob->csb_end_offset) {
2661 /* this blob does cover our "offset" ! */
2662 break;
2663 }
2664 }
2665
2666 if (blob == NULL) {
2667 /* we didn't find a blob covering "offset" */
2668 return EBADEXEC; /* XXX any better error ? */
2669 }
2670
2671 /* get the SHA1 hash of that blob */
2672 bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
2673
2674 return 0;
2675}