]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/ubc_subr.c
xnu-1504.7.4.tar.gz
[apple/xnu.git] / bsd / kern / ubc_subr.c
CommitLineData
1c79356b 1/*
cf7d32b8 2 * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * File: ubc_subr.c
30 * Author: Umesh Vaishampayan [umeshv@apple.com]
31 * 05-Aug-1999 umeshv Created.
32 *
33 * Functions related to Unified Buffer cache.
34 *
0b4e3aa0
A
35 * Caller of UBC functions MUST have a valid reference on the vnode.
36 *
1c79356b
A
37 */
38
1c79356b
A
39#include <sys/types.h>
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/lock.h>
91447636
A
43#include <sys/mman.h>
44#include <sys/mount_internal.h>
45#include <sys/vnode_internal.h>
46#include <sys/ubc_internal.h>
1c79356b 47#include <sys/ucred.h>
91447636
A
48#include <sys/proc_internal.h>
49#include <sys/kauth.h>
1c79356b 50#include <sys/buf.h>
13fec989 51#include <sys/user.h>
2d21ac55 52#include <sys/codesign.h>
1c79356b
A
53
54#include <mach/mach_types.h>
55#include <mach/memory_object_types.h>
91447636
A
56#include <mach/memory_object_control.h>
57#include <mach/vm_map.h>
b0d623f7 58#include <mach/mach_vm.h>
91447636 59#include <mach/upl.h>
1c79356b 60
91447636 61#include <kern/kern_types.h>
2d21ac55 62#include <kern/kalloc.h>
1c79356b 63#include <kern/zalloc.h>
13fec989 64#include <kern/thread.h>
91447636
A
65#include <vm/vm_kern.h>
66#include <vm/vm_protos.h> /* last */
1c79356b 67
2d21ac55
A
68#include <libkern/crypto/sha1.h>
69
593a1d5f
A
70#include <security/mac_framework.h>
71
2d21ac55
A
72/* XXX These should be in a BSD accessible Mach header, but aren't. */
73extern kern_return_t memory_object_pages_resident(memory_object_control_t,
74 boolean_t *);
75extern kern_return_t memory_object_signed(memory_object_control_t control,
76 boolean_t is_signed);
77extern void Debugger(const char *message);
78
79
80/* XXX no one uses this interface! */
81kern_return_t ubc_page_op_with_control(
82 memory_object_control_t control,
83 off_t f_offset,
84 int ops,
85 ppnum_t *phys_entryp,
86 int *flagsp);
87
88
1c79356b
A
89#if DIAGNOSTIC
90#if defined(assert)
b0d623f7 91#undef assert
1c79356b
A
92#endif
93#define assert(cond) \
2d21ac55 94 ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond)))
1c79356b
A
95#else
96#include <kern/assert.h>
97#endif /* DIAGNOSTIC */
98
2d21ac55 99static int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize);
0c530ab8 100static int ubc_umcallback(vnode_t, void *);
0c530ab8 101static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *);
2d21ac55 102static void ubc_cs_free(struct ubc_info *uip);
b4c24cb9 103
91447636 104struct zone *ubc_info_zone;
b4c24cb9 105
2d21ac55
A
106
107/*
108 * CODESIGNING
109 * Routines to navigate code signing data structures in the kernel...
110 */
b0d623f7
A
111
112extern int cs_debug;
113
2d21ac55
A
114static boolean_t
115cs_valid_range(
116 const void *start,
117 const void *end,
118 const void *lower_bound,
119 const void *upper_bound)
120{
121 if (upper_bound < lower_bound ||
122 end < start) {
123 return FALSE;
124 }
125
126 if (start < lower_bound ||
127 end > upper_bound) {
128 return FALSE;
129 }
130
131 return TRUE;
132}
133
134/*
135 * Magic numbers used by Code Signing
136 */
137enum {
138 CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */
139 CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */
140 CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */
141 CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
142 CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02, /* XXX */
143 CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
144
145 CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */
146};
147
b0d623f7 148static const uint32_t supportsScatter = 0x20100; // first version to support scatter option
2d21ac55
A
149
150/*
151 * Structure of an embedded-signature SuperBlob
152 */
153typedef struct __BlobIndex {
154 uint32_t type; /* type of entry */
155 uint32_t offset; /* offset of entry */
156} CS_BlobIndex;
157
158typedef struct __SuperBlob {
159 uint32_t magic; /* magic number */
160 uint32_t length; /* total length of SuperBlob */
161 uint32_t count; /* number of index entries following */
162 CS_BlobIndex index[]; /* (count) entries */
163 /* followed by Blobs in no particular order as indicated by offsets in index */
164} CS_SuperBlob;
165
b0d623f7
A
166struct Scatter {
167 uint32_t count; // number of pages; zero for sentinel (only)
168 uint32_t base; // first page number
169 uint64_t targetOffset; // offset in target
170 uint64_t spare; // reserved
171};
2d21ac55
A
172
173/*
174 * C form of a CodeDirectory.
175 */
176typedef struct __CodeDirectory {
177 uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */
178 uint32_t length; /* total length of CodeDirectory blob */
179 uint32_t version; /* compatibility version */
180 uint32_t flags; /* setup and mode flags */
181 uint32_t hashOffset; /* offset of hash slot element at index zero */
182 uint32_t identOffset; /* offset of identifier string */
183 uint32_t nSpecialSlots; /* number of special hash slots */
184 uint32_t nCodeSlots; /* number of ordinary (code) hash slots */
185 uint32_t codeLimit; /* limit to main image signature range */
186 uint8_t hashSize; /* size of each hash in bytes */
187 uint8_t hashType; /* type of hash (cdHashType* constants) */
188 uint8_t spare1; /* unused (must be zero) */
189 uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */
190 uint32_t spare2; /* unused (must be zero) */
b0d623f7
A
191 /* Version 0x20100 */
192 uint32_t scatterOffset; /* offset of optional scatter vector */
2d21ac55
A
193 /* followed by dynamic content as located by offset fields above */
194} CS_CodeDirectory;
195
196
197/*
198 * Locate the CodeDirectory from an embedded signature blob
199 */
200static const
201CS_CodeDirectory *findCodeDirectory(
202 const CS_SuperBlob *embedded,
203 char *lower_bound,
204 char *upper_bound)
205{
206 const CS_CodeDirectory *cd = NULL;
207
208 if (embedded &&
209 cs_valid_range(embedded, embedded + 1, lower_bound, upper_bound) &&
210 ntohl(embedded->magic) == CSMAGIC_EMBEDDED_SIGNATURE) {
211 const CS_BlobIndex *limit;
212 const CS_BlobIndex *p;
213
214 limit = &embedded->index[ntohl(embedded->count)];
215 if (!cs_valid_range(&embedded->index[0], limit,
216 lower_bound, upper_bound)) {
217 return NULL;
218 }
219 for (p = embedded->index; p < limit; ++p) {
220 if (ntohl(p->type) == CSSLOT_CODEDIRECTORY) {
221 const unsigned char *base;
222
223 base = (const unsigned char *)embedded;
224 cd = (const CS_CodeDirectory *)(base + ntohl(p->offset));
225 break;
226 }
227 }
228 } else {
229 /*
230 * Detached signatures come as a bare CS_CodeDirectory,
231 * without a blob.
232 */
233 cd = (const CS_CodeDirectory *) embedded;
234 }
b0d623f7 235
2d21ac55
A
236 if (cd &&
237 cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
238 cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
239 lower_bound, upper_bound) &&
cf7d32b8
A
240 cs_valid_range(cd, (const char *) cd + ntohl(cd->hashOffset),
241 lower_bound, upper_bound) &&
242 cs_valid_range(cd, (const char *) cd +
243 ntohl(cd->hashOffset) +
244 (ntohl(cd->nCodeSlots) * SHA1_RESULTLEN),
245 lower_bound, upper_bound) &&
246
2d21ac55
A
247 ntohl(cd->magic) == CSMAGIC_CODEDIRECTORY) {
248 return cd;
249 }
250
251 // not found or not a valid code directory
252 return NULL;
253}
254
255
256/*
257 * Locating a page hash
258 */
259static const unsigned char *
260hashes(
261 const CS_CodeDirectory *cd,
262 unsigned page,
263 char *lower_bound,
264 char *upper_bound)
265{
266 const unsigned char *base, *top, *hash;
b0d623f7 267 uint32_t nCodeSlots = ntohl(cd->nCodeSlots);
2d21ac55
A
268
269 assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound));
270
b0d623f7
A
271 if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
272 /* Get first scatter struct */
273 const struct Scatter *scatter = (const struct Scatter*)
274 ((const char*)cd + ntohl(cd->scatterOffset));
275 uint32_t hashindex=0, scount, sbase=0;
276 /* iterate all scatter structs */
277 do {
278 if((const char*)scatter > (const char*)cd + ntohl(cd->length)) {
279 if(cs_debug) {
280 printf("CODE SIGNING: Scatter extends past Code Directory\n");
281 }
282 return NULL;
283 }
284
285 scount = ntohl(scatter->count);
286 uint32_t new_base = ntohl(scatter->base);
287
288 /* last scatter? */
289 if (scount == 0) {
290 return NULL;
291 }
292
293 if((hashindex > 0) && (new_base <= sbase)) {
294 if(cs_debug) {
295 printf("CODE SIGNING: unordered Scatter, prev base %d, cur base %d\n",
296 sbase, new_base);
297 }
298 return NULL; /* unordered scatter array */
299 }
300 sbase = new_base;
301
302 /* this scatter beyond page we're looking for? */
303 if (sbase > page) {
304 return NULL;
305 }
306
307 if (sbase+scount >= page) {
308 /* Found the scatter struct that is
309 * referencing our page */
310
311 /* base = address of first hash covered by scatter */
312 base = (const unsigned char *)cd + ntohl(cd->hashOffset) +
313 hashindex * SHA1_RESULTLEN;
314 /* top = address of first hash after this scatter */
315 top = base + scount * SHA1_RESULTLEN;
316 if (!cs_valid_range(base, top, lower_bound,
317 upper_bound) ||
318 hashindex > nCodeSlots) {
319 return NULL;
320 }
321
322 break;
323 }
324
325 /* this scatter struct is before the page we're looking
326 * for. Iterate. */
327 hashindex+=scount;
328 scatter++;
329 } while(1);
330
331 hash = base + (page - sbase) * SHA1_RESULTLEN;
332 } else {
333 base = (const unsigned char *)cd + ntohl(cd->hashOffset);
334 top = base + nCodeSlots * SHA1_RESULTLEN;
335 if (!cs_valid_range(base, top, lower_bound, upper_bound) ||
336 page > nCodeSlots) {
337 return NULL;
338 }
339 assert(page < nCodeSlots);
2d21ac55 340
b0d623f7
A
341 hash = base + page * SHA1_RESULTLEN;
342 }
343
2d21ac55
A
344 if (!cs_valid_range(hash, hash + SHA1_RESULTLEN,
345 lower_bound, upper_bound)) {
346 hash = NULL;
347 }
348
349 return hash;
350}
351/*
352 * CODESIGNING
353 * End of routines to navigate code signing data structures in the kernel.
354 */
355
356
1c79356b 357/*
2d21ac55
A
358 * ubc_init
359 *
360 * Initialization of the zone for Unified Buffer Cache.
361 *
362 * Parameters: (void)
363 *
364 * Returns: (void)
365 *
366 * Implicit returns:
367 * ubc_info_zone(global) initialized for subsequent allocations
1c79356b 368 */
0b4e3aa0 369__private_extern__ void
2d21ac55 370ubc_init(void)
1c79356b
A
371{
372 int i;
373
374 i = (vm_size_t) sizeof (struct ubc_info);
2d21ac55 375
1c79356b 376 ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone");
1c79356b
A
377}
378
2d21ac55 379
1c79356b 380/*
2d21ac55
A
381 * ubc_info_init
382 *
383 * Allocate and attach an empty ubc_info structure to a vnode
384 *
385 * Parameters: vp Pointer to the vnode
386 *
387 * Returns: 0 Success
388 * vnode_size:ENOMEM Not enough space
389 * vnode_size:??? Other error from vnode_getattr
390 *
1c79356b
A
391 */
392int
393ubc_info_init(struct vnode *vp)
91447636
A
394{
395 return(ubc_info_init_internal(vp, 0, 0));
396}
2d21ac55
A
397
398
399/*
400 * ubc_info_init_withsize
401 *
402 * Allocate and attach a sized ubc_info structure to a vnode
403 *
404 * Parameters: vp Pointer to the vnode
405 * filesize The size of the file
406 *
407 * Returns: 0 Success
408 * vnode_size:ENOMEM Not enough space
409 * vnode_size:??? Other error from vnode_getattr
410 */
91447636
A
411int
412ubc_info_init_withsize(struct vnode *vp, off_t filesize)
413{
414 return(ubc_info_init_internal(vp, 1, filesize));
415}
416
2d21ac55
A
417
418/*
419 * ubc_info_init_internal
420 *
421 * Allocate and attach a ubc_info structure to a vnode
422 *
423 * Parameters: vp Pointer to the vnode
424 * withfsize{0,1} Zero if the size should be obtained
425 * from the vnode; otherwise, use filesize
426 * filesize The size of the file, if withfsize == 1
427 *
428 * Returns: 0 Success
429 * vnode_size:ENOMEM Not enough space
430 * vnode_size:??? Other error from vnode_getattr
431 *
432 * Notes: We call a blocking zalloc(), and the zone was created as an
433 * expandable and collectable zone, so if no memory is available,
434 * it is possible for zalloc() to block indefinitely. zalloc()
435 * may also panic if the zone of zones is exhausted, since it's
436 * NOT expandable.
437 *
438 * We unconditionally call vnode_pager_setup(), even if this is
439 * a reuse of a ubc_info; in that case, we should probably assert
440 * that it does not already have a pager association, but do not.
441 *
442 * Since memory_object_create_named() can only fail from receiving
443 * an invalid pager argument, the explicit check and panic is
444 * merely precautionary.
445 */
446static int
447ubc_info_init_internal(vnode_t vp, int withfsize, off_t filesize)
1c79356b
A
448{
449 register struct ubc_info *uip;
450 void * pager;
1c79356b
A
451 int error = 0;
452 kern_return_t kret;
0b4e3aa0 453 memory_object_control_t control;
1c79356b 454
91447636 455 uip = vp->v_ubcinfo;
1c79356b 456
2d21ac55
A
457 /*
458 * If there is not already a ubc_info attached to the vnode, we
459 * attach one; otherwise, we will reuse the one that's there.
460 */
91447636 461 if (uip == UBC_INFO_NULL) {
1c79356b 462
1c79356b 463 uip = (struct ubc_info *) zalloc(ubc_info_zone);
91447636
A
464 bzero((char *)uip, sizeof(struct ubc_info));
465
1c79356b 466 uip->ui_vnode = vp;
91447636 467 uip->ui_flags = UI_INITED;
1c79356b
A
468 uip->ui_ucred = NOCRED;
469 }
1c79356b
A
470 assert(uip->ui_flags != UI_NONE);
471 assert(uip->ui_vnode == vp);
472
1c79356b
A
473 /* now set this ubc_info in the vnode */
474 vp->v_ubcinfo = uip;
91447636 475
2d21ac55
A
476 /*
477 * Allocate a pager object for this vnode
478 *
479 * XXX The value of the pager parameter is currently ignored.
480 * XXX Presumably, this API changed to avoid the race between
481 * XXX setting the pager and the UI_HASPAGER flag.
482 */
1c79356b
A
483 pager = (void *)vnode_pager_setup(vp, uip->ui_pager);
484 assert(pager);
91447636 485
2d21ac55
A
486 /*
487 * Explicitly set the pager into the ubc_info, after setting the
488 * UI_HASPAGER flag.
489 */
91447636
A
490 SET(uip->ui_flags, UI_HASPAGER);
491 uip->ui_pager = pager;
1c79356b
A
492
493 /*
91447636 494 * Note: We can not use VNOP_GETATTR() to get accurate
2d21ac55
A
495 * value of ui_size because this may be an NFS vnode, and
496 * nfs_getattr() can call vinvalbuf(); if this happens,
497 * ubc_info is not set up to deal with that event.
1c79356b
A
498 * So use bogus size.
499 */
500
1c79356b 501 /*
0b4e3aa0
A
502 * create a vnode - vm_object association
503 * memory_object_create_named() creates a "named" reference on the
504 * memory object we hold this reference as long as the vnode is
505 * "alive." Since memory_object_create_named() took its own reference
506 * on the vnode pager we passed it, we can drop the reference
507 * vnode_pager_setup() returned here.
1c79356b 508 */
0b4e3aa0
A
509 kret = memory_object_create_named(pager,
510 (memory_object_size_t)uip->ui_size, &control);
511 vnode_pager_deallocate(pager);
512 if (kret != KERN_SUCCESS)
513 panic("ubc_info_init: memory_object_create_named returned %d", kret);
1c79356b 514
0b4e3aa0
A
515 assert(control);
516 uip->ui_control = control; /* cache the value of the mo control */
517 SET(uip->ui_flags, UI_HASOBJREF); /* with a named reference */
2d21ac55 518
91447636 519 if (withfsize == 0) {
91447636 520 /* initialize the size */
2d21ac55 521 error = vnode_size(vp, &uip->ui_size, vfs_context_current());
91447636
A
522 if (error)
523 uip->ui_size = 0;
524 } else {
525 uip->ui_size = filesize;
526 }
2d21ac55 527 vp->v_lflag |= VNAMED_UBC; /* vnode has a named ubc reference */
1c79356b 528
0b4e3aa0 529 return (error);
1c79356b
A
530}
531
2d21ac55
A
532
533/*
534 * ubc_info_free
535 *
536 * Free a ubc_info structure
537 *
538 * Parameters: uip A pointer to the ubc_info to free
539 *
540 * Returns: (void)
541 *
542 * Notes: If there is a credential that has subsequently been associated
543 * with the ubc_info via a call to ubc_setcred(), the reference
544 * to the credential is dropped.
545 *
546 * It's actually impossible for a ubc_info.ui_control to take the
547 * value MEMORY_OBJECT_CONTROL_NULL.
548 */
0b4e3aa0
A
549static void
550ubc_info_free(struct ubc_info *uip)
1c79356b 551{
0c530ab8
A
552 if (IS_VALID_CRED(uip->ui_ucred)) {
553 kauth_cred_unref(&uip->ui_ucred);
1c79356b 554 }
0b4e3aa0
A
555
556 if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL)
557 memory_object_control_deallocate(uip->ui_control);
91447636
A
558
559 cluster_release(uip);
2d21ac55 560 ubc_cs_free(uip);
0b4e3aa0 561
2d21ac55 562 zfree(ubc_info_zone, uip);
1c79356b
A
563 return;
564}
565
2d21ac55 566
0b4e3aa0
A
567void
568ubc_info_deallocate(struct ubc_info *uip)
569{
91447636 570 ubc_info_free(uip);
0b4e3aa0
A
571}
572
2d21ac55 573
1c79356b 574/*
2d21ac55
A
575 * ubc_setsize
576 *
577 * Tell the VM that the the size of the file represented by the vnode has
578 * changed
579 *
580 * Parameters: vp The vp whose backing file size is
581 * being changed
582 * nsize The new size of the backing file
583 *
584 * Returns: 1 Success
585 * 0 Failure
586 *
587 * Notes: This function will indicate failure if the new size that's
588 * being attempted to be set is negative.
589 *
590 * This function will fail if there is no ubc_info currently
591 * associated with the vnode.
592 *
593 * This function will indicate success it the new size is the
594 * same or larger than the old size (in this case, the remainder
595 * of the file will require modification or use of an existing upl
596 * to access successfully).
597 *
598 * This function will fail if the new file size is smaller, and
599 * the memory region being invalidated was unable to actually be
600 * invalidated and/or the last page could not be flushed, if the
601 * new size is not aligned to a page boundary. This is usually
602 * indicative of an I/O error.
1c79356b
A
603 */
604int
605ubc_setsize(struct vnode *vp, off_t nsize)
606{
607 off_t osize; /* ui_size before change */
608 off_t lastpg, olastpgend, lastoff;
609 struct ubc_info *uip;
0b4e3aa0 610 memory_object_control_t control;
2d21ac55 611 kern_return_t kret = KERN_SUCCESS;
1c79356b 612
55e303ae
A
613 if (nsize < (off_t)0)
614 return (0);
1c79356b 615
1c79356b 616 if (!UBCINFOEXISTS(vp))
0b4e3aa0 617 return (0);
1c79356b
A
618
619 uip = vp->v_ubcinfo;
2d21ac55
A
620 osize = uip->ui_size;
621 /*
622 * Update the size before flushing the VM
623 */
1c79356b
A
624 uip->ui_size = nsize;
625
b0d623f7
A
626 if (nsize >= osize) { /* Nothing more to do */
627 lock_vnode_and_post(vp, NOTE_EXTEND);
0b4e3aa0 628 return (1); /* return success */
b0d623f7 629 }
1c79356b
A
630
631 /*
632 * When the file shrinks, invalidate the pages beyond the
633 * new size. Also get rid of garbage beyond nsize on the
2d21ac55
A
634 * last page. The ui_size already has the nsize, so any
635 * subsequent page-in will zero-fill the tail properly
1c79356b 636 */
1c79356b
A
637 lastpg = trunc_page_64(nsize);
638 olastpgend = round_page_64(osize);
0b4e3aa0
A
639 control = uip->ui_control;
640 assert(control);
1c79356b
A
641 lastoff = (nsize & PAGE_MASK_64);
642
2d21ac55
A
643 if (lastoff) {
644 upl_t upl;
645 upl_page_info_t *pl;
646
647
648 /*
649 * new EOF ends up in the middle of a page
650 * zero the tail of this page if its currently
651 * present in the cache
652 */
653 kret = ubc_create_upl(vp, lastpg, PAGE_SIZE, &upl, &pl, UPL_SET_LITE);
654
1c79356b 655 if (kret != KERN_SUCCESS)
2d21ac55
A
656 panic("ubc_setsize: ubc_create_upl (error = %d)\n", kret);
657
658 if (upl_valid_page(pl, 0))
659 cluster_zero(upl, (uint32_t)lastoff, PAGE_SIZE - (uint32_t)lastoff, NULL);
660
661 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY);
1c79356b 662
2d21ac55
A
663 lastpg += PAGE_SIZE_64;
664 }
665 if (olastpgend > lastpg) {
b0d623f7
A
666 int flags;
667
668 if (lastpg == 0)
669 flags = MEMORY_OBJECT_DATA_FLUSH_ALL;
670 else
671 flags = MEMORY_OBJECT_DATA_FLUSH;
2d21ac55
A
672 /*
673 * invalidate the pages beyond the new EOF page
674 *
675 */
676 kret = memory_object_lock_request(control,
677 (memory_object_offset_t)lastpg,
678 (memory_object_size_t)(olastpgend - lastpg), NULL, NULL,
b0d623f7 679 MEMORY_OBJECT_RETURN_NONE, flags, VM_PROT_NO_CHANGE);
2d21ac55
A
680 if (kret != KERN_SUCCESS)
681 printf("ubc_setsize: invalidate failed (error = %d)\n", kret);
682 }
1c79356b
A
683 return ((kret == KERN_SUCCESS) ? 1 : 0);
684}
685
2d21ac55 686
1c79356b 687/*
2d21ac55
A
688 * ubc_getsize
689 *
690 * Get the size of the file assocated with the specified vnode
691 *
692 * Parameters: vp The vnode whose size is of interest
693 *
694 * Returns: 0 There is no ubc_info associated with
695 * this vnode, or the size is zero
696 * !0 The size of the file
697 *
698 * Notes: Using this routine, it is not possible for a caller to
699 * successfully distinguish between a vnode associate with a zero
700 * length file, and a vnode with no associated ubc_info. The
701 * caller therefore needs to not care, or needs to ensure that
702 * they have previously successfully called ubc_info_init() or
703 * ubc_info_init_withsize().
1c79356b
A
704 */
705off_t
706ubc_getsize(struct vnode *vp)
707{
91447636
A
708 /* people depend on the side effect of this working this way
709 * as they call this for directory
1c79356b 710 */
91447636
A
711 if (!UBCINFOEXISTS(vp))
712 return ((off_t)0);
713 return (vp->v_ubcinfo->ui_size);
1c79356b
A
714}
715
2d21ac55 716
1c79356b 717/*
2d21ac55
A
718 * ubc_umount
719 *
720 * Call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes for this
721 * mount point
722 *
723 * Parameters: mp The mount point
724 *
725 * Returns: 0 Success
726 *
727 * Notes: There is no failure indication for this function.
728 *
729 * This function is used in the unmount path; since it may block
730 * I/O indefinitely, it should not be used in the forced unmount
731 * path, since a device unavailability could also block that
732 * indefinitely.
733 *
734 * Because there is no device ejection interlock on USB, FireWire,
735 * or similar devices, it's possible that an ejection that begins
736 * subsequent to the vnode_iterate() completing, either on one of
737 * those devices, or a network mount for which the server quits
738 * responding, etc., may cause the caller to block indefinitely.
1c79356b 739 */
0b4e3aa0 740__private_extern__ int
1c79356b
A
741ubc_umount(struct mount *mp)
742{
91447636
A
743 vnode_iterate(mp, 0, ubc_umcallback, 0);
744 return(0);
1c79356b
A
745}
746
2d21ac55
A
747
748/*
749 * ubc_umcallback
750 *
751 * Used by ubc_umount() as an internal implementation detail; see ubc_umount()
752 * and vnode_iterate() for details of implementation.
753 */
91447636
A
754static int
755ubc_umcallback(vnode_t vp, __unused void * args)
1c79356b 756{
1c79356b 757
91447636
A
758 if (UBCINFOEXISTS(vp)) {
759
91447636 760 (void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL);
1c79356b 761 }
91447636 762 return (VNODE_RETURNED);
1c79356b
A
763}
764
91447636 765
2d21ac55
A
766/*
767 * ubc_getcred
768 *
769 * Get the credentials currently active for the ubc_info associated with the
770 * vnode.
771 *
772 * Parameters: vp The vnode whose ubc_info credentials
773 * are to be retrieved
774 *
775 * Returns: !NOCRED The credentials
776 * NOCRED If there is no ubc_info for the vnode,
777 * or if there is one, but it has not had
778 * any credentials associated with it via
779 * a call to ubc_setcred()
780 */
91447636 781kauth_cred_t
1c79356b
A
782ubc_getcred(struct vnode *vp)
783{
91447636
A
784 if (UBCINFOEXISTS(vp))
785 return (vp->v_ubcinfo->ui_ucred);
1c79356b 786
91447636 787 return (NOCRED);
1c79356b
A
788}
789
2d21ac55
A
790
791/*
792 * ubc_setthreadcred
793 *
794 * If they are not already set, set the credentials of the ubc_info structure
795 * associated with the vnode to those of the supplied thread; otherwise leave
796 * them alone.
797 *
798 * Parameters: vp The vnode whose ubc_info creds are to
799 * be set
800 * p The process whose credentials are to
801 * be used, if not running on an assumed
802 * credential
803 * thread The thread whose credentials are to
804 * be used
805 *
806 * Returns: 1 This vnode has no associated ubc_info
807 * 0 Success
808 *
809 * Notes: This function takes a proc parameter to account for bootstrap
810 * issues where a task or thread may call this routine, either
811 * before credentials have been initialized by bsd_init(), or if
812 * there is no BSD info asscoiate with a mach thread yet. This
813 * is known to happen in both the initial swap and memory mapping
814 * calls.
815 *
816 * This function is generally used only in the following cases:
817 *
818 * o a memory mapped file via the mmap() system call
819 * o a memory mapped file via the deprecated map_fd() call
820 * o a swap store backing file
821 * o subsequent to a successful write via vn_write()
822 *
823 * The information is then used by the NFS client in order to
824 * cons up a wire message in either the page-in or page-out path.
825 *
826 * There are two potential problems with the use of this API:
827 *
828 * o Because the write path only set it on a successful
829 * write, there is a race window between setting the
830 * credential and its use to evict the pages to the
831 * remote file server
832 *
833 * o Because a page-in may occur prior to a write, the
834 * credential may not be set at this time, if the page-in
835 * is not the result of a mapping established via mmap()
836 * or map_fd().
837 *
838 * In both these cases, this will be triggered from the paging
839 * path, which will instead use the credential of the current
840 * process, which in this case is either the dynamic_pager or
841 * the kernel task, both of which utilize "root" credentials.
842 *
843 * This may potentially permit operations to occur which should
844 * be denied, or it may cause to be denied operations which
845 * should be permitted, depending on the configuration of the NFS
846 * server.
847 */
13fec989 848int
2d21ac55 849ubc_setthreadcred(struct vnode *vp, proc_t p, thread_t thread)
13fec989
A
850{
851 struct ubc_info *uip;
852 kauth_cred_t credp;
2d21ac55 853 struct uthread *uthread = get_bsdthread_info(thread);
13fec989
A
854
855 if (!UBCINFOEXISTS(vp))
2d21ac55 856 return (1);
13fec989
A
857
858 vnode_lock(vp);
859
860 uip = vp->v_ubcinfo;
861 credp = uip->ui_ucred;
862
0c530ab8 863 if (!IS_VALID_CRED(credp)) {
13fec989
A
864 /* use per-thread cred, if assumed identity, else proc cred */
865 if (uthread == NULL || (uthread->uu_flag & UT_SETUID) == 0) {
866 uip->ui_ucred = kauth_cred_proc_ref(p);
867 } else {
868 uip->ui_ucred = uthread->uu_ucred;
869 kauth_cred_ref(uip->ui_ucred);
870 }
2d21ac55 871 }
13fec989
A
872 vnode_unlock(vp);
873
874 return (0);
875}
876
2d21ac55 877
1c79356b 878/*
2d21ac55
A
879 * ubc_setcred
880 *
881 * If they are not already set, set the credentials of the ubc_info structure
882 * associated with the vnode to those of the process; otherwise leave them
883 * alone.
884 *
885 * Parameters: vp The vnode whose ubc_info creds are to
886 * be set
887 * p The process whose credentials are to
888 * be used
889 *
890 * Returns: 0 This vnode has no associated ubc_info
891 * 1 Success
892 *
893 * Notes: The return values for this function are inverted from nearly
894 * all other uses in the kernel.
895 *
896 * See also ubc_setthreadcred(), above.
897 *
898 * This function is considered deprecated, and generally should
899 * not be used, as it is incompatible with per-thread credentials;
900 * it exists for legacy KPI reasons.
901 *
902 * DEPRECATION: ubc_setcred() is being deprecated. Please use
903 * ubc_setthreadcred() instead.
1c79356b 904 */
1c79356b 905int
2d21ac55 906ubc_setcred(struct vnode *vp, proc_t p)
1c79356b
A
907{
908 struct ubc_info *uip;
91447636 909 kauth_cred_t credp;
1c79356b 910
2d21ac55
A
911 /* If there is no ubc_info, deny the operation */
912 if ( !UBCINFOEXISTS(vp))
1c79356b 913 return (0);
1c79356b 914
2d21ac55
A
915 /*
916 * Check to see if there is already a credential reference in the
917 * ubc_info; if there is not, take one on the supplied credential.
918 */
91447636 919 vnode_lock(vp);
91447636 920 uip = vp->v_ubcinfo;
1c79356b 921 credp = uip->ui_ucred;
0c530ab8 922 if (!IS_VALID_CRED(credp)) {
91447636 923 uip->ui_ucred = kauth_cred_proc_ref(p);
1c79356b 924 }
91447636 925 vnode_unlock(vp);
1c79356b
A
926
927 return (1);
928}
929
2d21ac55
A
930/*
931 * ubc_getpager
932 *
933 * Get the pager associated with the ubc_info associated with the vnode.
934 *
935 * Parameters: vp The vnode to obtain the pager from
936 *
937 * Returns: !VNODE_PAGER_NULL The memory_object_t for the pager
938 * VNODE_PAGER_NULL There is no ubc_info for this vnode
939 *
940 * Notes: For each vnode that has a ubc_info associated with it, that
941 * ubc_info SHALL have a pager associated with it, so in the
942 * normal case, it's impossible to return VNODE_PAGER_NULL for
943 * a vnode with an associated ubc_info.
944 */
0b4e3aa0 945__private_extern__ memory_object_t
1c79356b
A
946ubc_getpager(struct vnode *vp)
947{
91447636
A
948 if (UBCINFOEXISTS(vp))
949 return (vp->v_ubcinfo->ui_pager);
1c79356b 950
91447636 951 return (0);
1c79356b
A
952}
953
2d21ac55 954
1c79356b 955/*
2d21ac55
A
956 * ubc_getobject
957 *
958 * Get the memory object control associated with the ubc_info associated with
959 * the vnode
960 *
961 * Parameters: vp The vnode to obtain the memory object
962 * from
963 * flags DEPRECATED
964 *
965 * Returns: !MEMORY_OBJECT_CONTROL_NULL
966 * MEMORY_OBJECT_CONTROL_NULL
967 *
968 * Notes: Historically, if the flags were not "do not reactivate", this
969 * function would look up the memory object using the pager if
970 * it did not exist (this could be the case if the vnode had
971 * been previously reactivated). The flags would also permit a
972 * hold to be requested, which would have created an object
973 * reference, if one had not already existed. This usage is
974 * deprecated, as it would permit a race between finding and
975 * taking the reference vs. a single reference being dropped in
976 * another thread.
1c79356b 977 */
0b4e3aa0 978memory_object_control_t
91447636 979ubc_getobject(struct vnode *vp, __unused int flags)
1c79356b 980{
91447636
A
981 if (UBCINFOEXISTS(vp))
982 return((vp->v_ubcinfo->ui_control));
1c79356b 983
2d21ac55 984 return (MEMORY_OBJECT_CONTROL_NULL);
1c79356b
A
985}
986
1c79356b 987
2d21ac55
A
988/*
989 * ubc_blktooff
990 *
991 * Convert a given block number to a memory backing object (file) offset for a
992 * given vnode
993 *
994 * Parameters: vp The vnode in which the block is located
995 * blkno The block number to convert
996 *
997 * Returns: !-1 The offset into the backing object
998 * -1 There is no ubc_info associated with
999 * the vnode
1000 * -1 An error occurred in the underlying VFS
1001 * while translating the block to an
1002 * offset; the most likely cause is that
1003 * the caller specified a block past the
1004 * end of the file, but this could also be
1005 * any other error from VNOP_BLKTOOFF().
1006 *
1007 * Note: Representing the error in band loses some information, but does
1008 * not occlude a valid offset, since an off_t of -1 is normally
1009 * used to represent EOF. If we had a more reliable constant in
1010 * our header files for it (i.e. explicitly cast to an off_t), we
1011 * would use it here instead.
1012 */
1c79356b 1013off_t
91447636 1014ubc_blktooff(vnode_t vp, daddr64_t blkno)
1c79356b 1015{
2d21ac55 1016 off_t file_offset = -1;
1c79356b
A
1017 int error;
1018
2d21ac55
A
1019 if (UBCINFOEXISTS(vp)) {
1020 error = VNOP_BLKTOOFF(vp, blkno, &file_offset);
1021 if (error)
1022 file_offset = -1;
1023 }
1c79356b
A
1024
1025 return (file_offset);
1026}
0b4e3aa0 1027
2d21ac55
A
1028
1029/*
1030 * ubc_offtoblk
1031 *
1032 * Convert a given offset in a memory backing object into a block number for a
1033 * given vnode
1034 *
1035 * Parameters: vp The vnode in which the offset is
1036 * located
1037 * offset The offset into the backing object
1038 *
1039 * Returns: !-1 The returned block number
1040 * -1 There is no ubc_info associated with
1041 * the vnode
1042 * -1 An error occurred in the underlying VFS
1043 * while translating the block to an
1044 * offset; the most likely cause is that
1045 * the caller specified a block past the
1046 * end of the file, but this could also be
1047 * any other error from VNOP_OFFTOBLK().
1048 *
1049 * Note: Representing the error in band loses some information, but does
1050 * not occlude a valid block number, since block numbers exceed
1051 * the valid range for offsets, due to their relative sizes. If
1052 * we had a more reliable constant than -1 in our header files
1053 * for it (i.e. explicitly cast to an daddr64_t), we would use it
1054 * here instead.
1055 */
91447636
A
1056daddr64_t
1057ubc_offtoblk(vnode_t vp, off_t offset)
1c79356b 1058{
2d21ac55 1059 daddr64_t blkno = -1;
0b4e3aa0 1060 int error = 0;
1c79356b 1061
2d21ac55
A
1062 if (UBCINFOEXISTS(vp)) {
1063 error = VNOP_OFFTOBLK(vp, offset, &blkno);
1064 if (error)
1065 blkno = -1;
1066 }
1c79356b
A
1067
1068 return (blkno);
1069}
1070
2d21ac55
A
1071
1072/*
1073 * ubc_pages_resident
1074 *
1075 * Determine whether or not a given vnode has pages resident via the memory
1076 * object control associated with the ubc_info associated with the vnode
1077 *
1078 * Parameters: vp The vnode we want to know about
1079 *
1080 * Returns: 1 Yes
1081 * 0 No
1082 */
1c79356b 1083int
91447636 1084ubc_pages_resident(vnode_t vp)
1c79356b 1085{
91447636
A
1086 kern_return_t kret;
1087 boolean_t has_pages_resident;
1088
2d21ac55 1089 if (!UBCINFOEXISTS(vp))
0b4e3aa0 1090 return (0);
91447636 1091
2d21ac55
A
1092 /*
1093 * The following call may fail if an invalid ui_control is specified,
1094 * or if there is no VM object associated with the control object. In
1095 * either case, reacting to it as if there were no pages resident will
1096 * result in correct behavior.
1097 */
91447636
A
1098 kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident);
1099
1100 if (kret != KERN_SUCCESS)
0b4e3aa0 1101 return (0);
91447636
A
1102
1103 if (has_pages_resident == TRUE)
1104 return (1);
1105
1106 return (0);
1107}
1c79356b 1108
1c79356b 1109
1c79356b 1110/*
2d21ac55
A
1111 * ubc_sync_range
1112 *
1113 * Clean and/or invalidate a range in the memory object that backs this vnode
1114 *
1115 * Parameters: vp The vnode whose associated ubc_info's
1116 * associated memory object is to have a
1117 * range invalidated within it
1118 * beg_off The start of the range, as an offset
1119 * end_off The end of the range, as an offset
1120 * flags See ubc_msync_internal()
91447636 1121 *
2d21ac55
A
1122 * Returns: 1 Success
1123 * 0 Failure
91447636 1124 *
2d21ac55
A
1125 * Notes: see ubc_msync_internal() for more detailed information.
1126 *
1127 * DEPRECATED: This interface is obsolete due to a failure to return error
1128 * information needed in order to correct failures. The currently
1129 * recommended interface is ubc_msync().
1c79356b
A
1130 */
1131int
91447636 1132ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags)
1c79356b 1133{
91447636 1134 return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL));
0b4e3aa0
A
1135}
1136
91447636 1137
0b4e3aa0 1138/*
2d21ac55
A
1139 * ubc_msync
1140 *
1141 * Clean and/or invalidate a range in the memory object that backs this vnode
1142 *
1143 * Parameters: vp The vnode whose associated ubc_info's
1144 * associated memory object is to have a
1145 * range invalidated within it
1146 * beg_off The start of the range, as an offset
1147 * end_off The end of the range, as an offset
1148 * resid_off The address of an off_t supplied by the
1149 * caller; may be set to NULL to ignore
1150 * flags See ubc_msync_internal()
1151 *
1152 * Returns: 0 Success
1153 * !0 Failure; an errno is returned
1154 *
1155 * Implicit Returns:
1156 * *resid_off, modified If non-NULL, the contents are ALWAYS
1157 * modified; they are initialized to the
1158 * beg_off, and in case of an I/O error,
1159 * the difference between beg_off and the
1160 * current value will reflect what was
1161 * able to be written before the error
1162 * occurred. If no error is returned, the
1163 * value of the resid_off is undefined; do
1164 * NOT use it in place of end_off if you
1165 * intend to increment from the end of the
1166 * last call and call iteratively.
1167 *
1168 * Notes: see ubc_msync_internal() for more detailed information.
1169 *
0b4e3aa0 1170 */
91447636
A
1171errno_t
1172ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags)
0b4e3aa0 1173{
91447636
A
1174 int retval;
1175 int io_errno = 0;
1176
1177 if (resid_off)
1178 *resid_off = beg_off;
0b4e3aa0 1179
91447636 1180 retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno);
0b4e3aa0 1181
91447636
A
1182 if (retval == 0 && io_errno == 0)
1183 return (EINVAL);
1184 return (io_errno);
1185}
0b4e3aa0 1186
1c79356b 1187
1c79356b 1188/*
2d21ac55
A
1189 * Clean and/or invalidate a range in the memory object that backs this vnode
1190 *
1191 * Parameters: vp The vnode whose associated ubc_info's
1192 * associated memory object is to have a
1193 * range invalidated within it
1194 * beg_off The start of the range, as an offset
1195 * end_off The end of the range, as an offset
1196 * resid_off The address of an off_t supplied by the
1197 * caller; may be set to NULL to ignore
1198 * flags MUST contain at least one of the flags
1199 * UBC_INVALIDATE, UBC_PUSHDIRTY, or
1200 * UBC_PUSHALL; if UBC_PUSHDIRTY is used,
1201 * UBC_SYNC may also be specified to cause
1202 * this function to block until the
1203 * operation is complete. The behavior
1204 * of UBC_SYNC is otherwise undefined.
1205 * io_errno The address of an int to contain the
1206 * errno from a failed I/O operation, if
1207 * one occurs; may be set to NULL to
1208 * ignore
1209 *
1210 * Returns: 1 Success
1211 * 0 Failure
1212 *
1213 * Implicit Returns:
1214 * *resid_off, modified The contents of this offset MAY be
1215 * modified; in case of an I/O error, the
1216 * difference between beg_off and the
1217 * current value will reflect what was
1218 * able to be written before the error
1219 * occurred.
1220 * *io_errno, modified The contents of this offset are set to
1221 * an errno, if an error occurs; if the
1222 * caller supplies an io_errno parameter,
1223 * they should be careful to initialize it
1224 * to 0 before calling this function to
1225 * enable them to distinguish an error
1226 * with a valid *resid_off from an invalid
1227 * one, and to avoid potentially falsely
1228 * reporting an error, depending on use.
1229 *
1230 * Notes: If there is no ubc_info associated with the vnode supplied,
1231 * this function immediately returns success.
1232 *
1233 * If the value of end_off is less than or equal to beg_off, this
1234 * function immediately returns success; that is, end_off is NOT
1235 * inclusive.
1236 *
1237 * IMPORTANT: one of the flags UBC_INVALIDATE, UBC_PUSHDIRTY, or
1238 * UBC_PUSHALL MUST be specified; that is, it is NOT possible to
1239 * attempt to block on in-progress I/O by calling this function
1240 * with UBC_PUSHDIRTY, and then later call it with just UBC_SYNC
1241 * in order to block pending on the I/O already in progress.
1242 *
1243 * The start offset is truncated to the page boundary and the
1244 * size is adjusted to include the last page in the range; that
1245 * is, end_off on exactly a page boundary will not change if it
1246 * is rounded, and the range of bytes written will be from the
1247 * truncate beg_off to the rounded (end_off - 1).
1c79356b 1248 */
91447636
A
1249static int
1250ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno)
1c79356b 1251{
91447636
A
1252 memory_object_size_t tsize;
1253 kern_return_t kret;
1254 int request_flags = 0;
1255 int flush_flags = MEMORY_OBJECT_RETURN_NONE;
1256
1257 if ( !UBCINFOEXISTS(vp))
1258 return (0);
91447636
A
1259 if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0)
1260 return (0);
2d21ac55
A
1261 if (end_off <= beg_off)
1262 return (1);
91447636
A
1263
1264 if (flags & UBC_INVALIDATE)
1265 /*
1266 * discard the resident pages
1267 */
1268 request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE);
1c79356b 1269
91447636
A
1270 if (flags & UBC_SYNC)
1271 /*
1272 * wait for all the I/O to complete before returning
55e303ae 1273 */
91447636 1274 request_flags |= MEMORY_OBJECT_IO_SYNC;
55e303ae 1275
91447636
A
1276 if (flags & UBC_PUSHDIRTY)
1277 /*
1278 * we only return the dirty pages in the range
1279 */
1280 flush_flags = MEMORY_OBJECT_RETURN_DIRTY;
0b4e3aa0 1281
91447636
A
1282 if (flags & UBC_PUSHALL)
1283 /*
2d21ac55
A
1284 * then return all the interesting pages in the range (both
1285 * dirty and precious) to the pager
91447636
A
1286 */
1287 flush_flags = MEMORY_OBJECT_RETURN_ALL;
0b4e3aa0 1288
91447636
A
1289 beg_off = trunc_page_64(beg_off);
1290 end_off = round_page_64(end_off);
1291 tsize = (memory_object_size_t)end_off - beg_off;
b4c24cb9 1292
91447636
A
1293 /* flush and/or invalidate pages in the range requested */
1294 kret = memory_object_lock_request(vp->v_ubcinfo->ui_control,
2d21ac55
A
1295 beg_off, tsize,
1296 (memory_object_offset_t *)resid_off,
1297 io_errno, flush_flags, request_flags,
1298 VM_PROT_NO_CHANGE);
91447636
A
1299
1300 return ((kret == KERN_SUCCESS) ? 1 : 0);
1c79356b
A
1301}
1302
1c79356b
A
1303
1304/*
2d21ac55
A
1305 * ubc_msync_internal
1306 *
1307 * Explicitly map a vnode that has an associate ubc_info, and add a reference
1308 * to it for the ubc system, if there isn't one already, so it will not be
1309 * recycled while it's in use, and set flags on the ubc_info to indicate that
1310 * we have done this
1311 *
1312 * Parameters: vp The vnode to map
1313 * flags The mapping flags for the vnode; this
1314 * will be a combination of one or more of
1315 * PROT_READ, PROT_WRITE, and PROT_EXEC
1316 *
1317 * Returns: 0 Success
1318 * EPERM Permission was denied
1319 *
1320 * Notes: An I/O reference on the vnode must already be held on entry
1321 *
1322 * If there is no ubc_info associated with the vnode, this function
1323 * will return success.
1324 *
1325 * If a permission error occurs, this function will return
1326 * failure; all other failures will cause this function to return
1327 * success.
1328 *
1329 * IMPORTANT: This is an internal use function, and its symbols
1330 * are not exported, hence its error checking is not very robust.
1331 * It is primarily used by:
1332 *
1333 * o mmap(), when mapping a file
1334 * o The deprecated map_fd() interface, when mapping a file
1335 * o When mapping a shared file (a shared library in the
1336 * shared segment region)
1337 * o When loading a program image during the exec process
1338 *
1339 * ...all of these uses ignore the return code, and any fault that
1340 * results later because of a failure is handled in the fix-up path
1341 * of the fault handler. The interface exists primarily as a
1342 * performance hint.
1343 *
1344 * Given that third party implementation of the type of interfaces
1345 * that would use this function, such as alternative executable
1346 * formats, etc., are unsupported, this function is not exported
1347 * for general use.
1348 *
1349 * The extra reference is held until the VM system unmaps the
1350 * vnode from its own context to maintain a vnode reference in
1351 * cases like open()/mmap()/close(), which leave the backing
1352 * object referenced by a mapped memory region in a process
1353 * address space.
1c79356b 1354 */
91447636
A
1355__private_extern__ int
1356ubc_map(vnode_t vp, int flags)
1c79356b
A
1357{
1358 struct ubc_info *uip;
91447636
A
1359 int error = 0;
1360 int need_ref = 0;
2d21ac55 1361 int need_wakeup = 0;
1c79356b 1362
91447636 1363 if (UBCINFOEXISTS(vp)) {
1c79356b 1364
2d21ac55
A
1365 vnode_lock(vp);
1366 uip = vp->v_ubcinfo;
1367
1368 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1369 SET(uip->ui_flags, UI_MAPWAITING);
1370 (void) msleep(&uip->ui_flags, &vp->v_lock,
1371 PRIBIO, "ubc_map", NULL);
1372 }
1373 SET(uip->ui_flags, UI_MAPBUSY);
1374 vnode_unlock(vp);
1375
1376 error = VNOP_MMAP(vp, flags, vfs_context_current());
1c79356b 1377
91447636
A
1378 if (error != EPERM)
1379 error = 0;
1c79356b 1380
2d21ac55 1381 vnode_lock_spin(vp);
1c79356b 1382
2d21ac55 1383 if (error == 0) {
91447636
A
1384 if ( !ISSET(uip->ui_flags, UI_ISMAPPED))
1385 need_ref = 1;
1386 SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED));
2d21ac55
A
1387 }
1388 CLR(uip->ui_flags, UI_MAPBUSY);
55e303ae 1389
2d21ac55
A
1390 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1391 CLR(uip->ui_flags, UI_MAPWAITING);
1392 need_wakeup = 1;
55e303ae 1393 }
2d21ac55 1394 vnode_unlock(vp);
b4c24cb9 1395
2d21ac55
A
1396 if (need_wakeup)
1397 wakeup(&uip->ui_flags);
1398
1399 if (need_ref)
1400 vnode_ref(vp);
1401 }
91447636 1402 return (error);
0b4e3aa0
A
1403}
1404
2d21ac55 1405
0b4e3aa0 1406/*
2d21ac55
A
1407 * ubc_destroy_named
1408 *
1409 * Destroy the named memory object associated with the ubc_info control object
1410 * associated with the designated vnode, if there is a ubc_info associated
1411 * with the vnode, and a control object is associated with it
1412 *
1413 * Parameters: vp The designated vnode
1414 *
1415 * Returns: (void)
1416 *
1417 * Notes: This function is called on vnode termination for all vnodes,
1418 * and must therefore not assume that there is a ubc_info that is
1419 * associated with the vnode, nor that there is a control object
1420 * associated with the ubc_info.
1421 *
1422 * If all the conditions necessary are present, this function
1423 * calls memory_object_destory(), which will in turn end up
1424 * calling ubc_unmap() to release any vnode references that were
1425 * established via ubc_map().
1426 *
1427 * IMPORTANT: This is an internal use function that is used
1428 * exclusively by the internal use function vclean().
0b4e3aa0 1429 */
2d21ac55
A
1430__private_extern__ void
1431ubc_destroy_named(vnode_t vp)
0b4e3aa0
A
1432{
1433 memory_object_control_t control;
0b4e3aa0
A
1434 struct ubc_info *uip;
1435 kern_return_t kret;
1436
2d21ac55
A
1437 if (UBCINFOEXISTS(vp)) {
1438 uip = vp->v_ubcinfo;
1439
1440 /* Terminate the memory object */
1441 control = ubc_getobject(vp, UBC_HOLDOBJECT);
1442 if (control != MEMORY_OBJECT_CONTROL_NULL) {
1443 kret = memory_object_destroy(control, 0);
1444 if (kret != KERN_SUCCESS)
1445 panic("ubc_destroy_named: memory_object_destroy failed");
0b4e3aa0
A
1446 }
1447 }
1c79356b
A
1448}
1449
0b4e3aa0 1450
1c79356b 1451/*
2d21ac55
A
1452 * ubc_isinuse
1453 *
1454 * Determine whether or not a vnode is currently in use by ubc at a level in
1455 * excess of the requested busycount
1456 *
1457 * Parameters: vp The vnode to check
1458 * busycount The threshold busy count, used to bias
1459 * the count usually already held by the
1460 * caller to avoid races
1461 *
1462 * Returns: 1 The vnode is in use over the threshold
1463 * 0 The vnode is not in use over the
1464 * threshold
1465 *
1466 * Notes: Because the vnode is only held locked while actually asking
1467 * the use count, this function only represents a snapshot of the
1468 * current state of the vnode. If more accurate information is
1469 * required, an additional busycount should be held by the caller
1470 * and a non-zero busycount used.
1471 *
1472 * If there is no ubc_info associated with the vnode, this
1473 * function will report that the vnode is not in use by ubc.
1c79356b
A
1474 */
1475int
91447636 1476ubc_isinuse(struct vnode *vp, int busycount)
1c79356b 1477{
91447636 1478 if ( !UBCINFOEXISTS(vp))
0b4e3aa0 1479 return (0);
91447636 1480 return(ubc_isinuse_locked(vp, busycount, 0));
1c79356b
A
1481}
1482
91447636 1483
2d21ac55
A
1484/*
1485 * ubc_isinuse_locked
1486 *
1487 * Determine whether or not a vnode is currently in use by ubc at a level in
1488 * excess of the requested busycount
1489 *
1490 * Parameters: vp The vnode to check
1491 * busycount The threshold busy count, used to bias
1492 * the count usually already held by the
1493 * caller to avoid races
1494 * locked True if the vnode is already locked by
1495 * the caller
1496 *
1497 * Returns: 1 The vnode is in use over the threshold
1498 * 0 The vnode is not in use over the
1499 * threshold
1500 *
1501 * Notes: If the vnode is not locked on entry, it is locked while
1502 * actually asking the use count. If this is the case, this
1503 * function only represents a snapshot of the current state of
1504 * the vnode. If more accurate information is required, the
1505 * vnode lock should be held by the caller, otherwise an
1506 * additional busycount should be held by the caller and a
1507 * non-zero busycount used.
1508 *
1509 * If there is no ubc_info associated with the vnode, this
1510 * function will report that the vnode is not in use by ubc.
1511 */
1c79356b 1512int
91447636 1513ubc_isinuse_locked(struct vnode *vp, int busycount, int locked)
1c79356b 1514{
91447636 1515 int retval = 0;
1c79356b 1516
9bccf70c 1517
91447636 1518 if (!locked)
b0d623f7 1519 vnode_lock_spin(vp);
1c79356b 1520
91447636
A
1521 if ((vp->v_usecount - vp->v_kusecount) > busycount)
1522 retval = 1;
1523
1524 if (!locked)
1525 vnode_unlock(vp);
1526 return (retval);
1c79356b
A
1527}
1528
91447636 1529
1c79356b 1530/*
2d21ac55
A
1531 * ubc_unmap
1532 *
1533 * Reverse the effects of a ubc_map() call for a given vnode
1534 *
1535 * Parameters: vp vnode to unmap from ubc
1536 *
1537 * Returns: (void)
1538 *
1539 * Notes: This is an internal use function used by vnode_pager_unmap().
1540 * It will attempt to obtain a reference on the supplied vnode,
1541 * and if it can do so, and there is an associated ubc_info, and
1542 * the flags indicate that it was mapped via ubc_map(), then the
1543 * flag is cleared, the mapping removed, and the reference taken
1544 * by ubc_map() is released.
1545 *
1546 * IMPORTANT: This MUST only be called by the VM
1547 * to prevent race conditions.
1c79356b 1548 */
0b4e3aa0 1549__private_extern__ void
1c79356b
A
1550ubc_unmap(struct vnode *vp)
1551{
1552 struct ubc_info *uip;
91447636 1553 int need_rele = 0;
2d21ac55 1554 int need_wakeup = 0;
b0d623f7 1555
91447636
A
1556 if (vnode_getwithref(vp))
1557 return;
1c79356b 1558
91447636
A
1559 if (UBCINFOEXISTS(vp)) {
1560 vnode_lock(vp);
91447636 1561 uip = vp->v_ubcinfo;
2d21ac55
A
1562
1563 while (ISSET(uip->ui_flags, UI_MAPBUSY)) {
1564 SET(uip->ui_flags, UI_MAPWAITING);
1565 (void) msleep(&uip->ui_flags, &vp->v_lock,
1566 PRIBIO, "ubc_unmap", NULL);
1567 }
1568 SET(uip->ui_flags, UI_MAPBUSY);
1569
91447636 1570 if (ISSET(uip->ui_flags, UI_ISMAPPED)) {
b0d623f7 1571 CLR(uip->ui_flags, UI_ISMAPPED);
91447636
A
1572 need_rele = 1;
1573 }
1574 vnode_unlock(vp);
b0d623f7 1575
91447636 1576 if (need_rele) {
b0d623f7
A
1577 (void)VNOP_MNOMAP(vp, vfs_context_current());
1578 vnode_rele(vp);
91447636 1579 }
2d21ac55
A
1580
1581 vnode_lock_spin(vp);
1582
1583 CLR(uip->ui_flags, UI_MAPBUSY);
1584 if (ISSET(uip->ui_flags, UI_MAPWAITING)) {
1585 CLR(uip->ui_flags, UI_MAPWAITING);
1586 need_wakeup = 1;
1587 }
1588 vnode_unlock(vp);
1589
1590 if (need_wakeup)
b0d623f7 1591 wakeup(&uip->ui_flags);
2d21ac55 1592
91447636
A
1593 }
1594 /*
1595 * the drop of the vnode ref will cleanup
1596 */
1597 vnode_put(vp);
0b4e3aa0
A
1598}
1599
2d21ac55
A
1600
1601/*
1602 * ubc_page_op
1603 *
1604 * Manipulate individual page state for a vnode with an associated ubc_info
1605 * with an associated memory object control.
1606 *
1607 * Parameters: vp The vnode backing the page
1608 * f_offset A file offset interior to the page
1609 * ops The operations to perform, as a bitmap
1610 * (see below for more information)
1611 * phys_entryp The address of a ppnum_t; may be NULL
1612 * to ignore
1613 * flagsp A pointer to an int to contain flags;
1614 * may be NULL to ignore
1615 *
1616 * Returns: KERN_SUCCESS Success
1617 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1618 * object associated
1619 * KERN_INVALID_OBJECT If UPL_POP_PHYSICAL and the object is
1620 * not physically contiguous
1621 * KERN_INVALID_OBJECT If !UPL_POP_PHYSICAL and the object is
1622 * physically contiguous
1623 * KERN_FAILURE If the page cannot be looked up
1624 *
1625 * Implicit Returns:
1626 * *phys_entryp (modified) If phys_entryp is non-NULL and
1627 * UPL_POP_PHYSICAL
1628 * *flagsp (modified) If flagsp is non-NULL and there was
1629 * !UPL_POP_PHYSICAL and a KERN_SUCCESS
1630 *
1631 * Notes: For object boundaries, it is considerably more efficient to
1632 * ensure that f_offset is in fact on a page boundary, as this
1633 * will avoid internal use of the hash table to identify the
1634 * page, and would therefore skip a number of early optimizations.
1635 * Since this is a page operation anyway, the caller should try
1636 * to pass only a page aligned offset because of this.
1637 *
1638 * *flagsp may be modified even if this function fails. If it is
1639 * modified, it will contain the condition of the page before the
1640 * requested operation was attempted; these will only include the
1641 * bitmap flags, and not the PL_POP_PHYSICAL, UPL_POP_DUMP,
1642 * UPL_POP_SET, or UPL_POP_CLR bits.
1643 *
1644 * The flags field may contain a specific operation, such as
1645 * UPL_POP_PHYSICAL or UPL_POP_DUMP:
1646 *
1647 * o UPL_POP_PHYSICAL Fail if not contiguous; if
1648 * *phys_entryp and successful, set
1649 * *phys_entryp
1650 * o UPL_POP_DUMP Dump the specified page
1651 *
1652 * Otherwise, it is treated as a bitmap of one or more page
1653 * operations to perform on the final memory object; allowable
1654 * bit values are:
1655 *
1656 * o UPL_POP_DIRTY The page is dirty
1657 * o UPL_POP_PAGEOUT The page is paged out
1658 * o UPL_POP_PRECIOUS The page is precious
1659 * o UPL_POP_ABSENT The page is absent
1660 * o UPL_POP_BUSY The page is busy
1661 *
1662 * If the page status is only being queried and not modified, then
1663 * not other bits should be specified. However, if it is being
1664 * modified, exactly ONE of the following bits should be set:
1665 *
1666 * o UPL_POP_SET Set the current bitmap bits
1667 * o UPL_POP_CLR Clear the current bitmap bits
1668 *
1669 * Thus to effect a combination of setting an clearing, it may be
1670 * necessary to call this function twice. If this is done, the
1671 * set should be used before the clear, since clearing may trigger
1672 * a wakeup on the destination page, and if the page is backed by
1673 * an encrypted swap file, setting will trigger the decryption
1674 * needed before the wakeup occurs.
1675 */
0b4e3aa0
A
1676kern_return_t
1677ubc_page_op(
1678 struct vnode *vp,
1679 off_t f_offset,
1680 int ops,
55e303ae 1681 ppnum_t *phys_entryp,
0b4e3aa0
A
1682 int *flagsp)
1683{
1684 memory_object_control_t control;
1685
1686 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1687 if (control == MEMORY_OBJECT_CONTROL_NULL)
1688 return KERN_INVALID_ARGUMENT;
1689
1690 return (memory_object_page_op(control,
1691 (memory_object_offset_t)f_offset,
1692 ops,
1693 phys_entryp,
1694 flagsp));
1695}
2d21ac55
A
1696
1697
1698/*
1699 * ubc_range_op
1700 *
1701 * Manipulate page state for a range of memory for a vnode with an associated
1702 * ubc_info with an associated memory object control, when page level state is
1703 * not required to be returned from the call (i.e. there are no phys_entryp or
1704 * flagsp parameters to this call, and it takes a range which may contain
1705 * multiple pages, rather than an offset interior to a single page).
1706 *
1707 * Parameters: vp The vnode backing the page
1708 * f_offset_beg A file offset interior to the start page
1709 * f_offset_end A file offset interior to the end page
1710 * ops The operations to perform, as a bitmap
1711 * (see below for more information)
1712 * range The address of an int; may be NULL to
1713 * ignore
1714 *
1715 * Returns: KERN_SUCCESS Success
1716 * KERN_INVALID_ARGUMENT If the memory object control has no VM
1717 * object associated
1718 * KERN_INVALID_OBJECT If the object is physically contiguous
1719 *
1720 * Implicit Returns:
1721 * *range (modified) If range is non-NULL, its contents will
1722 * be modified to contain the number of
1723 * bytes successfully operated upon.
1724 *
1725 * Notes: IMPORTANT: This function cannot be used on a range that
1726 * consists of physically contiguous pages.
1727 *
1728 * For object boundaries, it is considerably more efficient to
1729 * ensure that f_offset_beg and f_offset_end are in fact on page
1730 * boundaries, as this will avoid internal use of the hash table
1731 * to identify the page, and would therefore skip a number of
1732 * early optimizations. Since this is an operation on a set of
1733 * pages anyway, the caller should try to pass only a page aligned
1734 * offsets because of this.
1735 *
1736 * *range will be modified only if this function succeeds.
1737 *
1738 * The flags field MUST contain a specific operation; allowable
1739 * values are:
1740 *
1741 * o UPL_ROP_ABSENT Returns the extent of the range
1742 * presented which is absent, starting
1743 * with the start address presented
1744 *
1745 * o UPL_ROP_PRESENT Returns the extent of the range
1746 * presented which is present (resident),
1747 * starting with the start address
1748 * presented
1749 * o UPL_ROP_DUMP Dump the pages which are found in the
1750 * target object for the target range.
1751 *
1752 * IMPORTANT: For UPL_ROP_ABSENT and UPL_ROP_PRESENT; if there are
1753 * multiple regions in the range, only the first matching region
1754 * is returned.
1755 */
55e303ae
A
1756kern_return_t
1757ubc_range_op(
1758 struct vnode *vp,
1759 off_t f_offset_beg,
1760 off_t f_offset_end,
1761 int ops,
1762 int *range)
1763{
1764 memory_object_control_t control;
1765
1766 control = ubc_getobject(vp, UBC_FLAGS_NONE);
1767 if (control == MEMORY_OBJECT_CONTROL_NULL)
1768 return KERN_INVALID_ARGUMENT;
1769
1770 return (memory_object_range_op(control,
1771 (memory_object_offset_t)f_offset_beg,
1772 (memory_object_offset_t)f_offset_end,
1773 ops,
1774 range));
1775}
2d21ac55
A
1776
1777
1778/*
1779 * ubc_create_upl
1780 *
1781 * Given a vnode, cause the population of a portion of the vm_object; based on
1782 * the nature of the request, the pages returned may contain valid data, or
1783 * they may be uninitialized.
1784 *
1785 * Parameters: vp The vnode from which to create the upl
1786 * f_offset The start offset into the backing store
1787 * represented by the vnode
1788 * bufsize The size of the upl to create
1789 * uplp Pointer to the upl_t to receive the
1790 * created upl; MUST NOT be NULL
1791 * plp Pointer to receive the internal page
1792 * list for the created upl; MAY be NULL
1793 * to ignore
1794 *
1795 * Returns: KERN_SUCCESS The requested upl has been created
1796 * KERN_INVALID_ARGUMENT The bufsize argument is not an even
1797 * multiple of the page size
1798 * KERN_INVALID_ARGUMENT There is no ubc_info associated with
1799 * the vnode, or there is no memory object
1800 * control associated with the ubc_info
1801 * memory_object_upl_request:KERN_INVALID_VALUE
1802 * The supplied upl_flags argument is
1803 * invalid
1804 * Implicit Returns:
1805 * *uplp (modified)
1806 * *plp (modified) If non-NULL, the value of *plp will be
1807 * modified to point to the internal page
1808 * list; this modification may occur even
1809 * if this function is unsuccessful, in
1810 * which case the contents may be invalid
1811 *
1812 * Note: If successful, the returned *uplp MUST subsequently be freed
1813 * via a call to ubc_upl_commit(), ubc_upl_commit_range(),
1814 * ubc_upl_abort(), or ubc_upl_abort_range().
1815 */
0b4e3aa0
A
1816kern_return_t
1817ubc_create_upl(
1818 struct vnode *vp,
2d21ac55 1819 off_t f_offset,
b0d623f7 1820 int bufsize,
2d21ac55 1821 upl_t *uplp,
0b4e3aa0 1822 upl_page_info_t **plp,
2d21ac55 1823 int uplflags)
0b4e3aa0
A
1824{
1825 memory_object_control_t control;
55e303ae 1826 kern_return_t kr;
b0d623f7
A
1827
1828 if (plp != NULL)
1829 *plp = NULL;
1830 *uplp = NULL;
0b4e3aa0
A
1831
1832 if (bufsize & 0xfff)
1833 return KERN_INVALID_ARGUMENT;
1834
b0d623f7
A
1835 if (uplflags & (UPL_UBC_MSYNC | UPL_UBC_PAGEOUT | UPL_UBC_PAGEIN)) {
1836
1837 if (uplflags & UPL_UBC_MSYNC) {
1838 uplflags &= UPL_RET_ONLY_DIRTY;
1839
1840 uplflags |= UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
1841 UPL_SET_INTERNAL | UPL_SET_LITE;
1842
1843 } else if (uplflags & UPL_UBC_PAGEOUT) {
1844 uplflags &= UPL_RET_ONLY_DIRTY;
1845
1846 if (uplflags & UPL_RET_ONLY_DIRTY)
1847 uplflags |= UPL_NOBLOCK;
1848
1849 uplflags |= UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
1850 UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE;
1851 } else {
1852 uplflags |= UPL_RET_ONLY_ABSENT | UPL_NOBLOCK |
1853 UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
1854 UPL_SET_INTERNAL | UPL_SET_LITE;
1855 }
1856 } else {
55e303ae 1857 uplflags &= ~UPL_FOR_PAGEOUT;
55e303ae 1858
b0d623f7
A
1859 if (uplflags & UPL_WILL_BE_DUMPED) {
1860 uplflags &= ~UPL_WILL_BE_DUMPED;
1861 uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL);
1862 } else
1863 uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL);
1864 }
1865 control = ubc_getobject(vp, UBC_FLAGS_NONE);
0b4e3aa0
A
1866 if (control == MEMORY_OBJECT_CONTROL_NULL)
1867 return KERN_INVALID_ARGUMENT;
1868
b0d623f7
A
1869 kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, NULL, uplflags);
1870 if (kr == KERN_SUCCESS && plp != NULL)
1871 *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp);
0b4e3aa0
A
1872 return kr;
1873}
2d21ac55
A
1874
1875
1876/*
1877 * ubc_upl_maxbufsize
1878 *
1879 * Return the maximum bufsize ubc_create_upl( ) will take.
1880 *
1881 * Parameters: none
1882 *
1883 * Returns: maximum size buffer (in bytes) ubc_create_upl( ) will take.
1884 */
1885upl_size_t
1886ubc_upl_maxbufsize(
1887 void)
1888{
cf7d32b8 1889 return(MAX_UPL_SIZE * PAGE_SIZE);
2d21ac55 1890}
0b4e3aa0 1891
2d21ac55
A
1892/*
1893 * ubc_upl_map
1894 *
1895 * Map the page list assocated with the supplied upl into the kernel virtual
1896 * address space at the virtual address indicated by the dst_addr argument;
1897 * the entire upl is mapped
1898 *
1899 * Parameters: upl The upl to map
1900 * dst_addr The address at which to map the upl
1901 *
1902 * Returns: KERN_SUCCESS The upl has been mapped
1903 * KERN_INVALID_ARGUMENT The upl is UPL_NULL
1904 * KERN_FAILURE The upl is already mapped
1905 * vm_map_enter:KERN_INVALID_ARGUMENT
1906 * A failure code from vm_map_enter() due
1907 * to an invalid argument
1908 */
0b4e3aa0
A
1909kern_return_t
1910ubc_upl_map(
1911 upl_t upl,
1912 vm_offset_t *dst_addr)
1913{
1914 return (vm_upl_map(kernel_map, upl, dst_addr));
1915}
1916
1917
2d21ac55
A
1918/*
1919 * ubc_upl_unmap
1920 *
1921 * Unmap the page list assocated with the supplied upl from the kernel virtual
1922 * address space; the entire upl is unmapped.
1923 *
1924 * Parameters: upl The upl to unmap
1925 *
1926 * Returns: KERN_SUCCESS The upl has been unmapped
1927 * KERN_FAILURE The upl is not currently mapped
1928 * KERN_INVALID_ARGUMENT If the upl is UPL_NULL
1929 */
0b4e3aa0
A
1930kern_return_t
1931ubc_upl_unmap(
1932 upl_t upl)
1933{
1934 return(vm_upl_unmap(kernel_map, upl));
1935}
1936
2d21ac55
A
1937
1938/*
1939 * ubc_upl_commit
1940 *
1941 * Commit the contents of the upl to the backing store
1942 *
1943 * Parameters: upl The upl to commit
1944 *
1945 * Returns: KERN_SUCCESS The upl has been committed
1946 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
1947 * KERN_FAILURE The supplied upl does not represent
1948 * device memory, and the offset plus the
1949 * size would exceed the actual size of
1950 * the upl
1951 *
1952 * Notes: In practice, the only return value for this function should be
1953 * KERN_SUCCESS, unless there has been data structure corruption;
1954 * since the upl is deallocated regardless of success or failure,
1955 * there's really nothing to do about this other than panic.
1956 *
1957 * IMPORTANT: Use of this function should not be mixed with use of
1958 * ubc_upl_commit_range(), due to the unconditional deallocation
1959 * by this function.
1960 */
0b4e3aa0
A
1961kern_return_t
1962ubc_upl_commit(
1963 upl_t upl)
1964{
1965 upl_page_info_t *pl;
1966 kern_return_t kr;
1967
1968 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
cf7d32b8 1969 kr = upl_commit(upl, pl, MAX_UPL_SIZE);
0b4e3aa0
A
1970 upl_deallocate(upl);
1971 return kr;
1c79356b
A
1972}
1973
0b4e3aa0 1974
2d21ac55
A
1975/*
1976 * ubc_upl_commit
1977 *
1978 * Commit the contents of the specified range of the upl to the backing store
1979 *
1980 * Parameters: upl The upl to commit
1981 * offset The offset into the upl
1982 * size The size of the region to be committed,
1983 * starting at the specified offset
1984 * flags commit type (see below)
1985 *
1986 * Returns: KERN_SUCCESS The range has been committed
1987 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
1988 * KERN_FAILURE The supplied upl does not represent
1989 * device memory, and the offset plus the
1990 * size would exceed the actual size of
1991 * the upl
1992 *
1993 * Notes: IMPORTANT: If the commit is successful, and the object is now
1994 * empty, the upl will be deallocated. Since the caller cannot
1995 * check that this is the case, the UPL_COMMIT_FREE_ON_EMPTY flag
1996 * should generally only be used when the offset is 0 and the size
1997 * is equal to the upl size.
1998 *
1999 * The flags argument is a bitmap of flags on the rage of pages in
2000 * the upl to be committed; allowable flags are:
2001 *
2002 * o UPL_COMMIT_FREE_ON_EMPTY Free the upl when it is
2003 * both empty and has been
2004 * successfully committed
2005 * o UPL_COMMIT_CLEAR_DIRTY Clear each pages dirty
2006 * bit; will prevent a
2007 * later pageout
2008 * o UPL_COMMIT_SET_DIRTY Set each pages dirty
2009 * bit; will cause a later
2010 * pageout
2011 * o UPL_COMMIT_INACTIVATE Clear each pages
2012 * reference bit; the page
2013 * will not be accessed
2014 * o UPL_COMMIT_ALLOW_ACCESS Unbusy each page; pages
2015 * become busy when an
2016 * IOMemoryDescriptor is
2017 * mapped or redirected,
2018 * and we have to wait for
2019 * an IOKit driver
2020 *
2021 * The flag UPL_COMMIT_NOTIFY_EMPTY is used internally, and should
2022 * not be specified by the caller.
2023 *
2024 * The UPL_COMMIT_CLEAR_DIRTY and UPL_COMMIT_SET_DIRTY flags are
2025 * mutually exclusive, and should not be combined.
2026 */
0b4e3aa0
A
2027kern_return_t
2028ubc_upl_commit_range(
2029 upl_t upl,
b0d623f7
A
2030 upl_offset_t offset,
2031 upl_size_t size,
0b4e3aa0
A
2032 int flags)
2033{
2034 upl_page_info_t *pl;
2035 boolean_t empty;
2036 kern_return_t kr;
2037
2038 if (flags & UPL_COMMIT_FREE_ON_EMPTY)
2039 flags |= UPL_COMMIT_NOTIFY_EMPTY;
2040
593a1d5f
A
2041 if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
2042 return KERN_INVALID_ARGUMENT;
2043 }
2044
0b4e3aa0
A
2045 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2046
2047 kr = upl_commit_range(upl, offset, size, flags,
cf7d32b8 2048 pl, MAX_UPL_SIZE, &empty);
0b4e3aa0
A
2049
2050 if((flags & UPL_COMMIT_FREE_ON_EMPTY) && empty)
2051 upl_deallocate(upl);
2052
2053 return kr;
2054}
2d21ac55
A
2055
2056
2057/*
2058 * ubc_upl_abort_range
2059 *
2060 * Abort the contents of the specified range of the specified upl
2061 *
2062 * Parameters: upl The upl to abort
2063 * offset The offset into the upl
2064 * size The size of the region to be aborted,
2065 * starting at the specified offset
2066 * abort_flags abort type (see below)
2067 *
2068 * Returns: KERN_SUCCESS The range has been aborted
2069 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2070 * KERN_FAILURE The supplied upl does not represent
2071 * device memory, and the offset plus the
2072 * size would exceed the actual size of
2073 * the upl
2074 *
2075 * Notes: IMPORTANT: If the abort is successful, and the object is now
2076 * empty, the upl will be deallocated. Since the caller cannot
2077 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2078 * should generally only be used when the offset is 0 and the size
2079 * is equal to the upl size.
2080 *
2081 * The abort_flags argument is a bitmap of flags on the range of
2082 * pages in the upl to be aborted; allowable flags are:
2083 *
2084 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2085 * empty and has been successfully
2086 * aborted
2087 * o UPL_ABORT_RESTART The operation must be restarted
2088 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
2089 * o UPL_ABORT_ERROR An I/O error occurred
2090 * o UPL_ABORT_DUMP_PAGES Just free the pages
2091 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
2092 * o UPL_ABORT_ALLOW_ACCESS RESERVED
2093 *
2094 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2095 * not be specified by the caller. It is intended to fulfill the
2096 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2097 * ubc_upl_commit_range(), but is never referenced internally.
2098 *
2099 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2100 * referenced; do not use it.
2101 */
0b4e3aa0
A
2102kern_return_t
2103ubc_upl_abort_range(
2104 upl_t upl,
b0d623f7
A
2105 upl_offset_t offset,
2106 upl_size_t size,
0b4e3aa0
A
2107 int abort_flags)
2108{
2109 kern_return_t kr;
2110 boolean_t empty = FALSE;
2111
2112 if (abort_flags & UPL_ABORT_FREE_ON_EMPTY)
2113 abort_flags |= UPL_ABORT_NOTIFY_EMPTY;
2114
2115 kr = upl_abort_range(upl, offset, size, abort_flags, &empty);
2116
2117 if((abort_flags & UPL_ABORT_FREE_ON_EMPTY) && empty)
2118 upl_deallocate(upl);
2119
2120 return kr;
2121}
2122
2d21ac55
A
2123
2124/*
2125 * ubc_upl_abort
2126 *
2127 * Abort the contents of the specified upl
2128 *
2129 * Parameters: upl The upl to abort
2130 * abort_type abort type (see below)
2131 *
2132 * Returns: KERN_SUCCESS The range has been aborted
2133 * KERN_INVALID_ARGUMENT The supplied upl was UPL_NULL
2134 * KERN_FAILURE The supplied upl does not represent
2135 * device memory, and the offset plus the
2136 * size would exceed the actual size of
2137 * the upl
2138 *
2139 * Notes: IMPORTANT: If the abort is successful, and the object is now
2140 * empty, the upl will be deallocated. Since the caller cannot
2141 * check that this is the case, the UPL_ABORT_FREE_ON_EMPTY flag
2142 * should generally only be used when the offset is 0 and the size
2143 * is equal to the upl size.
2144 *
2145 * The abort_type is a bitmap of flags on the range of
2146 * pages in the upl to be aborted; allowable flags are:
2147 *
2148 * o UPL_ABORT_FREE_ON_EMPTY Free the upl when it is both
2149 * empty and has been successfully
2150 * aborted
2151 * o UPL_ABORT_RESTART The operation must be restarted
2152 * o UPL_ABORT_UNAVAILABLE The pages are unavailable
2153 * o UPL_ABORT_ERROR An I/O error occurred
2154 * o UPL_ABORT_DUMP_PAGES Just free the pages
2155 * o UPL_ABORT_NOTIFY_EMPTY RESERVED
2156 * o UPL_ABORT_ALLOW_ACCESS RESERVED
2157 *
2158 * The UPL_ABORT_NOTIFY_EMPTY is an internal use flag and should
2159 * not be specified by the caller. It is intended to fulfill the
2160 * same role as UPL_COMMIT_NOTIFY_EMPTY does in the function
2161 * ubc_upl_commit_range(), but is never referenced internally.
2162 *
2163 * The UPL_ABORT_ALLOW_ACCESS is defined, but neither set nor
2164 * referenced; do not use it.
2165 */
0b4e3aa0
A
2166kern_return_t
2167ubc_upl_abort(
2168 upl_t upl,
2169 int abort_type)
2170{
2171 kern_return_t kr;
2172
2173 kr = upl_abort(upl, abort_type);
2174 upl_deallocate(upl);
2175 return kr;
2176}
2177
2d21ac55
A
2178
2179/*
2180 * ubc_upl_pageinfo
2181 *
2182 * Retrieve the internal page list for the specified upl
2183 *
2184 * Parameters: upl The upl to obtain the page list from
2185 *
2186 * Returns: !NULL The (upl_page_info_t *) for the page
2187 * list internal to the upl
2188 * NULL Error/no page list associated
2189 *
2190 * Notes: IMPORTANT: The function is only valid on internal objects
2191 * where the list request was made with the UPL_INTERNAL flag.
2192 *
2193 * This function is a utility helper function, since some callers
2194 * may not have direct access to the header defining the macro,
2195 * due to abstraction layering constraints.
2196 */
0b4e3aa0
A
2197upl_page_info_t *
2198ubc_upl_pageinfo(
2199 upl_t upl)
2200{
2201 return (UPL_GET_INTERNAL_PAGE_LIST(upl));
2202}
91447636 2203
91447636
A
2204
2205int
2d21ac55 2206UBCINFOEXISTS(struct vnode * vp)
91447636 2207{
2d21ac55 2208 return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL));
91447636
A
2209}
2210
2d21ac55
A
2211
2212/*
2213 * CODE SIGNING
2214 */
593a1d5f 2215#define CS_BLOB_PAGEABLE 0
2d21ac55
A
2216static volatile SInt32 cs_blob_size = 0;
2217static volatile SInt32 cs_blob_count = 0;
2218static SInt32 cs_blob_size_peak = 0;
2219static UInt32 cs_blob_size_max = 0;
2220static SInt32 cs_blob_count_peak = 0;
2d21ac55
A
2221
2222int cs_validation = 1;
2223
2224SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW, &cs_validation, 0, "Do validate code signatures");
2225SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD, &cs_blob_count, 0, "Current number of code signature blobs");
2226SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD, &cs_blob_size, 0, "Current size of all code signature blobs");
2227SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
2228SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
2229SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD, &cs_blob_size_max, 0, "Size of biggest code signature blob");
2230
593a1d5f
A
2231kern_return_t
2232ubc_cs_blob_allocate(
2233 vm_offset_t *blob_addr_p,
2234 vm_size_t *blob_size_p)
2235{
2236 kern_return_t kr;
2237
2238#if CS_BLOB_PAGEABLE
2239 *blob_size_p = round_page(*blob_size_p);
2240 kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
2241#else /* CS_BLOB_PAGEABLE */
2242 *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
2243 if (*blob_addr_p == 0) {
2244 kr = KERN_NO_SPACE;
2245 } else {
2246 kr = KERN_SUCCESS;
2247 }
2248#endif /* CS_BLOB_PAGEABLE */
2249 return kr;
2250}
2251
2252void
2253ubc_cs_blob_deallocate(
2254 vm_offset_t blob_addr,
2255 vm_size_t blob_size)
2256{
2257#if CS_BLOB_PAGEABLE
2258 kmem_free(kernel_map, blob_addr, blob_size);
2259#else /* CS_BLOB_PAGEABLE */
2260 kfree((void *) blob_addr, blob_size);
2261#endif /* CS_BLOB_PAGEABLE */
2262}
2263
2d21ac55
A
2264int
2265ubc_cs_blob_add(
2266 struct vnode *vp,
2267 cpu_type_t cputype,
2268 off_t base_offset,
2269 vm_address_t addr,
2270 vm_size_t size)
91447636 2271{
2d21ac55
A
2272 kern_return_t kr;
2273 struct ubc_info *uip;
2274 struct cs_blob *blob, *oblob;
2275 int error;
2276 ipc_port_t blob_handle;
2277 memory_object_size_t blob_size;
2278 const CS_CodeDirectory *cd;
2279 off_t blob_start_offset, blob_end_offset;
2280 SHA1_CTX sha1ctxt;
2281
2282 blob_handle = IPC_PORT_NULL;
2283
2284 blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob));
2285 if (blob == NULL) {
2286 return ENOMEM;
2287 }
2288
593a1d5f 2289#if CS_BLOB_PAGEABLE
2d21ac55
A
2290 /* get a memory entry on the blob */
2291 blob_size = (memory_object_size_t) size;
2292 kr = mach_make_memory_entry_64(kernel_map,
2293 &blob_size,
2294 addr,
2295 VM_PROT_READ,
2296 &blob_handle,
2297 IPC_PORT_NULL);
2298 if (kr != KERN_SUCCESS) {
2299 error = ENOMEM;
2300 goto out;
2301 }
2302 if (memory_object_round_page(blob_size) !=
2303 (memory_object_size_t) round_page(size)) {
b0d623f7
A
2304 printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%lx !?\n",
2305 blob_size, (size_t)size);
2306 panic("XXX FBDP size mismatch 0x%llx 0x%lx\n", blob_size, (size_t)size);
2d21ac55
A
2307 error = EINVAL;
2308 goto out;
2309 }
593a1d5f
A
2310#else
2311 blob_size = (memory_object_size_t) size;
2312 blob_handle = IPC_PORT_NULL;
2313#endif
2d21ac55
A
2314
2315 /* fill in the new blob */
2316 blob->csb_cpu_type = cputype;
2317 blob->csb_base_offset = base_offset;
2318 blob->csb_mem_size = size;
2319 blob->csb_mem_offset = 0;
2320 blob->csb_mem_handle = blob_handle;
2321 blob->csb_mem_kaddr = addr;
2d21ac55
A
2322
2323 /*
2324 * Validate the blob's contents
2325 */
2326 cd = findCodeDirectory(
2327 (const CS_SuperBlob *) addr,
2328 (char *) addr,
2329 (char *) addr + blob->csb_mem_size);
2330 if (cd == NULL) {
2331 /* no code directory => useless blob ! */
2332 blob->csb_flags = 0;
2333 blob->csb_start_offset = 0;
2334 blob->csb_end_offset = 0;
2335 } else {
2336 unsigned char *sha1_base;
2337 int sha1_size;
2338
2339 blob->csb_flags = ntohl(cd->flags) | CS_VALID;
2340 blob->csb_end_offset = round_page(ntohl(cd->codeLimit));
b0d623f7
A
2341 if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) {
2342 const struct Scatter *scatter = (const struct Scatter*)
2343 ((const char*)cd + ntohl(cd->scatterOffset));
2344 blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE;
2345 } else {
2346 blob->csb_start_offset = (blob->csb_end_offset -
2347 (ntohl(cd->nCodeSlots) * PAGE_SIZE));
2348 }
2d21ac55
A
2349 /* compute the blob's SHA1 hash */
2350 sha1_base = (const unsigned char *) cd;
2351 sha1_size = ntohl(cd->length);
2352 SHA1Init(&sha1ctxt);
2353 SHA1Update(&sha1ctxt, sha1_base, sha1_size);
2354 SHA1Final(blob->csb_sha1, &sha1ctxt);
2355 }
2356
593a1d5f
A
2357 /*
2358 * Let policy module check whether the blob's signature is accepted.
2359 */
2360#if CONFIG_MACF
2361 error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size);
2362 if (error)
2363 goto out;
2364#endif
2365
2d21ac55
A
2366 /*
2367 * Validate the blob's coverage
2368 */
2369 blob_start_offset = blob->csb_base_offset + blob->csb_start_offset;
2370 blob_end_offset = blob->csb_base_offset + blob->csb_end_offset;
2371
cf7d32b8
A
2372 if (blob_start_offset >= blob_end_offset ||
2373 blob_start_offset < 0 ||
2374 blob_end_offset <= 0) {
2d21ac55
A
2375 /* reject empty or backwards blob */
2376 error = EINVAL;
2377 goto out;
2378 }
2379
2380 vnode_lock(vp);
2381 if (! UBCINFOEXISTS(vp)) {
2382 vnode_unlock(vp);
2383 error = ENOENT;
2384 goto out;
2385 }
2386 uip = vp->v_ubcinfo;
2387
2388 /* check if this new blob overlaps with an existing blob */
2389 for (oblob = uip->cs_blobs;
2390 oblob != NULL;
2391 oblob = oblob->csb_next) {
2392 off_t oblob_start_offset, oblob_end_offset;
2393
2394 oblob_start_offset = (oblob->csb_base_offset +
2395 oblob->csb_start_offset);
2396 oblob_end_offset = (oblob->csb_base_offset +
2397 oblob->csb_end_offset);
2398 if (blob_start_offset >= oblob_end_offset ||
2399 blob_end_offset <= oblob_start_offset) {
2400 /* no conflict with this existing blob */
2401 } else {
2402 /* conflict ! */
2403 if (blob_start_offset == oblob_start_offset &&
2404 blob_end_offset == oblob_end_offset &&
2405 blob->csb_mem_size == oblob->csb_mem_size &&
2406 blob->csb_flags == oblob->csb_flags &&
2407 (blob->csb_cpu_type == CPU_TYPE_ANY ||
2408 oblob->csb_cpu_type == CPU_TYPE_ANY ||
2409 blob->csb_cpu_type == oblob->csb_cpu_type) &&
2410 !bcmp(blob->csb_sha1,
2411 oblob->csb_sha1,
2412 SHA1_RESULTLEN)) {
2413 /*
2414 * We already have this blob:
2415 * we'll return success but
2416 * throw away the new blob.
2417 */
2418 if (oblob->csb_cpu_type == CPU_TYPE_ANY) {
2419 /*
2420 * The old blob matches this one
2421 * but doesn't have any CPU type.
2422 * Update it with whatever the caller
2423 * provided this time.
2424 */
2425 oblob->csb_cpu_type = cputype;
2426 }
2427 vnode_unlock(vp);
2428 error = EAGAIN;
2429 goto out;
2430 } else {
2431 /* different blob: reject the new one */
2432 vnode_unlock(vp);
2433 error = EALREADY;
2434 goto out;
2435 }
2436 }
2437
2438 }
2439
2440
2441 /* mark this vnode's VM object as having "signed pages" */
2442 kr = memory_object_signed(uip->ui_control, TRUE);
2443 if (kr != KERN_SUCCESS) {
2444 vnode_unlock(vp);
2445 error = ENOENT;
2446 goto out;
2447 }
2448
2449 /*
2450 * Add this blob to the list of blobs for this vnode.
2451 * We always add at the front of the list and we never remove a
2452 * blob from the list, so ubc_cs_get_blobs() can return whatever
2453 * the top of the list was and that list will remain valid
2454 * while we validate a page, even after we release the vnode's lock.
2455 */
2456 blob->csb_next = uip->cs_blobs;
2457 uip->cs_blobs = blob;
2458
2459 OSAddAtomic(+1, &cs_blob_count);
2460 if (cs_blob_count > cs_blob_count_peak) {
2461 cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */
2462 }
b0d623f7
A
2463 OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size);
2464 if ((SInt32) cs_blob_size > cs_blob_size_peak) {
2465 cs_blob_size_peak = (SInt32) cs_blob_size; /* XXX atomic ? */
2d21ac55 2466 }
b0d623f7
A
2467 if ((UInt32) blob->csb_mem_size > cs_blob_size_max) {
2468 cs_blob_size_max = (UInt32) blob->csb_mem_size;
2d21ac55
A
2469 }
2470
2471 if (cs_debug) {
2472 proc_t p;
2473
2474 p = current_proc();
2475 printf("CODE SIGNING: proc %d(%s) "
2476 "loaded %s signatures for file (%s) "
2477 "range 0x%llx:0x%llx flags 0x%x\n",
2478 p->p_pid, p->p_comm,
2479 blob->csb_cpu_type == -1 ? "detached" : "embedded",
2480 vnode_name(vp),
2481 blob->csb_base_offset + blob->csb_start_offset,
2482 blob->csb_base_offset + blob->csb_end_offset,
2483 blob->csb_flags);
2484 }
2485
2d21ac55
A
2486 vnode_unlock(vp);
2487
2488 error = 0; /* success ! */
2489
2490out:
2491 if (error) {
2492 /* we failed; release what we allocated */
2493 if (blob) {
2494 kfree(blob, sizeof (*blob));
2495 blob = NULL;
2496 }
2497 if (blob_handle != IPC_PORT_NULL) {
2498 mach_memory_entry_port_release(blob_handle);
2499 blob_handle = IPC_PORT_NULL;
2500 }
2d21ac55
A
2501 }
2502
2503 if (error == EAGAIN) {
2504 /*
2505 * See above: error is EAGAIN if we were asked
2506 * to add an existing blob again. We cleaned the new
2507 * blob and we want to return success.
2508 */
2509 error = 0;
2510 /*
2511 * Since we're not failing, consume the data we received.
2512 */
593a1d5f 2513 ubc_cs_blob_deallocate(addr, size);
2d21ac55
A
2514 }
2515
2516 return error;
91447636
A
2517}
2518
2519
2d21ac55
A
2520struct cs_blob *
2521ubc_cs_blob_get(
2522 struct vnode *vp,
2523 cpu_type_t cputype,
2524 off_t offset)
91447636 2525{
2d21ac55
A
2526 struct ubc_info *uip;
2527 struct cs_blob *blob;
2528 off_t offset_in_blob;
2529
2530 vnode_lock_spin(vp);
2531
2532 if (! UBCINFOEXISTS(vp)) {
2533 blob = NULL;
2534 goto out;
2535 }
2536
2537 uip = vp->v_ubcinfo;
2538 for (blob = uip->cs_blobs;
2539 blob != NULL;
2540 blob = blob->csb_next) {
2541 if (cputype != -1 && blob->csb_cpu_type == cputype) {
2542 break;
2543 }
2544 if (offset != -1) {
2545 offset_in_blob = offset - blob->csb_base_offset;
2546 if (offset_in_blob >= blob->csb_start_offset &&
2547 offset_in_blob < blob->csb_end_offset) {
2548 /* our offset is covered by this blob */
2549 break;
2550 }
2551 }
2552 }
2553
2554out:
2555 vnode_unlock(vp);
2556
2557 return blob;
91447636 2558}
2d21ac55
A
2559
2560static void
2561ubc_cs_free(
2562 struct ubc_info *uip)
91447636 2563{
2d21ac55
A
2564 struct cs_blob *blob, *next_blob;
2565
2566 for (blob = uip->cs_blobs;
2567 blob != NULL;
2568 blob = next_blob) {
2569 next_blob = blob->csb_next;
2570 if (blob->csb_mem_kaddr != 0) {
593a1d5f
A
2571 ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
2572 blob->csb_mem_size);
2d21ac55
A
2573 blob->csb_mem_kaddr = 0;
2574 }
593a1d5f
A
2575 if (blob->csb_mem_handle != IPC_PORT_NULL) {
2576 mach_memory_entry_port_release(blob->csb_mem_handle);
2577 }
2d21ac55
A
2578 blob->csb_mem_handle = IPC_PORT_NULL;
2579 OSAddAtomic(-1, &cs_blob_count);
b0d623f7 2580 OSAddAtomic((SInt32) -blob->csb_mem_size, &cs_blob_size);
2d21ac55
A
2581 kfree(blob, sizeof (*blob));
2582 }
2583 uip->cs_blobs = NULL;
91447636 2584}
2d21ac55
A
2585
2586struct cs_blob *
2587ubc_get_cs_blobs(
2588 struct vnode *vp)
91447636 2589{
2d21ac55
A
2590 struct ubc_info *uip;
2591 struct cs_blob *blobs;
2592
b0d623f7
A
2593 /*
2594 * No need to take the vnode lock here. The caller must be holding
2595 * a reference on the vnode (via a VM mapping or open file descriptor),
2596 * so the vnode will not go away. The ubc_info stays until the vnode
2597 * goes away. And we only modify "blobs" by adding to the head of the
2598 * list.
2599 * The ubc_info could go away entirely if the vnode gets reclaimed as
2600 * part of a forced unmount. In the case of a code-signature validation
2601 * during a page fault, the "paging_in_progress" reference on the VM
2602 * object guarantess that the vnode pager (and the ubc_info) won't go
2603 * away during the fault.
2604 * Other callers need to protect against vnode reclaim by holding the
2605 * vnode lock, for example.
2606 */
2d21ac55
A
2607
2608 if (! UBCINFOEXISTS(vp)) {
2609 blobs = NULL;
2610 goto out;
2611 }
2612
2613 uip = vp->v_ubcinfo;
2614 blobs = uip->cs_blobs;
2615
2616out:
2d21ac55 2617 return blobs;
91447636 2618}
2d21ac55
A
2619
2620unsigned long cs_validate_page_no_hash = 0;
2621unsigned long cs_validate_page_bad_hash = 0;
2622boolean_t
2623cs_validate_page(
2624 void *_blobs,
2625 memory_object_offset_t page_offset,
2626 const void *data,
2627 boolean_t *tainted)
91447636 2628{
2d21ac55
A
2629 SHA1_CTX sha1ctxt;
2630 unsigned char actual_hash[SHA1_RESULTLEN];
2631 unsigned char expected_hash[SHA1_RESULTLEN];
2632 boolean_t found_hash;
2633 struct cs_blob *blobs, *blob;
2634 const CS_CodeDirectory *cd;
2635 const CS_SuperBlob *embedded;
2d21ac55
A
2636 const unsigned char *hash;
2637 boolean_t validated;
2638 off_t offset; /* page offset in the file */
2639 size_t size;
2640 off_t codeLimit = 0;
2641 char *lower_bound, *upper_bound;
2642 vm_offset_t kaddr, blob_addr;
2643 vm_size_t ksize;
2644 kern_return_t kr;
2645
2646 offset = page_offset;
2647
2648 /* retrieve the expected hash */
2649 found_hash = FALSE;
2650 blobs = (struct cs_blob *) _blobs;
2651
2652 for (blob = blobs;
2653 blob != NULL;
2654 blob = blob->csb_next) {
2655 offset = page_offset - blob->csb_base_offset;
2656 if (offset < blob->csb_start_offset ||
2657 offset >= blob->csb_end_offset) {
2658 /* our page is not covered by this blob */
2659 continue;
2660 }
2661
2662 /* map the blob in the kernel address space */
2663 kaddr = blob->csb_mem_kaddr;
2664 if (kaddr == 0) {
2665 ksize = (vm_size_t) (blob->csb_mem_size +
2666 blob->csb_mem_offset);
2667 kr = vm_map(kernel_map,
2668 &kaddr,
2669 ksize,
2670 0,
2671 VM_FLAGS_ANYWHERE,
2672 blob->csb_mem_handle,
2673 0,
2674 TRUE,
2675 VM_PROT_READ,
2676 VM_PROT_READ,
2677 VM_INHERIT_NONE);
2678 if (kr != KERN_SUCCESS) {
2679 /* XXX FBDP what to do !? */
2680 printf("cs_validate_page: failed to map blob, "
b0d623f7
A
2681 "size=0x%lx kr=0x%x\n",
2682 (size_t)blob->csb_mem_size, kr);
2d21ac55
A
2683 break;
2684 }
2685 }
2686 blob_addr = kaddr + blob->csb_mem_offset;
2687
2688 lower_bound = CAST_DOWN(char *, blob_addr);
2689 upper_bound = lower_bound + blob->csb_mem_size;
2690
2691 embedded = (const CS_SuperBlob *) blob_addr;
2692 cd = findCodeDirectory(embedded, lower_bound, upper_bound);
2693 if (cd != NULL) {
2694 if (cd->pageSize != PAGE_SHIFT ||
2695 cd->hashType != 0x1 ||
2696 cd->hashSize != SHA1_RESULTLEN) {
2697 /* bogus blob ? */
2d21ac55
A
2698 continue;
2699 }
b0d623f7 2700
2d21ac55 2701 offset = page_offset - blob->csb_base_offset;
b0d623f7
A
2702 if (offset < blob->csb_start_offset ||
2703 offset >= blob->csb_end_offset) {
2d21ac55 2704 /* our page is not covered by this blob */
2d21ac55
A
2705 continue;
2706 }
2707
2708 codeLimit = ntohl(cd->codeLimit);
2709 hash = hashes(cd, atop(offset),
2710 lower_bound, upper_bound);
cf7d32b8
A
2711 if (hash != NULL) {
2712 bcopy(hash, expected_hash,
2713 sizeof (expected_hash));
2714 found_hash = TRUE;
2715 }
2d21ac55 2716
2d21ac55
A
2717 break;
2718 }
2719 }
2720
2721 if (found_hash == FALSE) {
2722 /*
2723 * We can't verify this page because there is no signature
2724 * for it (yet). It's possible that this part of the object
2725 * is not signed, or that signatures for that part have not
2726 * been loaded yet.
2727 * Report that the page has not been validated and let the
2728 * caller decide if it wants to accept it or not.
2729 */
2730 cs_validate_page_no_hash++;
2731 if (cs_debug > 1) {
2732 printf("CODE SIGNING: cs_validate_page: "
2733 "off 0x%llx: no hash to validate !?\n",
2734 page_offset);
2735 }
2736 validated = FALSE;
2737 *tainted = FALSE;
2738 } else {
2d21ac55
A
2739
2740 size = PAGE_SIZE;
593a1d5f 2741 const uint32_t *asha1, *esha1;
b0d623f7 2742 if ((off_t)(offset + size) > codeLimit) {
2d21ac55
A
2743 /* partial page at end of segment */
2744 assert(offset < codeLimit);
b0d623f7 2745 size = (size_t) (codeLimit & PAGE_MASK);
2d21ac55
A
2746 }
2747 /* compute the actual page's SHA1 hash */
2748 SHA1Init(&sha1ctxt);
593a1d5f 2749 SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
2d21ac55
A
2750 SHA1Final(actual_hash, &sha1ctxt);
2751
2752 asha1 = (const uint32_t *) actual_hash;
2753 esha1 = (const uint32_t *) expected_hash;
2754
2755 if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) {
2756 if (cs_debug) {
2757 printf("CODE SIGNING: cs_validate_page: "
2758 "off 0x%llx size 0x%lx: "
2759 "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != "
2760 "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n",
2761 page_offset, size,
2762 asha1[0], asha1[1], asha1[2],
2763 asha1[3], asha1[4],
2764 esha1[0], esha1[1], esha1[2],
2765 esha1[3], esha1[4]);
2766 }
2767 cs_validate_page_bad_hash++;
2768 *tainted = TRUE;
2769 } else {
2770 if (cs_debug > 1) {
2771 printf("CODE SIGNING: cs_validate_page: "
2772 "off 0x%llx size 0x%lx: SHA1 OK\n",
2773 page_offset, size);
2774 }
2775 *tainted = FALSE;
2776 }
2777 validated = TRUE;
2778 }
2779
2780 return validated;
91447636
A
2781}
2782
2d21ac55
A
2783int
2784ubc_cs_getcdhash(
2785 vnode_t vp,
2786 off_t offset,
2787 unsigned char *cdhash)
2788{
b0d623f7
A
2789 struct cs_blob *blobs, *blob;
2790 off_t rel_offset;
2791 int ret;
2792
2793 vnode_lock(vp);
2d21ac55
A
2794
2795 blobs = ubc_get_cs_blobs(vp);
2796 for (blob = blobs;
2797 blob != NULL;
2798 blob = blob->csb_next) {
2799 /* compute offset relative to this blob */
2800 rel_offset = offset - blob->csb_base_offset;
2801 if (rel_offset >= blob->csb_start_offset &&
2802 rel_offset < blob->csb_end_offset) {
2803 /* this blob does cover our "offset" ! */
2804 break;
2805 }
2806 }
2807
2808 if (blob == NULL) {
2809 /* we didn't find a blob covering "offset" */
b0d623f7
A
2810 ret = EBADEXEC; /* XXX any better error ? */
2811 } else {
2812 /* get the SHA1 hash of that blob */
2813 bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1));
2814 ret = 0;
2d21ac55
A
2815 }
2816
b0d623f7 2817 vnode_unlock(vp);
2d21ac55 2818
b0d623f7 2819 return ret;
2d21ac55 2820}