2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1998 Apple Computer, Inc. All rights reserved.
30 * File: bsd/kern/kern_symfile.c
35 #include <mach/vm_param.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/signalvar.h>
40 #include <sys/resourcevar.h>
41 #include <sys/namei.h>
42 #include <sys/vnode_internal.h>
43 #include <sys/proc_internal.h>
44 #include <sys/kauth.h>
45 #include <sys/timeb.h>
46 #include <sys/times.h>
48 #include <sys/file_internal.h>
50 #include <sys/kernel.h>
54 #include <sys/content_protection.h>
56 #include <mach-o/loader.h>
57 #include <mach-o/nlist.h>
59 #include <kern/kalloc.h>
60 #include <vm/vm_kern.h>
61 #include <pexpert/pexpert.h>
62 #include <IOKit/IOPolledInterface.h>
64 /* This function is called from kern_sysctl in the current process context;
65 * it is exported with the System6.0.exports, but this appears to be a legacy
66 * export, as there are no internal consumers.
69 get_kernel_symfile(__unused proc_t p
, __unused
char const **symfile
);
71 get_kernel_symfile(__unused proc_t p
, __unused
char const **symfile
)
76 struct kern_direct_file_io_ref_t
88 static int file_ioctl(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
)
90 dev_t device
= *(dev_t
*) p1
;
92 return ((*bdevsw
[major(device
)].d_ioctl
)
93 (device
, theIoctl
, result
, S_IFBLK
, p2
));
96 static int device_ioctl(void * p1
, __unused
void * p2
, u_long theIoctl
, caddr_t result
)
98 return (VNOP_IOCTL(p1
, theIoctl
, result
, 0, p2
));
102 kern_ioctl_file_extents(struct kern_direct_file_io_ref_t
* ref
, u_long theIoctl
, off_t offset
, off_t end
)
105 int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
);
114 bzero(&extent
, sizeof(dk_extent_t
));
115 bzero(&unmap
, sizeof(dk_unmap_t
));
116 bzero(&pin
, sizeof(pin
));
117 if (ref
->vp
->v_type
== VREG
)
121 do_ioctl
= &file_ioctl
;
128 do_ioctl
= &device_ioctl
;
131 if (_DKIOCCSPINEXTENT
== theIoctl
) {
132 /* Tell CS the image size, so it knows whether to place the subsequent pins SSD/HDD */
133 pin
.cp_extent
.length
= end
;
134 pin
.cp_flags
= _DKIOCCSHIBERNATEIMGSIZE
;
135 (void) do_ioctl(p1
, p2
, _DKIOCCSPINEXTENT
, (caddr_t
)&pin
);
136 } else if (_DKIOCCSUNPINEXTENT
== theIoctl
) {
137 /* Tell CS hibernation is done, so it can stop blocking overlapping writes */
138 pin
.cp_flags
= _DKIOCCSPINDISCARDBLACKLIST
;
139 (void) do_ioctl(p1
, p2
, _DKIOCCSUNPINEXTENT
, (caddr_t
)&pin
);
142 for (; offset
< end
; offset
+= filechunk
)
144 if (ref
->vp
->v_type
== VREG
)
147 filechunk
= 1*1024*1024*1024;
148 if (filechunk
> (size_t)(end
- offset
))
149 filechunk
= (size_t)(end
- offset
);
150 error
= VNOP_BLOCKMAP(ref
->vp
, offset
, filechunk
, &blkno
,
151 &filechunk
, NULL
, VNODE_WRITE
| VNODE_BLOCKMAP_NO_TRACK
, NULL
);
153 if (-1LL == blkno
) continue;
154 fileblk
= blkno
* ref
->blksize
;
156 else if ((ref
->vp
->v_type
== VBLK
) || (ref
->vp
->v_type
== VCHR
))
159 filechunk
= ref
->filelength
;
162 if (DKIOCUNMAP
== theIoctl
)
164 extent
.offset
= fileblk
;
165 extent
.length
= filechunk
;
166 unmap
.extents
= &extent
;
167 unmap
.extentsCount
= 1;
168 error
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&unmap
);
169 // printf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length);
171 else if (_DKIOCCSPINEXTENT
== theIoctl
)
173 pin
.cp_extent
.offset
= fileblk
;
174 pin
.cp_extent
.length
= filechunk
;
175 pin
.cp_flags
= _DKIOCCSPINFORHIBERNATION
;
176 error
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&pin
);
177 if (error
&& (ENOTTY
!= error
))
179 printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", error
, pin
.cp_extent
.offset
, pin
.cp_extent
.length
);
182 else if (_DKIOCCSUNPINEXTENT
== theIoctl
)
184 pin
.cp_extent
.offset
= fileblk
;
185 pin
.cp_extent
.length
= filechunk
;
186 pin
.cp_flags
= _DKIOCCSPINFORHIBERNATION
;
187 error
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&pin
);
188 if (error
&& (ENOTTY
!= error
))
190 printf("_DKIOCCSUNPINEXTENT(%d) 0x%qx, 0x%qx\n", error
, pin
.cp_extent
.offset
, pin
.cp_extent
.length
);
200 extern uint32_t freespace_mb(vnode_t vp
);
202 struct kern_direct_file_io_ref_t
*
203 kern_open_file_for_direct_io(const char * name
,
204 boolean_t create_file
,
205 kern_get_file_extents_callback_t callback
,
209 off_t write_file_offset
,
210 void * write_file_addr
,
211 size_t write_file_len
,
212 dev_t
* partition_device_result
,
213 dev_t
* image_device_result
,
214 uint64_t * partitionbase_result
,
215 uint64_t * maxiocount_result
,
218 struct kern_direct_file_io_ref_t
* ref
;
221 struct vnode_attr va
;
232 off_t maxiocount
, count
, segcount
;
233 boolean_t locked
= FALSE
;
239 int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
);
245 ref
= (struct kern_direct_file_io_ref_t
*) kalloc(sizeof(struct kern_direct_file_io_ref_t
));
252 bzero(ref
, sizeof(*ref
));
254 ref
->ctx
= vfs_context_kernel();
256 fmode
= (create_file
) ? (O_CREAT
| FWRITE
) : FWRITE
;
257 cmode
= S_IRUSR
| S_IWUSR
;
259 NDINIT(&nd
, LOOKUP
, OP_OPEN
, ndflags
, UIO_SYSSPACE
, CAST_USER_ADDR_T(name
), ref
->ctx
);
261 VATTR_SET(&va
, va_mode
, cmode
);
262 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
263 VATTR_SET(&va
, va_dataprotect_class
, PROTECTION_CLASS_D
);
264 if ((error
= vn_open_auth(&nd
, &fmode
, &va
))) {
265 kprintf("vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n", fmode
, cmode
, error
);
270 if (ref
->vp
->v_type
== VREG
)
272 vnode_lock_spin(ref
->vp
);
273 SET(ref
->vp
->v_flag
, VSWAP
);
274 vnode_unlock(ref
->vp
);
277 if (write_file_addr
&& write_file_len
)
279 if ((error
= kern_write_file(ref
, write_file_offset
, write_file_addr
, write_file_len
, IO_SKIP_ENCRYPTION
))) {
280 kprintf("kern_write_file() failed with error: %d\n", error
);
286 VATTR_WANTED(&va
, va_rdev
);
287 VATTR_WANTED(&va
, va_fsid
);
288 VATTR_WANTED(&va
, va_devid
);
289 VATTR_WANTED(&va
, va_data_size
);
290 VATTR_WANTED(&va
, va_data_alloc
);
291 VATTR_WANTED(&va
, va_nlink
);
293 if (vnode_getattr(ref
->vp
, &va
, ref
->ctx
)) goto out
;
295 mpFree
= freespace_mb(ref
->vp
);
297 kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n",
298 name
, va
.va_data_size
, va
.va_data_alloc
, mpFree
, fs_free_size
);
300 if (ref
->vp
->v_type
== VREG
)
302 /* Don't dump files with links. */
303 if (va
.va_nlink
!= 1) goto out
;
305 device
= (VATTR_IS_SUPPORTED(&va
, va_devid
)) ? va
.va_devid
: va
.va_fsid
;
306 ref
->filelength
= va
.va_data_size
;
310 do_ioctl
= &file_ioctl
;
316 mpFree
+= va
.va_data_alloc
;
317 if ((mpFree
< set_file_size
) || ((mpFree
- set_file_size
) < fs_free_size
))
323 error
= vnode_setsize(ref
->vp
, set_file_size
, IO_NOZEROFILL
| IO_NOAUTH
, ref
->ctx
);
325 ref
->filelength
= set_file_size
;
328 else if ((ref
->vp
->v_type
== VBLK
) || (ref
->vp
->v_type
== VCHR
))
335 do_ioctl
= &device_ioctl
;
339 /* Don't dump to non-regular files. */
343 ref
->device
= device
;
346 dk_corestorage_info_t cs_info
;
347 memset(&cs_info
, 0, sizeof(dk_corestorage_info_t
));
348 error
= do_ioctl(p1
, p2
, DKIOCCORESTORAGE
, (caddr_t
)&cs_info
);
349 ref
->cf
= (error
== 0) && (cs_info
.flags
& DK_CORESTORAGE_ENABLE_HOTFILES
);
353 error
= do_ioctl(p1
, p2
, DKIOCGETBLOCKSIZE
, (caddr_t
) &ref
->blksize
);
357 if (ref
->vp
->v_type
!= VREG
)
359 error
= do_ioctl(p1
, p2
, DKIOCGETBLOCKCOUNT
, (caddr_t
) &fileblk
);
361 ref
->filelength
= fileblk
* ref
->blksize
;
364 // pin logical extents
366 error
= kern_ioctl_file_extents(ref
, _DKIOCCSPINEXTENT
, 0, ref
->filelength
);
367 if (error
&& (ENOTTY
!= error
)) goto out
;
368 ref
->pinned
= (error
== 0);
370 // generate the block list
372 error
= do_ioctl(p1
, p2
, DKIOCLOCKPHYSICALEXTENTS
, NULL
);
377 for (; f_offset
< ref
->filelength
; f_offset
+= filechunk
)
379 if (ref
->vp
->v_type
== VREG
)
381 filechunk
= 1*1024*1024*1024;
384 error
= VNOP_BLOCKMAP(ref
->vp
, f_offset
, filechunk
, &blkno
,
385 &filechunk
, NULL
, VNODE_WRITE
| VNODE_BLOCKMAP_NO_TRACK
, NULL
);
387 if (-1LL == blkno
) continue;
388 fileblk
= blkno
* ref
->blksize
;
390 else if ((ref
->vp
->v_type
== VBLK
) || (ref
->vp
->v_type
== VCHR
))
393 filechunk
= f_offset
? 0 : ref
->filelength
;
397 while (physoffset
< filechunk
)
399 dk_physical_extent_t getphysreq
;
400 bzero(&getphysreq
, sizeof(getphysreq
));
402 getphysreq
.offset
= fileblk
+ physoffset
;
403 getphysreq
.length
= (filechunk
- physoffset
);
404 error
= do_ioctl(p1
, p2
, DKIOCGETPHYSICALEXTENT
, (caddr_t
) &getphysreq
);
408 target
= getphysreq
.dev
;
410 else if (target
!= getphysreq
.dev
)
417 for (rev
= 4096; rev
<= getphysreq
.length
; rev
+= 4096)
419 callback(callback_ref
, getphysreq
.offset
+ getphysreq
.length
- rev
, 4096);
422 callback(callback_ref
, getphysreq
.offset
, getphysreq
.length
);
424 physoffset
+= getphysreq
.length
;
427 callback(callback_ref
, 0ULL, 0ULL);
429 if (ref
->vp
->v_type
== VREG
) p1
= &target
;
434 do_ioctl
= &file_ioctl
;
437 // get partition base
439 if (partitionbase_result
)
441 error
= do_ioctl(p1
, p2
, DKIOCGETBASE
, (caddr_t
) partitionbase_result
);
446 // get block size & constraints
448 error
= do_ioctl(p1
, p2
, DKIOCGETBLOCKSIZE
, (caddr_t
) &blksize
);
452 maxiocount
= 1*1024*1024*1024;
454 error
= do_ioctl(p1
, p2
, DKIOCGETMAXBLOCKCOUNTREAD
, (caddr_t
) &count
);
458 if (count
&& (count
< maxiocount
))
461 error
= do_ioctl(p1
, p2
, DKIOCGETMAXBLOCKCOUNTWRITE
, (caddr_t
) &count
);
465 if (count
&& (count
< maxiocount
))
468 error
= do_ioctl(p1
, p2
, DKIOCGETMAXBYTECOUNTREAD
, (caddr_t
) &count
);
471 if (count
&& (count
< maxiocount
))
474 error
= do_ioctl(p1
, p2
, DKIOCGETMAXBYTECOUNTWRITE
, (caddr_t
) &count
);
477 if (count
&& (count
< maxiocount
))
480 error
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTBYTECOUNTREAD
, (caddr_t
) &count
);
482 error
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTCOUNTREAD
, (caddr_t
) &segcount
);
484 count
= segcount
= 0;
486 if (count
&& (count
< maxiocount
))
489 error
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTBYTECOUNTWRITE
, (caddr_t
) &count
);
491 error
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTCOUNTWRITE
, (caddr_t
) &segcount
);
493 count
= segcount
= 0;
495 if (count
&& (count
< maxiocount
))
498 kprintf("max io 0x%qx bytes\n", maxiocount
);
499 if (maxiocount_result
)
500 *maxiocount_result
= maxiocount
;
502 error
= do_ioctl(p1
, p2
, DKIOCISSOLIDSTATE
, (caddr_t
)&isssd
);
504 flags
|= kIOPolledFileSSD
;
506 if (partition_device_result
)
507 *partition_device_result
= device
;
508 if (image_device_result
)
509 *image_device_result
= target
;
513 if ((ref
->vp
->v_type
== VBLK
) || (ref
->vp
->v_type
== VCHR
))
515 vnode_close(ref
->vp
, FWRITE
, ref
->ctx
);
521 printf("kern_open_file_for_direct_io(%p, %d)\n", ref
, error
);
527 (void) do_ioctl(p1
, p2
, DKIOCUNLOCKPHYSICALEXTENTS
, NULL
);
534 (void) kern_ioctl_file_extents(ref
, _DKIOCCSUNPINEXTENT
, 0, (ref
->pinned
&& ref
->cf
) ? ref
->filelength
: 0);
535 vnode_close(ref
->vp
, FWRITE
, ref
->ctx
);
539 kfree(ref
, sizeof(struct kern_direct_file_io_ref_t
));
547 kern_write_file(struct kern_direct_file_io_ref_t
* ref
, off_t offset
, void * addr
, size_t len
, int ioflag
)
549 return (vn_rdwr(UIO_WRITE
, ref
->vp
,
551 UIO_SYSSPACE
, ioflag
|IO_SYNC
|IO_NODELOCKED
|IO_UNIT
,
552 vfs_context_ucred(ref
->ctx
), (int *) 0,
553 vfs_context_proc(ref
->ctx
)));
557 kern_read_file(struct kern_direct_file_io_ref_t
* ref
, off_t offset
, void * addr
, size_t len
, int ioflag
)
559 return (vn_rdwr(UIO_READ
, ref
->vp
,
561 UIO_SYSSPACE
, ioflag
|IO_SYNC
|IO_NODELOCKED
|IO_UNIT
,
562 vfs_context_ucred(ref
->ctx
), (int *) 0,
563 vfs_context_proc(ref
->ctx
)));
568 kern_file_mount(struct kern_direct_file_io_ref_t
* ref
)
570 return (ref
->vp
->v_mount
);
574 kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t
* ref
,
575 off_t write_offset
, void * addr
, size_t write_length
,
576 off_t discard_offset
, off_t discard_end
)
579 printf("kern_close_file_for_direct_io(%p)\n", ref
);
585 int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
);
589 if (ref
->vp
->v_type
== VREG
)
593 do_ioctl
= &file_ioctl
;
600 do_ioctl
= &device_ioctl
;
602 (void) do_ioctl(p1
, p2
, DKIOCUNLOCKPHYSICALEXTENTS
, NULL
);
604 //XXX If unmapping extents then don't also need to unpin; except ...
605 //XXX if file unaligned (HFS 4k / Fusion 128k) then pin is superset and
606 //XXX unmap is subset, so save extra walk over file extents (and the risk
607 //XXX that CF drain starts) vs leaving partial units pinned to SSD
608 //XXX (until whatever was sharing also unmaps). Err on cleaning up fully.
609 boolean_t will_unmap
= (!ref
->pinned
|| ref
->cf
) && (discard_end
> discard_offset
);
610 boolean_t will_unpin
= (ref
->pinned
&& ref
->cf
/* && !will_unmap */);
612 (void) kern_ioctl_file_extents(ref
, _DKIOCCSUNPINEXTENT
, 0, (will_unpin
) ? ref
->filelength
: 0);
616 (void) kern_ioctl_file_extents(ref
, DKIOCUNMAP
, discard_offset
, (ref
->cf
) ? ref
->filelength
: discard_end
);
619 if (addr
&& write_length
)
621 (void) kern_write_file(ref
, write_offset
, addr
, write_length
, IO_SKIP_ENCRYPTION
);
624 error
= vnode_close(ref
->vp
, FWRITE
, ref
->ctx
);
627 kprintf("vnode_close(%d)\n", error
);
633 kfree(ref
, sizeof(struct kern_direct_file_io_ref_t
));