2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1998 Apple Computer, Inc. All rights reserved.
30 * File: bsd/kern/kern_symfile.c
35 #include <mach/vm_param.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/signalvar.h>
40 #include <sys/resourcevar.h>
41 #include <sys/namei.h>
42 #include <sys/vnode_internal.h>
43 #include <sys/proc_internal.h>
44 #include <sys/kauth.h>
45 #include <sys/timeb.h>
46 #include <sys/times.h>
48 #include <sys/file_internal.h>
50 #include <sys/kernel.h>
54 #include <sys/content_protection.h>
56 #include <mach-o/loader.h>
57 #include <mach-o/nlist.h>
59 #include <kern/kalloc.h>
60 #include <vm/vm_kern.h>
61 #include <pexpert/pexpert.h>
62 #include <IOKit/IOPolledInterface.h>
64 /* This function is called from kern_sysctl in the current process context;
65 * it is exported with the System6.0.exports, but this appears to be a legacy
66 * export, as there are no internal consumers.
69 get_kernel_symfile(__unused proc_t p
, __unused
char const **symfile
);
71 get_kernel_symfile(__unused proc_t p
, __unused
char const **symfile
)
76 struct kern_direct_file_io_ref_t
88 static int file_ioctl(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
)
90 dev_t device
= *(dev_t
*) p1
;
92 return ((*bdevsw
[major(device
)].d_ioctl
)
93 (device
, theIoctl
, result
, S_IFBLK
, p2
));
96 static int device_ioctl(void * p1
, __unused
void * p2
, u_long theIoctl
, caddr_t result
)
98 return (VNOP_IOCTL(p1
, theIoctl
, result
, 0, p2
));
102 kern_ioctl_file_extents(struct kern_direct_file_io_ref_t
* ref
, u_long theIoctl
, off_t offset
, off_t end
)
105 int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
);
114 bzero(&extent
, sizeof(dk_extent_t
));
115 bzero(&unmap
, sizeof(dk_unmap_t
));
116 bzero(&pin
, sizeof(pin
));
117 if (ref
->vp
->v_type
== VREG
)
121 do_ioctl
= &file_ioctl
;
128 do_ioctl
= &device_ioctl
;
131 if (_DKIOCCSPINEXTENT
== theIoctl
) {
132 /* Tell CS the image size, so it knows whether to place the subsequent pins SSD/HDD */
133 pin
.cp_extent
.length
= end
;
134 pin
.cp_flags
= _DKIOCCSHIBERNATEIMGSIZE
;
135 (void) do_ioctl(p1
, p2
, _DKIOCCSPINEXTENT
, (caddr_t
)&pin
);
136 } else if (_DKIOCCSUNPINEXTENT
== theIoctl
) {
137 /* Tell CS hibernation is done, so it can stop blocking overlapping writes */
138 pin
.cp_flags
= _DKIOCCSPINDISCARDBLACKLIST
;
139 (void) do_ioctl(p1
, p2
, _DKIOCCSUNPINEXTENT
, (caddr_t
)&pin
);
144 if (ref
->vp
->v_type
== VREG
)
147 filechunk
= 1*1024*1024*1024;
148 if (filechunk
> (size_t)(end
- offset
))
149 filechunk
= (size_t)(end
- offset
);
150 error
= VNOP_BLOCKMAP(ref
->vp
, offset
, filechunk
, &blkno
,
151 &filechunk
, NULL
, VNODE_WRITE
, NULL
);
153 fileblk
= blkno
* ref
->blksize
;
155 else if ((ref
->vp
->v_type
== VBLK
) || (ref
->vp
->v_type
== VCHR
))
158 filechunk
= ref
->filelength
;
161 if (DKIOCUNMAP
== theIoctl
)
163 extent
.offset
= fileblk
;
164 extent
.length
= filechunk
;
165 unmap
.extents
= &extent
;
166 unmap
.extentsCount
= 1;
167 error
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&unmap
);
168 // printf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length);
170 else if (_DKIOCCSPINEXTENT
== theIoctl
)
172 pin
.cp_extent
.offset
= fileblk
;
173 pin
.cp_extent
.length
= filechunk
;
174 pin
.cp_flags
= _DKIOCCSPINFORHIBERNATION
;
175 error
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&pin
);
176 if (error
&& (ENOTTY
!= error
))
178 printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", error
, pin
.cp_extent
.offset
, pin
.cp_extent
.length
);
181 else if (_DKIOCCSUNPINEXTENT
== theIoctl
)
183 pin
.cp_extent
.offset
= fileblk
;
184 pin
.cp_extent
.length
= filechunk
;
185 pin
.cp_flags
= _DKIOCCSPINFORHIBERNATION
;
186 error
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&pin
);
187 if (error
&& (ENOTTY
!= error
))
189 printf("_DKIOCCSUNPINEXTENT(%d) 0x%qx, 0x%qx\n", error
, pin
.cp_extent
.offset
, pin
.cp_extent
.length
);
200 extern uint32_t freespace_mb(vnode_t vp
);
202 struct kern_direct_file_io_ref_t
*
203 kern_open_file_for_direct_io(const char * name
,
204 boolean_t create_file
,
205 kern_get_file_extents_callback_t callback
,
209 off_t write_file_offset
,
210 void * write_file_addr
,
211 size_t write_file_len
,
212 dev_t
* partition_device_result
,
213 dev_t
* image_device_result
,
214 uint64_t * partitionbase_result
,
215 uint64_t * maxiocount_result
,
218 struct kern_direct_file_io_ref_t
* ref
;
221 struct vnode_attr va
;
232 off_t maxiocount
, count
, segcount
;
233 boolean_t locked
= FALSE
;
239 int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
);
245 ref
= (struct kern_direct_file_io_ref_t
*) kalloc(sizeof(struct kern_direct_file_io_ref_t
));
252 bzero(ref
, sizeof(*ref
));
254 ref
->ctx
= vfs_context_create(vfs_context_kernel());
256 fmode
= (create_file
) ? (O_CREAT
| FWRITE
) : FWRITE
;
257 cmode
= S_IRUSR
| S_IWUSR
;
259 NDINIT(&nd
, LOOKUP
, OP_OPEN
, ndflags
, UIO_SYSSPACE
, CAST_USER_ADDR_T(name
), ref
->ctx
);
261 VATTR_SET(&va
, va_mode
, cmode
);
262 VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
);
263 VATTR_SET(&va
, va_dataprotect_class
, PROTECTION_CLASS_D
);
264 if ((error
= vn_open_auth(&nd
, &fmode
, &va
))) goto out
;
267 if (ref
->vp
->v_type
== VREG
)
269 vnode_lock_spin(ref
->vp
);
270 SET(ref
->vp
->v_flag
, VSWAP
);
271 vnode_unlock(ref
->vp
);
274 if (write_file_addr
&& write_file_len
)
276 if ((error
= kern_write_file(ref
, write_file_offset
, write_file_addr
, write_file_len
, 0))) goto out
;
280 VATTR_WANTED(&va
, va_rdev
);
281 VATTR_WANTED(&va
, va_fsid
);
282 VATTR_WANTED(&va
, va_data_size
);
283 VATTR_WANTED(&va
, va_data_alloc
);
284 VATTR_WANTED(&va
, va_nlink
);
286 if (vnode_getattr(ref
->vp
, &va
, ref
->ctx
)) goto out
;
288 mpFree
= freespace_mb(ref
->vp
);
290 kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n",
291 name
, va
.va_data_size
, va
.va_data_alloc
, mpFree
, fs_free_size
);
293 if (ref
->vp
->v_type
== VREG
)
295 /* Don't dump files with links. */
296 if (va
.va_nlink
!= 1) goto out
;
299 ref
->filelength
= va
.va_data_size
;
303 do_ioctl
= &file_ioctl
;
309 mpFree
+= va
.va_data_alloc
;
310 if ((mpFree
< set_file_size
) || ((mpFree
- set_file_size
) < fs_free_size
))
316 error
= vnode_setsize(ref
->vp
, set_file_size
, IO_NOZEROFILL
| IO_NOAUTH
, ref
->ctx
);
318 ref
->filelength
= set_file_size
;
321 else if ((ref
->vp
->v_type
== VBLK
) || (ref
->vp
->v_type
== VCHR
))
328 do_ioctl
= &device_ioctl
;
332 /* Don't dump to non-regular files. */
336 ref
->device
= device
;
339 dk_corestorage_info_t cs_info
;
340 memset(&cs_info
, 0, sizeof(dk_corestorage_info_t
));
341 error
= do_ioctl(p1
, p2
, DKIOCCORESTORAGE
, (caddr_t
)&cs_info
);
342 ref
->cf
= (error
== 0) && (cs_info
.flags
& DK_CORESTORAGE_ENABLE_HOTFILES
);
346 error
= do_ioctl(p1
, p2
, DKIOCGETBLOCKSIZE
, (caddr_t
) &ref
->blksize
);
350 if (ref
->vp
->v_type
!= VREG
)
352 error
= do_ioctl(p1
, p2
, DKIOCGETBLOCKCOUNT
, (caddr_t
) &fileblk
);
354 ref
->filelength
= fileblk
* ref
->blksize
;
357 // pin logical extents
359 error
= kern_ioctl_file_extents(ref
, _DKIOCCSPINEXTENT
, 0, ref
->filelength
);
360 if (error
&& (ENOTTY
!= error
)) goto out
;
361 ref
->pinned
= (error
== 0);
363 // generate the block list
365 error
= do_ioctl(p1
, p2
, DKIOCLOCKPHYSICALEXTENTS
, NULL
);
370 while (f_offset
< ref
->filelength
)
372 if (ref
->vp
->v_type
== VREG
)
374 filechunk
= 1*1024*1024*1024;
377 error
= VNOP_BLOCKMAP(ref
->vp
, f_offset
, filechunk
, &blkno
,
378 &filechunk
, NULL
, VNODE_WRITE
, NULL
);
381 fileblk
= blkno
* ref
->blksize
;
383 else if ((ref
->vp
->v_type
== VBLK
) || (ref
->vp
->v_type
== VCHR
))
386 filechunk
= f_offset
? 0 : ref
->filelength
;
390 while (physoffset
< filechunk
)
392 dk_physical_extent_t getphysreq
;
393 bzero(&getphysreq
, sizeof(getphysreq
));
395 getphysreq
.offset
= fileblk
+ physoffset
;
396 getphysreq
.length
= (filechunk
- physoffset
);
397 error
= do_ioctl(p1
, p2
, DKIOCGETPHYSICALEXTENT
, (caddr_t
) &getphysreq
);
401 target
= getphysreq
.dev
;
403 else if (target
!= getphysreq
.dev
)
410 for (rev
= 4096; rev
<= getphysreq
.length
; rev
+= 4096)
412 callback(callback_ref
, getphysreq
.offset
+ getphysreq
.length
- rev
, 4096);
415 callback(callback_ref
, getphysreq
.offset
, getphysreq
.length
);
417 physoffset
+= getphysreq
.length
;
419 f_offset
+= filechunk
;
421 callback(callback_ref
, 0ULL, 0ULL);
423 if (ref
->vp
->v_type
== VREG
) p1
= &target
;
428 do_ioctl
= &file_ioctl
;
431 // get partition base
433 if (partitionbase_result
)
435 error
= do_ioctl(p1
, p2
, DKIOCGETBASE
, (caddr_t
) partitionbase_result
);
440 // get block size & constraints
442 error
= do_ioctl(p1
, p2
, DKIOCGETBLOCKSIZE
, (caddr_t
) &blksize
);
446 maxiocount
= 1*1024*1024*1024;
448 error
= do_ioctl(p1
, p2
, DKIOCGETMAXBLOCKCOUNTREAD
, (caddr_t
) &count
);
452 if (count
&& (count
< maxiocount
))
455 error
= do_ioctl(p1
, p2
, DKIOCGETMAXBLOCKCOUNTWRITE
, (caddr_t
) &count
);
459 if (count
&& (count
< maxiocount
))
462 error
= do_ioctl(p1
, p2
, DKIOCGETMAXBYTECOUNTREAD
, (caddr_t
) &count
);
465 if (count
&& (count
< maxiocount
))
468 error
= do_ioctl(p1
, p2
, DKIOCGETMAXBYTECOUNTWRITE
, (caddr_t
) &count
);
471 if (count
&& (count
< maxiocount
))
474 error
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTBYTECOUNTREAD
, (caddr_t
) &count
);
476 error
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTCOUNTREAD
, (caddr_t
) &segcount
);
478 count
= segcount
= 0;
480 if (count
&& (count
< maxiocount
))
483 error
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTBYTECOUNTWRITE
, (caddr_t
) &count
);
485 error
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTCOUNTWRITE
, (caddr_t
) &segcount
);
487 count
= segcount
= 0;
489 if (count
&& (count
< maxiocount
))
492 kprintf("max io 0x%qx bytes\n", maxiocount
);
493 if (maxiocount_result
)
494 *maxiocount_result
= maxiocount
;
496 error
= do_ioctl(p1
, p2
, DKIOCISSOLIDSTATE
, (caddr_t
)&isssd
);
498 flags
|= kIOPolledFileSSD
;
500 if (partition_device_result
)
501 *partition_device_result
= device
;
502 if (image_device_result
)
503 *image_device_result
= target
;
507 if ((ref
->vp
->v_type
== VBLK
) || (ref
->vp
->v_type
== VCHR
))
509 vnode_close(ref
->vp
, FWRITE
, ref
->ctx
);
511 vfs_context_rele(ref
->ctx
);
516 printf("kern_open_file_for_direct_io(%d)\n", error
);
521 (void) do_ioctl(p1
, p2
, DKIOCUNLOCKPHYSICALEXTENTS
, NULL
);
528 (void) kern_ioctl_file_extents(ref
, _DKIOCCSUNPINEXTENT
, 0, (ref
->pinned
&& ref
->cf
) ? ref
->filelength
: 0);
529 vnode_close(ref
->vp
, FWRITE
, ref
->ctx
);
532 vfs_context_rele(ref
->ctx
);
533 kfree(ref
, sizeof(struct kern_direct_file_io_ref_t
));
541 kern_write_file(struct kern_direct_file_io_ref_t
* ref
, off_t offset
, void * addr
, size_t len
, int ioflag
)
543 return (vn_rdwr(UIO_WRITE
, ref
->vp
,
545 UIO_SYSSPACE
, ioflag
|IO_SYNC
|IO_NODELOCKED
|IO_UNIT
,
546 vfs_context_ucred(ref
->ctx
), (int *) 0,
547 vfs_context_proc(ref
->ctx
)));
551 kern_read_file(struct kern_direct_file_io_ref_t
* ref
, off_t offset
, void * addr
, size_t len
, int ioflag
)
553 return (vn_rdwr(UIO_READ
, ref
->vp
,
555 UIO_SYSSPACE
, ioflag
|IO_SYNC
|IO_NODELOCKED
|IO_UNIT
,
556 vfs_context_ucred(ref
->ctx
), (int *) 0,
557 vfs_context_proc(ref
->ctx
)));
562 kern_file_mount(struct kern_direct_file_io_ref_t
* ref
)
564 return (ref
->vp
->v_mount
);
568 kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t
* ref
,
569 off_t write_offset
, void * addr
, size_t write_length
,
570 off_t discard_offset
, off_t discard_end
)
573 kprintf("kern_close_file_for_direct_io\n");
579 int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
);
583 if (ref
->vp
->v_type
== VREG
)
587 do_ioctl
= &file_ioctl
;
594 do_ioctl
= &device_ioctl
;
596 (void) do_ioctl(p1
, p2
, DKIOCUNLOCKPHYSICALEXTENTS
, NULL
);
598 //XXX If unmapping extents then don't also need to unpin; except ...
599 //XXX if file unaligned (HFS 4k / Fusion 128k) then pin is superset and
600 //XXX unmap is subset, so save extra walk over file extents (and the risk
601 //XXX that CF drain starts) vs leaving partial units pinned to SSD
602 //XXX (until whatever was sharing also unmaps). Err on cleaning up fully.
603 boolean_t will_unmap
= (!ref
->pinned
|| ref
->cf
) && (discard_end
> discard_offset
);
604 boolean_t will_unpin
= (ref
->pinned
&& ref
->cf
/* && !will_unmap */);
606 (void) kern_ioctl_file_extents(ref
, _DKIOCCSUNPINEXTENT
, 0, (will_unpin
) ? ref
->filelength
: 0);
610 (void) kern_ioctl_file_extents(ref
, DKIOCUNMAP
, discard_offset
, (ref
->cf
) ? ref
->filelength
: discard_end
);
613 if (addr
&& write_length
)
615 (void) kern_write_file(ref
, write_offset
, addr
, write_length
, 0);
618 error
= vnode_close(ref
->vp
, FWRITE
, ref
->ctx
);
621 kprintf("vnode_close(%d)\n", error
);
625 vfs_context_rele(ref
->ctx
);
628 kfree(ref
, sizeof(struct kern_direct_file_io_ref_t
));