2  * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1998 Apple Computer, Inc.  All rights reserved. 
  30  *      File:   bsd/kern/kern_symfile.c 
  35 #include <mach/vm_param.h> 
  37 #include <sys/param.h> 
  38 #include <sys/systm.h> 
  39 #include <sys/signalvar.h> 
  40 #include <sys/resourcevar.h> 
  41 #include <sys/namei.h> 
  42 #include <sys/vnode_internal.h> 
  43 #include <sys/proc_internal.h> 
  44 #include <sys/kauth.h> 
  45 #include <sys/timeb.h> 
  46 #include <sys/times.h> 
  48 #include <sys/file_internal.h> 
  50 #include <sys/kernel.h> 
  54 #include <sys/content_protection.h> 
  56 #include <mach-o/loader.h> 
  57 #include <mach-o/nlist.h> 
  59 #include <kern/kalloc.h> 
  60 #include <vm/vm_kern.h> 
  61 #include <pexpert/pexpert.h> 
  62 #include <IOKit/IOPolledInterface.h> 
  64 /* This function is called from kern_sysctl in the current process context; 
  65  * it is exported with the System6.0.exports, but this appears to be a legacy 
  66  * export, as there are no internal consumers. 
  69 get_kernel_symfile(__unused proc_t p
, __unused 
char const **symfile
); 
  71 get_kernel_symfile(__unused proc_t p
, __unused 
char const **symfile
) 
  76 struct kern_direct_file_io_ref_t
 
  88 static int file_ioctl(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
) 
  90     dev_t device 
= *(dev_t
*) p1
; 
  92     return ((*bdevsw
[major(device
)].d_ioctl
) 
  93                     (device
, theIoctl
, result
, S_IFBLK
, p2
)); 
  96 static int device_ioctl(void * p1
, __unused 
void * p2
, u_long theIoctl
, caddr_t result
) 
  98     return (VNOP_IOCTL(p1
, theIoctl
, result
, 0, p2
)); 
 102 kern_ioctl_file_extents(struct kern_direct_file_io_ref_t 
* ref
, u_long theIoctl
, off_t offset
, off_t end
) 
 105     int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
); 
 114     bzero(&extent
, sizeof(dk_extent_t
)); 
 115     bzero(&unmap
, sizeof(dk_unmap_t
)); 
 116     bzero(&pin
, sizeof(pin
)); 
 117     if (ref
->vp
->v_type 
== VREG
) 
 121          do_ioctl 
= &file_ioctl
; 
 128         do_ioctl 
= &device_ioctl
; 
 131     if (_DKIOCCSPINEXTENT 
== theIoctl
) { 
 132             /* Tell CS the image size, so it knows whether to place the subsequent pins SSD/HDD */ 
 133             pin
.cp_extent
.length 
= end
; 
 134             pin
.cp_flags 
= _DKIOCCSHIBERNATEIMGSIZE
; 
 135             (void) do_ioctl(p1
, p2
, _DKIOCCSPINEXTENT
, (caddr_t
)&pin
); 
 136     } else if (_DKIOCCSUNPINEXTENT 
== theIoctl
) { 
 137             /* Tell CS hibernation is done, so it can stop blocking overlapping writes */ 
 138             pin
.cp_flags 
= _DKIOCCSPINDISCARDBLACKLIST
; 
 139             (void) do_ioctl(p1
, p2
, _DKIOCCSUNPINEXTENT
, (caddr_t
)&pin
); 
 142     for (; offset 
< end
; offset 
+= filechunk
) 
 144         if (ref
->vp
->v_type 
== VREG
) 
 147             filechunk 
= 1*1024*1024*1024; 
 148             if (filechunk 
> (size_t)(end 
- offset
)) 
 149             filechunk 
= (size_t)(end 
- offset
); 
 150             error 
= VNOP_BLOCKMAP(ref
->vp
, offset
, filechunk
, &blkno
, 
 151                                                                   &filechunk
, NULL
, VNODE_WRITE 
| VNODE_BLOCKMAP_NO_TRACK
, NULL
); 
 153             if (-1LL == blkno
) continue; 
 154             fileblk 
= blkno 
* ref
->blksize
; 
 156         else if ((ref
->vp
->v_type 
== VBLK
) || (ref
->vp
->v_type 
== VCHR
)) 
 159             filechunk 
= ref
->filelength
; 
 162         if (DKIOCUNMAP 
== theIoctl
) 
 164             extent
.offset 
= fileblk
; 
 165             extent
.length 
= filechunk
; 
 166             unmap
.extents 
= &extent
; 
 167             unmap
.extentsCount 
= 1; 
 168             error 
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&unmap
); 
 169 //          printf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length); 
 171         else if (_DKIOCCSPINEXTENT 
== theIoctl
) 
 173             pin
.cp_extent
.offset 
= fileblk
; 
 174             pin
.cp_extent
.length 
= filechunk
; 
 175             pin
.cp_flags 
= _DKIOCCSPINFORHIBERNATION
; 
 176             error 
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&pin
); 
 177             if (error 
&& (ENOTTY 
!= error
)) 
 179                 printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n", error
, pin
.cp_extent
.offset
, pin
.cp_extent
.length
); 
 182         else if (_DKIOCCSUNPINEXTENT 
== theIoctl
) 
 184             pin
.cp_extent
.offset 
= fileblk
; 
 185             pin
.cp_extent
.length 
= filechunk
; 
 186             pin
.cp_flags 
= _DKIOCCSPINFORHIBERNATION
; 
 187             error 
= do_ioctl(p1
, p2
, theIoctl
, (caddr_t
)&pin
); 
 188             if (error 
&& (ENOTTY 
!= error
)) 
 190                 printf("_DKIOCCSUNPINEXTENT(%d) 0x%qx, 0x%qx\n", error
, pin
.cp_extent
.offset
, pin
.cp_extent
.length
); 
 200 extern uint32_t freespace_mb(vnode_t vp
); 
 202 struct kern_direct_file_io_ref_t 
* 
 203 kern_open_file_for_direct_io(const char * name
,  
 204                              boolean_t create_file
, 
 205                              kern_get_file_extents_callback_t callback
,  
 209                              off_t write_file_offset
, 
 210                              void * write_file_addr
, 
 211                              size_t write_file_len
, 
 212                              dev_t 
* partition_device_result
, 
 213                              dev_t 
* image_device_result
, 
 214                              uint64_t * partitionbase_result
, 
 215                              uint64_t * maxiocount_result
, 
 218     struct kern_direct_file_io_ref_t 
* ref
; 
 221     struct vnode_attr va
; 
 232     off_t             maxiocount
, count
, segcount
; 
 233     boolean_t         locked 
= FALSE
; 
 239     int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
); 
 245     ref 
= (struct kern_direct_file_io_ref_t 
*) kalloc(sizeof(struct kern_direct_file_io_ref_t
)); 
 252     bzero(ref
, sizeof(*ref
)); 
 254     ref
->ctx 
= vfs_context_kernel(); 
 256     fmode  
= (create_file
) ? (O_CREAT 
| FWRITE
) : FWRITE
; 
 257     cmode 
=  S_IRUSR 
| S_IWUSR
; 
 259     NDINIT(&nd
, LOOKUP
, OP_OPEN
, ndflags
, UIO_SYSSPACE
, CAST_USER_ADDR_T(name
), ref
->ctx
); 
 261     VATTR_SET(&va
, va_mode
, cmode
); 
 262     VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
); 
 263     VATTR_SET(&va
, va_dataprotect_class
, PROTECTION_CLASS_D
); 
 264     if ((error 
= vn_open_auth(&nd
, &fmode
, &va
))) { 
 265         kprintf("vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n", fmode
, cmode
, error
); 
 270     if (ref
->vp
->v_type 
== VREG
) 
 272         vnode_lock_spin(ref
->vp
); 
 273         SET(ref
->vp
->v_flag
, VSWAP
); 
 274         vnode_unlock(ref
->vp
); 
 277     if (write_file_addr 
&& write_file_len
) 
 279         if ((error 
= kern_write_file(ref
, write_file_offset
, write_file_addr
, write_file_len
, IO_SKIP_ENCRYPTION
))) { 
 280                 kprintf("kern_write_file() failed with error: %d\n", error
); 
 286     VATTR_WANTED(&va
, va_rdev
); 
 287     VATTR_WANTED(&va
, va_fsid
); 
 288     VATTR_WANTED(&va
, va_devid
); 
 289     VATTR_WANTED(&va
, va_data_size
); 
 290     VATTR_WANTED(&va
, va_data_alloc
); 
 291     VATTR_WANTED(&va
, va_nlink
); 
 293     if (vnode_getattr(ref
->vp
, &va
, ref
->ctx
)) goto out
; 
 295     mpFree 
= freespace_mb(ref
->vp
); 
 297     kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n",  
 298                 name
, va
.va_data_size
, va
.va_data_alloc
, mpFree
, fs_free_size
); 
 300     if (ref
->vp
->v_type 
== VREG
) 
 302         /* Don't dump files with links. */ 
 303         if (va
.va_nlink 
!= 1) goto out
; 
 305         device 
= (VATTR_IS_SUPPORTED(&va
, va_devid
)) ? va
.va_devid 
: va
.va_fsid
; 
 306         ref
->filelength 
= va
.va_data_size
; 
 310         do_ioctl 
= &file_ioctl
; 
 316                 mpFree 
+= va
.va_data_alloc
; 
 317                 if ((mpFree 
< set_file_size
) || ((mpFree 
- set_file_size
) < fs_free_size
)) 
 323             error 
= vnode_setsize(ref
->vp
, set_file_size
, IO_NOZEROFILL 
| IO_NOAUTH
, ref
->ctx
); 
 325             ref
->filelength 
= set_file_size
; 
 328     else if ((ref
->vp
->v_type 
== VBLK
) || (ref
->vp
->v_type 
== VCHR
)) 
 335         do_ioctl 
= &device_ioctl
; 
 339         /* Don't dump to non-regular files. */ 
 343     ref
->device 
= device
; 
 346     dk_corestorage_info_t cs_info
; 
 347     memset(&cs_info
, 0, sizeof(dk_corestorage_info_t
)); 
 348     error 
= do_ioctl(p1
, p2
, DKIOCCORESTORAGE
, (caddr_t
)&cs_info
); 
 349     ref
->cf 
= (error 
== 0) && (cs_info
.flags 
& DK_CORESTORAGE_ENABLE_HOTFILES
); 
 353     error 
= do_ioctl(p1
, p2
, DKIOCGETBLOCKSIZE
, (caddr_t
) &ref
->blksize
); 
 357     if (ref
->vp
->v_type 
!= VREG
) 
 359         error 
= do_ioctl(p1
, p2
, DKIOCGETBLOCKCOUNT
, (caddr_t
) &fileblk
); 
 361         ref
->filelength 
= fileblk 
* ref
->blksize
;     
 364     // pin logical extents 
 366     error 
= kern_ioctl_file_extents(ref
, _DKIOCCSPINEXTENT
, 0, ref
->filelength
); 
 367     if (error 
&& (ENOTTY 
!= error
)) goto out
; 
 368     ref
->pinned 
= (error 
== 0); 
 370     // generate the block list 
 372     error 
= do_ioctl(p1
, p2
, DKIOCLOCKPHYSICALEXTENTS
, NULL
); 
 377     for (; f_offset 
< ref
->filelength
; f_offset 
+= filechunk
) 
 379         if (ref
->vp
->v_type 
== VREG
) 
 381             filechunk 
= 1*1024*1024*1024; 
 384             error 
= VNOP_BLOCKMAP(ref
->vp
, f_offset
, filechunk
, &blkno
, 
 385                                                                   &filechunk
, NULL
, VNODE_WRITE 
| VNODE_BLOCKMAP_NO_TRACK
, NULL
); 
 387             if (-1LL == blkno
) continue; 
 388             fileblk 
= blkno 
* ref
->blksize
; 
 390         else if ((ref
->vp
->v_type 
== VBLK
) || (ref
->vp
->v_type 
== VCHR
)) 
 393             filechunk 
= f_offset 
? 0 : ref
->filelength
; 
 397         while (physoffset 
< filechunk
) 
 399             dk_physical_extent_t getphysreq
; 
 400             bzero(&getphysreq
, sizeof(getphysreq
)); 
 402             getphysreq
.offset 
= fileblk 
+ physoffset
; 
 403             getphysreq
.length 
= (filechunk 
- physoffset
); 
 404             error 
= do_ioctl(p1
, p2
, DKIOCGETPHYSICALEXTENT
, (caddr_t
) &getphysreq
); 
 408                 target 
= getphysreq
.dev
; 
 410             else if (target 
!= getphysreq
.dev
) 
 417             for (rev 
= 4096; rev 
<= getphysreq
.length
; rev 
+= 4096) 
 419                 callback(callback_ref
, getphysreq
.offset 
+ getphysreq
.length 
- rev
, 4096); 
 422             callback(callback_ref
, getphysreq
.offset
, getphysreq
.length
); 
 424             physoffset 
+= getphysreq
.length
; 
 427     callback(callback_ref
, 0ULL, 0ULL); 
 429     if (ref
->vp
->v_type 
== VREG
) p1 
= &target
; 
 434         do_ioctl 
= &file_ioctl
; 
 437     // get partition base 
 439     if (partitionbase_result
)  
 441         error 
= do_ioctl(p1
, p2
, DKIOCGETBASE
, (caddr_t
) partitionbase_result
); 
 446     // get block size & constraints 
 448     error 
= do_ioctl(p1
, p2
, DKIOCGETBLOCKSIZE
, (caddr_t
) &blksize
); 
 452     maxiocount 
= 1*1024*1024*1024; 
 454     error 
= do_ioctl(p1
, p2
, DKIOCGETMAXBLOCKCOUNTREAD
, (caddr_t
) &count
); 
 458     if (count 
&& (count 
< maxiocount
)) 
 461     error 
= do_ioctl(p1
, p2
, DKIOCGETMAXBLOCKCOUNTWRITE
, (caddr_t
) &count
); 
 465     if (count 
&& (count 
< maxiocount
)) 
 468     error 
= do_ioctl(p1
, p2
, DKIOCGETMAXBYTECOUNTREAD
, (caddr_t
) &count
); 
 471     if (count 
&& (count 
< maxiocount
)) 
 474     error 
= do_ioctl(p1
, p2
, DKIOCGETMAXBYTECOUNTWRITE
, (caddr_t
) &count
); 
 477     if (count 
&& (count 
< maxiocount
)) 
 480     error 
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTBYTECOUNTREAD
, (caddr_t
) &count
); 
 482         error 
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTCOUNTREAD
, (caddr_t
) &segcount
); 
 484         count 
= segcount 
= 0; 
 486     if (count 
&& (count 
< maxiocount
)) 
 489     error 
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTBYTECOUNTWRITE
, (caddr_t
) &count
); 
 491         error 
= do_ioctl(p1
, p2
, DKIOCGETMAXSEGMENTCOUNTWRITE
, (caddr_t
) &segcount
); 
 493         count 
= segcount 
= 0; 
 495     if (count 
&& (count 
< maxiocount
)) 
 498     kprintf("max io 0x%qx bytes\n", maxiocount
); 
 499     if (maxiocount_result
) 
 500         *maxiocount_result 
= maxiocount
; 
 502     error 
= do_ioctl(p1
, p2
, DKIOCISSOLIDSTATE
, (caddr_t
)&isssd
); 
 504         flags 
|= kIOPolledFileSSD
; 
 506     if (partition_device_result
) 
 507         *partition_device_result 
= device
; 
 508     if (image_device_result
) 
 509         *image_device_result 
= target
; 
 513     if ((ref
->vp
->v_type 
== VBLK
) || (ref
->vp
->v_type 
== VCHR
)) 
 515         vnode_close(ref
->vp
, FWRITE
, ref
->ctx
); 
 521     printf("kern_open_file_for_direct_io(%p, %d)\n", ref
, error
); 
 527         (void) do_ioctl(p1
, p2
, DKIOCUNLOCKPHYSICALEXTENTS
, NULL
); 
 534             (void) kern_ioctl_file_extents(ref
, _DKIOCCSUNPINEXTENT
, 0, (ref
->pinned 
&& ref
->cf
) ? ref
->filelength 
: 0); 
 535             vnode_close(ref
->vp
, FWRITE
, ref
->ctx
); 
 539         kfree(ref
, sizeof(struct kern_direct_file_io_ref_t
)); 
 547 kern_write_file(struct kern_direct_file_io_ref_t 
* ref
, off_t offset
, void * addr
, size_t len
, int ioflag
) 
 549     return (vn_rdwr(UIO_WRITE
, ref
->vp
, 
 551                         UIO_SYSSPACE
, ioflag
|IO_SYNC
|IO_NODELOCKED
|IO_UNIT
,  
 552                         vfs_context_ucred(ref
->ctx
), (int *) 0, 
 553                         vfs_context_proc(ref
->ctx
))); 
 557 kern_read_file(struct kern_direct_file_io_ref_t 
* ref
, off_t offset
, void * addr
, size_t len
, int ioflag
) 
 559     return (vn_rdwr(UIO_READ
, ref
->vp
, 
 561                         UIO_SYSSPACE
, ioflag
|IO_SYNC
|IO_NODELOCKED
|IO_UNIT
,  
 562                         vfs_context_ucred(ref
->ctx
), (int *) 0, 
 563                         vfs_context_proc(ref
->ctx
))); 
 568 kern_file_mount(struct kern_direct_file_io_ref_t 
* ref
) 
 570     return (ref
->vp
->v_mount
); 
 574 kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t 
* ref
, 
 575                               off_t write_offset
, void * addr
, size_t write_length
, 
 576                               off_t discard_offset
, off_t discard_end
) 
 579     printf("kern_close_file_for_direct_io(%p)\n", ref
); 
 585         int (*do_ioctl
)(void * p1
, void * p2
, u_long theIoctl
, caddr_t result
); 
 589         if (ref
->vp
->v_type 
== VREG
) 
 593             do_ioctl 
= &file_ioctl
; 
 600             do_ioctl 
= &device_ioctl
; 
 602         (void) do_ioctl(p1
, p2
, DKIOCUNLOCKPHYSICALEXTENTS
, NULL
); 
 604                 //XXX If unmapping extents then don't also need to unpin; except ... 
 605                 //XXX if file unaligned (HFS 4k / Fusion 128k) then pin is superset and 
 606                 //XXX unmap is subset, so save extra walk over file extents (and the risk 
 607                 //XXX that CF drain starts) vs leaving partial units pinned to SSD 
 608                 //XXX (until whatever was sharing also unmaps).  Err on cleaning up fully. 
 609                 boolean_t will_unmap 
= (!ref
->pinned 
|| ref
->cf
) && (discard_end 
> discard_offset
); 
 610                 boolean_t will_unpin 
= (ref
->pinned 
&& ref
->cf 
/* && !will_unmap */); 
 612                 (void) kern_ioctl_file_extents(ref
, _DKIOCCSUNPINEXTENT
, 0, (will_unpin
) ? ref
->filelength 
: 0); 
 616             (void) kern_ioctl_file_extents(ref
, DKIOCUNMAP
, discard_offset
, (ref
->cf
) ? ref
->filelength 
: discard_end
); 
 619         if (addr 
&& write_length
) 
 621             (void) kern_write_file(ref
, write_offset
, addr
, write_length
, IO_SKIP_ENCRYPTION
); 
 624         error 
= vnode_close(ref
->vp
, FWRITE
, ref
->ctx
); 
 627         kprintf("vnode_close(%d)\n", error
); 
 633     kfree(ref
, sizeof(struct kern_direct_file_io_ref_t
));