]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_disk_conditioner.c
xnu-4570.31.3.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_disk_conditioner.c
1 /*
2 * Copyright (c) 2016 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/fsctl.h>
30 #include <stdbool.h>
31 #include <sys/time.h>
32 #include <sys/buf.h>
33 #include <sys/mount_internal.h>
34 #include <sys/vnode_internal.h>
35 #include <sys/buf_internal.h>
36
37 #include <kern/kalloc.h>
38
39 #include <sys/kauth.h>
40 #include <IOKit/IOBSD.h>
41
42 #include <vfs/vfs_disk_conditioner.h>
43
44 #define DISK_CONDITIONER_SET_ENTITLEMENT "com.apple.private.dmc.set"
45
46 // number of total blocks for a mount
47 #define BLK_MAX(mp) ((mp->mnt_vfsstat.f_blocks * mp->mnt_vfsstat.f_bsize) / (mp->mnt_devblocksize))
48
49 // approx. time to spin up an idle HDD
50 #define DISK_SPINUP_SEC (8)
51
52 // idle period until assumed disk spin down
53 #define DISK_IDLE_SEC (10 * 60)
54
55 struct _disk_conditioner_info_t {
56 boolean_t enabled; // if other fields have any effect
57 uint64_t access_time_usec; // maximum latency before an I/O transfer begins
58 uint64_t read_throughput_mbps; // throughput of an I/O read
59 uint64_t write_throughput_mbps; // throughput of an I/O write
60 boolean_t is_ssd; // behave like an SSD (for both conditioning and affecting behavior in other parts of VFS)
61 daddr64_t last_blkno; // approx. last transfered block for simulating seek times
62 struct timeval last_io_timestamp; // the last time an I/O completed
63 };
64
65 void disk_conditioner_delay(buf_t, int, int, uint64_t);
66 void disk_conditioner_unmount(mount_t mp);
67
68 extern void throttle_info_mount_reset_period(mount_t, int isssd);
69
70 static double
71 weighted_scale_factor(double scale)
72 {
73 // 0 to 1 increasing quickly from 0. This weights smaller blkdiffs higher to add a type of minimum latency
74 // I would like to use log(10) / 2.0 + 1, but using different approximation due to no math library
75 // y = (x-1)^3 + 1
76 double x_m1 = scale - 1;
77 return x_m1 * x_m1 * x_m1 + 1;
78 }
79
80 void
81 disk_conditioner_delay(buf_t bp, int extents, int total_size, uint64_t already_elapsed_usec)
82 {
83 mount_t mp;
84 uint64_t delay_usec;
85 daddr64_t blkdiff;
86 daddr64_t last_blkno;
87 double access_time_scale;
88 struct _disk_conditioner_info_t *info = NULL;
89 struct timeval elapsed;
90 struct timeval start;
91
92 mp = buf_vnode(bp)->v_mount;
93 if (!mp) {
94 return;
95 }
96
97 info = mp->mnt_disk_conditioner_info;
98 if (!info || !info->enabled) {
99 return;
100 }
101
102 if (!info->is_ssd) {
103 // calculate approximate seek time based on difference in block number
104 last_blkno = info->last_blkno;
105 blkdiff = bp->b_blkno > last_blkno ? bp->b_blkno - last_blkno : last_blkno - bp->b_blkno;
106 info->last_blkno = bp->b_blkno + bp->b_bcount;
107 } else {
108 blkdiff = BLK_MAX(mp);
109 }
110
111 // scale access time by (distance in blocks from previous I/O / maximum blocks)
112 access_time_scale = weighted_scale_factor((double)blkdiff / BLK_MAX(mp));
113 // most cases should pass in extents==1 for optimal delay calculation, otherwise just multiply delay by extents
114 delay_usec = (uint64_t)(((uint64_t)extents * info->access_time_usec) * access_time_scale);
115
116 if (info->read_throughput_mbps && (bp->b_flags & B_READ)) {
117 delay_usec += (uint64_t)(total_size / ((double)(info->read_throughput_mbps * 1024 * 1024 / 8) / USEC_PER_SEC));
118 } else if (info->write_throughput_mbps && !(bp->b_flags & B_READ)) {
119 delay_usec += (uint64_t)(total_size / ((double)(info->write_throughput_mbps * 1024 * 1024 / 8) / USEC_PER_SEC));
120 }
121
122 // try simulating disk spinup based on time since last I/O
123 if (!info->is_ssd) {
124 microuptime(&elapsed);
125 timevalsub(&elapsed, &info->last_io_timestamp);
126 // avoid this delay right after boot (assuming last_io_timestamp is 0 and disk is already spinning)
127 if (elapsed.tv_sec > DISK_IDLE_SEC && info->last_io_timestamp.tv_sec != 0) {
128 delay_usec += DISK_SPINUP_SEC * USEC_PER_SEC;
129 }
130 }
131
132 if (delay_usec <= already_elapsed_usec) {
133 microuptime(&info->last_io_timestamp);
134 return;
135 }
136
137 delay_usec -= already_elapsed_usec;
138
139 while (delay_usec) {
140 microuptime(&start);
141 delay(delay_usec);
142 microuptime(&elapsed);
143 timevalsub(&elapsed, &start);
144 if (elapsed.tv_sec * USEC_PER_SEC < delay_usec) {
145 delay_usec -= elapsed.tv_sec * USEC_PER_SEC;
146 } else {
147 break;
148 }
149 if ((uint64_t)elapsed.tv_usec < delay_usec) {
150 delay_usec -= elapsed.tv_usec;
151 } else {
152 break;
153 }
154 }
155
156 microuptime(&info->last_io_timestamp);
157 }
158
159 int
160 disk_conditioner_get_info(mount_t mp, disk_conditioner_info *uinfo)
161 {
162 struct _disk_conditioner_info_t *info;
163
164 if (!mp) {
165 return EINVAL;
166 }
167
168 info = mp->mnt_disk_conditioner_info;
169
170 if (!info) {
171 return 0;
172 }
173
174 uinfo->enabled = info->enabled;
175 uinfo->access_time_usec = info->access_time_usec;
176 uinfo->read_throughput_mbps = info->read_throughput_mbps;
177 uinfo->write_throughput_mbps = info->write_throughput_mbps;
178 uinfo->is_ssd = info->is_ssd;
179
180 return 0;
181 }
182
183 int
184 disk_conditioner_set_info(mount_t mp, disk_conditioner_info *uinfo)
185 {
186 struct _disk_conditioner_info_t *info;
187
188 if (!kauth_cred_issuser(kauth_cred_get()) || !IOTaskHasEntitlement(current_task(), DISK_CONDITIONER_SET_ENTITLEMENT)) {
189 return EPERM;
190 }
191
192 if (!mp) {
193 return EINVAL;
194 }
195
196 info = mp->mnt_disk_conditioner_info;
197 if (!info) {
198 info = mp->mnt_disk_conditioner_info = kalloc(sizeof(struct _disk_conditioner_info_t));
199 bzero(info, sizeof(struct _disk_conditioner_info_t));
200 }
201
202 info->enabled = uinfo->enabled;
203 info->access_time_usec = uinfo->access_time_usec;
204 info->read_throughput_mbps = uinfo->read_throughput_mbps;
205 info->write_throughput_mbps = uinfo->write_throughput_mbps;
206 info->is_ssd = uinfo->is_ssd;
207 microuptime(&info->last_io_timestamp);
208
209 // make sure throttling picks up the new periods
210 throttle_info_mount_reset_period(mp, info->is_ssd);
211
212 return 0;
213 }
214
215 void
216 disk_conditioner_unmount(mount_t mp)
217 {
218 if (!mp->mnt_disk_conditioner_info) {
219 return;
220 }
221 kfree(mp->mnt_disk_conditioner_info, sizeof(struct _disk_conditioner_info_t));
222 mp->mnt_disk_conditioner_info = NULL;
223 }
224
225 boolean_t
226 disk_conditioner_mount_is_ssd(mount_t mp)
227 {
228 struct _disk_conditioner_info_t *info = mp->mnt_disk_conditioner_info;
229
230 if (!info || !info->enabled) {
231 return (mp->mnt_kern_flag & MNTK_SSD);
232 }
233
234 return info->is_ssd;
235 }