]>
Commit | Line | Data |
---|---|---|
5ba3f43e | 1 | /* |
cb323159 | 2 | * Copyright (c) 2016-2018 Apple Computer, Inc. All rights reserved. |
5ba3f43e A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/fsctl.h> | |
30 | #include <stdbool.h> | |
31 | #include <sys/time.h> | |
32 | #include <sys/buf.h> | |
33 | #include <sys/mount_internal.h> | |
34 | #include <sys/vnode_internal.h> | |
35 | #include <sys/buf_internal.h> | |
36 | ||
37 | #include <kern/kalloc.h> | |
38 | ||
39 | #include <sys/kauth.h> | |
40 | #include <IOKit/IOBSD.h> | |
41 | ||
42 | #include <vfs/vfs_disk_conditioner.h> | |
43 | ||
44 | #define DISK_CONDITIONER_SET_ENTITLEMENT "com.apple.private.dmc.set" | |
45 | ||
46 | // number of total blocks for a mount | |
47 | #define BLK_MAX(mp) ((mp->mnt_vfsstat.f_blocks * mp->mnt_vfsstat.f_bsize) / (mp->mnt_devblocksize)) | |
48 | ||
49 | // approx. time to spin up an idle HDD | |
50 | #define DISK_SPINUP_SEC (8) | |
51 | ||
52 | // idle period until assumed disk spin down | |
53 | #define DISK_IDLE_SEC (10 * 60) | |
54 | ||
d9a64523 | 55 | struct saved_mount_fields { |
0a7de745 A |
56 | uint32_t mnt_maxreadcnt; /* Max. byte count for read */ |
57 | uint32_t mnt_maxwritecnt; /* Max. byte count for write */ | |
58 | uint32_t mnt_segreadcnt; /* Max. segment count for read */ | |
59 | uint32_t mnt_segwritecnt; /* Max. segment count for write */ | |
60 | uint32_t mnt_ioqueue_depth; /* the maxiumum number of commands a device can accept */ | |
61 | uint32_t mnt_ioscale; /* scale the various throttles/limits imposed on the amount of I/O in flight */ | |
d9a64523 A |
62 | }; |
63 | ||
5ba3f43e | 64 | struct _disk_conditioner_info_t { |
d9a64523 A |
65 | disk_conditioner_info dcinfo; // all the original data from fsctl |
66 | struct saved_mount_fields mnt_fields; // fields to restore in mount_t when conditioner is disabled | |
67 | ||
5ba3f43e A |
68 | daddr64_t last_blkno; // approx. last transfered block for simulating seek times |
69 | struct timeval last_io_timestamp; // the last time an I/O completed | |
70 | }; | |
71 | ||
72 | void disk_conditioner_delay(buf_t, int, int, uint64_t); | |
73 | void disk_conditioner_unmount(mount_t mp); | |
74 | ||
75 | extern void throttle_info_mount_reset_period(mount_t, int isssd); | |
76 | ||
77 | static double | |
78 | weighted_scale_factor(double scale) | |
79 | { | |
80 | // 0 to 1 increasing quickly from 0. This weights smaller blkdiffs higher to add a type of minimum latency | |
81 | // I would like to use log(10) / 2.0 + 1, but using different approximation due to no math library | |
82 | // y = (x-1)^3 + 1 | |
83 | double x_m1 = scale - 1; | |
84 | return x_m1 * x_m1 * x_m1 + 1; | |
85 | } | |
86 | ||
87 | void | |
88 | disk_conditioner_delay(buf_t bp, int extents, int total_size, uint64_t already_elapsed_usec) | |
89 | { | |
90 | mount_t mp; | |
91 | uint64_t delay_usec; | |
92 | daddr64_t blkdiff; | |
93 | daddr64_t last_blkno; | |
94 | double access_time_scale; | |
d9a64523 A |
95 | struct _disk_conditioner_info_t *internal_info = NULL; |
96 | disk_conditioner_info *info = NULL; | |
5ba3f43e A |
97 | struct timeval elapsed; |
98 | struct timeval start; | |
d9a64523 | 99 | vnode_t vp; |
5ba3f43e | 100 | |
d9a64523 A |
101 | vp = buf_vnode(bp); |
102 | if (!vp) { | |
103 | return; | |
104 | } | |
105 | ||
106 | mp = vp->v_mount; | |
5ba3f43e A |
107 | if (!mp) { |
108 | return; | |
109 | } | |
110 | ||
d9a64523 A |
111 | internal_info = mp->mnt_disk_conditioner_info; |
112 | if (!internal_info || !internal_info->dcinfo.enabled) { | |
5ba3f43e A |
113 | return; |
114 | } | |
d9a64523 | 115 | info = &(internal_info->dcinfo); |
5ba3f43e A |
116 | |
117 | if (!info->is_ssd) { | |
118 | // calculate approximate seek time based on difference in block number | |
d9a64523 | 119 | last_blkno = internal_info->last_blkno; |
5ba3f43e | 120 | blkdiff = bp->b_blkno > last_blkno ? bp->b_blkno - last_blkno : last_blkno - bp->b_blkno; |
d9a64523 | 121 | internal_info->last_blkno = bp->b_blkno + bp->b_bcount; |
5ba3f43e A |
122 | } else { |
123 | blkdiff = BLK_MAX(mp); | |
124 | } | |
125 | ||
126 | // scale access time by (distance in blocks from previous I/O / maximum blocks) | |
127 | access_time_scale = weighted_scale_factor((double)blkdiff / BLK_MAX(mp)); | |
128 | // most cases should pass in extents==1 for optimal delay calculation, otherwise just multiply delay by extents | |
129 | delay_usec = (uint64_t)(((uint64_t)extents * info->access_time_usec) * access_time_scale); | |
130 | ||
131 | if (info->read_throughput_mbps && (bp->b_flags & B_READ)) { | |
132 | delay_usec += (uint64_t)(total_size / ((double)(info->read_throughput_mbps * 1024 * 1024 / 8) / USEC_PER_SEC)); | |
133 | } else if (info->write_throughput_mbps && !(bp->b_flags & B_READ)) { | |
134 | delay_usec += (uint64_t)(total_size / ((double)(info->write_throughput_mbps * 1024 * 1024 / 8) / USEC_PER_SEC)); | |
135 | } | |
136 | ||
137 | // try simulating disk spinup based on time since last I/O | |
138 | if (!info->is_ssd) { | |
139 | microuptime(&elapsed); | |
d9a64523 | 140 | timevalsub(&elapsed, &internal_info->last_io_timestamp); |
5ba3f43e | 141 | // avoid this delay right after boot (assuming last_io_timestamp is 0 and disk is already spinning) |
d9a64523 | 142 | if (elapsed.tv_sec > DISK_IDLE_SEC && internal_info->last_io_timestamp.tv_sec != 0) { |
5ba3f43e A |
143 | delay_usec += DISK_SPINUP_SEC * USEC_PER_SEC; |
144 | } | |
145 | } | |
146 | ||
147 | if (delay_usec <= already_elapsed_usec) { | |
d9a64523 | 148 | microuptime(&internal_info->last_io_timestamp); |
5ba3f43e A |
149 | return; |
150 | } | |
151 | ||
152 | delay_usec -= already_elapsed_usec; | |
153 | ||
154 | while (delay_usec) { | |
155 | microuptime(&start); | |
156 | delay(delay_usec); | |
157 | microuptime(&elapsed); | |
158 | timevalsub(&elapsed, &start); | |
159 | if (elapsed.tv_sec * USEC_PER_SEC < delay_usec) { | |
160 | delay_usec -= elapsed.tv_sec * USEC_PER_SEC; | |
161 | } else { | |
162 | break; | |
163 | } | |
164 | if ((uint64_t)elapsed.tv_usec < delay_usec) { | |
165 | delay_usec -= elapsed.tv_usec; | |
166 | } else { | |
167 | break; | |
168 | } | |
169 | } | |
170 | ||
d9a64523 | 171 | microuptime(&internal_info->last_io_timestamp); |
5ba3f43e A |
172 | } |
173 | ||
174 | int | |
175 | disk_conditioner_get_info(mount_t mp, disk_conditioner_info *uinfo) | |
176 | { | |
177 | struct _disk_conditioner_info_t *info; | |
178 | ||
179 | if (!mp) { | |
180 | return EINVAL; | |
181 | } | |
182 | ||
183 | info = mp->mnt_disk_conditioner_info; | |
184 | ||
d9a64523 A |
185 | if (info) { |
186 | memcpy(uinfo, &(info->dcinfo), sizeof(disk_conditioner_info)); | |
5ba3f43e A |
187 | } |
188 | ||
5ba3f43e A |
189 | return 0; |
190 | } | |
191 | ||
d9a64523 | 192 | static inline void |
0a7de745 A |
193 | disk_conditioner_restore_mount_fields(mount_t mp, struct saved_mount_fields *mnt_fields) |
194 | { | |
d9a64523 A |
195 | mp->mnt_maxreadcnt = mnt_fields->mnt_maxreadcnt; |
196 | mp->mnt_maxwritecnt = mnt_fields->mnt_maxwritecnt; | |
197 | mp->mnt_segreadcnt = mnt_fields->mnt_segreadcnt; | |
198 | mp->mnt_segwritecnt = mnt_fields->mnt_segwritecnt; | |
199 | mp->mnt_ioqueue_depth = mnt_fields->mnt_ioqueue_depth; | |
200 | mp->mnt_ioscale = mnt_fields->mnt_ioscale; | |
201 | } | |
202 | ||
5ba3f43e A |
203 | int |
204 | disk_conditioner_set_info(mount_t mp, disk_conditioner_info *uinfo) | |
205 | { | |
d9a64523 A |
206 | struct _disk_conditioner_info_t *internal_info; |
207 | disk_conditioner_info *info; | |
208 | struct saved_mount_fields *mnt_fields; | |
5ba3f43e A |
209 | |
210 | if (!kauth_cred_issuser(kauth_cred_get()) || !IOTaskHasEntitlement(current_task(), DISK_CONDITIONER_SET_ENTITLEMENT)) { | |
211 | return EPERM; | |
212 | } | |
213 | ||
214 | if (!mp) { | |
215 | return EINVAL; | |
216 | } | |
217 | ||
d9a64523 A |
218 | mount_lock(mp); |
219 | ||
220 | internal_info = mp->mnt_disk_conditioner_info; | |
221 | if (!internal_info) { | |
cb323159 | 222 | internal_info = kalloc(sizeof(struct _disk_conditioner_info_t)); |
d9a64523 | 223 | bzero(internal_info, sizeof(struct _disk_conditioner_info_t)); |
cb323159 | 224 | mp->mnt_disk_conditioner_info = internal_info; |
d9a64523 A |
225 | mnt_fields = &(internal_info->mnt_fields); |
226 | ||
227 | /* save mount_t fields for restoration later */ | |
228 | mnt_fields->mnt_maxreadcnt = mp->mnt_maxreadcnt; | |
229 | mnt_fields->mnt_maxwritecnt = mp->mnt_maxwritecnt; | |
230 | mnt_fields->mnt_segreadcnt = mp->mnt_segreadcnt; | |
231 | mnt_fields->mnt_segwritecnt = mp->mnt_segwritecnt; | |
232 | mnt_fields->mnt_ioqueue_depth = mp->mnt_ioqueue_depth; | |
233 | mnt_fields->mnt_ioscale = mp->mnt_ioscale; | |
234 | } | |
235 | ||
236 | info = &(internal_info->dcinfo); | |
237 | mnt_fields = &(internal_info->mnt_fields); | |
238 | ||
239 | if (!uinfo->enabled && info->enabled) { | |
240 | /* disk conditioner is being disabled when already enabled */ | |
241 | disk_conditioner_restore_mount_fields(mp, mnt_fields); | |
242 | } | |
243 | ||
244 | memcpy(info, uinfo, sizeof(disk_conditioner_info)); | |
245 | ||
246 | /* scale back based on hardware advertised limits */ | |
247 | if (uinfo->ioqueue_depth == 0 || uinfo->ioqueue_depth > mnt_fields->mnt_ioqueue_depth) { | |
248 | info->ioqueue_depth = mnt_fields->mnt_ioqueue_depth; | |
249 | } | |
250 | if (uinfo->maxreadcnt == 0 || uinfo->maxreadcnt > mnt_fields->mnt_maxreadcnt) { | |
251 | info->maxreadcnt = mnt_fields->mnt_maxreadcnt; | |
252 | } | |
253 | if (uinfo->maxwritecnt == 0 || uinfo->maxwritecnt > mnt_fields->mnt_maxwritecnt) { | |
254 | info->maxwritecnt = mnt_fields->mnt_maxwritecnt; | |
255 | } | |
256 | if (uinfo->segreadcnt == 0 || uinfo->segreadcnt > mnt_fields->mnt_segreadcnt) { | |
257 | info->segreadcnt = mnt_fields->mnt_segreadcnt; | |
5ba3f43e | 258 | } |
d9a64523 A |
259 | if (uinfo->segwritecnt == 0 || uinfo->segwritecnt > mnt_fields->mnt_segwritecnt) { |
260 | info->segwritecnt = mnt_fields->mnt_segwritecnt; | |
261 | } | |
262 | ||
263 | if (uinfo->enabled) { | |
264 | mp->mnt_maxreadcnt = info->maxreadcnt; | |
265 | mp->mnt_maxwritecnt = info->maxwritecnt; | |
266 | mp->mnt_segreadcnt = info->segreadcnt; | |
267 | mp->mnt_segwritecnt = info->segwritecnt; | |
268 | mp->mnt_ioqueue_depth = info->ioqueue_depth; | |
269 | mp->mnt_ioscale = MNT_IOSCALE(info->ioqueue_depth); | |
270 | } | |
271 | ||
272 | mount_unlock(mp); | |
5ba3f43e | 273 | |
d9a64523 | 274 | microuptime(&internal_info->last_io_timestamp); |
5ba3f43e A |
275 | |
276 | // make sure throttling picks up the new periods | |
277 | throttle_info_mount_reset_period(mp, info->is_ssd); | |
278 | ||
279 | return 0; | |
280 | } | |
281 | ||
282 | void | |
283 | disk_conditioner_unmount(mount_t mp) | |
284 | { | |
d9a64523 A |
285 | struct _disk_conditioner_info_t *internal_info = mp->mnt_disk_conditioner_info; |
286 | ||
287 | if (!internal_info) { | |
5ba3f43e A |
288 | return; |
289 | } | |
d9a64523 A |
290 | |
291 | if (internal_info->dcinfo.enabled) { | |
292 | disk_conditioner_restore_mount_fields(mp, &(internal_info->mnt_fields)); | |
293 | } | |
5ba3f43e | 294 | mp->mnt_disk_conditioner_info = NULL; |
d9a64523 | 295 | kfree(internal_info, sizeof(struct _disk_conditioner_info_t)); |
5ba3f43e A |
296 | } |
297 | ||
298 | boolean_t | |
299 | disk_conditioner_mount_is_ssd(mount_t mp) | |
300 | { | |
d9a64523 | 301 | struct _disk_conditioner_info_t *internal_info = mp->mnt_disk_conditioner_info; |
5ba3f43e | 302 | |
d9a64523 | 303 | if (!internal_info || !internal_info->dcinfo.enabled) { |
cb323159 A |
304 | if (mp->mnt_kern_flag & MNTK_SSD) { |
305 | return TRUE; | |
306 | } | |
307 | return FALSE; | |
5ba3f43e A |
308 | } |
309 | ||
d9a64523 | 310 | return internal_info->dcinfo.is_ssd; |
5ba3f43e | 311 | } |