]>
Commit | Line | Data |
---|---|---|
5ba3f43e A |
1 | /* |
2 | * Copyright (c) 2016 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/fsctl.h> | |
30 | #include <stdbool.h> | |
31 | #include <sys/time.h> | |
32 | #include <sys/buf.h> | |
33 | #include <sys/mount_internal.h> | |
34 | #include <sys/vnode_internal.h> | |
35 | #include <sys/buf_internal.h> | |
36 | ||
37 | #include <kern/kalloc.h> | |
38 | ||
39 | #include <sys/kauth.h> | |
40 | #include <IOKit/IOBSD.h> | |
41 | ||
42 | #include <vfs/vfs_disk_conditioner.h> | |
43 | ||
44 | #define DISK_CONDITIONER_SET_ENTITLEMENT "com.apple.private.dmc.set" | |
45 | ||
46 | // number of total blocks for a mount | |
47 | #define BLK_MAX(mp) ((mp->mnt_vfsstat.f_blocks * mp->mnt_vfsstat.f_bsize) / (mp->mnt_devblocksize)) | |
48 | ||
49 | // approx. time to spin up an idle HDD | |
50 | #define DISK_SPINUP_SEC (8) | |
51 | ||
52 | // idle period until assumed disk spin down | |
53 | #define DISK_IDLE_SEC (10 * 60) | |
54 | ||
55 | struct _disk_conditioner_info_t { | |
56 | boolean_t enabled; // if other fields have any effect | |
57 | uint64_t access_time_usec; // maximum latency before an I/O transfer begins | |
58 | uint64_t read_throughput_mbps; // throughput of an I/O read | |
59 | uint64_t write_throughput_mbps; // throughput of an I/O write | |
60 | boolean_t is_ssd; // behave like an SSD (for both conditioning and affecting behavior in other parts of VFS) | |
61 | daddr64_t last_blkno; // approx. last transfered block for simulating seek times | |
62 | struct timeval last_io_timestamp; // the last time an I/O completed | |
63 | }; | |
64 | ||
65 | void disk_conditioner_delay(buf_t, int, int, uint64_t); | |
66 | void disk_conditioner_unmount(mount_t mp); | |
67 | ||
68 | extern void throttle_info_mount_reset_period(mount_t, int isssd); | |
69 | ||
70 | static double | |
71 | weighted_scale_factor(double scale) | |
72 | { | |
73 | // 0 to 1 increasing quickly from 0. This weights smaller blkdiffs higher to add a type of minimum latency | |
74 | // I would like to use log(10) / 2.0 + 1, but using different approximation due to no math library | |
75 | // y = (x-1)^3 + 1 | |
76 | double x_m1 = scale - 1; | |
77 | return x_m1 * x_m1 * x_m1 + 1; | |
78 | } | |
79 | ||
80 | void | |
81 | disk_conditioner_delay(buf_t bp, int extents, int total_size, uint64_t already_elapsed_usec) | |
82 | { | |
83 | mount_t mp; | |
84 | uint64_t delay_usec; | |
85 | daddr64_t blkdiff; | |
86 | daddr64_t last_blkno; | |
87 | double access_time_scale; | |
88 | struct _disk_conditioner_info_t *info = NULL; | |
89 | struct timeval elapsed; | |
90 | struct timeval start; | |
91 | ||
92 | mp = buf_vnode(bp)->v_mount; | |
93 | if (!mp) { | |
94 | return; | |
95 | } | |
96 | ||
97 | info = mp->mnt_disk_conditioner_info; | |
98 | if (!info || !info->enabled) { | |
99 | return; | |
100 | } | |
101 | ||
102 | if (!info->is_ssd) { | |
103 | // calculate approximate seek time based on difference in block number | |
104 | last_blkno = info->last_blkno; | |
105 | blkdiff = bp->b_blkno > last_blkno ? bp->b_blkno - last_blkno : last_blkno - bp->b_blkno; | |
106 | info->last_blkno = bp->b_blkno + bp->b_bcount; | |
107 | } else { | |
108 | blkdiff = BLK_MAX(mp); | |
109 | } | |
110 | ||
111 | // scale access time by (distance in blocks from previous I/O / maximum blocks) | |
112 | access_time_scale = weighted_scale_factor((double)blkdiff / BLK_MAX(mp)); | |
113 | // most cases should pass in extents==1 for optimal delay calculation, otherwise just multiply delay by extents | |
114 | delay_usec = (uint64_t)(((uint64_t)extents * info->access_time_usec) * access_time_scale); | |
115 | ||
116 | if (info->read_throughput_mbps && (bp->b_flags & B_READ)) { | |
117 | delay_usec += (uint64_t)(total_size / ((double)(info->read_throughput_mbps * 1024 * 1024 / 8) / USEC_PER_SEC)); | |
118 | } else if (info->write_throughput_mbps && !(bp->b_flags & B_READ)) { | |
119 | delay_usec += (uint64_t)(total_size / ((double)(info->write_throughput_mbps * 1024 * 1024 / 8) / USEC_PER_SEC)); | |
120 | } | |
121 | ||
122 | // try simulating disk spinup based on time since last I/O | |
123 | if (!info->is_ssd) { | |
124 | microuptime(&elapsed); | |
125 | timevalsub(&elapsed, &info->last_io_timestamp); | |
126 | // avoid this delay right after boot (assuming last_io_timestamp is 0 and disk is already spinning) | |
127 | if (elapsed.tv_sec > DISK_IDLE_SEC && info->last_io_timestamp.tv_sec != 0) { | |
128 | delay_usec += DISK_SPINUP_SEC * USEC_PER_SEC; | |
129 | } | |
130 | } | |
131 | ||
132 | if (delay_usec <= already_elapsed_usec) { | |
133 | microuptime(&info->last_io_timestamp); | |
134 | return; | |
135 | } | |
136 | ||
137 | delay_usec -= already_elapsed_usec; | |
138 | ||
139 | while (delay_usec) { | |
140 | microuptime(&start); | |
141 | delay(delay_usec); | |
142 | microuptime(&elapsed); | |
143 | timevalsub(&elapsed, &start); | |
144 | if (elapsed.tv_sec * USEC_PER_SEC < delay_usec) { | |
145 | delay_usec -= elapsed.tv_sec * USEC_PER_SEC; | |
146 | } else { | |
147 | break; | |
148 | } | |
149 | if ((uint64_t)elapsed.tv_usec < delay_usec) { | |
150 | delay_usec -= elapsed.tv_usec; | |
151 | } else { | |
152 | break; | |
153 | } | |
154 | } | |
155 | ||
156 | microuptime(&info->last_io_timestamp); | |
157 | } | |
158 | ||
159 | int | |
160 | disk_conditioner_get_info(mount_t mp, disk_conditioner_info *uinfo) | |
161 | { | |
162 | struct _disk_conditioner_info_t *info; | |
163 | ||
164 | if (!mp) { | |
165 | return EINVAL; | |
166 | } | |
167 | ||
168 | info = mp->mnt_disk_conditioner_info; | |
169 | ||
170 | if (!info) { | |
171 | return 0; | |
172 | } | |
173 | ||
174 | uinfo->enabled = info->enabled; | |
175 | uinfo->access_time_usec = info->access_time_usec; | |
176 | uinfo->read_throughput_mbps = info->read_throughput_mbps; | |
177 | uinfo->write_throughput_mbps = info->write_throughput_mbps; | |
178 | uinfo->is_ssd = info->is_ssd; | |
179 | ||
180 | return 0; | |
181 | } | |
182 | ||
183 | int | |
184 | disk_conditioner_set_info(mount_t mp, disk_conditioner_info *uinfo) | |
185 | { | |
186 | struct _disk_conditioner_info_t *info; | |
187 | ||
188 | if (!kauth_cred_issuser(kauth_cred_get()) || !IOTaskHasEntitlement(current_task(), DISK_CONDITIONER_SET_ENTITLEMENT)) { | |
189 | return EPERM; | |
190 | } | |
191 | ||
192 | if (!mp) { | |
193 | return EINVAL; | |
194 | } | |
195 | ||
196 | info = mp->mnt_disk_conditioner_info; | |
197 | if (!info) { | |
198 | info = mp->mnt_disk_conditioner_info = kalloc(sizeof(struct _disk_conditioner_info_t)); | |
199 | bzero(info, sizeof(struct _disk_conditioner_info_t)); | |
200 | } | |
201 | ||
202 | info->enabled = uinfo->enabled; | |
203 | info->access_time_usec = uinfo->access_time_usec; | |
204 | info->read_throughput_mbps = uinfo->read_throughput_mbps; | |
205 | info->write_throughput_mbps = uinfo->write_throughput_mbps; | |
206 | info->is_ssd = uinfo->is_ssd; | |
207 | microuptime(&info->last_io_timestamp); | |
208 | ||
209 | // make sure throttling picks up the new periods | |
210 | throttle_info_mount_reset_period(mp, info->is_ssd); | |
211 | ||
212 | return 0; | |
213 | } | |
214 | ||
215 | void | |
216 | disk_conditioner_unmount(mount_t mp) | |
217 | { | |
218 | if (!mp->mnt_disk_conditioner_info) { | |
219 | return; | |
220 | } | |
221 | kfree(mp->mnt_disk_conditioner_info, sizeof(struct _disk_conditioner_info_t)); | |
222 | mp->mnt_disk_conditioner_info = NULL; | |
223 | } | |
224 | ||
225 | boolean_t | |
226 | disk_conditioner_mount_is_ssd(mount_t mp) | |
227 | { | |
228 | struct _disk_conditioner_info_t *info = mp->mnt_disk_conditioner_info; | |
229 | ||
230 | if (!info || !info->enabled) { | |
231 | return (mp->mnt_kern_flag & MNTK_SSD); | |
232 | } | |
233 | ||
234 | return info->is_ssd; | |
235 | } |