]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/pmap_common.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_common.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <vm/pmap.h>
29 #include <kern/ledger.h>
30 #include <i386/pmap_internal.h>
31
32
33 /*
34 * Each entry in the pv_head_table is locked by a bit in the
35 * pv_lock_table. The lock bits are accessed by the physical
36 * address of the page they lock.
37 */
38
39 char *pv_lock_table; /* pointer to array of bits */
40 char *pv_hash_lock_table;
41
42 pv_rooted_entry_t pv_head_table; /* array of entries, one per
43 * page */
44 uint32_t pv_hashed_free_count = 0;
45 uint32_t pv_hashed_kern_free_count = 0;
46
47 pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
48 uint32_t pmap_pagetable_corruption_incidents;
49 uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
50 uint64_t pmap_pagetable_corruption_interval_abstime;
51 thread_call_t pmap_pagetable_corruption_log_call;
52 static thread_call_data_t pmap_pagetable_corruption_log_call_data;
53 boolean_t pmap_pagetable_corruption_timeout = FALSE;
54
55 volatile uint32_t mappingrecurse = 0;
56
57 uint32_t pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark, pv_hashed_alloc_chunk, pv_hashed_kern_alloc_chunk;
58
59 thread_t mapping_replenish_thread;
60 event_t mapping_replenish_event, pmap_user_pv_throttle_event;
61
62 uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
63
64 int pmap_asserts_enabled = (DEBUG);
65 int pmap_asserts_traced = 0;
66
67 unsigned int pmap_cache_attributes(ppnum_t pn) {
68 if (pmap_get_cache_attributes(pn, FALSE) & INTEL_PTE_NCACHE)
69 return (VM_WIMG_IO);
70 else
71 return (VM_WIMG_COPYBACK);
72 }
73
74 void pmap_set_cache_attributes(ppnum_t pn, unsigned int cacheattr) {
75 unsigned int current, template = 0;
76 int pai;
77
78 if (cacheattr & VM_MEM_NOT_CACHEABLE) {
79 if(!(cacheattr & VM_MEM_GUARDED))
80 template |= PHYS_PTA;
81 template |= PHYS_NCACHE;
82 }
83
84 pmap_intr_assert();
85
86 assert((pn != vm_page_fictitious_addr) && (pn != vm_page_guard_addr));
87
88 pai = ppn_to_pai(pn);
89
90 if (!IS_MANAGED_PAGE(pai)) {
91 return;
92 }
93
94 /* override cache attributes for this phys page
95 * Does not walk through existing mappings to adjust,
96 * assumes page is disconnected
97 */
98
99 LOCK_PVH(pai);
100
101 pmap_update_cache_attributes_locked(pn, template);
102
103 current = pmap_phys_attributes[pai] & PHYS_CACHEABILITY_MASK;
104 pmap_phys_attributes[pai] &= ~PHYS_CACHEABILITY_MASK;
105 pmap_phys_attributes[pai] |= template;
106
107 UNLOCK_PVH(pai);
108
109 if ((template & PHYS_NCACHE) && !(current & PHYS_NCACHE)) {
110 pmap_sync_page_attributes_phys(pn);
111 }
112 }
113
114 unsigned pmap_get_cache_attributes(ppnum_t pn, boolean_t is_ept) {
115 if (last_managed_page == 0)
116 return 0;
117
118 if (!IS_MANAGED_PAGE(ppn_to_pai(pn)))
119 return PTE_NCACHE(is_ept);
120
121 /*
122 * The cache attributes are read locklessly for efficiency.
123 */
124 unsigned int attr = pmap_phys_attributes[ppn_to_pai(pn)];
125 unsigned int template = 0;
126
127 /*
128 * The PTA bit is currently unsupported for EPT PTEs.
129 */
130 if ((attr & PHYS_PTA) && !is_ept)
131 template |= INTEL_PTE_PTA;
132
133 /*
134 * If the page isn't marked as NCACHE, the default for EPT entries
135 * is WB.
136 */
137 if (attr & PHYS_NCACHE)
138 template |= PTE_NCACHE(is_ept);
139 else if (is_ept)
140 template |= INTEL_EPT_WB;
141
142 return template;
143 }
144
145 boolean_t
146 pmap_has_managed_page(ppnum_t first, ppnum_t last)
147 {
148 ppnum_t pn, kdata_start, kdata_end;
149 boolean_t result;
150 boot_args * args;
151
152 args = (boot_args *) PE_state.bootArgs;
153
154 // Allow pages that the booter added to the end of the kernel.
155 // We may miss reporting some pages in this range that were freed
156 // with ml_static_free()
157 kdata_start = atop_32(args->kaddr);
158 kdata_end = atop_32(args->kaddr + args->ksize);
159
160 assert(last_managed_page);
161 assert(first <= last);
162
163 for (result = FALSE, pn = first;
164 !result
165 && (pn <= last)
166 && (pn <= last_managed_page);
167 pn++)
168 {
169 if ((pn >= kdata_start) && (pn < kdata_end)) continue;
170 result = (0 != (pmap_phys_attributes[pn] & PHYS_MANAGED));
171 }
172
173 return (result);
174 }
175
176 boolean_t
177 pmap_is_noencrypt(ppnum_t pn)
178 {
179 int pai;
180
181 pai = ppn_to_pai(pn);
182
183 if (!IS_MANAGED_PAGE(pai))
184 return (FALSE);
185
186 if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT)
187 return (TRUE);
188
189 return (FALSE);
190 }
191
192
193 void
194 pmap_set_noencrypt(ppnum_t pn)
195 {
196 int pai;
197
198 pai = ppn_to_pai(pn);
199
200 if (IS_MANAGED_PAGE(pai)) {
201 LOCK_PVH(pai);
202
203 pmap_phys_attributes[pai] |= PHYS_NOENCRYPT;
204
205 UNLOCK_PVH(pai);
206 }
207 }
208
209
210 void
211 pmap_clear_noencrypt(ppnum_t pn)
212 {
213 int pai;
214
215 pai = ppn_to_pai(pn);
216
217 if (IS_MANAGED_PAGE(pai)) {
218 /*
219 * synchronization at VM layer prevents PHYS_NOENCRYPT
220 * from changing state, so we don't need the lock to inspect
221 */
222 if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT) {
223 LOCK_PVH(pai);
224
225 pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
226
227 UNLOCK_PVH(pai);
228 }
229 }
230 }
231
232 void
233 compute_pmap_gc_throttle(void *arg __unused)
234 {
235
236 }
237
238
239 void
240 pmap_lock_phys_page(ppnum_t pn)
241 {
242 int pai;
243
244 pai = ppn_to_pai(pn);
245
246 if (IS_MANAGED_PAGE(pai)) {
247 LOCK_PVH(pai);
248 } else
249 simple_lock(&phys_backup_lock);
250 }
251
252
253 void
254 pmap_unlock_phys_page(ppnum_t pn)
255 {
256 int pai;
257
258 pai = ppn_to_pai(pn);
259
260 if (IS_MANAGED_PAGE(pai)) {
261 UNLOCK_PVH(pai);
262 } else
263 simple_unlock(&phys_backup_lock);
264 }
265
266
267
268 __private_extern__ void
269 pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) {
270 if (pmap_pagetable_corruption_incidents > 0) {
271 int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
272 (*log_func)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
273 for (i = 0; i < e; i++) {
274 (*log_func)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident, pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
275 }
276 }
277 }
278
279 static inline void
280 pmap_pagetable_corruption_log_setup(void) {
281 if (pmap_pagetable_corruption_log_call == NULL) {
282 nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL, 0, &pmap_pagetable_corruption_interval_abstime);
283 thread_call_setup(&pmap_pagetable_corruption_log_call_data,
284 (thread_call_func_t) pmap_pagetable_corruption_msg_log,
285 (thread_call_param_t) &printf);
286 pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
287 }
288 }
289
290 void
291 mapping_free_prime(void)
292 {
293 unsigned i;
294 pv_hashed_entry_t pvh_e;
295 pv_hashed_entry_t pvh_eh;
296 pv_hashed_entry_t pvh_et;
297 int pv_cnt;
298
299 /* Scale based on DRAM size */
300 pv_hashed_low_water_mark = MAX(PV_HASHED_LOW_WATER_MARK_DEFAULT, ((uint32_t)(sane_size >> 30)) * 2000);
301 pv_hashed_low_water_mark = MIN(pv_hashed_low_water_mark, 16000);
302 /* Alterable via sysctl */
303 pv_hashed_kern_low_water_mark = MAX(PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT, ((uint32_t)(sane_size >> 30)) * 1000);
304 pv_hashed_kern_low_water_mark = MIN(pv_hashed_kern_low_water_mark, 16000);
305 pv_hashed_kern_alloc_chunk = PV_HASHED_KERN_ALLOC_CHUNK_INITIAL;
306 pv_hashed_alloc_chunk = PV_HASHED_ALLOC_CHUNK_INITIAL;
307
308 pv_cnt = 0;
309 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
310
311 for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK_INITIAL); i++) {
312 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
313
314 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
315 pvh_eh = pvh_e;
316
317 if (pvh_et == PV_HASHED_ENTRY_NULL)
318 pvh_et = pvh_e;
319 pv_cnt++;
320 }
321 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
322
323 pv_cnt = 0;
324 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
325 for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK_INITIAL; i++) {
326 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
327
328 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
329 pvh_eh = pvh_e;
330
331 if (pvh_et == PV_HASHED_ENTRY_NULL)
332 pvh_et = pvh_e;
333 pv_cnt++;
334 }
335 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
336 }
337
338 void mapping_replenish(void);
339
340 void mapping_adjust(void) {
341 kern_return_t mres;
342
343 pmap_pagetable_corruption_log_setup();
344
345 mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
346 if (mres != KERN_SUCCESS) {
347 panic("pmap: mapping_replenish_thread creation failed");
348 }
349 thread_deallocate(mapping_replenish_thread);
350 }
351
352 unsigned pmap_mapping_thread_wakeups;
353 unsigned pmap_kernel_reserve_replenish_stat;
354 unsigned pmap_user_reserve_replenish_stat;
355 unsigned pmap_kern_reserve_alloc_stat;
356
357 __attribute__((noreturn))
358 void
359 mapping_replenish(void)
360 {
361 pv_hashed_entry_t pvh_e;
362 pv_hashed_entry_t pvh_eh;
363 pv_hashed_entry_t pvh_et;
364 int pv_cnt;
365 unsigned i;
366
367 /* We qualify for VM privileges...*/
368 current_thread()->options |= TH_OPT_VMPRIV;
369
370 for (;;) {
371
372 while (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
373 pv_cnt = 0;
374 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
375
376 for (i = 0; i < pv_hashed_kern_alloc_chunk; i++) {
377 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
378 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
379 pvh_eh = pvh_e;
380
381 if (pvh_et == PV_HASHED_ENTRY_NULL)
382 pvh_et = pvh_e;
383 pv_cnt++;
384 }
385 pmap_kernel_reserve_replenish_stat += pv_cnt;
386 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
387 }
388
389 pv_cnt = 0;
390 pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
391
392 if (pv_hashed_free_count < pv_hashed_low_water_mark) {
393 for (i = 0; i < pv_hashed_alloc_chunk; i++) {
394 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
395
396 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
397 pvh_eh = pvh_e;
398
399 if (pvh_et == PV_HASHED_ENTRY_NULL)
400 pvh_et = pvh_e;
401 pv_cnt++;
402 }
403 pmap_user_reserve_replenish_stat += pv_cnt;
404 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
405 }
406 /* Wake threads throttled while the kernel reserve was being replenished.
407 */
408 if (pmap_pv_throttled_waiters) {
409 pmap_pv_throttled_waiters = 0;
410 thread_wakeup(&pmap_user_pv_throttle_event);
411 }
412 /* Check if the kernel pool has been depleted since the
413 * first pass, to reduce refill latency.
414 */
415 if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark)
416 continue;
417 /* Block sans continuation to avoid yielding kernel stack */
418 assert_wait(&mapping_replenish_event, THREAD_UNINT);
419 mappingrecurse = 0;
420 thread_block(THREAD_CONTINUE_NULL);
421 pmap_mapping_thread_wakeups++;
422 }
423 }
424
425 /*
426 * Set specified attribute bits.
427 */
428
429 void
430 phys_attribute_set(
431 ppnum_t pn,
432 int bits)
433 {
434 int pai;
435
436 pmap_intr_assert();
437 assert(pn != vm_page_fictitious_addr);
438 if (pn == vm_page_guard_addr)
439 return;
440
441 pai = ppn_to_pai(pn);
442
443 if (!IS_MANAGED_PAGE(pai)) {
444 /* Not a managed page. */
445 return;
446 }
447
448 LOCK_PVH(pai);
449 pmap_phys_attributes[pai] |= bits;
450 UNLOCK_PVH(pai);
451 }
452
453 /*
454 * Set the modify bit on the specified physical page.
455 */
456
457 void
458 pmap_set_modify(ppnum_t pn)
459 {
460 phys_attribute_set(pn, PHYS_MODIFIED);
461 }
462
463 /*
464 * Clear the modify bits on the specified physical page.
465 */
466
467 void
468 pmap_clear_modify(ppnum_t pn)
469 {
470 phys_attribute_clear(pn, PHYS_MODIFIED, 0, NULL);
471 }
472
473 /*
474 * pmap_is_modified:
475 *
476 * Return whether or not the specified physical page is modified
477 * by any physical maps.
478 */
479
480 boolean_t
481 pmap_is_modified(ppnum_t pn)
482 {
483 if (phys_attribute_test(pn, PHYS_MODIFIED))
484 return TRUE;
485 return FALSE;
486 }
487
488
489 /*
490 * pmap_clear_reference:
491 *
492 * Clear the reference bit on the specified physical page.
493 */
494
495 void
496 pmap_clear_reference(ppnum_t pn)
497 {
498 phys_attribute_clear(pn, PHYS_REFERENCED, 0, NULL);
499 }
500
501 void
502 pmap_set_reference(ppnum_t pn)
503 {
504 phys_attribute_set(pn, PHYS_REFERENCED);
505 }
506
507 /*
508 * pmap_is_referenced:
509 *
510 * Return whether or not the specified physical page is referenced
511 * by any physical maps.
512 */
513
514 boolean_t
515 pmap_is_referenced(ppnum_t pn)
516 {
517 if (phys_attribute_test(pn, PHYS_REFERENCED))
518 return TRUE;
519 return FALSE;
520 }
521
522
523 /*
524 * pmap_get_refmod(phys)
525 * returns the referenced and modified bits of the specified
526 * physical page.
527 */
528 unsigned int
529 pmap_get_refmod(ppnum_t pn)
530 {
531 int refmod;
532 unsigned int retval = 0;
533
534 refmod = phys_attribute_test(pn, PHYS_MODIFIED | PHYS_REFERENCED);
535
536 if (refmod & PHYS_MODIFIED)
537 retval |= VM_MEM_MODIFIED;
538 if (refmod & PHYS_REFERENCED)
539 retval |= VM_MEM_REFERENCED;
540
541 return (retval);
542 }
543
544
545 void
546 pmap_clear_refmod_options(ppnum_t pn, unsigned int mask, unsigned int options, void *arg)
547 {
548 unsigned int x86Mask;
549
550 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
551 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
552
553 phys_attribute_clear(pn, x86Mask, options, arg);
554 }
555
556 /*
557 * pmap_clear_refmod(phys, mask)
558 * clears the referenced and modified bits as specified by the mask
559 * of the specified physical page.
560 */
561 void
562 pmap_clear_refmod(ppnum_t pn, unsigned int mask)
563 {
564 unsigned int x86Mask;
565
566 x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0)
567 | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
568
569 phys_attribute_clear(pn, x86Mask, 0, NULL);
570 }
571
572 unsigned int
573 pmap_disconnect(ppnum_t pa)
574 {
575 return (pmap_disconnect_options(pa, 0, NULL));
576 }
577
578 /*
579 * Routine:
580 * pmap_disconnect_options
581 *
582 * Function:
583 * Disconnect all mappings for this page and return reference and change status
584 * in generic format.
585 *
586 */
587 unsigned int
588 pmap_disconnect_options(ppnum_t pa, unsigned int options, void *arg)
589 {
590 unsigned refmod, vmrefmod = 0;
591
592 pmap_page_protect_options(pa, 0, options, arg); /* disconnect the page */
593
594 pmap_assert(pa != vm_page_fictitious_addr);
595 if ((pa == vm_page_guard_addr) || !IS_MANAGED_PAGE(pa) || (options & PMAP_OPTIONS_NOREFMOD))
596 return 0;
597 refmod = pmap_phys_attributes[pa] & (PHYS_MODIFIED | PHYS_REFERENCED);
598
599 if (refmod & PHYS_MODIFIED)
600 vmrefmod |= VM_MEM_MODIFIED;
601 if (refmod & PHYS_REFERENCED)
602 vmrefmod |= VM_MEM_REFERENCED;
603
604 return vmrefmod;
605 }