git.saurik.com Git - apple/xnu.git/blob

1 /*

3 *

4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@

5 *

6 * This file contains Original Code and/or Modifications of Original Code

7 * as defined in and that are subject to the Apple Public Source License

8 * Version 2.0 (the 'License'). You may not use this file except in

9 * compliance with the License. The rights granted to you under the License

10 * may not be used to create, or enable the creation or redistribution of,

11 * unlawful or unlicensed copies of an Apple operating system, or to

12 * circumvent, violate, or enable the circumvention or violation of, any

13 * terms of an Apple operating system software license agreement.

14 *

15 * Please obtain a copy of the License at

16 * http://www.opensource.apple.com/apsl/ and read it before using this file.

17 *

18 * The Original Code and all software distributed under the License are

19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER

20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,

21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,

22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.

23 * Please see the License for the specific language governing rights and

24 * limitations under the License.

25 *

26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@

27 */

28 /*

29 * @OSF_COPYRIGHT@

30 */

31 /*

32 * Mach Operating System

35 *

36 * Permission to use, copy, modify and distribute this software and its

37 * documentation is hereby granted, provided that both the copyright

38 * notice and this permission notice appear in all copies of the

39 * software, derivative works or modified versions, and any portions

40 * thereof, and that both notices appear in supporting documentation.

41 *

42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"

43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR

44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.

45 *

46 * Carnegie Mellon requests users of this software to return to

47 *

48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU

49 * School of Computer Science

50 * Carnegie Mellon University

51 * Pittsburgh PA 15213-3890

52 *

53 * any improvements or extensions that they make and grant Carnegie Mellon

54 * the rights to redistribute these changes.

55 */

56 /*

57 */

58 /*

59 * File: vm_fault.c

60 * Author: Avadis Tevanian, Jr., Michael Wayne Young

61 *

62 * Page fault handling module.

63 */

65 #include <mach_cluster_stats.h>

66 #include <mach_pagemap.h>

67 #include <libkern/OSAtomic.h>

69 #include <mach/mach_types.h>

70 #include <mach/kern_return.h>

71 #include <mach/message.h> /* for error codes */

72 #include <mach/vm_param.h>

73 #include <mach/vm_behavior.h>

74 #include <mach/memory_object.h>

75 /* For memory_object_data_{request,unlock} */

76 #include <mach/sdt.h>

78 #include <kern/kern_types.h>

79 #include <kern/host_statistics.h>

80 #include <kern/counter.h>

81 #include <kern/task.h>

82 #include <kern/thread.h>

83 #include <kern/sched_prim.h>

84 #include <kern/host.h>

85 #include <kern/mach_param.h>

86 #include <kern/macro_help.h>

87 #include <kern/zalloc.h>

88 #include <kern/misc_protos.h>

89 #include <kern/policy_internal.h>

91 #include <vm/vm_compressor.h>

92 #include <vm/vm_compressor_pager.h>

93 #include <vm/vm_fault.h>

94 #include <vm/vm_map.h>

95 #include <vm/vm_object.h>

96 #include <vm/vm_page.h>

97 #include <vm/vm_kern.h>

98 #include <vm/pmap.h>

99 #include <vm/vm_pageout.h>

100 #include <vm/vm_protos.h>

101 #include <vm/vm_external.h>

102 #include <vm/memory_object.h>

103 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */

104 #include <vm/vm_shared_region.h>

105

106 #include <sys/codesign.h>

107 #include <sys/reason.h>

108 #include <sys/signalvar.h>

109

110 #include <san/kasan.h>

111

112 #define VM_FAULT_CLASSIFY 0

113

114 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */

115

116 int vm_protect_privileged_from_untrusted = 1;

117

118 unsigned int vm_object_pagein_throttle = 16;

119

120 /*

121 * We apply a hard throttle to the demand zero rate of tasks that we believe are running out of control which

122 * kicks in when swap space runs out. 64-bit programs have massive address spaces and can leak enormous amounts

123 * of memory if they're buggy and can run the system completely out of swap space. If this happens, we

124 * impose a hard throttle on them to prevent them from taking the last bit of memory left. This helps

125 * keep the UI active so that the user has a chance to kill the offending task before the system

126 * completely hangs.

127 *

128 * The hard throttle is only applied when the system is nearly completely out of swap space and is only applied

129 * to tasks that appear to be bloated. When swap runs out, any task using more than vm_hard_throttle_threshold

130 * will be throttled. The throttling is done by giving the thread that's trying to demand zero a page a

131 * delay of HARD_THROTTLE_DELAY microseconds before being allowed to try the page fault again.

132 */

133

 extern void throttle_lowpri_io(int);

135

 extern struct vnode *vnode_pager_lookup_vnode(memory_object_t);

137

138 uint64_t vm_hard_throttle_threshold;

139

140 #if DEBUG || DEVELOPMENT

141 static bool vmtc_panic_instead = false;

142 #endif /* DEBUG || DEVELOPMENT */

143

144 OS_ALWAYS_INLINE

145 boolean_t

146 NEED_TO_HARD_THROTTLE_THIS_TASK(void)

147 {

         return vm_wants_task_throttled(current_task()) ||

149 ((vm_page_free_count < vm_page_throttle_limit ||

150 HARD_THROTTLE_LIMIT_REACHED()) &&

                proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO) >= THROTTLE_LEVEL_THROTTLED);

152 }

153

154 #define HARD_THROTTLE_DELAY 10000 /* 10000 us == 10 ms */

155 #define SOFT_THROTTLE_DELAY 200 /* 200 us == .2 ms */

156

157 #define VM_PAGE_CREATION_THROTTLE_PERIOD_SECS 6

158 #define VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC 20000

159

160

161 #define VM_STAT_DECOMPRESSIONS() \

162 MACRO_BEGIN \

163 counter_inc(&vm_statistics_decompressions); \

164 current_thread()->decompressions++; \

165 MACRO_END

166

167 boolean_t current_thread_aborted(void);

168

169 /* Forward declarations of internal routines. */

170 static kern_return_t vm_fault_wire_fast(

171 vm_map_t map,

172 vm_map_offset_t va,

173 vm_prot_t prot,

174 vm_tag_t wire_tag,

175 vm_map_entry_t entry,

176 pmap_t pmap,

177 vm_map_offset_t pmap_addr,

178 ppnum_t *physpage_p);

179

180 static kern_return_t vm_fault_internal(

181 vm_map_t map,

182 vm_map_offset_t vaddr,

183 vm_prot_t caller_prot,

184 boolean_t change_wiring,

185 vm_tag_t wire_tag,

186 int interruptible,

187 pmap_t pmap,

188 vm_map_offset_t pmap_addr,

189 ppnum_t *physpage_p);

190

191 static void vm_fault_copy_cleanup(

192 vm_page_t page,

193 vm_page_t top_page);

194

195 static void vm_fault_copy_dst_cleanup(

196 vm_page_t page);

197

198 #if VM_FAULT_CLASSIFY

 extern void vm_fault_classify(vm_object_t       object,

200 vm_object_offset_t offset,

201 vm_prot_t fault_type);

202

 extern void vm_fault_classify_init(void);

204 #endif

205

206 unsigned long vm_pmap_enter_blocked = 0;

207 unsigned long vm_pmap_enter_retried = 0;

208

209 unsigned long vm_cs_validates = 0;

210 unsigned long vm_cs_revalidates = 0;

211 unsigned long vm_cs_query_modified = 0;

212 unsigned long vm_cs_validated_dirtied = 0;

213 unsigned long vm_cs_bitmap_validated = 0;

214

 void vm_pre_fault(vm_map_offset_t, vm_prot_t);

216

217 extern char *kdp_compressor_decompressed_page;

218 extern addr64_t kdp_compressor_decompressed_page_paddr;

219 extern ppnum_t kdp_compressor_decompressed_page_ppnum;

220

221 struct vmrtfr {

222 int vmrtfr_maxi;

223 int vmrtfr_curi;

224 int64_t vmrtf_total;

225 vm_rtfault_record_t *vm_rtf_records;

226 } vmrtfrs;

227 #define VMRTF_DEFAULT_BUFSIZE (4096)

228 #define VMRTF_NUM_RECORDS_DEFAULT (VMRTF_DEFAULT_BUFSIZE / sizeof(vm_rtfault_record_t))

 TUNABLE(int, vmrtf_num_records, "vm_rtfault_records", VMRTF_NUM_RECORDS_DEFAULT);

230

 static void vm_rtfrecord_lock(void);

 static void vm_rtfrecord_unlock(void);

 static void vm_record_rtfault(thread_t, uint64_t, vm_map_offset_t, int);

234

235 extern lck_grp_t vm_page_lck_grp_bucket;

236 extern lck_attr_t vm_page_lck_attr;

 LCK_SPIN_DECLARE_ATTR(vm_rtfr_slock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);

238

239 /*

240 * Routine: vm_fault_init

241 * Purpose:

242 * Initialize our private data structures.

243 */

244 __startup_func

245 void

246 vm_fault_init(void)

247 {

248 int i, vm_compressor_temp;

249 boolean_t need_default_val = TRUE;

250 /*

251 * Choose a value for the hard throttle threshold based on the amount of ram. The threshold is

252 * computed as a percentage of available memory, and the percentage used is scaled inversely with

253 * the amount of memory. The percentage runs between 10% and 35%. We use 35% for small memory systems

254 * and reduce the value down to 10% for very large memory configurations. This helps give us a

255 * definition of a memory hog that makes more sense relative to the amount of ram in the machine.

256 * The formula here simply uses the number of gigabytes of ram to adjust the percentage.

257 */

258

         vm_hard_throttle_threshold = sane_size * (35 - MIN((int)(sane_size / (1024 * 1024 * 1024)), 25)) / 100;

260

261 /*

262 * Configure compressed pager behavior. A boot arg takes precedence over a device tree entry.

263 */

264

         if (PE_parse_boot_argn("vm_compressor", &vm_compressor_temp, sizeof(vm_compressor_temp))) {

                 for (i = 0; i < VM_PAGER_MAX_MODES; i++) {

                         if (((vm_compressor_temp & (1 << i)) == vm_compressor_temp)) {

268 need_default_val = FALSE;

269 vm_compressor_mode = vm_compressor_temp;

270 break;

271 }

272 }

273 if (need_default_val) {

                         printf("Ignoring \"vm_compressor\" boot arg %d\n", vm_compressor_temp);

275 }

276 }

277 if (need_default_val) {

278 /* If no boot arg or incorrect boot arg, try device tree. */

                 PE_get_default("kern.vm_compressor", &vm_compressor_mode, sizeof(vm_compressor_mode));

280 }

         printf("\"vm_compressor_mode\" is %d\n", vm_compressor_mode);

282

283 PE_parse_boot_argn("vm_protect_privileged_from_untrusted",

284 &vm_protect_privileged_from_untrusted,

285 sizeof(vm_protect_privileged_from_untrusted));

286

287 #if DEBUG || DEVELOPMENT

         (void)PE_parse_boot_argn("text_corruption_panic", &vmtc_panic_instead, sizeof(vmtc_panic_instead));

289 #endif /* DEBUG || DEVELOPMENT */

290 }

291

292 __startup_func

293 static void

294 vm_rtfault_record_init(void)

295 {

296 size_t size;

297

         vmrtf_num_records = MAX(vmrtf_num_records, 1);

         size = vmrtf_num_records * sizeof(vm_rtfault_record_t);

         vmrtfrs.vm_rtf_records = zalloc_permanent(size,

301 ZALIGN(vm_rtfault_record_t));

         vmrtfrs.vmrtfr_maxi = vmrtf_num_records - 1;

303 }

 STARTUP(ZALLOC, STARTUP_RANK_MIDDLE, vm_rtfault_record_init);

305

306 /*

307 * Routine: vm_fault_cleanup

308 * Purpose:

309 * Clean up the result of vm_fault_page.

310 * Results:

311 * The paging reference for "object" is released.

312 * "object" is unlocked.

313 * If "top_page" is not null, "top_page" is

314 * freed and the paging reference for the object

315 * containing it is released.

316 *

317 * In/out conditions:

318 * "object" must be locked.

319 */

320 void

321 vm_fault_cleanup(

322 vm_object_t object,

323 vm_page_t top_page)

324 {

325 vm_object_paging_end(object);

326 vm_object_unlock(object);

327

328 if (top_page != VM_PAGE_NULL) {

329 object = VM_PAGE_OBJECT(top_page);

330

331 vm_object_lock(object);

332 VM_PAGE_FREE(top_page);

333 vm_object_paging_end(object);

334 vm_object_unlock(object);

335 }

336 }

337

338 #define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)

339

340

341 boolean_t vm_page_deactivate_behind = TRUE;

342 /*

343 * default sizes given VM_BEHAVIOR_DEFAULT reference behavior

344 */

345 #define VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW 128

346 #define VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER 16 /* don't make this too big... */

347 /* we use it to size an array on the stack */

348

349 int vm_default_behind = VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW;

350

351 #define MAX_SEQUENTIAL_RUN (1024 * 1024 * 1024)

352

353 /*

354 * vm_page_is_sequential

355 *

356 * Determine if sequential access is in progress

357 * in accordance with the behavior specified.

358 * Update state to indicate current access pattern.

359 *

360 * object must have at least the shared lock held

361 */

362 static

363 void

364 vm_fault_is_sequential(

365 vm_object_t object,

366 vm_object_offset_t offset,

367 vm_behavior_t behavior)

368 {

369 vm_object_offset_t last_alloc;

370 int sequential;

371 int orig_sequential;

372

373 last_alloc = object->last_alloc;

374 sequential = object->sequential;

375 orig_sequential = sequential;

376

377 offset = vm_object_trunc_page(offset);

         if (offset == last_alloc && behavior != VM_BEHAVIOR_RANDOM) {

379 /* re-faulting in the same page: no change in behavior */

380 return;

381 }

382

383 switch (behavior) {

384 case VM_BEHAVIOR_RANDOM:

385 /*

386 * reset indicator of sequential behavior

387 */

388 sequential = 0;

389 break;

390

391 case VM_BEHAVIOR_SEQUENTIAL:

                 if (offset && last_alloc == offset - PAGE_SIZE_64) {

393 /*

394 * advance indicator of sequential behavior

395 */

396 if (sequential < MAX_SEQUENTIAL_RUN) {

397 sequential += PAGE_SIZE;

398 }

399 } else {

400 /*

401 * reset indicator of sequential behavior

402 */

403 sequential = 0;

404 }

405 break;

406

407 case VM_BEHAVIOR_RSEQNTL:

                 if (last_alloc && last_alloc == offset + PAGE_SIZE_64) {

409 /*

410 * advance indicator of sequential behavior

411 */

412 if (sequential > -MAX_SEQUENTIAL_RUN) {

413 sequential -= PAGE_SIZE;

414 }

415 } else {

416 /*

417 * reset indicator of sequential behavior

418 */

419 sequential = 0;

420 }

421 break;

422

423 case VM_BEHAVIOR_DEFAULT:

424 default:

                 if (offset && last_alloc == (offset - PAGE_SIZE_64)) {

426 /*

427 * advance indicator of sequential behavior

428 */

                         if (sequential < 0) {

430 sequential = 0;

431 }

432 if (sequential < MAX_SEQUENTIAL_RUN) {

433 sequential += PAGE_SIZE;

434 }

                 } else if (last_alloc && last_alloc == (offset + PAGE_SIZE_64)) {

436 /*

437 * advance indicator of sequential behavior

438 */

                         if (sequential > 0) {

440 sequential = 0;

441 }

442 if (sequential > -MAX_SEQUENTIAL_RUN) {

443 sequential -= PAGE_SIZE;

444 }

445 } else {

446 /*

447 * reset indicator of sequential behavior

448 */

449 sequential = 0;

450 }

451 break;

452 }

453 if (sequential != orig_sequential) {

                 if (!OSCompareAndSwap(orig_sequential, sequential, (UInt32 *)&object->sequential)) {

455 /*

456 * if someone else has already updated object->sequential

457 * don't bother trying to update it or object->last_alloc

458 */

459 return;

460 }

461 }

462 /*

463 * I'd like to do this with a OSCompareAndSwap64, but that

464 * doesn't exist for PPC... however, it shouldn't matter

465 * that much... last_alloc is maintained so that we can determine

466 * if a sequential access pattern is taking place... if only

467 * one thread is banging on this object, no problem with the unprotected

468 * update... if 2 or more threads are banging away, we run the risk of

469 * someone seeing a mangled update... however, in the face of multiple

470 * accesses, no sequential access pattern can develop anyway, so we

471 * haven't lost any real info.

472 */

473 object->last_alloc = offset;

474 }

475

476

477 int vm_page_deactivate_behind_count = 0;

478

479 /*

480 * vm_page_deactivate_behind

481 *

482 * Determine if sequential access is in progress

483 * in accordance with the behavior specified. If

484 * so, compute a potential page to deactivate and

485 * deactivate it.

486 *

487 * object must be locked.

488 *

489 * return TRUE if we actually deactivate a page

490 */

491 static

492 boolean_t

493 vm_fault_deactivate_behind(

494 vm_object_t object,

495 vm_object_offset_t offset,

496 vm_behavior_t behavior)

497 {

498 int n;

499 int pages_in_run = 0;

500 int max_pages_in_run = 0;

501 int sequential_run;

502 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;

503 vm_object_offset_t run_offset = 0;

504 vm_object_offset_t pg_offset = 0;

505 vm_page_t m;

506 vm_page_t page_run[VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER];

507

508 pages_in_run = 0;

509 #if TRACEFAULTPAGE

         dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */

511 #endif

         if (object == kernel_object || vm_page_deactivate_behind == FALSE || (vm_object_trunc_page(offset) != offset)) {

513 /*

514 * Do not deactivate pages from the kernel object: they

515 * are not intended to become pageable.

516 * or we've disabled the deactivate behind mechanism

517 * or we are dealing with an offset that is not aligned to

518 * the system's PAGE_SIZE because in that case we will

519 * handle the deactivation on the aligned offset and, thus,

520 * the full PAGE_SIZE page once. This helps us avoid the redundant

521 * deactivates and the extra faults.

522 */

523 return FALSE;

524 }

         if ((sequential_run = object->sequential)) {

                 if (sequential_run < 0) {

527 sequential_behavior = VM_BEHAVIOR_RSEQNTL;

528 sequential_run = 0 - sequential_run;

529 } else {

530 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;

531 }

532 }

533 switch (behavior) {

534 case VM_BEHAVIOR_RANDOM:

535 break;

536 case VM_BEHAVIOR_SEQUENTIAL:

                 if (sequential_run >= (int)PAGE_SIZE) {

538 run_offset = 0 - PAGE_SIZE_64;

539 max_pages_in_run = 1;

540 }

541 break;

542 case VM_BEHAVIOR_RSEQNTL:

                 if (sequential_run >= (int)PAGE_SIZE) {

544 run_offset = PAGE_SIZE_64;

545 max_pages_in_run = 1;

546 }

547 break;

548 case VM_BEHAVIOR_DEFAULT:

549 default:

550 { vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;

551

552 /*

553 * determine if the run of sequential accesss has been

554 * long enough on an object with default access behavior

555 * to consider it for deactivation

556 */

                 if ((uint64_t)sequential_run >= behind && (sequential_run % (VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER * PAGE_SIZE)) == 0) {

558 /*

559 * the comparisons between offset and behind are done

560 * in this kind of odd fashion in order to prevent wrap around

561 * at the end points

562 */

563 if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) {

564 if (offset >= behind) {

565 run_offset = 0 - behind;

566 pg_offset = PAGE_SIZE_64;

567 max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER;

568 }

569 } else {

570 if (offset < -behind) {

571 run_offset = behind;

572 pg_offset = 0 - PAGE_SIZE_64;

573 max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER;

574 }

575 }

576 }

577 break;}

578 }

         for (n = 0; n < max_pages_in_run; n++) {

                 m = vm_page_lookup(object, offset + run_offset + (n * pg_offset));

581

                 if (m && !m->vmp_laundry && !m->vmp_busy && !m->vmp_no_cache && (m->vmp_q_state != VM_PAGE_ON_THROTTLED_Q) && !m->vmp_fictitious && !m->vmp_absent) {

583 page_run[pages_in_run++] = m;

584

585 /*

586 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...

587 *

588 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being

589 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the

590 * new reference happens. If no futher references happen on the page after that remote TLB flushes

591 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue

592 * by pageout_scan, which is just fine since the last reference would have happened quite far

593 * in the past (TLB caches don't hang around for very long), and of course could just as easily

594 * have happened before we did the deactivate_behind.

595 */

                         pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);

597 }

598 }

599 if (pages_in_run) {

600 vm_page_lockspin_queues();

601

                 for (n = 0; n < pages_in_run; n++) {

603 m = page_run[n];

604

605 vm_page_deactivate_internal(m, FALSE);

606

607 vm_page_deactivate_behind_count++;

608 #if TRACEFAULTPAGE

                         dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */

610 #endif

611 }

612 vm_page_unlock_queues();

613

614 return TRUE;

615 }

616 return FALSE;

617 }

618

619

620 #if (DEVELOPMENT || DEBUG)

621 uint32_t vm_page_creation_throttled_hard = 0;

622 uint32_t vm_page_creation_throttled_soft = 0;

623 uint64_t vm_page_creation_throttle_avoided = 0;

624 #endif /* DEVELOPMENT || DEBUG */

625

626 static int

627 vm_page_throttled(boolean_t page_kept)

628 {

629 clock_sec_t elapsed_sec;

630 clock_sec_t tv_sec;

631 clock_usec_t tv_usec;

632

633 thread_t thread = current_thread();

634

         if (thread->options & TH_OPT_VMPRIV) {

636 return 0;

637 }

638

639 if (thread->t_page_creation_throttled) {

640 thread->t_page_creation_throttled = 0;

641

642 if (page_kept == FALSE) {

643 goto no_throttle;

644 }

645 }

646 if (NEED_TO_HARD_THROTTLE_THIS_TASK()) {

647 #if (DEVELOPMENT || DEBUG)

648 thread->t_page_creation_throttled_hard++;

                 OSAddAtomic(1, &vm_page_creation_throttled_hard);

650 #endif /* DEVELOPMENT || DEBUG */

651 return HARD_THROTTLE_DELAY;

652 }

653

         if ((vm_page_free_count < vm_page_throttle_limit || (VM_CONFIG_COMPRESSOR_IS_PRESENT && SWAPPER_NEEDS_TO_UNTHROTTLE())) &&

655 thread->t_page_creation_count > (VM_PAGE_CREATION_THROTTLE_PERIOD_SECS * VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC)) {

                 if (vm_page_free_wanted == 0 && vm_page_free_wanted_privileged == 0) {

657 #if (DEVELOPMENT || DEBUG)

                         OSAddAtomic64(1, &vm_page_creation_throttle_avoided);

659 #endif

660 goto no_throttle;

661 }

662 clock_get_system_microtime(&tv_sec, &tv_usec);

663

664 elapsed_sec = tv_sec - thread->t_page_creation_time;

665

666 if (elapsed_sec <= VM_PAGE_CREATION_THROTTLE_PERIOD_SECS ||

                     (thread->t_page_creation_count / elapsed_sec) >= VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC) {

                         if (elapsed_sec >= (3 * VM_PAGE_CREATION_THROTTLE_PERIOD_SECS)) {

669 /*

670 * we'll reset our stats to give a well behaved app

671 * that was unlucky enough to accumulate a bunch of pages

672 * over a long period of time a chance to get out of

673 * the throttled state... we reset the counter and timestamp

674 * so that if it stays under the rate limit for the next second

675 * it will be back in our good graces... if it exceeds it, it

676 * will remain in the throttled state

677 */

678 thread->t_page_creation_time = tv_sec;

                                 thread->t_page_creation_count = VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC * (VM_PAGE_CREATION_THROTTLE_PERIOD_SECS - 1);

680 }

                         VM_PAGEOUT_DEBUG(vm_page_throttle_count, 1);

682

683 thread->t_page_creation_throttled = 1;

684

                         if (VM_CONFIG_COMPRESSOR_IS_PRESENT && HARD_THROTTLE_LIMIT_REACHED()) {

686 #if (DEVELOPMENT || DEBUG)

687 thread->t_page_creation_throttled_hard++;

                                 OSAddAtomic(1, &vm_page_creation_throttled_hard);

689 #endif /* DEVELOPMENT || DEBUG */

690 return HARD_THROTTLE_DELAY;

691 } else {

692 #if (DEVELOPMENT || DEBUG)

693 thread->t_page_creation_throttled_soft++;

                                 OSAddAtomic(1, &vm_page_creation_throttled_soft);

695 #endif /* DEVELOPMENT || DEBUG */

696 return SOFT_THROTTLE_DELAY;

697 }

698 }

699 thread->t_page_creation_time = tv_sec;

700 thread->t_page_creation_count = 0;

701 }

702 no_throttle:

703 thread->t_page_creation_count++;

704

705 return 0;

706 }

707

708

709 /*

710 * check for various conditions that would

711 * prevent us from creating a ZF page...

712 * cleanup is based on being called from vm_fault_page

713 *

714 * object must be locked

715 * object == m->vmp_object

716 */

717 static vm_fault_return_t

 vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, wait_interrupt_t interruptible_state, boolean_t page_throttle)

719 {

720 int throttle_delay;

721

722 if (object->shadow_severed ||

723 VM_OBJECT_PURGEABLE_FAULT_ERROR(object)) {

724 /*

725 * Either:

726 * 1. the shadow chain was severed,

727 * 2. the purgeable object is volatile or empty and is marked

728 * to fault on access while volatile.

729 * Just have to return an error at this point

730 */

731 if (m != VM_PAGE_NULL) {

732 VM_PAGE_FREE(m);

733 }

734 vm_fault_cleanup(object, first_m);

735

736 thread_interrupt_level(interruptible_state);

737

738 return VM_FAULT_MEMORY_ERROR;

739 }

740 if (page_throttle == TRUE) {

                 if ((throttle_delay = vm_page_throttled(FALSE))) {

742 /*

743 * we're throttling zero-fills...

744 * treat this as if we couldn't grab a page

745 */

746 if (m != VM_PAGE_NULL) {

747 VM_PAGE_FREE(m);

748 }

749 vm_fault_cleanup(object, first_m);

750

                         VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);

752

753 delay(throttle_delay);

754

755 if (current_thread_aborted()) {

756 thread_interrupt_level(interruptible_state);

757 return VM_FAULT_INTERRUPTED;

758 }

759 thread_interrupt_level(interruptible_state);

760

761 return VM_FAULT_MEMORY_SHORTAGE;

762 }

763 }

764 return VM_FAULT_SUCCESS;

765 }

766

767 /*

768 * Clear the code signing bits on the given page_t

769 */

770 static void

771 vm_fault_cs_clear(vm_page_t m)

772 {

773 m->vmp_cs_validated = VMP_CS_ALL_FALSE;

774 m->vmp_cs_tainted = VMP_CS_ALL_FALSE;

775 m->vmp_cs_nx = VMP_CS_ALL_FALSE;

776 }

777

778 /*

779 * Enqueues the given page on the throttled queue.

780 * The caller must hold the vm_page_queue_lock and it will be held on return.

781 */

782 static void

783 vm_fault_enqueue_throttled_locked(vm_page_t m)

784 {

785 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);

         assert(!VM_PAGE_WIRED(m));

787

788 /*

789 * can't be on the pageout queue since we don't

790 * have a pager to try and clean to

791 */

792 vm_page_queues_remove(m, TRUE);

793 vm_page_check_pageable_safe(m);

         vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);

795 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;

796 vm_page_throttled_count++;

797 }

798

799 /*

800 * do the work to zero fill a page and

801 * inject it into the correct paging queue

802 *

803 * m->vmp_object must be locked

804 * page queue lock must NOT be held

805 */

806 static int

807 vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)

808 {

809 int my_fault = DBG_ZERO_FILL_FAULT;

810 vm_object_t object;

811

812 object = VM_PAGE_OBJECT(m);

813

814 /*

815 * This is is a zero-fill page fault...

816 *

817 * Checking the page lock is a waste of

818 * time; this page was absent, so

819 * it can't be page locked by a pager.

820 *

821 * we also consider it undefined

822 * with respect to instruction

823 * execution. i.e. it is the responsibility

824 * of higher layers to call for an instruction

825 * sync after changing the contents and before

826 * sending a program into this area. We

827 * choose this approach for performance

828 */

829 vm_fault_cs_clear(m);

830 m->vmp_pmapped = TRUE;

831

832 if (no_zero_fill == TRUE) {

833 my_fault = DBG_NZF_PAGE_FAULT;

834

                 if (m->vmp_absent && m->vmp_busy) {

836 return my_fault;

837 }

838 } else {

839 vm_page_zero_fill(m);

840

841 counter_inc(&vm_statistics_zero_fill_count);

                 DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL);

843 }

844 assert(!m->vmp_laundry);

845 assert(object != kernel_object);

846 //assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);

847 if (!VM_DYNAMIC_PAGING_ENABLED() &&

848 (object->purgable == VM_PURGABLE_DENY ||

849 object->purgable == VM_PURGABLE_NONVOLATILE ||

850 object->purgable == VM_PURGABLE_VOLATILE)) {

851 vm_page_lockspin_queues();

852 if (!VM_DYNAMIC_PAGING_ENABLED()) {

853 vm_fault_enqueue_throttled_locked(m);

854 }

855 vm_page_unlock_queues();

856 }

857 return my_fault;

858 }

859

860

861 /*

862 * Routine: vm_fault_page

863 * Purpose:

864 * Find the resident page for the virtual memory

865 * specified by the given virtual memory object

866 * and offset.

867 * Additional arguments:

868 * The required permissions for the page is given

869 * in "fault_type". Desired permissions are included

870 * in "protection".

871 * fault_info is passed along to determine pagein cluster

872 * limits... it contains the expected reference pattern,

873 * cluster size if available, etc...

874 *

875 * If the desired page is known to be resident (for

876 * example, because it was previously wired down), asserting

877 * the "unwiring" parameter will speed the search.

878 *

879 * If the operation can be interrupted (by thread_abort

880 * or thread_terminate), then the "interruptible"

881 * parameter should be asserted.

882 *

883 * Results:

884 * The page containing the proper data is returned

885 * in "result_page".

886 *

887 * In/out conditions:

888 * The source object must be locked and referenced,

889 * and must donate one paging reference. The reference

890 * is not affected. The paging reference and lock are

891 * consumed.

892 *

893 * If the call succeeds, the object in which "result_page"

894 * resides is left locked and holding a paging reference.

895 * If this is not the original object, a busy page in the

896 * original object is returned in "top_page", to prevent other

897 * callers from pursuing this same data, along with a paging

898 * reference for the original object. The "top_page" should

899 * be destroyed when this guarantee is no longer required.

900 * The "result_page" is also left busy. It is not removed

901 * from the pageout queues.

902 * Special Case:

903 * A return value of VM_FAULT_SUCCESS_NO_PAGE means that the

904 * fault succeeded but there's no VM page (i.e. the VM object

905 * does not actually hold VM pages, but device memory or

906 * large pages). The object is still locked and we still hold a

907 * paging_in_progress reference.

908 */

909 unsigned int vm_fault_page_blocked_access = 0;

910 unsigned int vm_fault_page_forced_retry = 0;

911

912 vm_fault_return_t

913 vm_fault_page(

914 /* Arguments: */

915 vm_object_t first_object, /* Object to begin search */

916 vm_object_offset_t first_offset, /* Offset into object */

917 vm_prot_t fault_type, /* What access is requested */

918 boolean_t must_be_resident,/* Must page be resident? */

919 boolean_t caller_lookup, /* caller looked up page */

920 /* Modifies in place: */

921 vm_prot_t *protection, /* Protection for mapping */

922 vm_page_t *result_page, /* Page found, if successful */

923 /* Returns: */

924 vm_page_t *top_page, /* Page in top object, if

925 * not result_page. */

926 int *type_of_fault, /* if non-null, fill in with type of fault

927 * COW, zero-fill, etc... returned in trace point */

928 /* More arguments: */

929 kern_return_t *error_code, /* code if page is in error */

930 boolean_t no_zero_fill, /* don't zero fill absent pages */

931 boolean_t data_supply, /* treat as data_supply if

932 * it is a write fault and a full

933 * page is provided */

934 vm_object_fault_info_t fault_info)

935 {

936 vm_page_t m;

937 vm_object_t object;

938 vm_object_offset_t offset;

939 vm_page_t first_m;

940 vm_object_t next_object;

941 vm_object_t copy_object;

942 boolean_t look_for_page;

943 boolean_t force_fault_retry = FALSE;

944 vm_prot_t access_required = fault_type;

945 vm_prot_t wants_copy_flag;

946 kern_return_t wait_result;

947 wait_interrupt_t interruptible_state;

948 boolean_t data_already_requested = FALSE;

949 vm_behavior_t orig_behavior;

950 vm_size_t orig_cluster_size;

951 vm_fault_return_t error;

952 int my_fault;

953 uint32_t try_failed_count;

954 int interruptible; /* how may fault be interrupted? */

955 int external_state = VM_EXTERNAL_STATE_UNKNOWN;

956 memory_object_t pager;

957 vm_fault_return_t retval;

958 int grab_options;

959

960 /*

961 * MUST_ASK_PAGER() evaluates to TRUE if the page specified by object/offset is

962 * marked as paged out in the compressor pager or the pager doesn't exist.

963 * Note also that if the pager for an internal object

964 * has not been created, the pager is not invoked regardless of the value

965 * of MUST_ASK_PAGER().

966 *

967 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset

968 * is marked as paged out in the compressor pager.

969 * PAGED_OUT() is used to determine if a page has already been pushed

970 * into a copy object in order to avoid a redundant page out operation.

971 */

972 #define MUST_ASK_PAGER(o, f, s) \

973 ((s = VM_COMPRESSOR_PAGER_STATE_GET((o), (f))) != VM_EXTERNAL_STATE_ABSENT)

974

975 #define PAGED_OUT(o, f) \

976 (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)) == VM_EXTERNAL_STATE_EXISTS)

977

978 /*

979 * Recovery actions

980 */

981 #define RELEASE_PAGE(m) \

982 MACRO_BEGIN \

983 PAGE_WAKEUP_DONE(m); \

984 if ( !VM_PAGE_PAGEABLE(m)) { \

985 vm_page_lockspin_queues(); \

986 if ( !VM_PAGE_PAGEABLE(m)) { \

987 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) \

988 vm_page_deactivate(m); \

989 else \

990 vm_page_activate(m); \

991 } \

992 vm_page_unlock_queues(); \

993 } \

994 MACRO_END

995

996 #if TRACEFAULTPAGE

         dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */

998 #endif

999

1000 interruptible = fault_info->interruptible;

1001 interruptible_state = thread_interrupt_level(interruptible);

1002

1003 /*

1004 * INVARIANTS (through entire routine):

1005 *

1006 * 1) At all times, we must either have the object

1007 * lock or a busy page in some object to prevent

1008 * some other thread from trying to bring in

1009 * the same page.

1010 *

1011 * Note that we cannot hold any locks during the

1012 * pager access or when waiting for memory, so

1013 * we use a busy page then.

1014 *

1015 * 2) To prevent another thread from racing us down the

1016 * shadow chain and entering a new page in the top

1017 * object before we do, we must keep a busy page in

1018 * the top object while following the shadow chain.

1019 *

1020 * 3) We must increment paging_in_progress on any object

1021 * for which we have a busy page before dropping

1022 * the object lock

1023 *

1024 * 4) We leave busy pages on the pageout queues.

1025 * If the pageout daemon comes across a busy page,

1026 * it will remove the page from the pageout queues.

1027 */

1028

1029 object = first_object;

1030 offset = first_offset;

1031 first_m = VM_PAGE_NULL;

1032 access_required = fault_type;

1033

1034 /*

1035 * default type of fault

1036 */

1037 my_fault = DBG_CACHE_HIT_FAULT;

1038

1039 while (TRUE) {

1040 #if TRACEFAULTPAGE

                 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */

1042 #endif

1043

1044 grab_options = 0;

1045 #if CONFIG_SECLUDED_MEMORY

1046 if (object->can_grab_secluded) {

1047 grab_options |= VM_PAGE_GRAB_SECLUDED;

1048 }

1049 #endif /* CONFIG_SECLUDED_MEMORY */

1050

1051 if (!object->alive) {

1052 /*

1053 * object is no longer valid

1054 * clean up and return error

1055 */

1056 vm_fault_cleanup(object, first_m);

1057 thread_interrupt_level(interruptible_state);

1058

1059 return VM_FAULT_MEMORY_ERROR;

1060 }

1061

                 if (!object->pager_created && object->phys_contiguous) {

1063 /*

1064 * A physically-contiguous object without a pager:

1065 * must be a "large page" object. We do not deal

1066 * with VM pages for this object.

1067 */

1068 caller_lookup = FALSE;

1069 m = VM_PAGE_NULL;

1070 goto phys_contig_object;

1071 }

1072

1073 if (object->blocked_access) {

1074 /*

1075 * Access to this VM object has been blocked.

1076 * Replace our "paging_in_progress" reference with

1077 * a "activity_in_progress" reference and wait for

1078 * access to be unblocked.

1079 */

1080 caller_lookup = FALSE; /* no longer valid after sleep */

1081 vm_object_activity_begin(object);

1082 vm_object_paging_end(object);

1083 while (object->blocked_access) {

1084 vm_object_sleep(object,

1085 VM_OBJECT_EVENT_UNBLOCKED,

1086 THREAD_UNINT);

1087 }

1088 vm_fault_page_blocked_access++;

1089 vm_object_paging_begin(object);

1090 vm_object_activity_end(object);

1091 }

1092

1093 /*

1094 * See whether the page at 'offset' is resident

1095 */

1096 if (caller_lookup == TRUE) {

1097 /*

1098 * The caller has already looked up the page

1099 * and gave us the result in "result_page".

1100 * We can use this for the first lookup but

1101 * it loses its validity as soon as we unlock

1102 * the object.

1103 */

1104 m = *result_page;

1105 caller_lookup = FALSE; /* no longer valid after that */

1106 } else {

                         m = vm_page_lookup(object, vm_object_trunc_page(offset));

1108 }

1109 #if TRACEFAULTPAGE

                 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */

1111 #endif

1112 if (m != VM_PAGE_NULL) {

1113 if (m->vmp_busy) {

1114 /*

1115 * The page is being brought in,

1116 * wait for it and then retry.

1117 */

1118 #if TRACEFAULTPAGE

                                 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */

1120 #endif

                                 wait_result = PAGE_SLEEP(object, m, interruptible);

1122

1123 if (wait_result != THREAD_AWAKENED) {

1124 vm_fault_cleanup(object, first_m);

1125 thread_interrupt_level(interruptible_state);

1126

1127 if (wait_result == THREAD_RESTART) {

1128 return VM_FAULT_RETRY;

1129 } else {

1130 return VM_FAULT_INTERRUPTED;

1131 }

1132 }

1133 continue;

1134 }

1135 if (m->vmp_laundry) {

1136 m->vmp_free_when_done = FALSE;

1137

1138 if (!m->vmp_cleaning) {

1139 vm_pageout_steal_laundry(m, FALSE);

1140 }

1141 }

                         if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {

1143 /*

1144 * Guard page: off limits !

1145 */

1146 if (fault_type == VM_PROT_NONE) {

1147 /*

1148 * The fault is not requesting any

1149 * access to the guard page, so it must

1150 * be just to wire or unwire it.

1151 * Let's pretend it succeeded...

1152 */

1153 m->vmp_busy = TRUE;

1154 *result_page = m;

1155 assert(first_m == VM_PAGE_NULL);

1156 *top_page = first_m;

1157 if (type_of_fault) {

1158 *type_of_fault = DBG_GUARD_FAULT;

1159 }

1160 thread_interrupt_level(interruptible_state);

1161 return VM_FAULT_SUCCESS;

1162 } else {

1163 /*

1164 * The fault requests access to the

1165 * guard page: let's deny that !

1166 */

1167 vm_fault_cleanup(object, first_m);

1168 thread_interrupt_level(interruptible_state);

1169 return VM_FAULT_MEMORY_ERROR;

1170 }

1171 }

1172

1173 if (m->vmp_error) {

1174 /*

1175 * The page is in error, give up now.

1176 */

1177 #if TRACEFAULTPAGE

                                 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code);      /* (TEST/DEBUG) */

1179 #endif

1180 if (error_code) {

1181 *error_code = KERN_MEMORY_ERROR;

1182 }

1183 VM_PAGE_FREE(m);

1184

1185 vm_fault_cleanup(object, first_m);

1186 thread_interrupt_level(interruptible_state);

1187

1188 return VM_FAULT_MEMORY_ERROR;

1189 }

1190 if (m->vmp_restart) {

1191 /*

1192 * The pager wants us to restart

1193 * at the top of the chain,

1194 * typically because it has moved the

1195 * page to another pager, then do so.

1196 */

1197 #if TRACEFAULTPAGE

                                 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */

1199 #endif

1200 VM_PAGE_FREE(m);

1201

1202 vm_fault_cleanup(object, first_m);

1203 thread_interrupt_level(interruptible_state);

1204

1205 return VM_FAULT_RETRY;

1206 }

1207 if (m->vmp_absent) {

1208 /*

1209 * The page isn't busy, but is absent,

1210 * therefore it's deemed "unavailable".

1211 *

1212 * Remove the non-existent page (unless it's

1213 * in the top object) and move on down to the

1214 * next object (if there is one).

1215 */

1216 #if TRACEFAULTPAGE

                                 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow);  /* (TEST/DEBUG) */

1218 #endif

1219 next_object = object->shadow;

1220

1221 if (next_object == VM_OBJECT_NULL) {

1222 /*

1223 * Absent page at bottom of shadow

1224 * chain; zero fill the page we left

1225 * busy in the first object, and free

1226 * the absent page.

1227 */

1228 assert(!must_be_resident);

1229

1230 /*

1231 * check for any conditions that prevent

1232 * us from creating a new zero-fill page

1233 * vm_fault_check will do all of the

1234 * fault cleanup in the case of an error condition

1235 * including resetting the thread_interrupt_level

1236 */

                                         error = vm_fault_check(object, m, first_m, interruptible_state, (type_of_fault == NULL) ? TRUE : FALSE);

1238

1239 if (error != VM_FAULT_SUCCESS) {

1240 return error;

1241 }

1242

1243 if (object != first_object) {

1244 /*

1245 * free the absent page we just found

1246 */

1247 VM_PAGE_FREE(m);

1248

1249 /*

1250 * drop reference and lock on current object

1251 */

1252 vm_object_paging_end(object);

1253 vm_object_unlock(object);

1254

1255 /*

1256 * grab the original page we

1257 * 'soldered' in place and

1258 * retake lock on 'first_object'

1259 */

1260 m = first_m;

1261 first_m = VM_PAGE_NULL;

1262

1263 object = first_object;

1264 offset = first_offset;

1265

1266 vm_object_lock(object);

1267 } else {

1268 /*

1269 * we're going to use the absent page we just found

1270 * so convert it to a 'busy' page

1271 */

1272 m->vmp_absent = FALSE;

1273 m->vmp_busy = TRUE;

1274 }

                                         if (fault_info->mark_zf_absent && no_zero_fill == TRUE) {

1276 m->vmp_absent = TRUE;

1277 }

1278 /*

1279 * zero-fill the page and put it on

1280 * the correct paging queue

1281 */

                                         my_fault = vm_fault_zero_page(m, no_zero_fill);

1283

1284 break;

1285 } else {

1286 if (must_be_resident) {

1287 vm_object_paging_end(object);

                                         } else if (object != first_object) {

1289 vm_object_paging_end(object);

1290 VM_PAGE_FREE(m);

1291 } else {

1292 first_m = m;

1293 m->vmp_absent = FALSE;

1294 m->vmp_busy = TRUE;

1295

1296 vm_page_lockspin_queues();

1297 vm_page_queues_remove(m, FALSE);

1298 vm_page_unlock_queues();

1299 }

1300

1301 offset += object->vo_shadow_offset;

1302 fault_info->lo_offset += object->vo_shadow_offset;

1303 fault_info->hi_offset += object->vo_shadow_offset;

1304 access_required = VM_PROT_READ;

1305

1306 vm_object_lock(next_object);

1307 vm_object_unlock(object);

1308 object = next_object;

1309 vm_object_paging_begin(object);

1310

1311 /*

1312 * reset to default type of fault

1313 */

1314 my_fault = DBG_CACHE_HIT_FAULT;

1315

1316 continue;

1317 }

1318 }

1319 if ((m->vmp_cleaning)

                             && ((object != first_object) || (object->copy != VM_OBJECT_NULL))

1321 && (fault_type & VM_PROT_WRITE)) {

1322 /*

1323 * This is a copy-on-write fault that will

1324 * cause us to revoke access to this page, but

1325 * this page is in the process of being cleaned

1326 * in a clustered pageout. We must wait until

1327 * the cleaning operation completes before

1328 * revoking access to the original page,

1329 * otherwise we might attempt to remove a

1330 * wired mapping.

1331 */

1332 #if TRACEFAULTPAGE

                                 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset);  /* (TEST/DEBUG) */

1334 #endif

1335 /*

1336 * take an extra ref so that object won't die

1337 */

1338 vm_object_reference_locked(object);

1339

1340 vm_fault_cleanup(object, first_m);

1341

1342 vm_object_lock(object);

                                 assert(object->ref_count > 0);

1344

                                 m = vm_page_lookup(object, vm_object_trunc_page(offset));

1346

                                 if (m != VM_PAGE_NULL && m->vmp_cleaning) {

1348 PAGE_ASSERT_WAIT(m, interruptible);

1349

1350 vm_object_unlock(object);

1351 wait_result = thread_block(THREAD_CONTINUE_NULL);

1352 vm_object_deallocate(object);

1353

1354 goto backoff;

1355 } else {

1356 vm_object_unlock(object);

1357

1358 vm_object_deallocate(object);

1359 thread_interrupt_level(interruptible_state);

1360

1361 return VM_FAULT_RETRY;

1362 }

1363 }

                         if (type_of_fault == NULL && (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) &&

                             !(fault_info != NULL && fault_info->stealth)) {

1366 /*

1367 * If we were passed a non-NULL pointer for

1368 * "type_of_fault", than we came from

1369 * vm_fault... we'll let it deal with

1370 * this condition, since it

1371 * needs to see m->vmp_speculative to correctly

1372 * account the pageins, otherwise...

1373 * take it off the speculative queue, we'll

1374 * let the caller of vm_fault_page deal

1375 * with getting it onto the correct queue

1376 *

1377 * If the caller specified in fault_info that

1378 * it wants a "stealth" fault, we also leave

1379 * the page in the speculative queue.

1380 */

1381 vm_page_lockspin_queues();

                                 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {

1383 vm_page_queues_remove(m, FALSE);

1384 }

1385 vm_page_unlock_queues();

1386 }

                         assert(object == VM_PAGE_OBJECT(m));

1388

1389 if (object->code_signed) {

1390 /*

1391 * CODE SIGNING:

1392 * We just paged in a page from a signed

1393 * memory object but we don't need to

1394 * validate it now. We'll validate it if

1395 * when it gets mapped into a user address

1396 * space for the first time or when the page

1397 * gets copied to another object as a result

1398 * of a copy-on-write.

1399 */

1400 }

1401

1402 /*

1403 * We mark the page busy and leave it on

1404 * the pageout queues. If the pageout

1405 * deamon comes across it, then it will

1406 * remove the page from the queue, but not the object

1407 */

1408 #if TRACEFAULTPAGE

                         dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */

1410 #endif

1411 assert(!m->vmp_busy);

1412 assert(!m->vmp_absent);

1413

1414 m->vmp_busy = TRUE;

1415 break;

1416 }

1417

1418

1419 /*

1420 * we get here when there is no page present in the object at

1421 * the offset we're interested in... we'll allocate a page

1422 * at this point if the pager associated with

1423 * this object can provide the data or we're the top object...

1424 * object is locked; m == NULL

1425 */

1426

1427 if (must_be_resident) {

1428 if (fault_type == VM_PROT_NONE &&

1429 object == kernel_object) {

1430 /*

1431 * We've been called from vm_fault_unwire()

1432 * while removing a map entry that was allocated

1433 * with KMA_KOBJECT and KMA_VAONLY. This page

1434 * is not present and there's nothing more to

1435 * do here (nothing to unwire).

1436 */

1437 vm_fault_cleanup(object, first_m);

1438 thread_interrupt_level(interruptible_state);

1439

1440 return VM_FAULT_MEMORY_ERROR;

1441 }

1442

1443 goto dont_look_for_page;

1444 }

1445

1446 /* Don't expect to fault pages into the kernel object. */

1447 assert(object != kernel_object);

1448

1449 data_supply = FALSE;

1450

                 look_for_page = (object->pager_created && (MUST_ASK_PAGER(object, offset, external_state) == TRUE) && !data_supply);

1452

1453 #if TRACEFAULTPAGE

                 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);      /* (TEST/DEBUG) */

1455 #endif

                 if (!look_for_page && object == first_object && !object->phys_contiguous) {

1457 /*

1458 * Allocate a new page for this object/offset pair as a placeholder

1459 */

1460 m = vm_page_grab_options(grab_options);

1461 #if TRACEFAULTPAGE

                         dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */

1463 #endif

1464 if (m == VM_PAGE_NULL) {

1465 vm_fault_cleanup(object, first_m);

1466 thread_interrupt_level(interruptible_state);

1467

1468 return VM_FAULT_MEMORY_SHORTAGE;

1469 }

1470

                         if (fault_info && fault_info->batch_pmap_op == TRUE) {

1472 vm_page_insert_internal(m, object,

1473 vm_object_trunc_page(offset),

                                     VM_KERN_MEMORY_NONE, FALSE, TRUE, TRUE, FALSE, NULL);

1475 } else {

                                 vm_page_insert(m, object, vm_object_trunc_page(offset));

1477 }

1478 }

1479 if (look_for_page) {

1480 kern_return_t rc;

1481 int my_fault_type;

1482

1483 /*

1484 * If the memory manager is not ready, we

1485 * cannot make requests.

1486 */

1487 if (!object->pager_ready) {

1488 #if TRACEFAULTPAGE

                                 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0);       /* (TEST/DEBUG) */

1490 #endif

1491 if (m != VM_PAGE_NULL) {

1492 VM_PAGE_FREE(m);

1493 }

1494

1495 /*

1496 * take an extra ref so object won't die

1497 */

1498 vm_object_reference_locked(object);

1499 vm_fault_cleanup(object, first_m);

1500

1501 vm_object_lock(object);

                                 assert(object->ref_count > 0);

1503

1504 if (!object->pager_ready) {

                                         wait_result = vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGER_READY, interruptible);

1506

1507 vm_object_unlock(object);

1508 if (wait_result == THREAD_WAITING) {

1509 wait_result = thread_block(THREAD_CONTINUE_NULL);

1510 }

1511 vm_object_deallocate(object);

1512

1513 goto backoff;

1514 } else {

1515 vm_object_unlock(object);

1516 vm_object_deallocate(object);

1517 thread_interrupt_level(interruptible_state);

1518

1519 return VM_FAULT_RETRY;

1520 }

1521 }

                         if (!object->internal && !object->phys_contiguous && object->paging_in_progress > vm_object_pagein_throttle) {

1523 /*

1524 * If there are too many outstanding page

1525 * requests pending on this external object, we

1526 * wait for them to be resolved now.

1527 */

1528 #if TRACEFAULTPAGE

                                 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0);       /* (TEST/DEBUG) */

1530 #endif

1531 if (m != VM_PAGE_NULL) {

1532 VM_PAGE_FREE(m);

1533 }

1534 /*

1535 * take an extra ref so object won't die

1536 */

1537 vm_object_reference_locked(object);

1538

1539 vm_fault_cleanup(object, first_m);

1540

1541 vm_object_lock(object);

                                 assert(object->ref_count > 0);

1543

                                 if (object->paging_in_progress >= vm_object_pagein_throttle) {

                                         vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGING_ONLY_IN_PROGRESS, interruptible);

1546

1547 vm_object_unlock(object);

1548 wait_result = thread_block(THREAD_CONTINUE_NULL);

1549 vm_object_deallocate(object);

1550

1551 goto backoff;

1552 } else {

1553 vm_object_unlock(object);

1554 vm_object_deallocate(object);

1555 thread_interrupt_level(interruptible_state);

1556

1557 return VM_FAULT_RETRY;

1558 }

1559 }

1560 if (object->internal) {

1561 int compressed_count_delta;

1562

1563 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);

1564

1565 if (m == VM_PAGE_NULL) {

1566 /*

1567 * Allocate a new page for this object/offset pair as a placeholder

1568 */

1569 m = vm_page_grab_options(grab_options);

1570 #if TRACEFAULTPAGE

                                         dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);  /* (TEST/DEBUG) */

1572 #endif

1573 if (m == VM_PAGE_NULL) {

1574 vm_fault_cleanup(object, first_m);

1575 thread_interrupt_level(interruptible_state);

1576

1577 return VM_FAULT_MEMORY_SHORTAGE;

1578 }

1579

1580 m->vmp_absent = TRUE;

                                         if (fault_info && fault_info->batch_pmap_op == TRUE) {

                                                 vm_page_insert_internal(m, object, vm_object_trunc_page(offset), VM_KERN_MEMORY_NONE, FALSE, TRUE, TRUE, FALSE, NULL);

1583 } else {

                                                 vm_page_insert(m, object, vm_object_trunc_page(offset));

1585 }

1586 }

1587 assert(m->vmp_busy);

1588

1589 m->vmp_absent = TRUE;

1590 pager = object->pager;

1591

                                 assert(object->paging_in_progress > 0);

1593 vm_object_unlock(object);

1594

1595 rc = vm_compressor_pager_get(

1596 pager,

1597 offset + object->paging_offset,

1598 VM_PAGE_GET_PHYS_PAGE(m),

1599 &my_fault_type,

1600 0,

1601 &compressed_count_delta);

1602

1603 if (type_of_fault == NULL) {

1604 int throttle_delay;

1605

1606 /*

1607 * we weren't called from vm_fault, so we

1608 * need to apply page creation throttling

1609 * do it before we re-acquire any locks

1610 */

1611 if (my_fault_type == DBG_COMPRESSOR_FAULT) {

                                                 if ((throttle_delay = vm_page_throttled(TRUE))) {

                                                         VM_DEBUG_EVENT(vmf_compressordelay, VMF_COMPRESSORDELAY, DBG_FUNC_NONE, throttle_delay, 0, 1, 0);

1614 delay(throttle_delay);

1615 }

1616 }

1617 }

1618 vm_object_lock(object);

                                 assert(object->paging_in_progress > 0);

1620

1621 vm_compressor_pager_count(

1622 pager,

1623 compressed_count_delta,

1624 FALSE, /* shared_lock */

1625 object);

1626

1627 switch (rc) {

1628 case KERN_SUCCESS:

1629 m->vmp_absent = FALSE;

1630 m->vmp_dirty = TRUE;

1631 if ((object->wimg_bits &

1632 VM_WIMG_MASK) !=

1633 VM_WIMG_USE_DEFAULT) {

1634 /*

1635 * If the page is not cacheable,

1636 * we can't let its contents

1637 * linger in the data cache

1638 * after the decompression.

1639 */

1640 pmap_sync_page_attributes_phys(

1641 VM_PAGE_GET_PHYS_PAGE(m));

1642 } else {

1643 m->vmp_written_by_kernel = TRUE;

1644 }

1645

1646 /*

1647 * If the object is purgeable, its

1648 * owner's purgeable ledgers have been

1649 * updated in vm_page_insert() but the

1650 * page was also accounted for in a

1651 * "compressed purgeable" ledger, so

1652 * update that now.

1653 */

1654 if (((object->purgable !=

1655 VM_PURGABLE_DENY) ||

1656 object->vo_ledger_tag) &&

1657 (object->vo_owner !=

1658 NULL)) {

1659 /*

1660 * One less compressed

1661 * purgeable/tagged page.

1662 */

1663 vm_object_owner_compressed_update(

1664 object,

1665 -1);

1666 }

1667

1668 break;

1669 case KERN_MEMORY_FAILURE:

1670 m->vmp_unusual = TRUE;

1671 m->vmp_error = TRUE;

1672 m->vmp_absent = FALSE;

1673 break;

1674 case KERN_MEMORY_ERROR:

1675 assert(m->vmp_absent);

1676 break;

1677 default:

1678 panic("vm_fault_page(): unexpected "

1679 "error %d from "

1680 "vm_compressor_pager_get()\n",

1681 rc);

1682 }

1683 PAGE_WAKEUP_DONE(m);

1684

1685 rc = KERN_SUCCESS;

1686 goto data_requested;

1687 }

1688 my_fault_type = DBG_PAGEIN_FAULT;

1689

1690 if (m != VM_PAGE_NULL) {

1691 VM_PAGE_FREE(m);

1692 m = VM_PAGE_NULL;

1693 }

1694

1695 #if TRACEFAULTPAGE

                         dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0);  /* (TEST/DEBUG) */

1697 #endif

1698

1699 /*

1700 * It's possible someone called vm_object_destroy while we weren't

1701 * holding the object lock. If that has happened, then bail out

1702 * here.

1703 */

1704

1705 pager = object->pager;

1706

1707 if (pager == MEMORY_OBJECT_NULL) {

1708 vm_fault_cleanup(object, first_m);

1709 thread_interrupt_level(interruptible_state);

1710 return VM_FAULT_MEMORY_ERROR;

1711 }

1712

1713 /*

1714 * We have an absent page in place for the faulting offset,

1715 * so we can release the object lock.

1716 */

1717

1718 if (object->object_is_shared_cache) {

1719 set_thread_rwlock_boost();

1720 }

1721

1722 vm_object_unlock(object);

1723

1724 /*

1725 * If this object uses a copy_call strategy,

1726 * and we are interested in a copy of this object

1727 * (having gotten here only by following a

1728 * shadow chain), then tell the memory manager

1729 * via a flag added to the desired_access

1730 * parameter, so that it can detect a race

1731 * between our walking down the shadow chain

1732 * and its pushing pages up into a copy of

1733 * the object that it manages.

1734 */

                         if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL && object != first_object) {

1736 wants_copy_flag = VM_PROT_WANTS_COPY;

1737 } else {

1738 wants_copy_flag = VM_PROT_NONE;

1739 }

1740

                         if (object->copy == first_object) {

1742 /*

1743 * if we issue the memory_object_data_request in

1744 * this state, we are subject to a deadlock with

1745 * the underlying filesystem if it is trying to

1746 * shrink the file resulting in a push of pages

1747 * into the copy object... that push will stall

1748 * on the placeholder page, and if the pushing thread

1749 * is holding a lock that is required on the pagein

1750 * path (such as a truncate lock), we'll deadlock...

1751 * to avoid this potential deadlock, we throw away

1752 * our placeholder page before calling memory_object_data_request

1753 * and force this thread to retry the vm_fault_page after

1754 * we have issued the I/O. the second time through this path

1755 * we will find the page already in the cache (presumably still

1756 * busy waiting for the I/O to complete) and then complete

1757 * the fault w/o having to go through memory_object_data_request again

1758 */

1759 assert(first_m != VM_PAGE_NULL);

                                 assert(VM_PAGE_OBJECT(first_m) == first_object);

1761

1762 vm_object_lock(first_object);

1763 VM_PAGE_FREE(first_m);

1764 vm_object_paging_end(first_object);

1765 vm_object_unlock(first_object);

1766

1767 first_m = VM_PAGE_NULL;

1768 force_fault_retry = TRUE;

1769

1770 vm_fault_page_forced_retry++;

1771 }

1772

1773 if (data_already_requested == TRUE) {

1774 orig_behavior = fault_info->behavior;

1775 orig_cluster_size = fault_info->cluster_size;

1776

1777 fault_info->behavior = VM_BEHAVIOR_RANDOM;

1778 fault_info->cluster_size = PAGE_SIZE;

1779 }

1780 /*

1781 * Call the memory manager to retrieve the data.

1782 */

1783 rc = memory_object_data_request(

1784 pager,

                                 vm_object_trunc_page(offset) + object->paging_offset,

1786 PAGE_SIZE,

1787 access_required | wants_copy_flag,

1788 (memory_object_fault_info_t)fault_info);

1789

1790 if (data_already_requested == TRUE) {

1791 fault_info->behavior = orig_behavior;

1792 fault_info->cluster_size = orig_cluster_size;

1793 } else {

1794 data_already_requested = TRUE;

1795 }

1796

                         DTRACE_VM2(maj_fault, int, 1, (uint64_t *), NULL);

1798 #if TRACEFAULTPAGE

                         dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */

1800 #endif

1801 vm_object_lock(object);

1802

1803 if (object->object_is_shared_cache) {

1804 clear_thread_rwlock_boost();

1805 }

1806

1807 data_requested:

1808 if (rc != KERN_SUCCESS) {

1809 vm_fault_cleanup(object, first_m);

1810 thread_interrupt_level(interruptible_state);

1811

1812 return (rc == MACH_SEND_INTERRUPTED) ?

1813 VM_FAULT_INTERRUPTED :

1814 VM_FAULT_MEMORY_ERROR;

1815 } else {

1816 clock_sec_t tv_sec;

1817 clock_usec_t tv_usec;

1818

1819 if (my_fault_type == DBG_PAGEIN_FAULT) {

1820 clock_get_system_microtime(&tv_sec, &tv_usec);

1821 current_thread()->t_page_creation_time = tv_sec;

                                         current_thread()->t_page_creation_count = 0;

1823 }

1824 }

                         if ((interruptible != THREAD_UNINT) && (current_thread()->sched_flags & TH_SFLAG_ABORT)) {

1826 vm_fault_cleanup(object, first_m);

1827 thread_interrupt_level(interruptible_state);

1828

1829 return VM_FAULT_INTERRUPTED;

1830 }

1831 if (force_fault_retry == TRUE) {

1832 vm_fault_cleanup(object, first_m);

1833 thread_interrupt_level(interruptible_state);

1834

1835 return VM_FAULT_RETRY;

1836 }

                         if (m == VM_PAGE_NULL && object->phys_contiguous) {

1838 /*

1839 * No page here means that the object we

1840 * initially looked up was "physically

1841 * contiguous" (i.e. device memory). However,

1842 * with Virtual VRAM, the object might not

1843 * be backed by that device memory anymore,

1844 * so we're done here only if the object is

1845 * still "phys_contiguous".

1846 * Otherwise, if the object is no longer

1847 * "phys_contiguous", we need to retry the

1848 * page fault against the object's new backing

1849 * store (different memory object).

1850 */

1851 phys_contig_object:

1852 goto done;

1853 }

1854 /*

1855 * potentially a pagein fault

1856 * if we make it through the state checks

1857 * above, than we'll count it as such

1858 */

1859 my_fault = my_fault_type;

1860

1861 /*

1862 * Retry with same object/offset, since new data may

1863 * be in a different page (i.e., m is meaningless at

1864 * this point).

1865 */

1866 continue;

1867 }

1868 dont_look_for_page:

1869 /*

1870 * We get here if the object has no pager, or an existence map

1871 * exists and indicates the page isn't present on the pager

1872 * or we're unwiring a page. If a pager exists, but there

1873 * is no existence map, then the m->vmp_absent case above handles

1874 * the ZF case when the pager can't provide the page

1875 */

1876 #if TRACEFAULTPAGE

                 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */

1878 #endif

1879 if (object == first_object) {

1880 first_m = m;

1881 } else {

1882 assert(m == VM_PAGE_NULL);

1883 }

1884

1885 next_object = object->shadow;

1886

1887 if (next_object == VM_OBJECT_NULL) {

1888 /*

1889 * we've hit the bottom of the shadown chain,

1890 * fill the page in the top object with zeros.

1891 */

1892 assert(!must_be_resident);

1893

1894 if (object != first_object) {

1895 vm_object_paging_end(object);

1896 vm_object_unlock(object);

1897

1898 object = first_object;

1899 offset = first_offset;

1900 vm_object_lock(object);

1901 }

1902 m = first_m;

                         assert(VM_PAGE_OBJECT(m) == object);

1904 first_m = VM_PAGE_NULL;

1905

1906 /*

1907 * check for any conditions that prevent

1908 * us from creating a new zero-fill page

1909 * vm_fault_check will do all of the

1910 * fault cleanup in the case of an error condition

1911 * including resetting the thread_interrupt_level

1912 */

                         error = vm_fault_check(object, m, first_m, interruptible_state, (type_of_fault == NULL) ? TRUE : FALSE);

1914

1915 if (error != VM_FAULT_SUCCESS) {

1916 return error;

1917 }

1918

1919 if (m == VM_PAGE_NULL) {

1920 m = vm_page_grab_options(grab_options);

1921

1922 if (m == VM_PAGE_NULL) {

1923 vm_fault_cleanup(object, VM_PAGE_NULL);

1924 thread_interrupt_level(interruptible_state);

1925

1926 return VM_FAULT_MEMORY_SHORTAGE;

1927 }

                                 vm_page_insert(m, object, vm_object_trunc_page(offset));

1929 }

                         if (fault_info->mark_zf_absent && no_zero_fill == TRUE) {

1931 m->vmp_absent = TRUE;

1932 }

1933

                         my_fault = vm_fault_zero_page(m, no_zero_fill);

1935

1936 break;

1937 } else {

1938 /*

1939 * Move on to the next object. Lock the next

1940 * object before unlocking the current one.

1941 */

                         if ((object != first_object) || must_be_resident) {

1943 vm_object_paging_end(object);

1944 }

1945

1946 offset += object->vo_shadow_offset;

1947 fault_info->lo_offset += object->vo_shadow_offset;

1948 fault_info->hi_offset += object->vo_shadow_offset;

1949 access_required = VM_PROT_READ;

1950

1951 vm_object_lock(next_object);

1952 vm_object_unlock(object);

1953

1954 object = next_object;

1955 vm_object_paging_begin(object);

1956 }

1957 }

1958

1959 /*

1960 * PAGE HAS BEEN FOUND.

1961 *

1962 * This page (m) is:

1963 * busy, so that we can play with it;

1964 * not absent, so that nobody else will fill it;

1965 * possibly eligible for pageout;

1966 *

1967 * The top-level page (first_m) is:

1968 * VM_PAGE_NULL if the page was found in the

1969 * top-level object;

1970 * busy, not absent, and ineligible for pageout.

1971 *

1972 * The current object (object) is locked. A paging

1973 * reference is held for the current and top-level

1974 * objects.

1975 */

1976

1977 #if TRACEFAULTPAGE

         dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m);  /* (TEST/DEBUG) */

1979 #endif

1980 #if EXTRA_ASSERTIONS

         assert(m->vmp_busy && !m->vmp_absent);

1982 assert((first_m == VM_PAGE_NULL) ||

             (first_m->vmp_busy && !first_m->vmp_absent &&

             !first_m->vmp_active && !first_m->vmp_inactive && !first_m->vmp_secluded));

1985 #endif /* EXTRA_ASSERTIONS */

1986

1987 /*

1988 * If the page is being written, but isn't

1989 * already owned by the top-level object,

1990 * we have to copy it into a new page owned

1991 * by the top-level object.

1992 */

1993 if (object != first_object) {

1994 #if TRACEFAULTPAGE

                 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */

1996 #endif

1997 if (fault_type & VM_PROT_WRITE) {

1998 vm_page_t copy_m;

1999

2000 /*

2001 * We only really need to copy if we

2002 * want to write it.

2003 */

2004 assert(!must_be_resident);

2005

2006 /*

2007 * If we try to collapse first_object at this

2008 * point, we may deadlock when we try to get

2009 * the lock on an intermediate object (since we

2010 * have the bottom object locked). We can't

2011 * unlock the bottom object, because the page

2012 * we found may move (by collapse) if we do.

2013 *

2014 * Instead, we first copy the page. Then, when

2015 * we have no more use for the bottom object,

2016 * we unlock it and try to collapse.

2017 *

2018 * Note that we copy the page even if we didn't

2019 * need to... that's the breaks.

2020 */

2021

2022 /*

2023 * Allocate a page for the copy

2024 */

2025 copy_m = vm_page_grab_options(grab_options);

2026

2027 if (copy_m == VM_PAGE_NULL) {

2028 RELEASE_PAGE(m);

2029

2030 vm_fault_cleanup(object, first_m);

2031 thread_interrupt_level(interruptible_state);

2032

2033 return VM_FAULT_MEMORY_SHORTAGE;

2034 }

2035

2036 vm_page_copy(m, copy_m);

2037

2038 /*

2039 * If another map is truly sharing this

2040 * page with us, we have to flush all

2041 * uses of the original page, since we

2042 * can't distinguish those which want the

2043 * original from those which need the

2044 * new copy.

2045 *

2046 * XXXO If we know that only one map has

2047 * access to this page, then we could

2048 * avoid the pmap_disconnect() call.

2049 */

2050 if (m->vmp_pmapped) {

                                 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));

2052 }

2053

2054 if (m->vmp_clustered) {

2055 VM_PAGE_COUNT_AS_PAGEIN(m);

2056 VM_PAGE_CONSUME_CLUSTERED(m);

2057 }

2058 assert(!m->vmp_cleaning);

2059

2060 /*

2061 * We no longer need the old page or object.

2062 */

2063 RELEASE_PAGE(m);

2064

2065 /*

2066 * This check helps with marking the object as having a sequential pattern

2067 * Normally we'll miss doing this below because this fault is about COW to

2068 * the first_object i.e. bring page in from disk, push to object above but

2069 * don't update the file object's sequential pattern.

2070 */

                         if (object->internal == FALSE) {

                                 vm_fault_is_sequential(object, offset, fault_info->behavior);

2073 }

2074

2075 vm_object_paging_end(object);

2076 vm_object_unlock(object);

2077

2078 my_fault = DBG_COW_FAULT;

2079 counter_inc(&vm_statistics_cow_faults);

                         DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL);

2081 current_task()->cow_faults++;

2082

2083 object = first_object;

2084 offset = first_offset;

2085

2086 vm_object_lock(object);

2087 /*

2088 * get rid of the place holder

2089 * page that we soldered in earlier

2090 */

2091 VM_PAGE_FREE(first_m);

2092 first_m = VM_PAGE_NULL;

2093

2094 /*

2095 * and replace it with the

2096 * page we just copied into

2097 */

2098 assert(copy_m->vmp_busy);

                         vm_page_insert(copy_m, object, vm_object_trunc_page(offset));

2100 SET_PAGE_DIRTY(copy_m, TRUE);

2101

2102 m = copy_m;

2103 /*

2104 * Now that we've gotten the copy out of the

2105 * way, let's try to collapse the top object.

2106 * But we have to play ugly games with

2107 * paging_in_progress to do that...

2108 */

2109 vm_object_paging_end(object);

                         vm_object_collapse(object, vm_object_trunc_page(offset), TRUE);

2111 vm_object_paging_begin(object);

2112 } else {

2113 *protection &= (~VM_PROT_WRITE);

2114 }

2115 }

2116 /*

2117 * Now check whether the page needs to be pushed into the

2118 * copy object. The use of asymmetric copy on write for

2119 * shared temporary objects means that we may do two copies to

2120 * satisfy the fault; one above to get the page from a

2121 * shadowed object, and one here to push it into the copy.

2122 */

2123 try_failed_count = 0;

2124

         while ((copy_object = first_object->copy) != VM_OBJECT_NULL) {

2126 vm_object_offset_t copy_offset;

2127 vm_page_t copy_m;

2128

2129 #if TRACEFAULTPAGE

                 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type);    /* (TEST/DEBUG) */

2131 #endif

2132 /*

2133 * If the page is being written, but hasn't been

2134 * copied to the copy-object, we have to copy it there.

2135 */

                 if ((fault_type & VM_PROT_WRITE) == 0) {

2137 *protection &= ~VM_PROT_WRITE;

2138 break;

2139 }

2140

2141 /*

2142 * If the page was guaranteed to be resident,

2143 * we must have already performed the copy.

2144 */

2145 if (must_be_resident) {

2146 break;

2147 }

2148

2149 /*

2150 * Try to get the lock on the copy_object.

2151 */

                 if (!vm_object_lock_try(copy_object)) {

2153 vm_object_unlock(object);

2154 try_failed_count++;

2155

2156 mutex_pause(try_failed_count); /* wait a bit */

2157 vm_object_lock(object);

2158

2159 continue;

2160 }

2161 try_failed_count = 0;

2162

2163 /*

2164 * Make another reference to the copy-object,

2165 * to keep it from disappearing during the

2166 * copy.

2167 */

2168 vm_object_reference_locked(copy_object);

2169

2170 /*

2171 * Does the page exist in the copy?

2172 */

2173 copy_offset = first_offset - copy_object->vo_shadow_offset;

2174 copy_offset = vm_object_trunc_page(copy_offset);

2175

                 if (copy_object->vo_size <= copy_offset) {

2177 /*

2178 * Copy object doesn't cover this page -- do nothing.

2179 */

2180 ;

                 } else if ((copy_m = vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {

2182 /*

2183 * Page currently exists in the copy object

2184 */

2185 if (copy_m->vmp_busy) {

2186 /*

2187 * If the page is being brought

2188 * in, wait for it and then retry.

2189 */

2190 RELEASE_PAGE(m);

2191

2192 /*

2193 * take an extra ref so object won't die

2194 */

2195 vm_object_reference_locked(copy_object);

2196 vm_object_unlock(copy_object);

2197 vm_fault_cleanup(object, first_m);

2198

2199 vm_object_lock(copy_object);

                                 assert(copy_object->ref_count > 0);

2201 vm_object_lock_assert_exclusive(copy_object);

2202 copy_object->ref_count--;

                                 assert(copy_object->ref_count > 0);

                                 copy_m = vm_page_lookup(copy_object, copy_offset);

2205

                                 if (copy_m != VM_PAGE_NULL && copy_m->vmp_busy) {

2207 PAGE_ASSERT_WAIT(copy_m, interruptible);

2208

2209 vm_object_unlock(copy_object);

2210 wait_result = thread_block(THREAD_CONTINUE_NULL);

2211 vm_object_deallocate(copy_object);

2212

2213 goto backoff;

2214 } else {

2215 vm_object_unlock(copy_object);

2216 vm_object_deallocate(copy_object);

2217 thread_interrupt_level(interruptible_state);

2218

2219 return VM_FAULT_RETRY;

2220 }

2221 }

                 } else if (!PAGED_OUT(copy_object, copy_offset)) {

2223 /*

2224 * If PAGED_OUT is TRUE, then the page used to exist

2225 * in the copy-object, and has already been paged out.

2226 * We don't need to repeat this. If PAGED_OUT is

2227 * FALSE, then either we don't know (!pager_created,

2228 * for example) or it hasn't been paged out.

2229 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)

2230 * We must copy the page to the copy object.

2231 *

2232 * Allocate a page for the copy

2233 */

                         copy_m = vm_page_alloc(copy_object, copy_offset);

2235

2236 if (copy_m == VM_PAGE_NULL) {

2237 RELEASE_PAGE(m);

2238

2239 vm_object_lock_assert_exclusive(copy_object);

2240 copy_object->ref_count--;

                                 assert(copy_object->ref_count > 0);

2242

2243 vm_object_unlock(copy_object);

2244 vm_fault_cleanup(object, first_m);

2245 thread_interrupt_level(interruptible_state);

2246

2247 return VM_FAULT_MEMORY_SHORTAGE;

2248 }

2249 /*

2250 * Must copy page into copy-object.

2251 */

2252 vm_page_copy(m, copy_m);

2253

2254 /*

2255 * If the old page was in use by any users

2256 * of the copy-object, it must be removed

2257 * from all pmaps. (We can't know which

2258 * pmaps use it.)

2259 */

2260 if (m->vmp_pmapped) {

                                 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));

2262 }

2263

2264 if (m->vmp_clustered) {

2265 VM_PAGE_COUNT_AS_PAGEIN(m);

2266 VM_PAGE_CONSUME_CLUSTERED(m);

2267 }

2268 /*

2269 * If there's a pager, then immediately

2270 * page out this page, using the "initialize"

2271 * option. Else, we use the copy.

2272 */

2273 if ((!copy_object->pager_ready)

                             || VM_COMPRESSOR_PAGER_STATE_GET(copy_object, copy_offset) == VM_EXTERNAL_STATE_ABSENT

2275 ) {

2276 vm_page_lockspin_queues();

2277 assert(!m->vmp_cleaning);

2278 vm_page_activate(copy_m);

2279 vm_page_unlock_queues();

2280

2281 SET_PAGE_DIRTY(copy_m, TRUE);

2282 PAGE_WAKEUP_DONE(copy_m);

2283 } else {

                                 assert(copy_m->vmp_busy == TRUE);

2285 assert(!m->vmp_cleaning);

2286

2287 /*

2288 * dirty is protected by the object lock

2289 */

2290 SET_PAGE_DIRTY(copy_m, TRUE);

2291

2292 /*

2293 * The page is already ready for pageout:

2294 * not on pageout queues and busy.

2295 * Unlock everything except the

2296 * copy_object itself.

2297 */

2298 vm_object_unlock(object);

2299

2300 /*

2301 * Write the page to the copy-object,

2302 * flushing it from the kernel.

2303 */

2304 vm_pageout_initialize_page(copy_m);

2305

2306 /*

2307 * Since the pageout may have

2308 * temporarily dropped the

2309 * copy_object's lock, we

2310 * check whether we'll have

2311 * to deallocate the hard way.

2312 */

                                 if ((copy_object->shadow != object) || (copy_object->ref_count == 1)) {

2314 vm_object_unlock(copy_object);

2315 vm_object_deallocate(copy_object);

2316 vm_object_lock(object);

2317

2318 continue;

2319 }

2320 /*

2321 * Pick back up the old object's

2322 * lock. [It is safe to do so,

2323 * since it must be deeper in the

2324 * object tree.]

2325 */

2326 vm_object_lock(object);

2327 }

2328

2329 /*

2330 * Because we're pushing a page upward

2331 * in the object tree, we must restart

2332 * any faults that are waiting here.

2333 * [Note that this is an expansion of

2334 * PAGE_WAKEUP that uses the THREAD_RESTART

2335 * wait result]. Can't turn off the page's

2336 * busy bit because we're not done with it.

2337 */

2338 if (m->vmp_wanted) {

2339 m->vmp_wanted = FALSE;

                                 thread_wakeup_with_result((event_t) m, THREAD_RESTART);

2341 }

2342 }

2343 /*

2344 * The reference count on copy_object must be

2345 * at least 2: one for our extra reference,

2346 * and at least one from the outside world

2347 * (we checked that when we last locked

2348 * copy_object).

2349 */

2350 vm_object_lock_assert_exclusive(copy_object);

2351 copy_object->ref_count--;

                 assert(copy_object->ref_count > 0);

2353

2354 vm_object_unlock(copy_object);

2355

2356 break;

2357 }

2358

2359 done:

2360 *result_page = m;

2361 *top_page = first_m;

2362

2363 if (m != VM_PAGE_NULL) {

                 assert(VM_PAGE_OBJECT(m) == object);

2365

2366 retval = VM_FAULT_SUCCESS;

2367

2368 if (my_fault == DBG_PAGEIN_FAULT) {

2369 VM_PAGE_COUNT_AS_PAGEIN(m);

2370

2371 if (object->internal) {

2372 my_fault = DBG_PAGEIND_FAULT;

2373 } else {

2374 my_fault = DBG_PAGEINV_FAULT;

2375 }

2376

2377 /*

2378 * evaluate access pattern and update state

2379 * vm_fault_deactivate_behind depends on the

2380 * state being up to date

2381 */

                         vm_fault_is_sequential(object, offset, fault_info->behavior);

                         vm_fault_deactivate_behind(object, offset, fault_info->behavior);

                 } else if (type_of_fault == NULL && my_fault == DBG_CACHE_HIT_FAULT) {

2385 /*

2386 * we weren't called from vm_fault, so handle the

2387 * accounting here for hits in the cache

2388 */

2389 if (m->vmp_clustered) {

2390 VM_PAGE_COUNT_AS_PAGEIN(m);

2391 VM_PAGE_CONSUME_CLUSTERED(m);

2392 }

                         vm_fault_is_sequential(object, offset, fault_info->behavior);

                         vm_fault_deactivate_behind(object, offset, fault_info->behavior);

                 } else if (my_fault == DBG_COMPRESSOR_FAULT || my_fault == DBG_COMPRESSOR_SWAPIN_FAULT) {

2396 VM_STAT_DECOMPRESSIONS();

2397 }

2398 if (type_of_fault) {

2399 *type_of_fault = my_fault;

2400 }

2401 } else {

2402 retval = VM_FAULT_SUCCESS_NO_VM_PAGE;

2403 assert(first_m == VM_PAGE_NULL);

2404 assert(object == first_object);

2405 }

2406

2407 thread_interrupt_level(interruptible_state);

2408

2409 #if TRACEFAULTPAGE

         dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0);       /* (TEST/DEBUG) */

2411 #endif

2412 return retval;

2413

2414 backoff:

2415 thread_interrupt_level(interruptible_state);

2416

2417 if (wait_result == THREAD_INTERRUPTED) {

2418 return VM_FAULT_INTERRUPTED;

2419 }

2420 return VM_FAULT_RETRY;

2421

2422 #undef RELEASE_PAGE

2423 }

2424

2425

2426 extern int panic_on_cs_killed;

 extern int proc_selfpid(void);

 extern char *proc_name_address(void *p);

2429 unsigned long cs_enter_tainted_rejected = 0;

2430 unsigned long cs_enter_tainted_accepted = 0;

2431

2432 /*

2433 * CODE SIGNING:

2434 * When soft faulting a page, we have to validate the page if:

2435 * 1. the page is being mapped in user space

2436 * 2. the page hasn't already been found to be "tainted"

2437 * 3. the page belongs to a code-signed object

2438 * 4. the page has not been validated yet or has been mapped for write.

2439 */

2440 static bool

2441 vm_fault_cs_need_validation(

2442 pmap_t pmap,

2443 vm_page_t page,

2444 vm_object_t page_obj,

2445 vm_map_size_t fault_page_size,

2446 vm_map_offset_t fault_phys_offset)

2447 {

2448 if (pmap == kernel_pmap) {

2449 /* 1 - not user space */

2450 return false;

2451 }

2452 if (!page_obj->code_signed) {

2453 /* 3 - page does not belong to a code-signed object */

2454 return false;

2455 }

2456 if (fault_page_size == PAGE_SIZE) {

2457 /* looking at the whole page */

                 assertf(fault_phys_offset == 0,

                     "fault_page_size 0x%llx fault_phys_offset 0x%llx\n",

2460 (uint64_t)fault_page_size,

2461 (uint64_t)fault_phys_offset);

                 if (page->vmp_cs_tainted == VMP_CS_ALL_TRUE) {

2463 /* 2 - page is all tainted */

2464 return false;

2465 }

                 if (page->vmp_cs_validated == VMP_CS_ALL_TRUE &&

2467 !page->vmp_wpmapped) {

2468 /* 4 - already fully validated and never mapped writable */

2469 return false;

2470 }

2471 } else {

2472 /* looking at a specific sub-page */

                 if (VMP_CS_TAINTED(page, fault_page_size, fault_phys_offset)) {

2474 /* 2 - sub-page was already marked as tainted */

2475 return false;

2476 }

                 if (VMP_CS_VALIDATED(page, fault_page_size, fault_phys_offset) &&

2478 !page->vmp_wpmapped) {

2479 /* 4 - already validated and never mapped writable */

2480 return false;

2481 }

2482 }

2483 /* page needs to be validated */

2484 return true;

2485 }

2486

2487

2488 static bool

2489 vm_fault_cs_page_immutable(

2490 vm_page_t m,

2491 vm_map_size_t fault_page_size,

2492 vm_map_offset_t fault_phys_offset,

2493 vm_prot_t prot __unused)

2494 {

         if (VMP_CS_VALIDATED(m, fault_page_size, fault_phys_offset)

2496 /*&& ((prot) & VM_PROT_EXECUTE)*/) {

2497 return true;

2498 }

2499 return false;

2500 }

2501

2502 static bool

2503 vm_fault_cs_page_nx(

2504 vm_page_t m,

2505 vm_map_size_t fault_page_size,

2506 vm_map_offset_t fault_phys_offset)

2507 {

         return VMP_CS_NX(m, fault_page_size, fault_phys_offset);

2509 }

2510

2511 /*

2512 * Check if the page being entered into the pmap violates code signing.

2513 */

2514 static kern_return_t

2515 vm_fault_cs_check_violation(

2516 bool cs_bypass,

2517 vm_object_t object,

2518 vm_page_t m,

2519 pmap_t pmap,

2520 vm_prot_t prot,

2521 vm_prot_t caller_prot,

2522 vm_map_size_t fault_page_size,

2523 vm_map_offset_t fault_phys_offset,

2524 vm_object_fault_info_t fault_info,

2525 bool map_is_switched,

2526 bool map_is_switch_protected,

2527 bool *cs_violation)

2528 {

2529 #if !PMAP_CS

2530 #pragma unused(caller_prot)

2531 #pragma unused(fault_info)

2532 #endif /* !PMAP_CS */

2533 int cs_enforcement_enabled;

2534 if (!cs_bypass &&

             vm_fault_cs_need_validation(pmap, m, object,

2536 fault_page_size, fault_phys_offset)) {

2537 vm_object_lock_assert_exclusive(object);

2538

                 if (VMP_CS_VALIDATED(m, fault_page_size, fault_phys_offset)) {

2540 vm_cs_revalidates++;

2541 }

2542

2543 /* VM map is locked, so 1 ref will remain on VM object -

2544 * so no harm if vm_page_validate_cs drops the object lock */

2545

                 vm_page_validate_cs(m, fault_page_size, fault_phys_offset);

2547 }

2548

2549 /* If the map is switched, and is switch-protected, we must protect

2550 * some pages from being write-faulted: immutable pages because by

2551 * definition they may not be written, and executable pages because that

2552 * would provide a way to inject unsigned code.

2553 * If the page is immutable, we can simply return. However, we can't

2554 * immediately determine whether a page is executable anywhere. But,

2555 * we can disconnect it everywhere and remove the executable protection

2556 * from the current map. We do that below right before we do the

2557 * PMAP_ENTER.

2558 */

2559 if (pmap == kernel_pmap) {

2560 /* kernel fault: cs_enforcement does not apply */

2561 cs_enforcement_enabled = 0;

2562 } else {

2563 cs_enforcement_enabled = pmap_get_vm_map_cs_enforced(pmap);

2564 }

2565

2566 if (cs_enforcement_enabled && map_is_switched &&

2567 map_is_switch_protected &&

             vm_fault_cs_page_immutable(m, fault_page_size, fault_phys_offset, prot) &&

2569 (prot & VM_PROT_WRITE)) {

2570 return KERN_CODESIGN_ERROR;

2571 }

2572

2573 if (cs_enforcement_enabled &&

             vm_fault_cs_page_nx(m, fault_page_size, fault_phys_offset) &&

2575 (prot & VM_PROT_EXECUTE)) {

2576 if (cs_debug) {

                         printf("page marked to be NX, not letting it be mapped EXEC\n");

2578 }

2579 return KERN_CODESIGN_ERROR;

2580 }

2581

2582 /* A page could be tainted, or pose a risk of being tainted later.

2583 * Check whether the receiving process wants it, and make it feel

2584 * the consequences (that hapens in cs_invalid_page()).

2585 * For CS Enforcement, two other conditions will

2586 * cause that page to be tainted as well:

2587 * - pmapping an unsigned page executable - this means unsigned code;

2588 * - writeable mapping of a validated page - the content of that page

2589 * can be changed without the kernel noticing, therefore unsigned

2590 * code can be created

2591 */

2592 if (cs_bypass) {

2593 /* code-signing is bypassed */

2594 *cs_violation = FALSE;

         } else if (VMP_CS_TAINTED(m, fault_page_size, fault_phys_offset)) {

2596 /* tainted page */

2597 *cs_violation = TRUE;

2598 } else if (!cs_enforcement_enabled) {

2599 /* no further code-signing enforcement */

2600 *cs_violation = FALSE;

         } else if (vm_fault_cs_page_immutable(m, fault_page_size, fault_phys_offset, prot) &&

2602 ((prot & VM_PROT_WRITE) ||

2603 m->vmp_wpmapped)) {

2604 /*

2605 * The page should be immutable, but is in danger of being

2606 * modified.

2607 * This is the case where we want policy from the code

2608 * directory - is the page immutable or not? For now we have

2609 * to assume that code pages will be immutable, data pages not.

2610 * We'll assume a page is a code page if it has a code directory

2611 * and we fault for execution.

2612 * That is good enough since if we faulted the code page for

2613 * writing in another map before, it is wpmapped; if we fault

2614 * it for writing in this map later it will also be faulted for

2615 * executing at the same time; and if we fault for writing in

2616 * another map later, we will disconnect it from this pmap so

2617 * we'll notice the change.

2618 */

2619 *cs_violation = TRUE;

         } else if (!VMP_CS_VALIDATED(m, fault_page_size, fault_phys_offset) &&

2621 (prot & VM_PROT_EXECUTE)

2622 ) {

2623 *cs_violation = TRUE;

2624 } else {

2625 *cs_violation = FALSE;

2626 }

2627 return KERN_SUCCESS;

2628 }

2629

2630 /*

2631 * Handles a code signing violation by either rejecting the page or forcing a disconnect.

2632 * @param must_disconnect This value will be set to true if the caller must disconnect

2633 * this page.

2634 * @return If this function does not return KERN_SUCCESS, the caller must abort the page fault.

2635 */

2636 static kern_return_t

2637 vm_fault_cs_handle_violation(

2638 vm_object_t object,

2639 vm_page_t m,

2640 pmap_t pmap,

2641 vm_prot_t prot,

2642 vm_map_offset_t vaddr,

2643 vm_map_size_t fault_page_size,

2644 vm_map_offset_t fault_phys_offset,

2645 bool map_is_switched,

2646 bool map_is_switch_protected,

2647 bool *must_disconnect)

2648 {

2649 #if !MACH_ASSERT

2650 #pragma unused(pmap)

2651 #pragma unused(map_is_switch_protected)

2652 #endif /* !MACH_ASSERT */

2653 /*

2654 * We will have a tainted page. Have to handle the special case

2655 * of a switched map now. If the map is not switched, standard

2656 * procedure applies - call cs_invalid_page().

2657 * If the map is switched, the real owner is invalid already.

2658 * There is no point in invalidating the switching process since

2659 * it will not be executing from the map. So we don't call

2660 * cs_invalid_page() in that case.

2661 */

2662 boolean_t reject_page, cs_killed;

2663 kern_return_t kr;

2664 if (map_is_switched) {

                 assert(pmap == vm_map_pmap(current_thread()->map));

                 assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE));

2667 reject_page = FALSE;

2668 } else {

                 if (cs_debug > 5) {

                         printf("vm_fault: signed: %s validate: %s tainted: %s wpmapped: %s prot: 0x%x\n",

                             object->code_signed ? "yes" : "no",

                             VMP_CS_VALIDATED(m, fault_page_size, fault_phys_offset) ? "yes" : "no",

                             VMP_CS_TAINTED(m, fault_page_size, fault_phys_offset) ? "yes" : "no",

                             m->vmp_wpmapped ? "yes" : "no",

2675 (int)prot);

2676 }

                 reject_page = cs_invalid_page((addr64_t) vaddr, &cs_killed);

2678 }

2679

2680 if (reject_page) {

2681 /* reject the invalid page: abort the page fault */

2682 int pid;

2683 const char *procname;

2684 task_t task;

2685 vm_object_t file_object, shadow;

2686 vm_object_offset_t file_offset;

2687 char *pathname, *filename;

2688 vm_size_t pathname_len, filename_len;

2689 boolean_t truncated_path;

2690 #define __PATH_MAX 1024

2691 struct timespec mtime, cs_mtime;

2692 int shadow_depth;

2693 os_reason_t codesigning_exit_reason = OS_REASON_NULL;

2694

2695 kr = KERN_CODESIGN_ERROR;

2696 cs_enter_tainted_rejected++;

2697

2698 /* get process name and pid */

2699 procname = "?";

2700 task = current_task();

2701 pid = proc_selfpid();

                 if (task->bsd_info != NULL) {

                         procname = proc_name_address(task->bsd_info);

2704 }

2705

2706 /* get file's VM object */

2707 file_object = object;

2708 file_offset = m->vmp_offset;

                 for (shadow = file_object->shadow,

2710 shadow_depth = 0;

2711 shadow != VM_OBJECT_NULL;

2712 shadow = file_object->shadow,

2713 shadow_depth++) {

2714 vm_object_lock_shared(shadow);

2715 if (file_object != object) {

2716 vm_object_unlock(file_object);

2717 }

2718 file_offset += file_object->vo_shadow_offset;

2719 file_object = shadow;

2720 }

2721

2722 mtime.tv_sec = 0;

2723 mtime.tv_nsec = 0;

2724 cs_mtime.tv_sec = 0;

2725 cs_mtime.tv_nsec = 0;

2726

2727 /* get file's pathname and/or filename */

2728 pathname = NULL;

2729 filename = NULL;

2730 pathname_len = 0;

2731 filename_len = 0;

2732 truncated_path = FALSE;

2733 /* no pager -> no file -> no pathname, use "<nil>" in that case */

                 if (file_object->pager != NULL) {

                         pathname = kheap_alloc(KHEAP_TEMP, __PATH_MAX * 2, Z_WAITOK);

2736 if (pathname) {

                                 pathname[0] = '\0';

2738 pathname_len = __PATH_MAX;

2739 filename = pathname + pathname_len;

2740 filename_len = __PATH_MAX;

2741

                                 if (vnode_pager_get_object_name(file_object->pager,

2743 pathname,

2744 pathname_len,

2745 filename,

2746 filename_len,

2747 &truncated_path) == KERN_SUCCESS) {

2748 /* safety first... */

                                         pathname[__PATH_MAX - 1] = '\0';

                                         filename[__PATH_MAX - 1] = '\0';

2751

2752 vnode_pager_get_object_mtime(file_object->pager,

2753 &mtime,

2754 &cs_mtime);

2755 } else {

                                         kheap_free(KHEAP_TEMP, pathname, __PATH_MAX * 2);

2757 pathname = NULL;

2758 filename = NULL;

2759 pathname_len = 0;

2760 filename_len = 0;

2761 truncated_path = FALSE;

2762 }

2763 }

2764 }

                 printf("CODE SIGNING: process %d[%s]: "

2766 "rejecting invalid page at address 0x%llx "

                     "from offset 0x%llx in file \"%s%s%s\" "

                     "(cs_mtime:%lu.%ld %s mtime:%lu.%ld) "

                     "(signed:%d validated:%d tainted:%d nx:%d "

                     "wpmapped:%d dirty:%d depth:%d)\n",

2771 pid, procname, (addr64_t) vaddr,

2772 file_offset,

                     (pathname ? pathname : "<nil>"),

                     (truncated_path ? "/.../" : ""),

                     (truncated_path ? filename : ""),

2776 cs_mtime.tv_sec, cs_mtime.tv_nsec,

                     ((cs_mtime.tv_sec == mtime.tv_sec &&

2778 cs_mtime.tv_nsec == mtime.tv_nsec)

2779 ? "=="

2780 : "!="),

2781 mtime.tv_sec, mtime.tv_nsec,

2782 object->code_signed,

                     VMP_CS_VALIDATED(m, fault_page_size, fault_phys_offset),

                     VMP_CS_TAINTED(m, fault_page_size, fault_phys_offset),

                     VMP_CS_NX(m, fault_page_size, fault_phys_offset),

2786 m->vmp_wpmapped,

2787 m->vmp_dirty,

2788 shadow_depth);

2789

2790 /*

2791 * We currently only generate an exit reason if cs_invalid_page directly killed a process. If cs_invalid_page

2792 * did not kill the process (more the case on desktop), vm_fault_enter will not satisfy the fault and whether the

2793 * process dies is dependent on whether there is a signal handler registered for SIGSEGV and how that handler

2794 * will deal with the segmentation fault.

2795 */

2796 if (cs_killed) {

                         KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE,

                             pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_INVALID_PAGE, 0, 0);

2799

                         codesigning_exit_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_INVALID_PAGE);

2801 if (codesigning_exit_reason == NULL) {

                                 printf("vm_fault_enter: failed to allocate codesigning exit reason\n");

2803 } else {

2804 mach_vm_address_t data_addr = 0;

2805 struct codesigning_exit_reason_info *ceri = NULL;

                                 uint32_t reason_buffer_size_estimate = kcdata_estimate_required_buffer_size(1, sizeof(*ceri));

2807

                                 if (os_reason_alloc_buffer_noblock(codesigning_exit_reason, reason_buffer_size_estimate)) {

                                         printf("vm_fault_enter: failed to allocate buffer for codesigning exit reason\n");

2810 } else {

                                         if (KERN_SUCCESS == kcdata_get_memory_addr(&codesigning_exit_reason->osr_kcd_descriptor,

                                             EXIT_REASON_CODESIGNING_INFO, sizeof(*ceri), &data_addr)) {

2813 ceri = (struct codesigning_exit_reason_info *)data_addr;

                                                 static_assert(__PATH_MAX == sizeof(ceri->ceri_pathname));

2815

2816 ceri->ceri_virt_addr = vaddr;

2817 ceri->ceri_file_offset = file_offset;

2818 if (pathname) {

                                                         strncpy((char *)&ceri->ceri_pathname, pathname, sizeof(ceri->ceri_pathname));

2820 } else {

                                                         ceri->ceri_pathname[0] = '\0';

2822 }

2823 if (filename) {

                                                         strncpy((char *)&ceri->ceri_filename, filename, sizeof(ceri->ceri_filename));

2825 } else {

                                                         ceri->ceri_filename[0] = '\0';

2827 }

                                                 ceri->ceri_path_truncated = (truncated_path ? 1 : 0);

2829 ceri->ceri_codesig_modtime_secs = cs_mtime.tv_sec;

2830 ceri->ceri_codesig_modtime_nsecs = cs_mtime.tv_nsec;

2831 ceri->ceri_page_modtime_secs = mtime.tv_sec;

2832 ceri->ceri_page_modtime_nsecs = mtime.tv_nsec;

2833 ceri->ceri_object_codesigned = (object->code_signed);

                                                 ceri->ceri_page_codesig_validated = VMP_CS_VALIDATED(m, fault_page_size, fault_phys_offset);

                                                 ceri->ceri_page_codesig_tainted = VMP_CS_TAINTED(m, fault_page_size, fault_phys_offset);

                                                 ceri->ceri_page_codesig_nx = VMP_CS_NX(m, fault_page_size, fault_phys_offset);

2837 ceri->ceri_page_wpmapped = (m->vmp_wpmapped);

2838 ceri->ceri_page_slid = 0;

2839 ceri->ceri_page_dirty = (m->vmp_dirty);

2840 ceri->ceri_page_shadow_depth = shadow_depth;

2841 } else {

2842 #if DEBUG || DEVELOPMENT

2843 panic("vm_fault_enter: failed to allocate kcdata for codesigning exit reason");

2844 #else

                                                 printf("vm_fault_enter: failed to allocate kcdata for codesigning exit reason\n");

2846 #endif /* DEBUG || DEVELOPMENT */

2847 /* Free the buffer */

                                                 os_reason_alloc_buffer_noblock(codesigning_exit_reason, 0);

2849 }

2850 }

2851 }

2852

                         set_thread_exit_reason(current_thread(), codesigning_exit_reason, FALSE);

2854 }

2855 if (panic_on_cs_killed &&

2856 object->object_is_shared_cache) {

2857 char *tainted_contents;

2858 vm_map_offset_t src_vaddr;

                         src_vaddr = (vm_map_offset_t) phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m) << PAGE_SHIFT);

2860 tainted_contents = kalloc(PAGE_SIZE);

                         bcopy((const char *)src_vaddr, tainted_contents, PAGE_SIZE);

                         printf("CODE SIGNING: tainted page %p phys 0x%x phystokv 0x%llx copied to %p\n", m, VM_PAGE_GET_PHYS_PAGE(m), (uint64_t)src_vaddr, tainted_contents);

                         panic("CODE SIGNING: process %d[%s]: "

                             "rejecting invalid page (phys#0x%x) at address 0x%llx "

                             "from offset 0x%llx in file \"%s%s%s\" "

                             "(cs_mtime:%lu.%ld %s mtime:%lu.%ld) "

                             "(signed:%d validated:%d tainted:%d nx:%d"

                             "wpmapped:%d dirty:%d depth:%d)\n",

2869 pid, procname,

2870 VM_PAGE_GET_PHYS_PAGE(m),

2871 (addr64_t) vaddr,

2872 file_offset,

                             (pathname ? pathname : "<nil>"),

                             (truncated_path ? "/.../" : ""),

                             (truncated_path ? filename : ""),

2876 cs_mtime.tv_sec, cs_mtime.tv_nsec,

                             ((cs_mtime.tv_sec == mtime.tv_sec &&

2878 cs_mtime.tv_nsec == mtime.tv_nsec)

2879 ? "=="

2880 : "!="),

2881 mtime.tv_sec, mtime.tv_nsec,

2882 object->code_signed,

                             VMP_CS_VALIDATED(m, fault_page_size, fault_phys_offset),

                             VMP_CS_TAINTED(m, fault_page_size, fault_phys_offset),

                             VMP_CS_NX(m, fault_page_size, fault_phys_offset),

2886 m->vmp_wpmapped,

2887 m->vmp_dirty,

2888 shadow_depth);

2889 }

2890

2891 if (file_object != object) {

2892 vm_object_unlock(file_object);

2893 }

                 if (pathname_len != 0) {

                         kheap_free(KHEAP_TEMP, pathname, __PATH_MAX * 2);

2896 pathname = NULL;

2897 filename = NULL;

2898 }

2899 } else {

2900 /* proceed with the invalid page */

2901 kr = KERN_SUCCESS;

                 if (!VMP_CS_VALIDATED(m, fault_page_size, fault_phys_offset) &&

2903 !object->code_signed) {

2904 /*

2905 * This page has not been (fully) validated but

2906 * does not belong to a code-signed object

2907 * so it should not be forcefully considered

2908 * as tainted.

2909 * We're just concerned about it here because

2910 * we've been asked to "execute" it but that

2911 * does not mean that it should cause other

2912 * accesses to fail.

2913 * This happens when a debugger sets a

2914 * breakpoint and we then execute code in

2915 * that page. Marking the page as "tainted"

2916 * would cause any inspection tool ("leaks",

2917 * "vmmap", "CrashReporter", ...) to get killed

2918 * due to code-signing violation on that page,

2919 * even though they're just reading it and not

2920 * executing from it.

2921 */

2922 } else {

2923 /*

2924 * Page might have been tainted before or not;

2925 * now it definitively is. If the page wasn't

2926 * tainted, we must disconnect it from all

2927 * pmaps later, to force existing mappings

2928 * through that code path for re-consideration

2929 * of the validity of that page.

2930 */

                         if (!VMP_CS_TAINTED(m, fault_page_size, fault_phys_offset)) {

2932 *must_disconnect = TRUE;

                                 VMP_CS_SET_TAINTED(m, fault_page_size, fault_phys_offset, TRUE);

2934 }

2935 }

2936 cs_enter_tainted_accepted++;

2937 }

2938 if (kr != KERN_SUCCESS) {

2939 if (cs_debug) {

                         printf("CODESIGNING: vm_fault_enter(0x%llx): "

2941 "*** INVALID PAGE ***\n",

2942 (long long)vaddr);

2943 }

2944 #if !SECURE_KERNEL

2945 if (cs_enforcement_panic) {

                         panic("CODESIGNING: panicking on invalid page\n");

2947 }

2948 #endif

2949 }

2950 return kr;

2951 }

2952

2953 /*

2954 * Check that the code signature is valid for the given page being inserted into

2955 * the pmap.

2956 *

2957 * @param must_disconnect This value will be set to true if the caller must disconnect

2958 * this page.

2959 * @return If this function does not return KERN_SUCCESS, the caller must abort the page fault.

2960 */

2961 static kern_return_t

2962 vm_fault_validate_cs(

2963 bool cs_bypass,

2964 vm_object_t object,

2965 vm_page_t m,

2966 pmap_t pmap,

2967 vm_map_offset_t vaddr,

2968 vm_prot_t prot,

2969 vm_prot_t caller_prot,

2970 vm_map_size_t fault_page_size,

2971 vm_map_offset_t fault_phys_offset,

2972 vm_object_fault_info_t fault_info,

2973 bool *must_disconnect)

2974 {

2975 bool map_is_switched, map_is_switch_protected, cs_violation;

2976 kern_return_t kr;

2977 /* Validate code signature if necessary. */

         map_is_switched = ((pmap != vm_map_pmap(current_task()->map)) &&

             (pmap == vm_map_pmap(current_thread()->map)));

         map_is_switch_protected = current_thread()->map->switch_protect;

         kr = vm_fault_cs_check_violation(cs_bypass, object, m, pmap,

             prot, caller_prot, fault_page_size, fault_phys_offset, fault_info,

2983 map_is_switched, map_is_switch_protected, &cs_violation);

2984 if (kr != KERN_SUCCESS) {

2985 return kr;

2986 }

2987 if (cs_violation) {

                 kr = vm_fault_cs_handle_violation(object, m, pmap, prot, vaddr,

2989 fault_page_size, fault_phys_offset,

2990 map_is_switched, map_is_switch_protected, must_disconnect);

2991 }

2992 return kr;

2993 }

2994

2995 /*

2996 * Enqueue the page on the appropriate paging queue.

2997 */

2998 static void

2999 vm_fault_enqueue_page(

3000 vm_object_t object,

3001 vm_page_t m,

3002 bool wired,

3003 bool change_wiring,

3004 vm_tag_t wire_tag,

3005 bool no_cache,

3006 int *type_of_fault,

3007 kern_return_t kr)

3008 {

         assert((m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) || object != compressor_object);

3010 boolean_t page_queues_locked = FALSE;

3011 boolean_t previously_pmapped = m->vmp_pmapped;

3012 #define __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED() \

3013 MACRO_BEGIN \

3014 if (! page_queues_locked) { \

3015 page_queues_locked = TRUE; \

3016 vm_page_lockspin_queues(); \

3017 } \

3018 MACRO_END

3019 #define __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED() \

3020 MACRO_BEGIN \

3021 if (page_queues_locked) { \

3022 page_queues_locked = FALSE; \

3023 vm_page_unlock_queues(); \

3024 } \

3025 MACRO_END

3026

3027 #if CONFIG_BACKGROUND_QUEUE

3028 vm_page_update_background_state(m);

3029 #endif

         if (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {

3031 /*

3032 * Compressor pages are neither wired

3033 * nor pageable and should never change.

3034 */

3035 assert(object == compressor_object);

3036 } else if (change_wiring) {

3037 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED();

3038

3039 if (wired) {

3040 if (kr == KERN_SUCCESS) {

                                 vm_page_wire(m, wire_tag, TRUE);

3042 }

3043 } else {

3044 vm_page_unwire(m, TRUE);

3045 }

3046 /* we keep the page queues lock, if we need it later */

3047 } else {

                 if (object->internal == TRUE) {

3049 /*

3050 * don't allow anonymous pages on

3051 * the speculative queues

3052 */

3053 no_cache = FALSE;

3054 }

3055 if (kr != KERN_SUCCESS) {

3056 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED();

3057 vm_page_deactivate(m);

3058 /* we keep the page queues lock, if we need it later */

                 } else if (((m->vmp_q_state == VM_PAGE_NOT_ON_Q) ||

3060 (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ||

3061 (m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) ||

                     ((m->vmp_q_state != VM_PAGE_ON_THROTTLED_Q) && no_cache)) &&

3063 !VM_PAGE_WIRED(m)) {

3064 if (vm_page_local_q &&

3065 (*type_of_fault == DBG_COW_FAULT ||

3066 *type_of_fault == DBG_ZERO_FILL_FAULT)) {

3067 struct vpl *lq;

3068 uint32_t lid;

3069

                                 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);

3071

3072 __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED();

3073 vm_object_lock_assert_exclusive(object);

3074

3075 /*

3076 * we got a local queue to stuff this

3077 * new page on...

3078 * its safe to manipulate local and

3079 * local_id at this point since we're

3080 * behind an exclusive object lock and

3081 * the page is not on any global queue.

3082 *

3083 * we'll use the current cpu number to

3084 * select the queue note that we don't

3085 * need to disable preemption... we're

3086 * going to be behind the local queue's

3087 * lock to do the real work

3088 */

3089 lid = cpu_number();

3090

                                 lq = zpercpu_get_cpu(vm_page_local_q, lid);

3092

3093 VPL_LOCK(&lq->vpl_lock);

3094

3095 vm_page_check_pageable_safe(m);

                                 vm_page_queue_enter(&lq->vpl_queue, m, vmp_pageq);

3097 m->vmp_q_state = VM_PAGE_ON_ACTIVE_LOCAL_Q;

3098 m->vmp_local_id = lid;

3099 lq->vpl_count++;

3100

3101 if (object->internal) {

3102 lq->vpl_internal_count++;

3103 } else {

3104 lq->vpl_external_count++;

3105 }

3106

3107 VPL_UNLOCK(&lq->vpl_lock);

3108

                                 if (lq->vpl_count > vm_page_local_q_soft_limit) {

3110 /*

3111 * we're beyond the soft limit

3112 * for the local queue

3113 * vm_page_reactivate_local will

3114 * 'try' to take the global page

3115 * queue lock... if it can't

3116 * that's ok... we'll let the

3117 * queue continue to grow up

3118 * to the hard limit... at that

3119 * point we'll wait for the

3120 * lock... once we've got the

3121 * lock, we'll transfer all of

3122 * the pages from the local

3123 * queue to the global active

3124 * queue

3125 */

                                         vm_page_reactivate_local(lid, FALSE, FALSE);

3127 }

3128 } else {

3129 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED();

3130

3131 /*

3132 * test again now that we hold the

3133 * page queue lock

3134 */

                                 if (!VM_PAGE_WIRED(m)) {

                                         if (m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {

3137 vm_page_queues_remove(m, FALSE);

3138

                                                 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);

                                                 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_fault_reactivated, 1);

3141 }

3142

                                         if (!VM_PAGE_ACTIVE_OR_INACTIVE(m) ||

3144 no_cache) {

3145 /*

3146 * If this is a no_cache mapping

3147 * and the page has never been

3148 * mapped before or was

3149 * previously a no_cache page,

3150 * then we want to leave pages

3151 * in the speculative state so

3152 * that they can be readily

3153 * recycled if free memory runs

3154 * low. Otherwise the page is

3155 * activated as normal.

3156 */

3157

3158 if (no_cache &&

3159 (!previously_pmapped ||

3160 m->vmp_no_cache)) {

3161 m->vmp_no_cache = TRUE;

3162

                                                         if (m->vmp_q_state != VM_PAGE_ON_SPECULATIVE_Q) {

3164 vm_page_speculate(m, FALSE);

3165 }

                                                 } else if (!VM_PAGE_ACTIVE_OR_INACTIVE(m)) {

3167 vm_page_activate(m);

3168 }

3169 }

3170 }

3171 /* we keep the page queues lock, if we need it later */

3172 }

3173 }

3174 }

3175 /* we're done with the page queues lock, if we ever took it */

3176 __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED();

3177 }

3178

3179 /*

3180 * Sets the pmmpped, xpmapped, and wpmapped bits on the vm_page_t and updates accounting.

3181 * @return true if the page needs to be sync'ed via pmap_sync-page_data_physo

3182 * before being inserted into the pmap.

3183 */

3184 static bool

3185 vm_fault_enter_set_mapped(

3186 vm_object_t object,

3187 vm_page_t m,

3188 vm_prot_t prot,

3189 vm_prot_t fault_type)

3190 {

3191 bool page_needs_sync = false;

3192 /*

3193 * NOTE: we may only hold the vm_object lock SHARED

3194 * at this point, so we need the phys_page lock to

3195 * properly serialize updating the pmapped and

3196 * xpmapped bits

3197 */

         if ((prot & VM_PROT_EXECUTE) && !m->vmp_xpmapped) {

3199 ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);

3200

3201 pmap_lock_phys_page(phys_page);

3202 m->vmp_pmapped = TRUE;

3203

3204 if (!m->vmp_xpmapped) {

3205 m->vmp_xpmapped = TRUE;

3206

3207 pmap_unlock_phys_page(phys_page);

3208

3209 if (!object->internal) {

                                 OSAddAtomic(1, &vm_page_xpmapped_external_count);

3211 }

3212

3213 #if defined(__arm__) || defined(__arm64__)

3214 page_needs_sync = true;

3215 #else

3216 if (object->internal &&

3217 object->pager != NULL) {

3218 /*

3219 * This page could have been

3220 * uncompressed by the

3221 * compressor pager and its

3222 * contents might be only in

3223 * the data cache.

3224 * Since it's being mapped for

3225 * "execute" for the fist time,

3226 * make sure the icache is in

3227 * sync.

3228 */

3229 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);

3230 page_needs_sync = true;

3231 }

3232 #endif

3233 } else {

3234 pmap_unlock_phys_page(phys_page);

3235 }

3236 } else {

                 if (m->vmp_pmapped == FALSE) {

3238 ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);

3239

3240 pmap_lock_phys_page(phys_page);

3241 m->vmp_pmapped = TRUE;

3242 pmap_unlock_phys_page(phys_page);

3243 }

3244 }

3245

3246 if (fault_type & VM_PROT_WRITE) {

                 if (m->vmp_wpmapped == FALSE) {

3248 vm_object_lock_assert_exclusive(object);

                         if (!object->internal && object->pager) {

                                 task_update_logical_writes(current_task(), PAGE_SIZE, TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));

3251 }

3252 m->vmp_wpmapped = TRUE;

3253 }

3254 }

3255 return page_needs_sync;

3256 }

3257

3258 /*

3259 * Try to enter the given page into the pmap.

3260 * Will retry without execute permission iff PMAP_CS is enabled and we encounter

3261 * a codesigning failure on a non-execute fault.

3262 */

3263 static kern_return_t

3264 vm_fault_attempt_pmap_enter(

3265 pmap_t pmap,

3266 vm_map_offset_t vaddr,

3267 vm_map_size_t fault_page_size,

3268 vm_map_offset_t fault_phys_offset,

3269 vm_page_t m,

3270 vm_prot_t *prot,

3271 vm_prot_t caller_prot,

3272 vm_prot_t fault_type,

3273 bool wired,

3274 int pmap_options)

3275 {

3276 #if !PMAP_CS

3277 #pragma unused(caller_prot)

3278 #endif /* !PMAP_CS */

3279 kern_return_t kr;

3280 if (fault_page_size != PAGE_SIZE) {

                 DEBUG4K_FAULT("pmap %p va 0x%llx pa 0x%llx (0x%llx+0x%llx) prot 0x%x fault_type 0x%x\n", pmap, (uint64_t)vaddr, (uint64_t)((((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m)) << PAGE_SHIFT) + fault_phys_offset), (uint64_t)(((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m)) << PAGE_SHIFT), (uint64_t)fault_phys_offset, *prot, fault_type);

3282 assertf((!(fault_phys_offset & FOURK_PAGE_MASK) &&

3283 fault_phys_offset < PAGE_SIZE),

                     "0x%llx\n", (uint64_t)fault_phys_offset);

3285 } else {

                 assertf(fault_phys_offset == 0,

                     "0x%llx\n", (uint64_t)fault_phys_offset);

3288 }

3289

3290 PMAP_ENTER_OPTIONS(pmap, vaddr,

3291 fault_phys_offset,

             m, *prot, fault_type, 0,

3293 wired,

3294 pmap_options,

3295 kr);

3296 return kr;

3297 }

3298

3299 /*

3300 * Enter the given page into the pmap.

3301 * The map must be locked shared.

3302 * The vm object must NOT be locked.

3303 *

3304 * @param need_retry if not null, avoid making a (potentially) blocking call into

3305 * the pmap layer. When such a call would be necessary, return true in this boolean instead.

3306 */

3307 static kern_return_t

3308 vm_fault_pmap_enter(

3309 pmap_t pmap,

3310 vm_map_offset_t vaddr,

3311 vm_map_size_t fault_page_size,

3312 vm_map_offset_t fault_phys_offset,

3313 vm_page_t m,

3314 vm_prot_t *prot,

3315 vm_prot_t caller_prot,

3316 vm_prot_t fault_type,

3317 bool wired,

3318 int pmap_options,

3319 boolean_t *need_retry)

3320 {

3321 kern_return_t kr;

3322 if (need_retry != NULL) {

3323 /*

3324 * Although we don't hold a lock on this object, we hold a lock

3325 * on the top object in the chain. To prevent a deadlock, we

3326 * can't allow the pmap layer to block.

3327 */

3328 pmap_options |= PMAP_OPTIONS_NOWAIT;

3329 }

         kr = vm_fault_attempt_pmap_enter(pmap, vaddr,

3331 fault_page_size, fault_phys_offset,

             m, prot, caller_prot, fault_type, wired, pmap_options);

3333 if (kr == KERN_RESOURCE_SHORTAGE) {

3334 if (need_retry) {

3335 /*

3336 * There's nothing we can do here since we hold the

3337 * lock on the top object in the chain. The caller

3338 * will need to deal with this by dropping that lock and retrying.

3339 */

3340 *need_retry = TRUE;

3341 vm_pmap_enter_retried++;

3342 }

3343 }

3344 return kr;

3345 }

3346

3347 /*

3348 * Enter the given page into the pmap.

3349 * The vm map must be locked shared.

3350 * The vm object must be locked exclusive, unless this is a soft fault.

3351 * For a soft fault, the object must be locked shared or exclusive.

3352 *

3353 * @param need_retry if not null, avoid making a (potentially) blocking call into

3354 * the pmap layer. When such a call would be necessary, return true in this boolean instead.

3355 */

3356 static kern_return_t

3357 vm_fault_pmap_enter_with_object_lock(

3358 vm_object_t object,

3359 pmap_t pmap,

3360 vm_map_offset_t vaddr,

3361 vm_map_size_t fault_page_size,

3362 vm_map_offset_t fault_phys_offset,

3363 vm_page_t m,

3364 vm_prot_t *prot,

3365 vm_prot_t caller_prot,

3366 vm_prot_t fault_type,

3367 bool wired,

3368 int pmap_options,

3369 boolean_t *need_retry)

3370 {

3371 kern_return_t kr;

3372 /*

3373 * Prevent a deadlock by not

3374 * holding the object lock if we need to wait for a page in

3375 * pmap_enter() - <rdar://problem/7138958>

3376 */

         kr = vm_fault_attempt_pmap_enter(pmap, vaddr,

3378 fault_page_size, fault_phys_offset,

             m, prot, caller_prot, fault_type, wired, pmap_options | PMAP_OPTIONS_NOWAIT);

3380 #if __x86_64__

3381 if (kr == KERN_INVALID_ARGUMENT &&

3382 pmap == PMAP_NULL &&

3383 wired) {

3384 /*

3385 * Wiring a page in a pmap-less VM map:

3386 * VMware's "vmmon" kernel extension does this

3387 * to grab pages.

3388 * Let it proceed even though the PMAP_ENTER() failed.

3389 */

3390 kr = KERN_SUCCESS;

3391 }

3392 #endif /* __x86_64__ */

3393

3394 if (kr == KERN_RESOURCE_SHORTAGE) {

3395 if (need_retry) {

3396 /*

3397 * this will be non-null in the case where we hold the lock

3398 * on the top-object in this chain... we can't just drop

3399 * the lock on the object we're inserting the page into

3400 * and recall the PMAP_ENTER since we can still cause

3401 * a deadlock if one of the critical paths tries to

3402 * acquire the lock on the top-object and we're blocked

3403 * in PMAP_ENTER waiting for memory... our only recourse

3404 * is to deal with it at a higher level where we can

3405 * drop both locks.

3406 */

3407 *need_retry = TRUE;

3408 vm_pmap_enter_retried++;

3409 goto done;

3410 }

3411 /*

3412 * The nonblocking version of pmap_enter did not succeed.

3413 * and we don't need to drop other locks and retry

3414 * at the level above us, so

3415 * use the blocking version instead. Requires marking

3416 * the page busy and unlocking the object

3417 */

3418 boolean_t was_busy = m->vmp_busy;

3419

3420 vm_object_lock_assert_exclusive(object);

3421

3422 m->vmp_busy = TRUE;

3423 vm_object_unlock(object);

3424

3425 PMAP_ENTER_OPTIONS(pmap, vaddr,

3426 fault_phys_offset,

3427 m, *prot, fault_type,

3428 0, wired,

3429 pmap_options, kr);

3430

                 assert(VM_PAGE_OBJECT(m) == object);

3432

3433 /* Take the object lock again. */

3434 vm_object_lock(object);

3435

3436 /* If the page was busy, someone else will wake it up.

3437 * Otherwise, we have to do it now. */

3438 assert(m->vmp_busy);

3439 if (!was_busy) {

3440 PAGE_WAKEUP_DONE(m);

3441 }

3442 vm_pmap_enter_blocked++;

3443 }

3444

3445 done:

3446 return kr;

3447 }

3448

3449 /*

3450 * Prepare to enter a page into the pmap by checking CS, protection bits,

3451 * and setting mapped bits on the page_t.

3452 * Does not modify the page's paging queue.

3453 *

3454 * page queue lock must NOT be held

3455 * m->vmp_object must be locked

3456 *

3457 * NOTE: m->vmp_object could be locked "shared" only if we are called

3458 * from vm_fault() as part of a soft fault.

3459 */

3460 static kern_return_t

3461 vm_fault_enter_prepare(

3462 vm_page_t m,

3463 pmap_t pmap,

3464 vm_map_offset_t vaddr,

3465 vm_prot_t *prot,

3466 vm_prot_t caller_prot,

3467 vm_map_size_t fault_page_size,

3468 vm_map_offset_t fault_phys_offset,

3469 boolean_t change_wiring,

3470 vm_prot_t fault_type,

3471 vm_object_fault_info_t fault_info,

3472 int *type_of_fault,

3473 bool *page_needs_data_sync)

3474 {

3475 kern_return_t kr;

3476 bool is_tainted = false;

3477 vm_object_t object;

3478 boolean_t cs_bypass = fault_info->cs_bypass;

3479

3480 object = VM_PAGE_OBJECT(m);

3481

3482 vm_object_lock_assert_held(object);

3483

3484 #if KASAN

3485 if (pmap == kernel_pmap) {

3486 kasan_notify_address(vaddr, PAGE_SIZE);

3487 }

3488 #endif

3489

3490 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);

3491

3492 if (*type_of_fault == DBG_ZERO_FILL_FAULT) {

3493 vm_object_lock_assert_exclusive(object);

         } else if ((fault_type & VM_PROT_WRITE) == 0 &&

3495 !change_wiring &&

3496 (!m->vmp_wpmapped

3497 #if VM_OBJECT_ACCESS_TRACKING

3498 || object->access_tracking

3499 #endif /* VM_OBJECT_ACCESS_TRACKING */

3500 )) {

3501 /*

3502 * This is not a "write" fault, so we

3503 * might not have taken the object lock

3504 * exclusively and we might not be able

3505 * to update the "wpmapped" bit in

3506 * vm_fault_enter().

3507 * Let's just grant read access to

3508 * the page for now and we'll

3509 * soft-fault again if we need write

3510 * access later...

3511 */

3512

3513 /* This had better not be a JIT page. */

                 if (!pmap_has_prot_policy(pmap, fault_info->pmap_options & PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE, *prot)) {

3515 *prot &= ~VM_PROT_WRITE;

3516 } else {

3517 assert(cs_bypass);

3518 }

3519 }

         if (m->vmp_pmapped == FALSE) {

3521 if (m->vmp_clustered) {

3522 if (*type_of_fault == DBG_CACHE_HIT_FAULT) {

3523 /*

3524 * found it in the cache, but this

3525 * is the first fault-in of the page (m->vmp_pmapped == FALSE)

3526 * so it must have come in as part of

3527 * a cluster... account 1 pagein against it

3528 */

3529 if (object->internal) {

3530 *type_of_fault = DBG_PAGEIND_FAULT;

3531 } else {

3532 *type_of_fault = DBG_PAGEINV_FAULT;

3533 }

3534

3535 VM_PAGE_COUNT_AS_PAGEIN(m);

3536 }

3537 VM_PAGE_CONSUME_CLUSTERED(m);

3538 }

3539 }

3540

3541 if (*type_of_fault != DBG_COW_FAULT) {

                 DTRACE_VM2(as_fault, int, 1, (uint64_t *), NULL);

3543

3544 if (pmap == kernel_pmap) {

                         DTRACE_VM2(kernel_asflt, int, 1, (uint64_t *), NULL);

3546 }

3547 }

3548

         kr = vm_fault_validate_cs(cs_bypass, object, m, pmap, vaddr,

             *prot, caller_prot, fault_page_size, fault_phys_offset,

3551 fault_info, &is_tainted);

3552 if (kr == KERN_SUCCESS) {

3553 /*

3554 * We either have a good page, or a tainted page that has been accepted by the process.

3555 * In both cases the page will be entered into the pmap.

3556 */

                 *page_needs_data_sync = vm_fault_enter_set_mapped(object, m, *prot, fault_type);

                 if ((fault_type & VM_PROT_WRITE) && is_tainted) {

3559 /*

3560 * This page is tainted but we're inserting it anyways.

3561 * Since it's writeable, we need to disconnect it from other pmaps

3562 * now so those processes can take note.

3563 */

3564

3565 /*

3566 * We can only get here

3567 * because of the CSE logic

3568 */

                         assert(pmap_get_vm_map_cs_enforced(pmap));

                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));

3571 /*

3572 * If we are faulting for a write, we can clear

3573 * the execute bit - that will ensure the page is

3574 * checked again before being executable, which

3575 * protects against a map switch.

3576 * This only happens the first time the page

3577 * gets tainted, so we won't get stuck here

3578 * to make an already writeable page executable.

3579 */

3580 if (!cs_bypass) {

                                 assert(!pmap_has_prot_policy(pmap, fault_info->pmap_options & PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE, *prot));

3582 *prot &= ~VM_PROT_EXECUTE;

3583 }

3584 }

                 assert(VM_PAGE_OBJECT(m) == object);

3586

3587 #if VM_OBJECT_ACCESS_TRACKING

3588 if (object->access_tracking) {

                         DTRACE_VM2(access_tracking, vm_map_offset_t, vaddr, int, fault_type);

3590 if (fault_type & VM_PROT_WRITE) {

3591 object->access_tracking_writes++;

3592 vm_object_access_tracking_writes++;

3593 } else {

3594 object->access_tracking_reads++;

3595 vm_object_access_tracking_reads++;

3596 }

3597 }

3598 #endif /* VM_OBJECT_ACCESS_TRACKING */

3599 }

3600

3601 return kr;

3602 }

3603

3604 /*

3605 * page queue lock must NOT be held

3606 * m->vmp_object must be locked

3607 *

3608 * NOTE: m->vmp_object could be locked "shared" only if we are called

3609 * from vm_fault() as part of a soft fault. If so, we must be

3610 * careful not to modify the VM object in any way that is not

3611 * legal under a shared lock...

3612 */

3613 kern_return_t

3614 vm_fault_enter(

3615 vm_page_t m,

3616 pmap_t pmap,

3617 vm_map_offset_t vaddr,

3618 vm_map_size_t fault_page_size,

3619 vm_map_offset_t fault_phys_offset,

3620 vm_prot_t prot,

3621 vm_prot_t caller_prot,

3622 boolean_t wired,

3623 boolean_t change_wiring,

3624 vm_tag_t wire_tag,

3625 vm_object_fault_info_t fault_info,

3626 boolean_t *need_retry,

3627 int *type_of_fault)

3628 {

3629 kern_return_t kr;

3630 vm_object_t object;

3631 bool page_needs_data_sync;

3632 vm_prot_t fault_type;

3633 int pmap_options = fault_info->pmap_options;

3634

         if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {

3636 assert(m->vmp_fictitious);

3637 return KERN_SUCCESS;

3638 }

3639

3640 fault_type = change_wiring ? VM_PROT_NONE : caller_prot;

3641

         kr = vm_fault_enter_prepare(m, pmap, vaddr, &prot, caller_prot,

3643 fault_page_size, fault_phys_offset, change_wiring, fault_type,

3644 fault_info, type_of_fault, &page_needs_data_sync);

3645 object = VM_PAGE_OBJECT(m);

3646

         vm_fault_enqueue_page(object, m, wired, change_wiring, wire_tag, fault_info->no_cache, type_of_fault, kr);

3648

3649 if (kr == KERN_SUCCESS) {

3650 if (page_needs_data_sync) {

                         pmap_sync_page_data_phys(VM_PAGE_GET_PHYS_PAGE(m));

3652 }

3653

                 kr = vm_fault_pmap_enter_with_object_lock(object, pmap, vaddr,

3655 fault_page_size, fault_phys_offset, m,

                     &prot, caller_prot, fault_type, wired, pmap_options, need_retry);

3657 }

3658

3659 return kr;

3660 }

3661

3662 void

3663 vm_pre_fault(vm_map_offset_t vaddr, vm_prot_t prot)

3664 {

         if (pmap_find_phys(current_map()->pmap, vaddr) == 0) {

                 vm_fault(current_map(),      /* map */

3667 vaddr, /* vaddr */

3668 prot, /* fault_type */

3669 FALSE, /* change_wiring */

3670 VM_KERN_MEMORY_NONE, /* tag - not wiring */

3671 THREAD_UNINT, /* interruptible */

3672 NULL, /* caller_pmap */

3673 0 /* caller_pmap_addr */);

3674 }

3675 }

3676

3677

3678 /*

3679 * Routine: vm_fault

3680 * Purpose:

3681 * Handle page faults, including pseudo-faults

3682 * used to change the wiring status of pages.

3683 * Returns:

3684 * Explicit continuations have been removed.

3685 * Implementation:

3686 * vm_fault and vm_fault_page save mucho state

3687 * in the moral equivalent of a closure. The state

3688 * structure is allocated when first entering vm_fault

3689 * and deallocated when leaving vm_fault.

3690 */

3691

 extern uint64_t get_current_unique_pid(void);

3693

3694 unsigned long vm_fault_collapse_total = 0;

3695 unsigned long vm_fault_collapse_skipped = 0;

3696

3697

3698 kern_return_t

3699 vm_fault_external(

3700 vm_map_t map,

3701 vm_map_offset_t vaddr,

3702 vm_prot_t fault_type,

3703 boolean_t change_wiring,

3704 int interruptible,

3705 pmap_t caller_pmap,

3706 vm_map_offset_t caller_pmap_addr)

3707 {

         return vm_fault_internal(map, vaddr, fault_type, change_wiring,

3709 change_wiring ? vm_tag_bt() : VM_KERN_MEMORY_NONE,

3710 interruptible, caller_pmap, caller_pmap_addr,

3711 NULL);

3712 }

3713

3714 kern_return_t

3715 vm_fault(

3716 vm_map_t map,

3717 vm_map_offset_t vaddr,

3718 vm_prot_t fault_type,

3719 boolean_t change_wiring,

3720 vm_tag_t wire_tag, /* if wiring must pass tag != VM_KERN_MEMORY_NONE */

3721 int interruptible,

3722 pmap_t caller_pmap,

3723 vm_map_offset_t caller_pmap_addr)

3724 {

         return vm_fault_internal(map, vaddr, fault_type, change_wiring, wire_tag,

3726 interruptible, caller_pmap, caller_pmap_addr,

3727 NULL);

3728 }

3729

3730 static boolean_t

3731 current_proc_is_privileged(void)

3732 {

         return csproc_get_platform_binary(current_proc());

3734 }

3735

3736 uint64_t vm_copied_on_read = 0;

3737

3738 /*

3739 * Cleanup after a vm_fault_enter.

3740 * At this point, the fault should either have failed (kr != KERN_SUCCESS)

3741 * or the page should be in the pmap and on the correct paging queue.

3742 *

3743 * Precondition:

3744 * map must be locked shared.

3745 * m_object must be locked.

3746 * If top_object != VM_OBJECT_NULL, it must be locked.

3747 * real_map must be locked.

3748 *

3749 * Postcondition:

3750 * map will be unlocked

3751 * m_object will be unlocked

3752 * top_object will be unlocked

3753 * If real_map != map, it will be unlocked

3754 */

3755 static void

3756 vm_fault_complete(

3757 vm_map_t map,

3758 vm_map_t real_map,

3759 vm_object_t object,

3760 vm_object_t m_object,

3761 vm_page_t m,

3762 vm_map_offset_t offset,

3763 vm_map_offset_t trace_real_vaddr,

3764 vm_object_fault_info_t fault_info,

3765 vm_prot_t caller_prot,

3766 #if CONFIG_DTRACE

3767 vm_map_offset_t real_vaddr,

3768 #else

3769 __unused vm_map_offset_t real_vaddr,

3770 #endif /* CONFIG_DTRACE */

3771 int type_of_fault,

3772 boolean_t need_retry,

3773 kern_return_t kr,

3774 ppnum_t *physpage_p,

3775 vm_prot_t prot,

3776 vm_object_t top_object,

3777 boolean_t need_collapse,

3778 vm_map_offset_t cur_offset,

3779 vm_prot_t fault_type,

3780 vm_object_t *written_on_object,

3781 memory_object_t *written_on_pager,

3782 vm_object_offset_t *written_on_offset)

3783 {

3784 int event_code = 0;

3785 vm_map_lock_assert_shared(map);

3786 vm_object_lock_assert_held(m_object);

3787 if (top_object != VM_OBJECT_NULL) {

3788 vm_object_lock_assert_held(top_object);

3789 }

3790 vm_map_lock_assert_held(real_map);

3791

3792 if (m_object->internal) {

                 event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_INTERNAL));

         } else if (m_object->object_is_shared_cache) {

                 event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_SHAREDCACHE));

3796 } else {

                 event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_EXTERNAL));

3798 }

3799

         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info->user_tag << 16) | (caller_prot << 8) | type_of_fault, m->vmp_offset, get_current_unique_pid(), 0);

3801 if (need_retry == FALSE) {

                 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_FAST), get_current_unique_pid(), 0, 0, 0, 0);

3803 }

         DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info->user_tag);

3805 if (kr == KERN_SUCCESS &&

3806 physpage_p != NULL) {

3807 /* for vm_map_wire_and_extract() */

                 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);

3809 if (prot & VM_PROT_WRITE) {

3810 vm_object_lock_assert_exclusive(m_object);

3811 m->vmp_dirty = TRUE;

3812 }

3813 }

3814

3815 if (top_object != VM_OBJECT_NULL) {

3816 /*

3817 * It's safe to drop the top object

3818 * now that we've done our

3819 * vm_fault_enter(). Any other fault

3820 * in progress for that virtual

3821 * address will either find our page

3822 * and translation or put in a new page

3823 * and translation.

3824 */

3825 vm_object_unlock(top_object);

3826 top_object = VM_OBJECT_NULL;

3827 }

3828

3829 if (need_collapse == TRUE) {

                 vm_object_collapse(object, vm_object_trunc_page(offset), TRUE);

3831 }

3832

3833 if (need_retry == FALSE &&

             (type_of_fault == DBG_PAGEIND_FAULT || type_of_fault == DBG_PAGEINV_FAULT || type_of_fault == DBG_CACHE_HIT_FAULT)) {

3835 /*

3836 * evaluate access pattern and update state

3837 * vm_fault_deactivate_behind depends on the

3838 * state being up to date

3839 */

                 vm_fault_is_sequential(m_object, cur_offset, fault_info->behavior);

3841

                 vm_fault_deactivate_behind(m_object, cur_offset, fault_info->behavior);

3843 }

3844 /*

3845 * That's it, clean up and return.

3846 */

3847 if (m->vmp_busy) {

3848 vm_object_lock_assert_exclusive(m_object);

3849 PAGE_WAKEUP_DONE(m);

3850 }

3851

         if (need_retry == FALSE && !m_object->internal && (fault_type & VM_PROT_WRITE)) {

3853 vm_object_paging_begin(m_object);

3854

3855 assert(*written_on_object == VM_OBJECT_NULL);

3856 *written_on_object = m_object;

3857 *written_on_pager = m_object->pager;

                 *written_on_offset = m_object->paging_offset + m->vmp_offset;

3859 }

3860 vm_object_unlock(object);

3861

3862 vm_map_unlock_read(map);

3863 if (real_map != map) {

3864 vm_map_unlock(real_map);

3865 }

3866 }

3867

3868 static inline int

 vm_fault_type_for_tracing(boolean_t need_copy_on_read, int type_of_fault)

3870 {

         if (need_copy_on_read && type_of_fault == DBG_COW_FAULT) {

3872 return DBG_COR_FAULT;

3873 }

3874 return type_of_fault;

3875 }

3876

3877 kern_return_t

3878 vm_fault_internal(

3879 vm_map_t map,

3880 vm_map_offset_t vaddr,

3881 vm_prot_t caller_prot,

3882 boolean_t change_wiring,

3883 vm_tag_t wire_tag, /* if wiring must pass tag != VM_KERN_MEMORY_NONE */

3884 int interruptible,

3885 pmap_t caller_pmap,

3886 vm_map_offset_t caller_pmap_addr,

3887 ppnum_t *physpage_p)

3888 {

3889 vm_map_version_t version; /* Map version for verificiation */

3890 boolean_t wired; /* Should mapping be wired down? */

3891 vm_object_t object; /* Top-level object */

3892 vm_object_offset_t offset; /* Top-level offset */

3893 vm_prot_t prot; /* Protection for mapping */

3894 vm_object_t old_copy_object; /* Saved copy object */

3895 vm_page_t result_page; /* Result of vm_fault_page */

3896 vm_page_t top_page; /* Placeholder page */

3897 kern_return_t kr;

3898

3899 vm_page_t m; /* Fast access to result_page */

3900 kern_return_t error_code;

3901 vm_object_t cur_object;

3902 vm_object_t m_object = NULL;

3903 vm_object_offset_t cur_offset;

3904 vm_page_t cur_m;

3905 vm_object_t new_object;

3906 int type_of_fault;

3907 pmap_t pmap;

3908 wait_interrupt_t interruptible_state;

3909 vm_map_t real_map = map;

3910 vm_map_t original_map = map;

3911 bool object_locks_dropped = FALSE;

3912 vm_prot_t fault_type;

3913 vm_prot_t original_fault_type;

3914 struct vm_object_fault_info fault_info = {};

3915 bool need_collapse = FALSE;

3916 boolean_t need_retry = FALSE;

3917 boolean_t *need_retry_ptr = NULL;

3918 uint8_t object_lock_type = 0;

3919 uint8_t cur_object_lock_type;

3920 vm_object_t top_object = VM_OBJECT_NULL;

3921 vm_object_t written_on_object = VM_OBJECT_NULL;

3922 memory_object_t written_on_pager = NULL;

3923 vm_object_offset_t written_on_offset = 0;

3924 int throttle_delay;

3925 int compressed_count_delta;

3926 uint8_t grab_options;

3927 bool need_copy;

3928 bool need_copy_on_read;

3929 vm_map_offset_t trace_vaddr;

3930 vm_map_offset_t trace_real_vaddr;

3931 vm_map_size_t fault_page_size;

3932 vm_map_size_t fault_page_mask;

3933 vm_map_offset_t fault_phys_offset;

3934 vm_map_offset_t real_vaddr;

3935 bool resilient_media_retry = FALSE;

3936 vm_object_t resilient_media_object = VM_OBJECT_NULL;

3937 vm_object_offset_t resilient_media_offset = (vm_object_offset_t)-1;

3938 bool page_needs_data_sync = false;

3939 /*

3940 * Was the VM object contended when vm_map_lookup_locked locked it?

3941 * If so, the zero fill path will drop the lock

3942 * NB: Ideally we would always drop the lock rather than rely on

3943 * this heuristic, but vm_object_unlock currently takes > 30 cycles.

3944 */

3945 bool object_is_contended = false;

3946

3947 real_vaddr = vaddr;

3948 trace_real_vaddr = vaddr;

3949

         if (VM_MAP_PAGE_SIZE(original_map) < PAGE_SIZE) {

3951 fault_phys_offset = (vm_map_offset_t)-1;

3952 fault_page_size = VM_MAP_PAGE_SIZE(original_map);

3953 fault_page_mask = VM_MAP_PAGE_MASK(original_map);

3954 if (fault_page_size < PAGE_SIZE) {

                         DEBUG4K_FAULT("map %p vaddr 0x%llx caller_prot 0x%x\n", map, (uint64_t)trace_real_vaddr, caller_prot);

                         vaddr = vm_map_trunc_page(vaddr, fault_page_mask);

3957 }

3958 } else {

3959 fault_phys_offset = 0;

3960 fault_page_size = PAGE_SIZE;

3961 fault_page_mask = PAGE_MASK;

                 vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);

3963 }

3964

3965 if (map == kernel_map) {

3966 trace_vaddr = VM_KERNEL_ADDRHIDE(vaddr);

3967 trace_real_vaddr = VM_KERNEL_ADDRHIDE(trace_real_vaddr);

3968 } else {

3969 trace_vaddr = vaddr;

3970 }

3971

3972 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,

             (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,

             ((uint64_t)trace_vaddr >> 32),

3975 trace_vaddr,

3976 (map == kernel_map),

3977 0,

3978 0);

3979

         if (get_preemption_level() != 0) {

3981 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,

                     (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,

                     ((uint64_t)trace_vaddr >> 32),

3984 trace_vaddr,

3985 KERN_FAILURE,

3986 0,

3987 0);

3988

3989 return KERN_FAILURE;

3990 }

3991

3992 thread_t cthread = current_thread();

         bool      rtfault = (cthread->sched_mode == TH_MODE_REALTIME);

3994 uint64_t fstart = 0;

3995

3996 if (rtfault) {

3997 fstart = mach_continuous_time();

3998 }

3999

4000 interruptible_state = thread_interrupt_level(interruptible);

4001

4002 fault_type = (change_wiring ? VM_PROT_NONE : caller_prot);

4003

4004 counter_inc(&vm_statistics_faults);

         counter_inc(&current_task()->faults);

4006 original_fault_type = fault_type;

4007

4008 need_copy = FALSE;

4009 if (fault_type & VM_PROT_WRITE) {

4010 need_copy = TRUE;

4011 }

4012

4013 if (need_copy || change_wiring) {

4014 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4015 } else {

4016 object_lock_type = OBJECT_LOCK_SHARED;

4017 }

4018

4019 cur_object_lock_type = OBJECT_LOCK_SHARED;

4020

         if ((map == kernel_map) && (caller_prot & VM_PROT_WRITE)) {

4022 if (compressor_map) {

                         if ((vaddr >= vm_map_min(compressor_map)) && (vaddr < vm_map_max(compressor_map))) {

                                 panic("Write fault on compressor map, va: %p type: %u bounds: %p->%p", (void *) vaddr, caller_prot, (void *) vm_map_min(compressor_map), (void *) vm_map_max(compressor_map));

4025 }

4026 }

4027 }

4028 RetryFault:

4029 assert(written_on_object == VM_OBJECT_NULL);

4030

4031 /*

4032 * assume we will hit a page in the cache

4033 * otherwise, explicitly override with

4034 * the real fault type once we determine it

4035 */

4036 type_of_fault = DBG_CACHE_HIT_FAULT;

4037

4038 /*

4039 * Find the backing store object and offset into

4040 * it to begin the search.

4041 */

4042 fault_type = original_fault_type;

4043 map = original_map;

4044 vm_map_lock_read(map);

4045

4046 if (resilient_media_retry) {

4047 /*

4048 * If we have to insert a fake zero-filled page to hide

4049 * a media failure to provide the real page, we need to

4050 * resolve any pending copy-on-write on this mapping.

4051 * VM_PROT_COPY tells vm_map_lookup_locked() to deal

4052 * with that even if this is not a "write" fault.

4053 */

4054 need_copy = TRUE;

4055 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4056 }

4057

         kr = vm_map_lookup_locked(&map, vaddr,

             (fault_type | (need_copy ? VM_PROT_COPY : 0)),

4060 object_lock_type, &version,

             &object, &offset, &prot, &wired,

4062 &fault_info,

4063 &real_map,

4064 &object_is_contended);

4065

4066 if (kr != KERN_SUCCESS) {

4067 vm_map_unlock_read(map);

4068 goto done;

4069 }

4070

4071

4072 pmap = real_map->pmap;

4073 fault_info.interruptible = interruptible;

4074 fault_info.stealth = FALSE;

4075 fault_info.io_sync = FALSE;

4076 fault_info.mark_zf_absent = FALSE;

4077 fault_info.batch_pmap_op = FALSE;

4078

4079 if (resilient_media_retry) {

4080 /*

4081 * We're retrying this fault after having detected a media

4082 * failure from a "resilient_media" mapping.

4083 * Check that the mapping is still pointing at the object

4084 * that just failed to provide a page.

4085 */

4086 assert(resilient_media_object != VM_OBJECT_NULL);

                 assert(resilient_media_offset != (vm_object_offset_t)-1);

4088 if (object != VM_OBJECT_NULL &&

4089 object == resilient_media_object &&

4090 offset == resilient_media_offset &&

4091 fault_info.resilient_media) {

4092 /*

4093 * This mapping still points at the same object

4094 * and is still "resilient_media": proceed in

4095 * "recovery-from-media-failure" mode, where we'll

4096 * insert a zero-filled page in the top object.

4097 */

4098 // printf("RESILIENT_MEDIA %s:%d recovering for object %p offset 0x%llx\n", __FUNCTION__, __LINE__, object, offset);

4099 } else {

4100 /* not recovering: reset state */

4101 // printf("RESILIENT_MEDIA %s:%d no recovery resilient %d object %p/%p offset 0x%llx/0x%llx\n", __FUNCTION__, __LINE__, fault_info.resilient_media, object, resilient_media_object, offset, resilient_media_offset);

4102 resilient_media_retry = FALSE;

4103 /* release our extra reference on failed object */

4104 // printf("FBDP %s:%d resilient_media_object %p deallocate\n", __FUNCTION__, __LINE__, resilient_media_object);

4105 vm_object_deallocate(resilient_media_object);

4106 resilient_media_object = VM_OBJECT_NULL;

4107 resilient_media_offset = (vm_object_offset_t)-1;

4108 }

4109 } else {

4110 assert(resilient_media_object == VM_OBJECT_NULL);

4111 resilient_media_offset = (vm_object_offset_t)-1;

4112 }

4113

4114 /*

4115 * If the page is wired, we must fault for the current protection

4116 * value, to avoid further faults.

4117 */

4118 if (wired) {

4119 fault_type = prot | VM_PROT_WRITE;

4120 }

4121 if (wired || need_copy) {

4122 /*

4123 * since we're treating this fault as a 'write'

4124 * we must hold the top object lock exclusively

4125 */

4126 if (object_lock_type == OBJECT_LOCK_SHARED) {

4127 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4128

                         if (vm_object_lock_upgrade(object) == FALSE) {

4130 /*

4131 * couldn't upgrade, so explictly

4132 * take the lock exclusively

4133 */

4134 vm_object_lock(object);

4135 }

4136 }

4137 }

4138

4139 #if VM_FAULT_CLASSIFY

4140 /*

4141 * Temporary data gathering code

4142 */

         vm_fault_classify(object, offset, fault_type);

4144 #endif

4145 /*

4146 * Fast fault code. The basic idea is to do as much as

4147 * possible while holding the map lock and object locks.

4148 * Busy pages are not used until the object lock has to

4149 * be dropped to do something (copy, zero fill, pmap enter).

4150 * Similarly, paging references aren't acquired until that

4151 * point, and object references aren't used.

4152 *

4153 * If we can figure out what to do

4154 * (zero fill, copy on write, pmap enter) while holding

4155 * the locks, then it gets done. Otherwise, we give up,

4156 * and use the original fault path (which doesn't hold

4157 * the map lock, and relies on busy pages).

4158 * The give up cases include:

4159 * - Have to talk to pager.

4160 * - Page is busy, absent or in error.

4161 * - Pager has locked out desired access.

4162 * - Fault needs to be restarted.

4163 * - Have to push page into copy object.

4164 *

4165 * The code is an infinite loop that moves one level down

4166 * the shadow chain each time. cur_object and cur_offset

4167 * refer to the current object being examined. object and offset

4168 * are the original object from the map. The loop is at the

4169 * top level if and only if object and cur_object are the same.

4170 *

4171 * Invariants: Map lock is held throughout. Lock is held on

4172 * original object and cur_object (if different) when

4173 * continuing or exiting loop.

4174 *

4175 */

4176

4177 #if defined(__arm64__)

4178 /*

4179 * Fail if reading an execute-only page in a

4180 * pmap that enforces execute-only protection.

4181 */

4182 if (fault_type == VM_PROT_READ &&

4183 (prot & VM_PROT_EXECUTE) &&

4184 !(prot & VM_PROT_READ) &&

4185 pmap_enforces_execute_only(pmap)) {

4186 vm_object_unlock(object);

4187 vm_map_unlock_read(map);

4188 if (real_map != map) {

4189 vm_map_unlock(real_map);

4190 }

4191 kr = KERN_PROTECTION_FAILURE;

4192 goto done;

4193 }

4194 #endif

4195

         fault_phys_offset = (vm_map_offset_t)offset - vm_map_trunc_page((vm_map_offset_t)offset, PAGE_MASK);

4197

4198 /*

4199 * If this page is to be inserted in a copy delay object

4200 * for writing, and if the object has a copy, then the

4201 * copy delay strategy is implemented in the slow fault page.

4202 */

         if (object->copy_strategy == MEMORY_OBJECT_COPY_DELAY &&

             object->copy != VM_OBJECT_NULL && (fault_type & VM_PROT_WRITE)) {

4205 goto handle_copy_delay;

4206 }

4207

4208 cur_object = object;

4209 cur_offset = offset;

4210

4211 grab_options = 0;

4212 #if CONFIG_SECLUDED_MEMORY

4213 if (object->can_grab_secluded) {

4214 grab_options |= VM_PAGE_GRAB_SECLUDED;

4215 }

4216 #endif /* CONFIG_SECLUDED_MEMORY */

4217

4218 while (TRUE) {

4219 if (!cur_object->pager_created &&

4220 cur_object->phys_contiguous) { /* superpage */

4221 break;

4222 }

4223

4224 if (cur_object->blocked_access) {

4225 /*

4226 * Access to this VM object has been blocked.

4227 * Let the slow path handle it.

4228 */

4229 break;

4230 }

4231

                 m = vm_page_lookup(cur_object, vm_object_trunc_page(cur_offset));

4233 m_object = NULL;

4234

4235 if (m != VM_PAGE_NULL) {

4236 m_object = cur_object;

4237

4238 if (m->vmp_busy) {

4239 wait_result_t result;

4240

4241 /*

4242 * in order to do the PAGE_ASSERT_WAIT, we must

4243 * have object that 'm' belongs to locked exclusively

4244 */

4245 if (object != cur_object) {

4246 if (cur_object_lock_type == OBJECT_LOCK_SHARED) {

4247 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4248

                                                 if (vm_object_lock_upgrade(cur_object) == FALSE) {

4250 /*

4251 * couldn't upgrade so go do a full retry

4252 * immediately since we can no longer be

4253 * certain about cur_object (since we

4254 * don't hold a reference on it)...

4255 * first drop the top object lock

4256 */

4257 vm_object_unlock(object);

4258

4259 vm_map_unlock_read(map);

4260 if (real_map != map) {

4261 vm_map_unlock(real_map);

4262 }

4263

4264 goto RetryFault;

4265 }

4266 }

                                 } else if (object_lock_type == OBJECT_LOCK_SHARED) {

4268 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4269

                                         if (vm_object_lock_upgrade(object) == FALSE) {

4271 /*

4272 * couldn't upgrade, so explictly take the lock

4273 * exclusively and go relookup the page since we

4274 * will have dropped the object lock and

4275 * a different thread could have inserted

4276 * a page at this offset

4277 * no need for a full retry since we're

4278 * at the top level of the object chain

4279 */

4280 vm_object_lock(object);

4281

4282 continue;

4283 }

4284 }

                                 if ((m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) && m_object->internal) {

4286 /*

4287 * m->vmp_busy == TRUE and the object is locked exclusively

4288 * if m->pageout_queue == TRUE after we acquire the

4289 * queues lock, we are guaranteed that it is stable on

4290 * the pageout queue and therefore reclaimable

4291 *

4292 * NOTE: this is only true for the internal pageout queue

4293 * in the compressor world

4294 */

4295 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);

4296

4297 vm_page_lock_queues();

4298

                                         if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {

4300 vm_pageout_throttle_up(m);

4301 vm_page_unlock_queues();

4302

4303 PAGE_WAKEUP_DONE(m);

4304 goto reclaimed_from_pageout;

4305 }

4306 vm_page_unlock_queues();

4307 }

4308 if (object != cur_object) {

4309 vm_object_unlock(object);

4310 }

4311

4312 vm_map_unlock_read(map);

4313 if (real_map != map) {

4314 vm_map_unlock(real_map);

4315 }

4316

                                 result = PAGE_ASSERT_WAIT(m, interruptible);

4318

4319 vm_object_unlock(cur_object);

4320

4321 if (result == THREAD_WAITING) {

4322 result = thread_block(THREAD_CONTINUE_NULL);

4323 }

                                 if (result == THREAD_AWAKENED || result == THREAD_RESTART) {

4325 goto RetryFault;

4326 }

4327

4328 kr = KERN_ABORTED;

4329 goto done;

4330 }

4331 reclaimed_from_pageout:

4332 if (m->vmp_laundry) {

4333 if (object != cur_object) {

4334 if (cur_object_lock_type == OBJECT_LOCK_SHARED) {

4335 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4336

4337 vm_object_unlock(object);

4338 vm_object_unlock(cur_object);

4339

4340 vm_map_unlock_read(map);

4341 if (real_map != map) {

4342 vm_map_unlock(real_map);

4343 }

4344

4345 goto RetryFault;

4346 }

                                 } else if (object_lock_type == OBJECT_LOCK_SHARED) {

4348 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4349

                                         if (vm_object_lock_upgrade(object) == FALSE) {

4351 /*

4352 * couldn't upgrade, so explictly take the lock

4353 * exclusively and go relookup the page since we

4354 * will have dropped the object lock and

4355 * a different thread could have inserted

4356 * a page at this offset

4357 * no need for a full retry since we're

4358 * at the top level of the object chain

4359 */

4360 vm_object_lock(object);

4361

4362 continue;

4363 }

4364 }

4365 vm_pageout_steal_laundry(m, FALSE);

4366 }

4367

                         if (VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {

4369 /*

4370 * Guard page: let the slow path deal with it

4371 */

4372 break;

4373 }

                         if (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent)) {

4375 /*

4376 * Unusual case... let the slow path deal with it

4377 */

4378 break;

4379 }

                         if (VM_OBJECT_PURGEABLE_FAULT_ERROR(m_object)) {

4381 if (object != cur_object) {

4382 vm_object_unlock(object);

4383 }

4384 vm_map_unlock_read(map);

4385 if (real_map != map) {

4386 vm_map_unlock(real_map);

4387 }

4388 vm_object_unlock(cur_object);

4389 kr = KERN_MEMORY_ERROR;

4390 goto done;

4391 }

                         assert(m_object == VM_PAGE_OBJECT(m));

4393

                         if (vm_fault_cs_need_validation(map->pmap, m, m_object,

4395 PAGE_SIZE, 0) ||

                             (physpage_p != NULL && (prot & VM_PROT_WRITE))) {

4397 upgrade_lock_and_retry:

4398 /*

4399 * We might need to validate this page

4400 * against its code signature, so we

4401 * want to hold the VM object exclusively.

4402 */

4403 if (object != cur_object) {

4404 if (cur_object_lock_type == OBJECT_LOCK_SHARED) {

4405 vm_object_unlock(object);

4406 vm_object_unlock(cur_object);

4407

4408 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4409

4410 vm_map_unlock_read(map);

4411 if (real_map != map) {

4412 vm_map_unlock(real_map);

4413 }

4414

4415 goto RetryFault;

4416 }

                                 } else if (object_lock_type == OBJECT_LOCK_SHARED) {

4418 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4419

                                         if (vm_object_lock_upgrade(object) == FALSE) {

4421 /*

4422 * couldn't upgrade, so explictly take the lock

4423 * exclusively and go relookup the page since we

4424 * will have dropped the object lock and

4425 * a different thread could have inserted

4426 * a page at this offset

4427 * no need for a full retry since we're

4428 * at the top level of the object chain

4429 */

4430 vm_object_lock(object);

4431

4432 continue;

4433 }

4434 }

4435 }

4436 /*

4437 * Two cases of map in faults:

4438 * - At top level w/o copy object.

4439 * - Read fault anywhere.

4440 * --> must disallow write.

4441 */

4442

                         if (object == cur_object && object->copy == VM_OBJECT_NULL) {

4444 goto FastPmapEnter;

4445 }

4446

4447 if (!need_copy &&

4448 !fault_info.no_copy_on_read &&

4449 cur_object != object &&

4450 !cur_object->internal &&

4451 !cur_object->pager_trusted &&

4452 vm_protect_privileged_from_untrusted &&

4453 !((prot & VM_PROT_EXECUTE) &&

4454 cur_object->code_signed &&

                             pmap_get_vm_map_cs_enforced(caller_pmap ? caller_pmap : pmap)) &&

4456 current_proc_is_privileged()) {

4457 /*

4458 * We're faulting on a page in "object" and

4459 * went down the shadow chain to "cur_object"

4460 * to find out that "cur_object"'s pager

4461 * is not "trusted", i.e. we can not trust it

4462 * to always return the same contents.

4463 * Since the target is a "privileged" process,

4464 * let's treat this as a copy-on-read fault, as

4465 * if it was a copy-on-write fault.

4466 * Once "object" gets a copy of this page, it

4467 * won't have to rely on "cur_object" to

4468 * provide the contents again.

4469 *

4470 * This is done by setting "need_copy" and

4471 * retrying the fault from the top with the

4472 * appropriate locking.

4473 *

4474 * Special case: if the mapping is executable

4475 * and the untrusted object is code-signed and

4476 * the process is "cs_enforced", we do not

4477 * copy-on-read because that would break

4478 * code-signing enforcement expectations (an

4479 * executable page must belong to a code-signed

4480 * object) and we can rely on code-signing

4481 * to re-validate the page if it gets evicted

4482 * and paged back in.

4483 */

4484 // printf("COPY-ON-READ %s:%d map %p va 0x%llx page %p object %p offset 0x%llx UNTRUSTED: need copy-on-read!\n", __FUNCTION__, __LINE__, map, (uint64_t)vaddr, m, VM_PAGE_OBJECT(m), m->vmp_offset);

4485 vm_copied_on_read++;

4486 need_copy = TRUE;

4487

4488 vm_object_unlock(object);

4489 vm_object_unlock(cur_object);

4490 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4491 vm_map_unlock_read(map);

4492 if (real_map != map) {

4493 vm_map_unlock(real_map);

4494 }

4495 goto RetryFault;

4496 }

4497

                         if (!(fault_type & VM_PROT_WRITE) && !need_copy) {

                                 if (!pmap_has_prot_policy(pmap, fault_info.pmap_options & PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE, prot)) {

4500 prot &= ~VM_PROT_WRITE;

4501 } else {

4502 /*

4503 * For a protection that the pmap cares

4504 * about, we must hand over the full

4505 * set of protections (so that the pmap

4506 * layer can apply any desired policy).

4507 * This means that cs_bypass must be

4508 * set, as this can force us to pass

4509 * RWX.

4510 */

4511 assert(fault_info.cs_bypass);

4512 }

4513

4514 if (object != cur_object) {

4515 /*

4516 * We still need to hold the top object

4517 * lock here to prevent a race between

4518 * a read fault (taking only "shared"

4519 * locks) and a write fault (taking

4520 * an "exclusive" lock on the top

4521 * object.

4522 * Otherwise, as soon as we release the

4523 * top lock, the write fault could

4524 * proceed and actually complete before

4525 * the read fault, and the copied page's

4526 * translation could then be overwritten

4527 * by the read fault's translation for

4528 * the original page.

4529 *

4530 * Let's just record what the top object

4531 * is and we'll release it later.

4532 */

4533 top_object = object;

4534

4535 /*

4536 * switch to the object that has the new page

4537 */

4538 object = cur_object;

4539 object_lock_type = cur_object_lock_type;

4540 }

4541 FastPmapEnter:

                                 assert(m_object == VM_PAGE_OBJECT(m));

4543

4544 /*

4545 * prepare for the pmap_enter...

4546 * object and map are both locked

4547 * m contains valid data

4548 * object == m->vmp_object

4549 * cur_object == NULL or it's been unlocked

4550 * no paging references on either object or cur_object

4551 */

                                 if (top_object != VM_OBJECT_NULL || object_lock_type != OBJECT_LOCK_EXCLUSIVE) {

4553 need_retry_ptr = &need_retry;

4554 } else {

4555 need_retry_ptr = NULL;

4556 }

4557

4558 if (fault_page_size < PAGE_SIZE) {

                                         DEBUG4K_FAULT("map %p original %p pmap %p va 0x%llx caller pmap %p va 0x%llx pa 0x%llx (0x%llx+0x%llx) prot 0x%x caller_prot 0x%x\n", map, original_map, pmap, (uint64_t)vaddr, caller_pmap, (uint64_t)caller_pmap_addr, (uint64_t)((((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m)) << PAGE_SHIFT) + fault_phys_offset), (uint64_t)(((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m)) << PAGE_SHIFT), (uint64_t)fault_phys_offset, prot, caller_prot);

4560 assertf((!(fault_phys_offset & FOURK_PAGE_MASK) &&

4561 fault_phys_offset < PAGE_SIZE),

                                             "0x%llx\n", (uint64_t)fault_phys_offset);

4563 } else {

                                         assertf(fault_phys_offset == 0,

                                             "0x%llx\n", (uint64_t)fault_phys_offset);

4566 }

4567

4568 if (caller_pmap) {

4569 kr = vm_fault_enter(m,

4570 caller_pmap,

4571 caller_pmap_addr,

4572 fault_page_size,

4573 fault_phys_offset,

4574 prot,

4575 caller_prot,

4576 wired,

4577 change_wiring,

4578 wire_tag,

4579 &fault_info,

4580 need_retry_ptr,

4581 &type_of_fault);

4582 } else {

4583 kr = vm_fault_enter(m,

4584 pmap,

4585 vaddr,

4586 fault_page_size,

4587 fault_phys_offset,

4588 prot,

4589 caller_prot,

4590 wired,

4591 change_wiring,

4592 wire_tag,

4593 &fault_info,

4594 need_retry_ptr,

4595 &type_of_fault);

4596 }

4597

4598 vm_fault_complete(

4599 map,

4600 real_map,

4601 object,

4602 m_object,

4603 m,

4604 offset,

4605 trace_real_vaddr,

4606 &fault_info,

4607 caller_prot,

4608 real_vaddr,

4609 vm_fault_type_for_tracing(need_copy_on_read, type_of_fault),

4610 need_retry,

4611 kr,

4612 physpage_p,

4613 prot,

4614 top_object,

4615 need_collapse,

4616 cur_offset,

4617 fault_type,

4618 &written_on_object,

4619 &written_on_pager,

4620 &written_on_offset);

4621 top_object = VM_OBJECT_NULL;

4622 if (need_retry == TRUE) {

4623 /*

4624 * vm_fault_enter couldn't complete the PMAP_ENTER...

4625 * at this point we don't hold any locks so it's safe

4626 * to ask the pmap layer to expand the page table to

4627 * accommodate this mapping... once expanded, we'll

4628 * re-drive the fault which should result in vm_fault_enter

4629 * being able to successfully enter the mapping this time around

4630 */

                                         (void)pmap_enter_options(

                                                 pmap, vaddr, 0, 0, 0, 0, 0,

4633 PMAP_OPTIONS_NOENTER, NULL);

4634

4635 need_retry = FALSE;

4636 goto RetryFault;

4637 }

4638 goto done;

4639 }

4640 /*

4641 * COPY ON WRITE FAULT

4642 */

4643 assert(object_lock_type == OBJECT_LOCK_EXCLUSIVE);

4644

4645 /*

4646 * If objects match, then

4647 * object->copy must not be NULL (else control

4648 * would be in previous code block), and we

4649 * have a potential push into the copy object

4650 * with which we can't cope with here.

4651 */

4652 if (cur_object == object) {

4653 /*

4654 * must take the slow path to

4655 * deal with the copy push

4656 */

4657 break;

4658 }

4659

4660 /*

4661 * This is now a shadow based copy on write

4662 * fault -- it requires a copy up the shadow

4663 * chain.

4664 */

                         assert(m_object == VM_PAGE_OBJECT(m));

4666

4667 if ((cur_object_lock_type == OBJECT_LOCK_SHARED) &&

                             vm_fault_cs_need_validation(NULL, m, m_object,

4669 PAGE_SIZE, 0)) {

4670 goto upgrade_lock_and_retry;

4671 }

4672

4673 /*

4674 * Allocate a page in the original top level

4675 * object. Give up if allocate fails. Also

4676 * need to remember current page, as it's the

4677 * source of the copy.

4678 *

4679 * at this point we hold locks on both

4680 * object and cur_object... no need to take

4681 * paging refs or mark pages BUSY since

4682 * we don't drop either object lock until

4683 * the page has been copied and inserted

4684 */

4685 cur_m = m;

4686 m = vm_page_grab_options(grab_options);

4687 m_object = NULL;

4688

4689 if (m == VM_PAGE_NULL) {

4690 /*

4691 * no free page currently available...

4692 * must take the slow path

4693 */

4694 break;

4695 }

4696 /*

4697 * Now do the copy. Mark the source page busy...

4698 *

4699 * NOTE: This code holds the map lock across

4700 * the page copy.

4701 */

4702 vm_page_copy(cur_m, m);

                         vm_page_insert(m, object, vm_object_trunc_page(offset));

                         if (VM_MAP_PAGE_MASK(map) != PAGE_MASK) {

                                 DEBUG4K_FAULT("map %p vaddr 0x%llx page %p [%p 0x%llx] copied to %p [%p 0x%llx]\n", map, (uint64_t)vaddr, cur_m, VM_PAGE_OBJECT(cur_m), cur_m->vmp_offset, m, VM_PAGE_OBJECT(m), m->vmp_offset);

4706 }

4707 m_object = object;

4708 SET_PAGE_DIRTY(m, FALSE);

4709

4710 /*

4711 * Now cope with the source page and object

4712 */

                         if (object->ref_count > 1 && cur_m->vmp_pmapped) {

                                 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(cur_m));

                         } else if (VM_MAP_PAGE_SIZE(map) < PAGE_SIZE) {

4716 /*

4717 * We've copied the full 16K page but we're

4718 * about to call vm_fault_enter() only for

4719 * the 4K chunk we're faulting on. The other

4720 * three 4K chunks in that page could still

4721 * be pmapped in this pmap.

4722 * Since the VM object layer thinks that the

4723 * entire page has been dealt with and the

4724 * original page might no longer be needed,

4725 * it might collapse/bypass the original VM

4726 * object and free its pages, which would be

4727 * bad (and would trigger pmap_verify_free()

4728 * assertions) if the other 4K chunks are still

4729 * pmapped.

4730 */

4731 /*

4732 * XXX FBDP TODO4K: to be revisisted

4733 * Technically, we need to pmap_disconnect()

4734 * only the target pmap's mappings for the 4K

4735 * chunks of this 16K VM page. If other pmaps

4736 * have PTEs on these chunks, that means that

4737 * the associated VM map must have a reference

4738 * on the VM object, so no need to worry about

4739 * those.

4740 * pmap_protect() for each 4K chunk would be

4741 * better but we'd have to check which chunks

4742 * are actually mapped before and after this

4743 * one.

4744 * A full-blown pmap_disconnect() is easier

4745 * for now but not efficient.

4746 */

                                 DEBUG4K_FAULT("pmap_disconnect() page %p object %p offset 0x%llx phys 0x%x\n", cur_m, VM_PAGE_OBJECT(cur_m), cur_m->vmp_offset, VM_PAGE_GET_PHYS_PAGE(cur_m));

                                 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(cur_m));

4749 }

4750

4751 if (cur_m->vmp_clustered) {

4752 VM_PAGE_COUNT_AS_PAGEIN(cur_m);

4753 VM_PAGE_CONSUME_CLUSTERED(cur_m);

                                 vm_fault_is_sequential(cur_object, cur_offset, fault_info.behavior);

4755 }

4756 need_collapse = TRUE;

4757

4758 if (!cur_object->internal &&

4759 cur_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY) {

4760 /*

4761 * The object from which we've just

4762 * copied a page is most probably backed

4763 * by a vnode. We don't want to waste too

4764 * much time trying to collapse the VM objects

4765 * and create a bottleneck when several tasks

4766 * map the same file.

4767 */

                                 if (cur_object->copy == object) {

4769 /*

4770 * Shared mapping or no COW yet.

4771 * We can never collapse a copy

4772 * object into its backing object.

4773 */

4774 need_collapse = FALSE;

                                 } else if (cur_object->copy == object->shadow &&

                                     object->shadow->resident_page_count == 0) {

4777 /*

4778 * Shared mapping after a COW occurred.

4779 */

4780 need_collapse = FALSE;

4781 }

4782 }

4783 vm_object_unlock(cur_object);

4784

4785 if (need_collapse == FALSE) {

4786 vm_fault_collapse_skipped++;

4787 }

4788 vm_fault_collapse_total++;

4789

4790 type_of_fault = DBG_COW_FAULT;

4791 counter_inc(&vm_statistics_cow_faults);

                         DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL);

4793 current_task()->cow_faults++;

4794

4795 goto FastPmapEnter;

4796 } else {

4797 /*

4798 * No page at cur_object, cur_offset... m == NULL

4799 */

4800 if (cur_object->pager_created) {

4801 vm_external_state_t compressor_external_state = VM_EXTERNAL_STATE_UNKNOWN;

4802

                                 if (MUST_ASK_PAGER(cur_object, cur_offset, compressor_external_state) == TRUE) {

4804 int my_fault_type;

4805 uint8_t c_flags = C_DONT_BLOCK;

4806 bool insert_cur_object = FALSE;

4807

4808 /*

4809 * May have to talk to a pager...

4810 * if so, take the slow path by

4811 * doing a 'break' from the while (TRUE) loop

4812 *

4813 * external_state will only be set to VM_EXTERNAL_STATE_EXISTS

4814 * if the compressor is active and the page exists there

4815 */

4816 if (compressor_external_state != VM_EXTERNAL_STATE_EXISTS) {

4817 break;

4818 }

4819

                                         if (map == kernel_map || real_map == kernel_map) {

4821 /*

4822 * can't call into the compressor with the kernel_map

4823 * lock held, since the compressor may try to operate

4824 * on the kernel map in order to return an empty c_segment

4825 */

4826 break;

4827 }

4828 if (object != cur_object) {

4829 if (fault_type & VM_PROT_WRITE) {

4830 c_flags |= C_KEEP;

4831 } else {

4832 insert_cur_object = TRUE;

4833 }

4834 }

4835 if (insert_cur_object == TRUE) {

4836 if (cur_object_lock_type == OBJECT_LOCK_SHARED) {

4837 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4838

                                                         if (vm_object_lock_upgrade(cur_object) == FALSE) {

4840 /*

4841 * couldn't upgrade so go do a full retry

4842 * immediately since we can no longer be

4843 * certain about cur_object (since we

4844 * don't hold a reference on it)...

4845 * first drop the top object lock

4846 */

4847 vm_object_unlock(object);

4848

4849 vm_map_unlock_read(map);

4850 if (real_map != map) {

4851 vm_map_unlock(real_map);

4852 }

4853

4854 goto RetryFault;

4855 }

4856 }

                                         } else if (object_lock_type == OBJECT_LOCK_SHARED) {

4858 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

4859

4860 if (object != cur_object) {

4861 /*

4862 * we can't go for the upgrade on the top

4863 * lock since the upgrade may block waiting

4864 * for readers to drain... since we hold

4865 * cur_object locked at this point, waiting

4866 * for the readers to drain would represent

4867 * a lock order inversion since the lock order

4868 * for objects is the reference order in the

4869 * shadown chain

4870 */

4871 vm_object_unlock(object);

4872 vm_object_unlock(cur_object);

4873

4874 vm_map_unlock_read(map);

4875 if (real_map != map) {

4876 vm_map_unlock(real_map);

4877 }

4878

4879 goto RetryFault;

4880 }

                                                 if (vm_object_lock_upgrade(object) == FALSE) {

4882 /*

4883 * couldn't upgrade, so explictly take the lock

4884 * exclusively and go relookup the page since we

4885 * will have dropped the object lock and

4886 * a different thread could have inserted

4887 * a page at this offset

4888 * no need for a full retry since we're

4889 * at the top level of the object chain

4890 */

4891 vm_object_lock(object);

4892

4893 continue;

4894 }

4895 }

4896 m = vm_page_grab_options(grab_options);

4897 m_object = NULL;

4898

4899 if (m == VM_PAGE_NULL) {

4900 /*

4901 * no free page currently available...

4902 * must take the slow path

4903 */

4904 break;

4905 }

4906

4907 /*

4908 * The object is and remains locked

4909 * so no need to take a

4910 * "paging_in_progress" reference.

4911 */

4912 bool shared_lock;

4913 if ((object == cur_object &&

4914 object_lock_type == OBJECT_LOCK_EXCLUSIVE) ||

4915 (object != cur_object &&

4916 cur_object_lock_type == OBJECT_LOCK_EXCLUSIVE)) {

4917 shared_lock = FALSE;

4918 } else {

4919 shared_lock = TRUE;

4920 }

4921

4922 kr = vm_compressor_pager_get(

4923 cur_object->pager,

4924 (vm_object_trunc_page(cur_offset)

4925 + cur_object->paging_offset),

4926 VM_PAGE_GET_PHYS_PAGE(m),

4927 &my_fault_type,

4928 c_flags,

4929 &compressed_count_delta);

4930

4931 vm_compressor_pager_count(

4932 cur_object->pager,

4933 compressed_count_delta,

4934 shared_lock,

4935 cur_object);

4936

4937 if (kr != KERN_SUCCESS) {

4938 vm_page_release(m, FALSE);

4939 m = VM_PAGE_NULL;

4940 }

4941 /*

4942 * If vm_compressor_pager_get() returns

4943 * KERN_MEMORY_FAILURE, then the

4944 * compressed data is permanently lost,

4945 * so return this error immediately.

4946 */

4947 if (kr == KERN_MEMORY_FAILURE) {

4948 if (object != cur_object) {

4949 vm_object_unlock(cur_object);

4950 }

4951 vm_object_unlock(object);

4952 vm_map_unlock_read(map);

4953 if (real_map != map) {

4954 vm_map_unlock(real_map);

4955 }

4956 goto done;

                                         } else if (kr != KERN_SUCCESS) {

4958 break;

4959 }

4960 m->vmp_dirty = TRUE;

4961

4962 /*

4963 * If the object is purgeable, its

4964 * owner's purgeable ledgers will be

4965 * updated in vm_page_insert() but the

4966 * page was also accounted for in a

4967 * "compressed purgeable" ledger, so

4968 * update that now.

4969 */

4970 if (object != cur_object &&

4971 !insert_cur_object) {

4972 /*

4973 * We're not going to insert

4974 * the decompressed page into

4975 * the object it came from.

4976 *

4977 * We're dealing with a

4978 * copy-on-write fault on

4979 * "object".

4980 * We're going to decompress

4981 * the page directly into the

4982 * target "object" while

4983 * keepin the compressed

4984 * page for "cur_object", so

4985 * no ledger update in that

4986 * case.

4987 */

                                         } else if (((cur_object->purgable ==

4989 VM_PURGABLE_DENY) &&

4990 (!cur_object->vo_ledger_tag)) ||

4991 (cur_object->vo_owner ==

4992 NULL)) {

4993 /*

4994 * "cur_object" is not purgeable

4995 * and is not ledger-taged, or

4996 * there's no owner for it,

4997 * so no owner's ledgers to

4998 * update.

4999 */

5000 } else {

5001 /*

5002 * One less compressed

5003 * purgeable/tagged page for

5004 * cur_object's owner.

5005 */

5006 vm_object_owner_compressed_update(

5007 cur_object,

5008 -1);

5009 }

5010

5011 if (insert_cur_object) {

                                                 vm_page_insert(m, cur_object, vm_object_trunc_page(cur_offset));

5013 m_object = cur_object;

5014 } else {

                                                 vm_page_insert(m, object, vm_object_trunc_page(offset));

5016 m_object = object;

5017 }

5018

                                         if ((m_object->wimg_bits & VM_WIMG_MASK) != VM_WIMG_USE_DEFAULT) {

5020 /*

5021 * If the page is not cacheable,

5022 * we can't let its contents

5023 * linger in the data cache

5024 * after the decompression.

5025 */

                                                 pmap_sync_page_attributes_phys(VM_PAGE_GET_PHYS_PAGE(m));

5027 }

5028

5029 type_of_fault = my_fault_type;

5030

5031 VM_STAT_DECOMPRESSIONS();

5032

5033 if (cur_object != object) {

5034 if (insert_cur_object) {

5035 top_object = object;

5036 /*

5037 * switch to the object that has the new page

5038 */

5039 object = cur_object;

5040 object_lock_type = cur_object_lock_type;

5041 } else {

5042 vm_object_unlock(cur_object);

5043 cur_object = object;

5044 }

5045 }

5046 goto FastPmapEnter;

5047 }

5048 /*

5049 * existence map present and indicates

5050 * that the pager doesn't have this page

5051 */

5052 }

                         if (cur_object->shadow == VM_OBJECT_NULL ||

5054 resilient_media_retry) {

5055 /*

5056 * Zero fill fault. Page gets

5057 * inserted into the original object.

5058 */

5059 if (cur_object->shadow_severed ||

5060 VM_OBJECT_PURGEABLE_FAULT_ERROR(cur_object) ||

5061 cur_object == compressor_object ||

5062 cur_object == kernel_object ||

5063 cur_object == vm_submap_object) {

5064 if (object != cur_object) {

5065 vm_object_unlock(cur_object);

5066 }

5067 vm_object_unlock(object);

5068

5069 vm_map_unlock_read(map);

5070 if (real_map != map) {

5071 vm_map_unlock(real_map);

5072 }

5073

5074 kr = KERN_MEMORY_ERROR;

5075 goto done;

5076 }

5077 if (cur_object != object) {

5078 vm_object_unlock(cur_object);

5079

5080 cur_object = object;

5081 }

5082 if (object_lock_type == OBJECT_LOCK_SHARED) {

5083 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

5084

                                         if (vm_object_lock_upgrade(object) == FALSE) {

5086 /*

5087 * couldn't upgrade so do a full retry on the fault

5088 * since we dropped the object lock which

5089 * could allow another thread to insert

5090 * a page at this offset

5091 */

5092 vm_map_unlock_read(map);

5093 if (real_map != map) {

5094 vm_map_unlock(real_map);

5095 }

5096

5097 goto RetryFault;

5098 }

5099 }

5100 if (!object->internal) {

                                         panic("%s:%d should not zero-fill page at offset 0x%llx in external object %p", __FUNCTION__, __LINE__, (uint64_t)offset, object);

5102 }

                                 m = vm_page_alloc(object, vm_object_trunc_page(offset));

5104 m_object = NULL;

5105

5106 if (m == VM_PAGE_NULL) {

5107 /*

5108 * no free page currently available...

5109 * must take the slow path

5110 */

5111 break;

5112 }

5113 m_object = object;

5114

5115 /*

5116 * Zeroing the page and entering into it into the pmap

5117 * represents a significant amount of the zero fill fault handler's work.

5118 *

5119 * To improve fault scalability, we'll drop the object lock, if it appears contended,

5120 * now that we've inserted the page into the vm object.

5121 * Before dropping the lock, we need to check protection bits and set the

5122 * mapped bits on the page. Then we can mark the page busy, drop the lock,

5123 * zero it, and do the pmap enter. We'll need to reacquire the lock

5124 * to clear the busy bit and wake up any waiters.

5125 */

5126 vm_fault_cs_clear(m);

5127 m->vmp_pmapped = TRUE;

5128 if (map->no_zero_fill) {

5129 type_of_fault = DBG_NZF_PAGE_FAULT;

5130 } else {

5131 type_of_fault = DBG_ZERO_FILL_FAULT;

5132 }

5133 {

5134 pmap_t destination_pmap;

5135 vm_map_offset_t destination_pmap_vaddr;

5136 vm_prot_t enter_fault_type;

5137 if (caller_pmap) {

5138 destination_pmap = caller_pmap;

5139 destination_pmap_vaddr = caller_pmap_addr;

5140 } else {

5141 destination_pmap = pmap;

5142 destination_pmap_vaddr = vaddr;

5143 }

5144 if (change_wiring) {

5145 enter_fault_type = VM_PROT_NONE;

5146 } else {

5147 enter_fault_type = caller_prot;

5148 }

5149 kr = vm_fault_enter_prepare(m,

5150 destination_pmap,

5151 destination_pmap_vaddr,

5152 &prot,

5153 caller_prot,

5154 fault_page_size,

5155 fault_phys_offset,

5156 change_wiring,

5157 enter_fault_type,

5158 &fault_info,

5159 &type_of_fault,

5160 &page_needs_data_sync);

5161 if (kr != KERN_SUCCESS) {

5162 goto zero_fill_cleanup;

5163 }

5164

5165 if (object_is_contended) {

5166 /*

5167 * At this point the page is in the vm object, but not on a paging queue.

5168 * Since it's accessible to another thread but its contents are invalid

5169 * (it hasn't been zeroed) mark it busy before dropping the object lock.

5170 */

5171 m->vmp_busy = TRUE;

5172 vm_object_unlock(object);

5173 }

5174 if (type_of_fault == DBG_ZERO_FILL_FAULT) {

5175 /*

5176 * Now zero fill page...

5177 * the page is probably going to

5178 * be written soon, so don't bother

5179 * to clear the modified bit

5180 *

5181 * NOTE: This code holds the map

5182 * lock across the zero fill.

5183 */

5184 vm_page_zero_fill(m);

5185 counter_inc(&vm_statistics_zero_fill_count);

                                                 DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL);

5187 }

5188 if (page_needs_data_sync) {

                                                 pmap_sync_page_data_phys(VM_PAGE_GET_PHYS_PAGE(m));

5190 }

5191

5192 if (top_object != VM_OBJECT_NULL) {

5193 need_retry_ptr = &need_retry;

5194 } else {

5195 need_retry_ptr = NULL;

5196 }

5197 if (object_is_contended) {

                                                 kr = vm_fault_pmap_enter(destination_pmap, destination_pmap_vaddr,

5199 fault_page_size, fault_phys_offset,

                                                     m, &prot, caller_prot, enter_fault_type, wired,

5201 fault_info.pmap_options, need_retry_ptr);

5202 vm_object_lock(object);

5203 } else {

                                                 kr = vm_fault_pmap_enter_with_object_lock(object, destination_pmap, destination_pmap_vaddr,

5205 fault_page_size, fault_phys_offset,

                                                     m, &prot, caller_prot, enter_fault_type, wired,

5207 fault_info.pmap_options, need_retry_ptr);

5208 }

5209 }

5210 zero_fill_cleanup:

5211 if (!VM_DYNAMIC_PAGING_ENABLED() &&

5212 (object->purgable == VM_PURGABLE_DENY ||

5213 object->purgable == VM_PURGABLE_NONVOLATILE ||

5214 object->purgable == VM_PURGABLE_VOLATILE)) {

5215 vm_page_lockspin_queues();

5216 if (!VM_DYNAMIC_PAGING_ENABLED()) {

5217 vm_fault_enqueue_throttled_locked(m);

5218 }

5219 vm_page_unlock_queues();

5220 }

                                 vm_fault_enqueue_page(object, m, wired, change_wiring, wire_tag, fault_info.no_cache, &type_of_fault, kr);

5222

5223 vm_fault_complete(

5224 map,

5225 real_map,

5226 object,

5227 m_object,

5228 m,

5229 offset,

5230 trace_real_vaddr,

5231 &fault_info,

5232 caller_prot,

5233 real_vaddr,

5234 type_of_fault,

5235 need_retry,

5236 kr,

5237 physpage_p,

5238 prot,

5239 top_object,

5240 need_collapse,

5241 cur_offset,

5242 fault_type,

5243 &written_on_object,

5244 &written_on_pager,

5245 &written_on_offset);

5246 top_object = VM_OBJECT_NULL;

5247 if (need_retry == TRUE) {

5248 /*

5249 * vm_fault_enter couldn't complete the PMAP_ENTER...

5250 * at this point we don't hold any locks so it's safe

5251 * to ask the pmap layer to expand the page table to

5252 * accommodate this mapping... once expanded, we'll

5253 * re-drive the fault which should result in vm_fault_enter

5254 * being able to successfully enter the mapping this time around

5255 */

                                         (void)pmap_enter_options(

                                                 pmap, vaddr, 0, 0, 0, 0, 0,

5258 PMAP_OPTIONS_NOENTER, NULL);

5259

5260 need_retry = FALSE;

5261 goto RetryFault;

5262 }

5263 goto done;

5264 }

5265 /*

5266 * On to the next level in the shadow chain

5267 */

5268 cur_offset += cur_object->vo_shadow_offset;

5269 new_object = cur_object->shadow;

                         fault_phys_offset = cur_offset - vm_object_trunc_page(cur_offset);

5271

5272 /*

5273 * take the new_object's lock with the indicated state

5274 */

5275 if (cur_object_lock_type == OBJECT_LOCK_SHARED) {

5276 vm_object_lock_shared(new_object);

5277 } else {

5278 vm_object_lock(new_object);

5279 }

5280

5281 if (cur_object != object) {

5282 vm_object_unlock(cur_object);

5283 }

5284

5285 cur_object = new_object;

5286

5287 continue;

5288 }

5289 }

5290 /*

5291 * Cleanup from fast fault failure. Drop any object

5292 * lock other than original and drop map lock.

5293 */

5294 if (object != cur_object) {

5295 vm_object_unlock(cur_object);

5296 }

5297

5298 /*

5299 * must own the object lock exclusively at this point

5300 */

5301 if (object_lock_type == OBJECT_LOCK_SHARED) {

5302 object_lock_type = OBJECT_LOCK_EXCLUSIVE;

5303

                 if (vm_object_lock_upgrade(object) == FALSE) {

5305 /*

5306 * couldn't upgrade, so explictly

5307 * take the lock exclusively

5308 * no need to retry the fault at this

5309 * point since "vm_fault_page" will

5310 * completely re-evaluate the state

5311 */

5312 vm_object_lock(object);

5313 }

5314 }

5315

5316 handle_copy_delay:

5317 vm_map_unlock_read(map);

5318 if (real_map != map) {

5319 vm_map_unlock(real_map);

5320 }

5321

         if (__improbable(object == compressor_object ||

5323 object == kernel_object ||

5324 object == vm_submap_object)) {

5325 /*

5326 * These objects are explicitly managed and populated by the

5327 * kernel. The virtual ranges backed by these objects should

5328 * either have wired pages or "holes" that are not supposed to

5329 * be accessed at all until they get explicitly populated.

5330 * We should never have to resolve a fault on a mapping backed

5331 * by one of these VM objects and providing a zero-filled page

5332 * would be wrong here, so let's fail the fault and let the

5333 * caller crash or recover.

5334 */

5335 vm_object_unlock(object);

5336 kr = KERN_MEMORY_ERROR;

5337 goto done;

5338 }

5339

5340 assert(object != compressor_object);

5341 assert(object != kernel_object);

5342 assert(object != vm_submap_object);

5343

5344 if (resilient_media_retry) {

5345 /*

5346 * We could get here if we failed to get a free page

5347 * to zero-fill and had to take the slow path again.

5348 * Reset our "recovery-from-failed-media" state.

5349 */

5350 assert(resilient_media_object != VM_OBJECT_NULL);

                 assert(resilient_media_offset != (vm_object_offset_t)-1);

5352 /* release our extra reference on failed object */

5353 // printf("FBDP %s:%d resilient_media_object %p deallocate\n", __FUNCTION__, __LINE__, resilient_media_object);

5354 vm_object_deallocate(resilient_media_object);

5355 resilient_media_object = VM_OBJECT_NULL;

5356 resilient_media_offset = (vm_object_offset_t)-1;

5357 resilient_media_retry = FALSE;

5358 }

5359

5360 /*

5361 * Make a reference to this object to

5362 * prevent its disposal while we are messing with

5363 * it. Once we have the reference, the map is free

5364 * to be diddled. Since objects reference their

5365 * shadows (and copies), they will stay around as well.

5366 */

5367 vm_object_reference_locked(object);

5368 vm_object_paging_begin(object);

5369

         set_thread_pagein_error(cthread, 0);

5371 error_code = 0;

5372

5373 result_page = VM_PAGE_NULL;

         kr = vm_fault_page(object, offset, fault_type,

5375 (change_wiring && !wired),

5376 FALSE, /* page not looked up */

5377 &prot, &result_page, &top_page,

5378 &type_of_fault,

5379 &error_code, map->no_zero_fill,

5380 FALSE, &fault_info);

5381

5382 /*

5383 * if kr != VM_FAULT_SUCCESS, then the paging reference

5384 * has been dropped and the object unlocked... the ref_count

5385 * is still held

5386 *

5387 * if kr == VM_FAULT_SUCCESS, then the paging reference

5388 * is still held along with the ref_count on the original object

5389 *

5390 * the object is returned locked with a paging reference

5391 *

5392 * if top_page != NULL, then it's BUSY and the

5393 * object it belongs to has a paging reference

5394 * but is returned unlocked

5395 */

5396 if (kr != VM_FAULT_SUCCESS &&

5397 kr != VM_FAULT_SUCCESS_NO_VM_PAGE) {

5398 if (kr == VM_FAULT_MEMORY_ERROR &&

5399 fault_info.resilient_media) {

                         assertf(object->internal, "object %p", object);

5401 /*

5402 * This fault failed but the mapping was

5403 * "media resilient", so we'll retry the fault in

5404 * recovery mode to get a zero-filled page in the

5405 * top object.

5406 * Keep the reference on the failing object so

5407 * that we can check that the mapping is still

5408 * pointing to it when we retry the fault.

5409 */

5410 // printf("RESILIENT_MEDIA %s:%d: object %p offset 0x%llx recover from media error 0x%x kr 0x%x top_page %p result_page %p\n", __FUNCTION__, __LINE__, object, offset, error_code, kr, top_page, result_page);

5411 assert(!resilient_media_retry); /* no double retry */

5412 assert(resilient_media_object == VM_OBJECT_NULL);

                         assert(resilient_media_offset == (vm_object_offset_t)-1);

5414 resilient_media_retry = TRUE;

5415 resilient_media_object = object;

5416 resilient_media_offset = offset;

5417 // printf("FBDP %s:%d resilient_media_object %p offset 0x%llx kept reference\n", __FUNCTION__, __LINE__, resilient_media_object, resilient_mmedia_offset);

5418 goto RetryFault;

5419 } else {

5420 /*

5421 * we didn't succeed, lose the object reference

5422 * immediately.

5423 */

5424 vm_object_deallocate(object);

5425 object = VM_OBJECT_NULL; /* no longer valid */

5426 }

5427

5428 /*

5429 * See why we failed, and take corrective action.

5430 */

5431 switch (kr) {

5432 case VM_FAULT_MEMORY_SHORTAGE:

                         if (vm_page_wait((change_wiring) ?

5434 THREAD_UNINT :

5435 THREAD_ABORTSAFE)) {

5436 goto RetryFault;

5437 }

5438 OS_FALLTHROUGH;

5439 case VM_FAULT_INTERRUPTED:

5440 kr = KERN_ABORTED;

5441 goto done;

5442 case VM_FAULT_RETRY:

5443 goto RetryFault;

5444 case VM_FAULT_MEMORY_ERROR:

5445 if (error_code) {

5446 kr = error_code;

5447 } else {

5448 kr = KERN_MEMORY_ERROR;

5449 }

5450 goto done;

5451 default:

                         panic("vm_fault: unexpected error 0x%x from "

                             "vm_fault_page()\n", kr);

5454 }

5455 }

5456 m = result_page;

5457 m_object = NULL;

5458

5459 if (m != VM_PAGE_NULL) {

5460 m_object = VM_PAGE_OBJECT(m);

5461 assert((change_wiring && !wired) ?

5462 (top_page == VM_PAGE_NULL) :

                     ((top_page == VM_PAGE_NULL) == (m_object == object)));

5464 }

5465

5466 /*

5467 * What to do with the resulting page from vm_fault_page

5468 * if it doesn't get entered into the physical map:

5469 */

5470 #define RELEASE_PAGE(m) \

5471 MACRO_BEGIN \

5472 PAGE_WAKEUP_DONE(m); \

5473 if ( !VM_PAGE_PAGEABLE(m)) { \

5474 vm_page_lockspin_queues(); \

5475 if ( !VM_PAGE_PAGEABLE(m)) \

5476 vm_page_activate(m); \

5477 vm_page_unlock_queues(); \

5478 } \

5479 MACRO_END

5480

5481

5482 object_locks_dropped = FALSE;

5483 /*

5484 * We must verify that the maps have not changed

5485 * since our last lookup. vm_map_verify() needs the

5486 * map lock (shared) but we are holding object locks.

5487 * So we do a try_lock() first and, if that fails, we

5488 * drop the object locks and go in for the map lock again.

5489 */

         if (!vm_map_try_lock_read(original_map)) {

5491 if (m != VM_PAGE_NULL) {

5492 old_copy_object = m_object->copy;

5493 vm_object_unlock(m_object);

5494 } else {

5495 old_copy_object = VM_OBJECT_NULL;

5496 vm_object_unlock(object);

5497 }

5498

5499 object_locks_dropped = TRUE;

5500

5501 vm_map_lock_read(original_map);

5502 }

5503

         if ((map != original_map) || !vm_map_verify(map, &version)) {

5505 if (object_locks_dropped == FALSE) {

5506 if (m != VM_PAGE_NULL) {

5507 old_copy_object = m_object->copy;

5508 vm_object_unlock(m_object);

5509 } else {

5510 old_copy_object = VM_OBJECT_NULL;

5511 vm_object_unlock(object);

5512 }

5513

5514 object_locks_dropped = TRUE;

5515 }

5516

5517 /*

5518 * no object locks are held at this point

5519 */

5520 vm_object_t retry_object;

5521 vm_object_offset_t retry_offset;

5522 vm_prot_t retry_prot;

5523

5524 /*

5525 * To avoid trying to write_lock the map while another

5526 * thread has it read_locked (in vm_map_pageable), we

5527 * do not try for write permission. If the page is

5528 * still writable, we will get write permission. If it

5529 * is not, or has been marked needs_copy, we enter the

5530 * mapping without write permission, and will merely

5531 * take another fault.

5532 */

5533 map = original_map;

5534

                 kr = vm_map_lookup_locked(&map, vaddr,

5536 fault_type & ~VM_PROT_WRITE,

5537 OBJECT_LOCK_EXCLUSIVE, &version,

5538 &retry_object, &retry_offset, &retry_prot,

5539 &wired,

5540 &fault_info,

5541 &real_map,

5542 NULL);

5543 pmap = real_map->pmap;

5544

5545 if (kr != KERN_SUCCESS) {

5546 vm_map_unlock_read(map);

5547

5548 if (m != VM_PAGE_NULL) {

                                 assert(VM_PAGE_OBJECT(m) == m_object);

5550

5551 /*

5552 * retake the lock so that

5553 * we can drop the paging reference

5554 * in vm_fault_cleanup and do the

5555 * PAGE_WAKEUP_DONE in RELEASE_PAGE

5556 */

5557 vm_object_lock(m_object);

5558

5559 RELEASE_PAGE(m);

5560

5561 vm_fault_cleanup(m_object, top_page);

5562 } else {

5563 /*

5564 * retake the lock so that

5565 * we can drop the paging reference

5566 * in vm_fault_cleanup

5567 */

5568 vm_object_lock(object);

5569

5570 vm_fault_cleanup(object, top_page);

5571 }

5572 vm_object_deallocate(object);

5573

5574 goto done;

5575 }

5576 vm_object_unlock(retry_object);

5577

                 if ((retry_object != object) || (retry_offset != offset)) {

5579 vm_map_unlock_read(map);

5580 if (real_map != map) {

5581 vm_map_unlock(real_map);

5582 }

5583

5584 if (m != VM_PAGE_NULL) {

                                 assert(VM_PAGE_OBJECT(m) == m_object);

5586

5587 /*

5588 * retake the lock so that

5589 * we can drop the paging reference

5590 * in vm_fault_cleanup and do the

5591 * PAGE_WAKEUP_DONE in RELEASE_PAGE

5592 */

5593 vm_object_lock(m_object);

5594

5595 RELEASE_PAGE(m);

5596

5597 vm_fault_cleanup(m_object, top_page);

5598 } else {

5599 /*

5600 * retake the lock so that

5601 * we can drop the paging reference

5602 * in vm_fault_cleanup

5603 */

5604 vm_object_lock(object);

5605

5606 vm_fault_cleanup(object, top_page);

5607 }

5608 vm_object_deallocate(object);

5609

5610 goto RetryFault;

5611 }

5612 /*

5613 * Check whether the protection has changed or the object

5614 * has been copied while we left the map unlocked.

5615 */

                 if (pmap_has_prot_policy(pmap, fault_info.pmap_options & PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE, retry_prot)) {

5617 /* If the pmap layer cares, pass the full set. */

5618 prot = retry_prot;

5619 } else {

5620 prot &= retry_prot;

5621 }

5622 }

5623

5624 if (object_locks_dropped == TRUE) {

5625 if (m != VM_PAGE_NULL) {

5626 vm_object_lock(m_object);

5627

                         if (m_object->copy != old_copy_object) {

5629 /*

5630 * The copy object changed while the top-level object

5631 * was unlocked, so take away write permission.

5632 */

                                 assert(!pmap_has_prot_policy(pmap, fault_info.pmap_options & PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE, prot));

5634 prot &= ~VM_PROT_WRITE;

5635 }

5636 } else {

5637 vm_object_lock(object);

5638 }

5639

5640 object_locks_dropped = FALSE;

5641 }

5642

5643 if (!need_copy &&

5644 !fault_info.no_copy_on_read &&

5645 m != VM_PAGE_NULL &&

5646 VM_PAGE_OBJECT(m) != object &&

             !VM_PAGE_OBJECT(m)->pager_trusted &&

5648 vm_protect_privileged_from_untrusted &&

5649 !((prot & VM_PROT_EXECUTE) &&

5650 VM_PAGE_OBJECT(m)->code_signed &&

             pmap_get_vm_map_cs_enforced(caller_pmap ? caller_pmap : pmap)) &&

5652 current_proc_is_privileged()) {

5653 /*

5654 * We found the page we want in an "untrusted" VM object

5655 * down the shadow chain. Since the target is "privileged"

5656 * we want to perform a copy-on-read of that page, so that the

5657 * mapped object gets a stable copy and does not have to

5658 * rely on the "untrusted" object to provide the same

5659 * contents if the page gets reclaimed and has to be paged

5660 * in again later on.

5661 *

5662 * Special case: if the mapping is executable and the untrusted

5663 * object is code-signed and the process is "cs_enforced", we

5664 * do not copy-on-read because that would break code-signing

5665 * enforcement expectations (an executable page must belong

5666 * to a code-signed object) and we can rely on code-signing

5667 * to re-validate the page if it gets evicted and paged back in.

5668 */

5669 // printf("COPY-ON-READ %s:%d map %p vaddr 0x%llx obj %p offset 0x%llx found page %p (obj %p offset 0x%llx) UNTRUSTED -> need copy-on-read\n", __FUNCTION__, __LINE__, map, (uint64_t)vaddr, object, offset, m, VM_PAGE_OBJECT(m), m->vmp_offset);

5670 vm_copied_on_read++;

5671 need_copy_on_read = TRUE;

5672 need_copy = TRUE;

5673 } else {

5674 need_copy_on_read = FALSE;

5675 }

5676

5677 /*

5678 * If we want to wire down this page, but no longer have

5679 * adequate permissions, we must start all over.

5680 * If we decided to copy-on-read, we must also start all over.

5681 */

         if ((wired && (fault_type != (prot | VM_PROT_WRITE))) ||

5683 need_copy_on_read) {

5684 vm_map_unlock_read(map);

5685 if (real_map != map) {

5686 vm_map_unlock(real_map);

5687 }

5688

5689 if (m != VM_PAGE_NULL) {

                         assert(VM_PAGE_OBJECT(m) == m_object);

5691

5692 RELEASE_PAGE(m);

5693

5694 vm_fault_cleanup(m_object, top_page);

5695 } else {

5696 vm_fault_cleanup(object, top_page);

5697 }

5698

5699 vm_object_deallocate(object);

5700

5701 goto RetryFault;

5702 }

5703 if (m != VM_PAGE_NULL) {

5704 /*

5705 * Put this page into the physical map.

5706 * We had to do the unlock above because pmap_enter

5707 * may cause other faults. The page may be on

5708 * the pageout queues. If the pageout daemon comes

5709 * across the page, it will remove it from the queues.

5710 */

5711 if (fault_page_size < PAGE_SIZE) {

                         DEBUG4K_FAULT("map %p original %p pmap %p va 0x%llx pa 0x%llx(0x%llx+0x%llx) prot 0x%x caller_prot 0x%x\n", map, original_map, pmap, (uint64_t)vaddr, (uint64_t)((((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m)) << PAGE_SHIFT) + fault_phys_offset), (uint64_t)(((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m)) << PAGE_SHIFT), (uint64_t)fault_phys_offset, prot, caller_prot);

5713 assertf((!(fault_phys_offset & FOURK_PAGE_MASK) &&

5714 fault_phys_offset < PAGE_SIZE),

                             "0x%llx\n", (uint64_t)fault_phys_offset);

5716 } else {

                         assertf(fault_phys_offset == 0,

                             "0x%llx\n", (uint64_t)fault_phys_offset);

5719 }

5720 if (caller_pmap) {

5721 kr = vm_fault_enter(m,

5722 caller_pmap,

5723 caller_pmap_addr,

5724 fault_page_size,

5725 fault_phys_offset,

5726 prot,

5727 caller_prot,

5728 wired,

5729 change_wiring,

5730 wire_tag,

5731 &fault_info,

5732 NULL,

5733 &type_of_fault);

5734 } else {

5735 kr = vm_fault_enter(m,

5736 pmap,

5737 vaddr,

5738 fault_page_size,

5739 fault_phys_offset,

5740 prot,

5741 caller_prot,

5742 wired,

5743 change_wiring,

5744 wire_tag,

5745 &fault_info,

5746 NULL,

5747 &type_of_fault);

5748 }

                 assert(VM_PAGE_OBJECT(m) == m_object);

5750

5751 {

5752 int event_code = 0;

5753

5754 if (m_object->internal) {

                                 event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_INTERNAL));

                         } else if (m_object->object_is_shared_cache) {

                                 event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_SHAREDCACHE));

5758 } else {

                                 event_code = (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_ADDR_EXTERNAL));

5760 }

5761

                         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info.user_tag << 16) | (caller_prot << 8) | vm_fault_type_for_tracing(need_copy_on_read, type_of_fault), m->vmp_offset, get_current_unique_pid(), 0);

                         KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_SLOW), get_current_unique_pid(), 0, 0, 0, 0);

5764

                         DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info.user_tag);

5766 }

5767 if (kr != KERN_SUCCESS) {

5768 /* abort this page fault */

5769 vm_map_unlock_read(map);

5770 if (real_map != map) {

5771 vm_map_unlock(real_map);

5772 }

5773 PAGE_WAKEUP_DONE(m);

5774 vm_fault_cleanup(m_object, top_page);

5775 vm_object_deallocate(object);

5776 goto done;

5777 }

5778 if (physpage_p != NULL) {

5779 /* for vm_map_wire_and_extract() */

                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);

5781 if (prot & VM_PROT_WRITE) {

5782 vm_object_lock_assert_exclusive(m_object);

5783 m->vmp_dirty = TRUE;

5784 }

5785 }

5786 } else {

5787 vm_map_entry_t entry;

5788 vm_map_offset_t laddr;

5789 vm_map_offset_t ldelta, hdelta;

5790

5791 /*

5792 * do a pmap block mapping from the physical address

5793 * in the object

5794 */

5795

5796 if (real_map != map) {

5797 vm_map_unlock(real_map);

5798 }

5799

5800 if (original_map != map) {

5801 vm_map_unlock_read(map);

5802 vm_map_lock_read(original_map);

5803 map = original_map;

5804 }

5805 real_map = map;

5806

5807 laddr = vaddr;

5808 hdelta = 0xFFFFF000;

5809 ldelta = 0xFFFFF000;

5810

                 while (vm_map_lookup_entry(map, laddr, &entry)) {

                         if (ldelta > (laddr - entry->vme_start)) {

5813 ldelta = laddr - entry->vme_start;

5814 }

                         if (hdelta > (entry->vme_end - laddr)) {

5816 hdelta = entry->vme_end - laddr;

5817 }

5818 if (entry->is_sub_map) {

5819 laddr = ((laddr - entry->vme_start)

5820 + VME_OFFSET(entry));

                                 vm_map_lock_read(VME_SUBMAP(entry));

5822

5823 if (map != real_map) {

5824 vm_map_unlock_read(map);

5825 }

5826 if (entry->use_pmap) {

5827 vm_map_unlock_read(real_map);

5828 real_map = VME_SUBMAP(entry);

5829 }

5830 map = VME_SUBMAP(entry);

5831 } else {

5832 break;

5833 }

5834 }

5835

                 if (vm_map_lookup_entry(map, laddr, &entry) &&

                     (VME_OBJECT(entry) != NULL) &&

                     (VME_OBJECT(entry) == object)) {

5839 uint16_t superpage;

5840

5841 if (!object->pager_created &&

5842 object->phys_contiguous &&

                             VME_OFFSET(entry) == 0 &&

                             (entry->vme_end - entry->vme_start == object->vo_size) &&

                             VM_MAP_PAGE_ALIGNED(entry->vme_start, (object->vo_size - 1))) {

5846 superpage = VM_MEM_SUPERPAGE;

5847 } else {

5848 superpage = 0;

5849 }

5850

5851 if (superpage && physpage_p) {

5852 /* for vm_map_wire_and_extract() */

5853 *physpage_p = (ppnum_t)

5854 ((((vm_map_offset_t)

5855 object->vo_shadow_offset)

5856 + VME_OFFSET(entry)

5857 + (laddr - entry->vme_start))

5858 >> PAGE_SHIFT);

5859 }

5860

5861 if (caller_pmap) {

5862 /*

5863 * Set up a block mapped area

5864 */

                                 assert((uint32_t)((ldelta + hdelta) >> PAGE_SHIFT) == ((ldelta + hdelta) >> PAGE_SHIFT));

5866 kr = pmap_map_block(caller_pmap,

5867 (addr64_t)(caller_pmap_addr - ldelta),

                                     (ppnum_t)((((vm_map_offset_t) (VME_OBJECT(entry)->vo_shadow_offset)) +

                                     VME_OFFSET(entry) + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT),

                                     (uint32_t)((ldelta + hdelta) >> PAGE_SHIFT), prot,

                                     (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0);

5872

5873 if (kr != KERN_SUCCESS) {

5874 goto cleanup;

5875 }

5876 } else {

5877 /*

5878 * Set up a block mapped area

5879 */

                                 assert((uint32_t)((ldelta + hdelta) >> PAGE_SHIFT) == ((ldelta + hdelta) >> PAGE_SHIFT));

                                 kr = pmap_map_block(real_map->pmap,

5882 (addr64_t)(vaddr - ldelta),

                                     (ppnum_t)((((vm_map_offset_t)(VME_OBJECT(entry)->vo_shadow_offset)) +

                                     VME_OFFSET(entry) + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT),

                                     (uint32_t)((ldelta + hdelta) >> PAGE_SHIFT), prot,

                                     (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0);

5887

5888 if (kr != KERN_SUCCESS) {

5889 goto cleanup;

5890 }

5891 }

5892 }

5893 }

5894

5895 /*

5896 * Success

5897 */

5898 kr = KERN_SUCCESS;

5899

5900 /*

5901 * TODO: could most of the done cases just use cleanup?

5902 */

5903 cleanup:

5904 /*

5905 * Unlock everything, and return

5906 */

5907 vm_map_unlock_read(map);

5908 if (real_map != map) {

5909 vm_map_unlock(real_map);

5910 }

5911

5912 if (m != VM_PAGE_NULL) {

                 assert(VM_PAGE_OBJECT(m) == m_object);

5914

                 if (!m_object->internal && (fault_type & VM_PROT_WRITE)) {

5916 vm_object_paging_begin(m_object);

5917

5918 assert(written_on_object == VM_OBJECT_NULL);

5919 written_on_object = m_object;

5920 written_on_pager = m_object->pager;

                         written_on_offset = m_object->paging_offset + m->vmp_offset;

5922 }

5923 PAGE_WAKEUP_DONE(m);

5924

5925 vm_fault_cleanup(m_object, top_page);

5926 } else {

5927 vm_fault_cleanup(object, top_page);

5928 }

5929

5930 vm_object_deallocate(object);

5931

5932 #undef RELEASE_PAGE

5933

5934 done:

5935 thread_interrupt_level(interruptible_state);

5936

5937 if (resilient_media_object != VM_OBJECT_NULL) {

5938 assert(resilient_media_retry);

                 assert(resilient_media_offset != (vm_object_offset_t)-1);

5940 /* release extra reference on failed object */

5941 // printf("FBDP %s:%d resilient_media_object %p deallocate\n", __FUNCTION__, __LINE__, resilient_media_object);

5942 vm_object_deallocate(resilient_media_object);

5943 resilient_media_object = VM_OBJECT_NULL;

5944 resilient_media_offset = (vm_object_offset_t)-1;

5945 resilient_media_retry = FALSE;

5946 }

5947 assert(!resilient_media_retry);

5948

5949 /*

5950 * Only I/O throttle on faults which cause a pagein/swapin.

5951 */

         if ((type_of_fault == DBG_PAGEIND_FAULT) || (type_of_fault == DBG_PAGEINV_FAULT) || (type_of_fault == DBG_COMPRESSOR_SWAPIN_FAULT)) {

5953 throttle_lowpri_io(1);

5954 } else {

                 if (kr == KERN_SUCCESS && type_of_fault != DBG_CACHE_HIT_FAULT && type_of_fault != DBG_GUARD_FAULT) {

                         if ((throttle_delay = vm_page_throttled(TRUE))) {

5957 if (vm_debug_events) {

5958 if (type_of_fault == DBG_COMPRESSOR_FAULT) {

                                                 VM_DEBUG_EVENT(vmf_compressordelay, VMF_COMPRESSORDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);

                                         } else if (type_of_fault == DBG_COW_FAULT) {

                                                 VM_DEBUG_EVENT(vmf_cowdelay, VMF_COWDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);

5962 } else {

                                                 VM_DEBUG_EVENT(vmf_zfdelay, VMF_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);

5964 }

5965 }

5966 delay(throttle_delay);

5967 }

5968 }

5969 }

5970

5971 if (written_on_object) {

                 vnode_pager_dirtied(written_on_pager, written_on_offset, written_on_offset + PAGE_SIZE_64);

5973

5974 vm_object_lock(written_on_object);

5975 vm_object_paging_end(written_on_object);

5976 vm_object_unlock(written_on_object);

5977

5978 written_on_object = VM_OBJECT_NULL;

5979 }

5980

5981 if (rtfault) {

                 vm_record_rtfault(cthread, fstart, trace_vaddr, type_of_fault);

5983 }

5984

5985 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,

             (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,

             ((uint64_t)trace_vaddr >> 32),

5988 trace_vaddr,

5989 kr,

5990 vm_fault_type_for_tracing(need_copy_on_read, type_of_fault),

5991 0);

5992

         if (fault_page_size < PAGE_SIZE && kr != KERN_SUCCESS) {

                 DEBUG4K_FAULT("map %p original %p vaddr 0x%llx -> 0x%x\n", map, original_map, (uint64_t)trace_real_vaddr, kr);

5995 }

5996

5997 return kr;

5998 }

5999

6000 /*

6001 * vm_fault_wire:

6002 *

6003 * Wire down a range of virtual addresses in a map.

6004 */

6005 kern_return_t

6006 vm_fault_wire(

6007 vm_map_t map,

6008 vm_map_entry_t entry,

6009 vm_prot_t prot,

6010 vm_tag_t wire_tag,

6011 pmap_t pmap,

6012 vm_map_offset_t pmap_addr,

6013 ppnum_t *physpage_p)

6014 {

6015 vm_map_offset_t va;

6016 vm_map_offset_t end_addr = entry->vme_end;

6017 kern_return_t rc;

6018 vm_map_size_t effective_page_size;

6019

6020 assert(entry->in_transition);

6021

         if ((VME_OBJECT(entry) != NULL) &&

6023 !entry->is_sub_map &&

6024 VME_OBJECT(entry)->phys_contiguous) {

6025 return KERN_SUCCESS;

6026 }

6027

6028 /*

6029 * Inform the physical mapping system that the

6030 * range of addresses may not fault, so that

6031 * page tables and such can be locked down as well.

6032 */

6033

6034 pmap_pageable(pmap, pmap_addr,

             pmap_addr + (end_addr - entry->vme_start), FALSE);

6036

6037 /*

6038 * We simulate a fault to get the page and enter it

6039 * in the physical map.

6040 */

6041

         effective_page_size = MIN(VM_MAP_PAGE_SIZE(map), PAGE_SIZE);

         for (va = entry->vme_start;

6044 va < end_addr;

6045 va += effective_page_size) {

                 rc = vm_fault_wire_fast(map, va, prot, wire_tag, entry, pmap,

6047 pmap_addr + (va - entry->vme_start),

6048 physpage_p);

6049 if (rc != KERN_SUCCESS) {

                         rc = vm_fault_internal(map, va, prot, TRUE, wire_tag,

6051 ((pmap == kernel_pmap)

6052 ? THREAD_UNINT

6053 : THREAD_ABORTSAFE),

6054 pmap,

6055 (pmap_addr +

6056 (va - entry->vme_start)),

6057 physpage_p);

                         DTRACE_VM2(softlock, int, 1, (uint64_t *), NULL);

6059 }

6060

6061 if (rc != KERN_SUCCESS) {

6062 struct vm_map_entry tmp_entry = *entry;

6063

6064 /* unwire wired pages */

6065 tmp_entry.vme_end = va;

6066 vm_fault_unwire(map,

                             &tmp_entry, FALSE, pmap, pmap_addr);

6068

6069 return rc;

6070 }

6071 }

6072 return KERN_SUCCESS;

6073 }

6074

6075 /*

6076 * vm_fault_unwire:

6077 *

6078 * Unwire a range of virtual addresses in a map.

6079 */

6080 void

6081 vm_fault_unwire(

6082 vm_map_t map,

6083 vm_map_entry_t entry,

6084 boolean_t deallocate,

6085 pmap_t pmap,

6086 vm_map_offset_t pmap_addr)

6087 {

6088 vm_map_offset_t va;

6089 vm_map_offset_t end_addr = entry->vme_end;

6090 vm_object_t object;

6091 struct vm_object_fault_info fault_info = {};

6092 unsigned int unwired_pages;

6093 vm_map_size_t effective_page_size;

6094

         object = (entry->is_sub_map) ? VM_OBJECT_NULL : VME_OBJECT(entry);

6096

6097 /*

6098 * If it's marked phys_contiguous, then vm_fault_wire() didn't actually

6099 * do anything since such memory is wired by default. So we don't have

6100 * anything to undo here.

6101 */

6102

         if (object != VM_OBJECT_NULL && object->phys_contiguous) {

6104 return;

6105 }

6106

6107 fault_info.interruptible = THREAD_UNINT;

6108 fault_info.behavior = entry->behavior;

         fault_info.user_tag = VME_ALIAS(entry);

6110 if (entry->iokit_acct ||

             (!entry->is_sub_map && !entry->use_pmap)) {

6112 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;

6113 }

         fault_info.lo_offset = VME_OFFSET(entry);

         fault_info.hi_offset = (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);

6116 fault_info.no_cache = entry->no_cache;

6117 fault_info.stealth = TRUE;

6118

6119 unwired_pages = 0;

6120

6121 /*

6122 * Since the pages are wired down, we must be able to

6123 * get their mappings from the physical map system.

6124 */

6125

         effective_page_size = MIN(VM_MAP_PAGE_SIZE(map), PAGE_SIZE);

         for (va = entry->vme_start;

6128 va < end_addr;

6129 va += effective_page_size) {

6130 if (object == VM_OBJECT_NULL) {

6131 if (pmap) {

6132 pmap_change_wiring(pmap,

                                     pmap_addr + (va - entry->vme_start), FALSE);

6134 }

                         (void) vm_fault(map, va, VM_PROT_NONE,

                             TRUE, VM_KERN_MEMORY_NONE, THREAD_UNINT, pmap, pmap_addr);

6137 } else {

6138 vm_prot_t prot;

6139 vm_page_t result_page;

6140 vm_page_t top_page;

6141 vm_object_t result_object;

6142 vm_fault_return_t result;

6143

6144 /* cap cluster size at maximum UPL size */

6145 upl_size_t cluster_size;

                         if (os_sub_overflow(end_addr, va, &cluster_size)) {

                                 cluster_size = 0 - (upl_size_t)PAGE_SIZE;

6148 }

6149 fault_info.cluster_size = cluster_size;

6150

6151 do {

6152 prot = VM_PROT_NONE;

6153

6154 vm_object_lock(object);

6155 vm_object_paging_begin(object);

6156 result_page = VM_PAGE_NULL;

6157 result = vm_fault_page(

6158 object,

6159 (VME_OFFSET(entry) +

6160 (va - entry->vme_start)),

6161 VM_PROT_NONE, TRUE,

6162 FALSE, /* page not looked up */

6163 &prot, &result_page, &top_page,

                                         (int *)0,

6165 NULL, map->no_zero_fill,

6166 FALSE, &fault_info);

                         } while (result == VM_FAULT_RETRY);

6168

6169 /*

6170 * If this was a mapping to a file on a device that has been forcibly

6171 * unmounted, then we won't get a page back from vm_fault_page(). Just

6172 * move on to the next one in case the remaining pages are mapped from

6173 * different objects. During a forced unmount, the object is terminated

6174 * so the alive flag will be false if this happens. A forced unmount will

6175 * will occur when an external disk is unplugged before the user does an

6176 * eject, so we don't want to panic in that situation.

6177 */

6178

                         if (result == VM_FAULT_MEMORY_ERROR && !object->alive) {

6180 continue;

6181 }

6182

6183 if (result == VM_FAULT_MEMORY_ERROR &&

6184 object == kernel_object) {

6185 /*

6186 * This must have been allocated with

6187 * KMA_KOBJECT and KMA_VAONLY and there's

6188 * no physical page at this offset.

6189 * We're done (no page to free).

6190 */

6191 assert(deallocate);

6192 continue;

6193 }

6194

6195 if (result != VM_FAULT_SUCCESS) {

6196 panic("vm_fault_unwire: failure");

6197 }

6198

6199 result_object = VM_PAGE_OBJECT(result_page);

6200

6201 if (deallocate) {

                                 assert(VM_PAGE_GET_PHYS_PAGE(result_page) !=

6203 vm_page_fictitious_addr);

                                 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(result_page));

                                 if (VM_PAGE_WIRED(result_page)) {

6206 unwired_pages++;

6207 }

6208 VM_PAGE_FREE(result_page);

6209 } else {

                                 if ((pmap) && (VM_PAGE_GET_PHYS_PAGE(result_page) != vm_page_guard_addr)) {

6211 pmap_change_wiring(pmap,

                                             pmap_addr + (va - entry->vme_start), FALSE);

6213 }

6214

6215

                                 if (VM_PAGE_WIRED(result_page)) {

6217 vm_page_lockspin_queues();

6218 vm_page_unwire(result_page, TRUE);

6219 vm_page_unlock_queues();

6220 unwired_pages++;

6221 }

6222 if (entry->zero_wired_pages) {

                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(result_page));

6224 entry->zero_wired_pages = FALSE;

6225 }

6226

6227 PAGE_WAKEUP_DONE(result_page);

6228 }

6229 vm_fault_cleanup(result_object, top_page);

6230 }

6231 }

6232

6233 /*

6234 * Inform the physical mapping system that the range

6235 * of addresses may fault, so that page tables and

6236 * such may be unwired themselves.

6237 */

6238

6239 pmap_pageable(pmap, pmap_addr,

             pmap_addr + (end_addr - entry->vme_start), TRUE);

6241

6242 if (kernel_object == object) {

6243 /*

6244 * Would like to make user_tag in vm_object_fault_info

6245 * vm_tag_t (unsigned short) but user_tag derives its value from

6246 * VME_ALIAS(entry) at a few places and VME_ALIAS, in turn, casts

6247 * to an _unsigned int_ which is used by non-fault_info paths throughout the

6248 * code at many places.

6249 *

6250 * So, for now, an explicit truncation to unsigned short (vm_tag_t).

6251 */

                 assertf((fault_info.user_tag & VME_ALIAS_MASK) == fault_info.user_tag,

                     "VM Tag truncated from 0x%x to 0x%x\n", fault_info.user_tag, (fault_info.user_tag & VME_ALIAS_MASK));

                 vm_tag_update_size((vm_tag_t) fault_info.user_tag, -ptoa_64(unwired_pages));

6255 }

6256 }

6257

6258 /*

6259 * vm_fault_wire_fast:

6260 *

6261 * Handle common case of a wire down page fault at the given address.

6262 * If successful, the page is inserted into the associated physical map.

6263 * The map entry is passed in to avoid the overhead of a map lookup.

6264 *

6265 * NOTE: the given address should be truncated to the

6266 * proper page address.

6267 *

6268 * KERN_SUCCESS is returned if the page fault is handled; otherwise,

6269 * a standard error specifying why the fault is fatal is returned.

6270 *

6271 * The map in question must be referenced, and remains so.

6272 * Caller has a read lock on the map.

6273 *

6274 * This is a stripped version of vm_fault() for wiring pages. Anything

6275 * other than the common case will return KERN_FAILURE, and the caller

6276 * is expected to call vm_fault().

6277 */

6278 static kern_return_t

6279 vm_fault_wire_fast(

6280 __unused vm_map_t map,

6281 vm_map_offset_t va,

6282 __unused vm_prot_t caller_prot,

6283 vm_tag_t wire_tag,

6284 vm_map_entry_t entry,

6285 pmap_t pmap,

6286 vm_map_offset_t pmap_addr,

6287 ppnum_t *physpage_p)

6288 {

6289 vm_object_t object;

6290 vm_object_offset_t offset;

6291 vm_page_t m;

6292 vm_prot_t prot;

6293 thread_t thread = current_thread();

6294 int type_of_fault;

6295 kern_return_t kr;

6296 vm_map_size_t fault_page_size;

6297 vm_map_offset_t fault_phys_offset;

6298 struct vm_object_fault_info fault_info = {};

6299

6300 counter_inc(&vm_statistics_faults);

6301

         if (thread != THREAD_NULL && thread->task != TASK_NULL) {

                 counter_inc(&thread->task->faults);

6304 }

6305

6306 /*

6307 * Recovery actions

6308 */

6309

6310 #undef RELEASE_PAGE

6311 #define RELEASE_PAGE(m) { \

6312 PAGE_WAKEUP_DONE(m); \

6313 vm_page_lockspin_queues(); \

6314 vm_page_unwire(m, TRUE); \

6315 vm_page_unlock_queues(); \

6316 }

6317

6318

6319 #undef UNLOCK_THINGS

6320 #define UNLOCK_THINGS { \

6321 vm_object_paging_end(object); \

6322 vm_object_unlock(object); \

6323 }

6324

6325 #undef UNLOCK_AND_DEALLOCATE

6326 #define UNLOCK_AND_DEALLOCATE { \

6327 UNLOCK_THINGS; \

6328 vm_object_deallocate(object); \

6329 }

6330 /*

6331 * Give up and have caller do things the hard way.

6332 */

6333

6334 #define GIVE_UP { \

6335 UNLOCK_AND_DEALLOCATE; \

6336 return(KERN_FAILURE); \

6337 }

6338

6339

6340 /*

6341 * If this entry is not directly to a vm_object, bail out.

6342 */

6343 if (entry->is_sub_map) {

6344 assert(physpage_p == NULL);

6345 return KERN_FAILURE;

6346 }

6347

6348 /*

6349 * Find the backing store object and offset into it.

6350 */

6351

6352 object = VME_OBJECT(entry);

         offset = (va - entry->vme_start) + VME_OFFSET(entry);

6354 prot = entry->protection;

6355

6356 /*

6357 * Make a reference to this object to prevent its

6358 * disposal while we are messing with it.

6359 */

6360

6361 vm_object_lock(object);

6362 vm_object_reference_locked(object);

6363 vm_object_paging_begin(object);

6364

6365 /*

6366 * INVARIANTS (through entire routine):

6367 *

6368 * 1) At all times, we must either have the object

6369 * lock or a busy page in some object to prevent

6370 * some other thread from trying to bring in

6371 * the same page.

6372 *

6373 * 2) Once we have a busy page, we must remove it from

6374 * the pageout queues, so that the pageout daemon

6375 * will not grab it away.

6376 *

6377 */

6378

6379 /*

6380 * Look for page in top-level object. If it's not there or

6381 * there's something going on, give up.

6382 */

         m = vm_page_lookup(object, vm_object_trunc_page(offset));

         if ((m == VM_PAGE_NULL) || (m->vmp_busy) ||

             (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {

6386 GIVE_UP;

6387 }

6388 if (m->vmp_fictitious &&

6389 VM_PAGE_GET_PHYS_PAGE(m) == vm_page_guard_addr) {

6390 /*

6391 * Guard pages are fictitious pages and are never

6392 * entered into a pmap, so let's say it's been wired...

6393 */

6394 kr = KERN_SUCCESS;

6395 goto done;

6396 }

6397

6398 /*

6399 * Wire the page down now. All bail outs beyond this

6400 * point must unwire the page.

6401 */

6402

6403 vm_page_lockspin_queues();

         vm_page_wire(m, wire_tag, TRUE);

6405 vm_page_unlock_queues();

6406

6407 /*

6408 * Mark page busy for other threads.

6409 */

6410 assert(!m->vmp_busy);

6411 m->vmp_busy = TRUE;

6412 assert(!m->vmp_absent);

6413

6414 /*

6415 * Give up if the page is being written and there's a copy object

6416 */

         if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {

6418 RELEASE_PAGE(m);

6419 GIVE_UP;

6420 }

6421

         fault_info.user_tag = VME_ALIAS(entry);

6423 fault_info.pmap_options = 0;

6424 if (entry->iokit_acct ||

             (!entry->is_sub_map && !entry->use_pmap)) {

6426 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;

6427 }

6428

         fault_page_size = MIN(VM_MAP_PAGE_SIZE(map), PAGE_SIZE);

         fault_phys_offset = offset - vm_object_trunc_page(offset);

6431

6432 /*

6433 * Put this page into the physical map.

6434 */

6435 type_of_fault = DBG_CACHE_HIT_FAULT;

6436 kr = vm_fault_enter(m,

6437 pmap,

6438 pmap_addr,

6439 fault_page_size,

6440 fault_phys_offset,

6441 prot,

6442 prot,

6443 TRUE, /* wired */

6444 FALSE, /* change_wiring */

6445 wire_tag,

6446 &fault_info,

6447 NULL,

6448 &type_of_fault);

6449 if (kr != KERN_SUCCESS) {

6450 RELEASE_PAGE(m);

6451 GIVE_UP;

6452 }

6453

6454 done:

6455 /*

6456 * Unlock everything, and return

6457 */

6458

6459 if (physpage_p) {

6460 /* for vm_map_wire_and_extract() */

6461 if (kr == KERN_SUCCESS) {

                         assert(object == VM_PAGE_OBJECT(m));

                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);

6464 if (prot & VM_PROT_WRITE) {

6465 vm_object_lock_assert_exclusive(object);

6466 m->vmp_dirty = TRUE;

6467 }

6468 } else {

6469 *physpage_p = 0;

6470 }

6471 }

6472

6473 PAGE_WAKEUP_DONE(m);

6474 UNLOCK_AND_DEALLOCATE;

6475

6476 return kr;

6477 }

6478

6479 /*

6480 * Routine: vm_fault_copy_cleanup

6481 * Purpose:

6482 * Release a page used by vm_fault_copy.

6483 */

6484

6485 static void

6486 vm_fault_copy_cleanup(

6487 vm_page_t page,

6488 vm_page_t top_page)

6489 {

6490 vm_object_t object = VM_PAGE_OBJECT(page);

6491

6492 vm_object_lock(object);

6493 PAGE_WAKEUP_DONE(page);

         if (!VM_PAGE_PAGEABLE(page)) {

6495 vm_page_lockspin_queues();

                 if (!VM_PAGE_PAGEABLE(page)) {

6497 vm_page_activate(page);

6498 }

6499 vm_page_unlock_queues();

6500 }

6501 vm_fault_cleanup(object, top_page);

6502 }

6503

6504 static void

6505 vm_fault_copy_dst_cleanup(

6506 vm_page_t page)

6507 {

6508 vm_object_t object;

6509

6510 if (page != VM_PAGE_NULL) {

6511 object = VM_PAGE_OBJECT(page);

6512 vm_object_lock(object);

6513 vm_page_lockspin_queues();

6514 vm_page_unwire(page, TRUE);

6515 vm_page_unlock_queues();

6516 vm_object_paging_end(object);

6517 vm_object_unlock(object);

6518 }

6519 }

6520

6521 /*

6522 * Routine: vm_fault_copy

6523 *

6524 * Purpose:

6525 * Copy pages from one virtual memory object to another --

6526 * neither the source nor destination pages need be resident.

6527 *

6528 * Before actually copying a page, the version associated with

6529 * the destination address map wil be verified.

6530 *

6531 * In/out conditions:

6532 * The caller must hold a reference, but not a lock, to

6533 * each of the source and destination objects and to the

6534 * destination map.

6535 *

6536 * Results:

6537 * Returns KERN_SUCCESS if no errors were encountered in

6538 * reading or writing the data. Returns KERN_INTERRUPTED if

6539 * the operation was interrupted (only possible if the

6540 * "interruptible" argument is asserted). Other return values

6541 * indicate a permanent error in copying the data.

6542 *

6543 * The actual amount of data copied will be returned in the

6544 * "copy_size" argument. In the event that the destination map

6545 * verification failed, this amount may be less than the amount

6546 * requested.

6547 */

6548 kern_return_t

6549 vm_fault_copy(

6550 vm_object_t src_object,

6551 vm_object_offset_t src_offset,

6552 vm_map_size_t *copy_size, /* INOUT */

6553 vm_object_t dst_object,

6554 vm_object_offset_t dst_offset,

6555 vm_map_t dst_map,

6556 vm_map_version_t *dst_version,

6557 int interruptible)

6558 {

6559 vm_page_t result_page;

6560

6561 vm_page_t src_page;

6562 vm_page_t src_top_page;

6563 vm_prot_t src_prot;

6564

6565 vm_page_t dst_page;

6566 vm_page_t dst_top_page;

6567 vm_prot_t dst_prot;

6568

6569 vm_map_size_t amount_left;

6570 vm_object_t old_copy_object;

6571 vm_object_t result_page_object = NULL;

6572 kern_return_t error = 0;

6573 vm_fault_return_t result;

6574

6575 vm_map_size_t part_size;

6576 struct vm_object_fault_info fault_info_src = {};

6577 struct vm_object_fault_info fault_info_dst = {};

6578

6579 /*

6580 * In order not to confuse the clustered pageins, align

6581 * the different offsets on a page boundary.

6582 */

6583

6584 #define RETURN(x) \

6585 MACRO_BEGIN \

6586 *copy_size -= amount_left; \

6587 MACRO_RETURN(x); \

6588 MACRO_END

6589

6590 amount_left = *copy_size;

6591

6592 fault_info_src.interruptible = interruptible;

6593 fault_info_src.behavior = VM_BEHAVIOR_SEQUENTIAL;

         fault_info_src.lo_offset = vm_object_trunc_page(src_offset);

         fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left;

6596 fault_info_src.stealth = TRUE;

6597

6598 fault_info_dst.interruptible = interruptible;

6599 fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL;

         fault_info_dst.lo_offset = vm_object_trunc_page(dst_offset);

         fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left;

6602 fault_info_dst.stealth = TRUE;

6603

6604 do { /* while (amount_left > 0) */

6605 /*

6606 * There may be a deadlock if both source and destination

6607 * pages are the same. To avoid this deadlock, the copy must

6608 * start by getting the destination page in order to apply

6609 * COW semantics if any.

6610 */

6611

6612 RetryDestinationFault:;

6613

6614 dst_prot = VM_PROT_WRITE | VM_PROT_READ;

6615

6616 vm_object_lock(dst_object);

6617 vm_object_paging_begin(dst_object);

6618

6619 /* cap cluster size at maximum UPL size */

6620 upl_size_t cluster_size;

                 if (os_convert_overflow(amount_left, &cluster_size)) {

                         cluster_size = 0 - (upl_size_t)PAGE_SIZE;

6623 }

6624 fault_info_dst.cluster_size = cluster_size;

6625

6626 dst_page = VM_PAGE_NULL;

6627 result = vm_fault_page(dst_object,

6628 vm_object_trunc_page(dst_offset),

6629 VM_PROT_WRITE | VM_PROT_READ,

6630 FALSE,

6631 FALSE, /* page not looked up */

6632 &dst_prot, &dst_page, &dst_top_page,

                     (int *)0,

6634 &error,

6635 dst_map->no_zero_fill,

6636 FALSE, &fault_info_dst);

6637 switch (result) {

6638 case VM_FAULT_SUCCESS:

6639 break;

6640 case VM_FAULT_RETRY:

6641 goto RetryDestinationFault;

6642 case VM_FAULT_MEMORY_SHORTAGE:

                         if (vm_page_wait(interruptible)) {

6644 goto RetryDestinationFault;

6645 }

6646 OS_FALLTHROUGH;

6647 case VM_FAULT_INTERRUPTED:

6648 RETURN(MACH_SEND_INTERRUPTED);

6649 case VM_FAULT_SUCCESS_NO_VM_PAGE:

6650 /* success but no VM page: fail the copy */

6651 vm_object_paging_end(dst_object);

6652 vm_object_unlock(dst_object);

6653 OS_FALLTHROUGH;

6654 case VM_FAULT_MEMORY_ERROR:

6655 if (error) {

6656 return error;

6657 } else {

6658 return KERN_MEMORY_ERROR;

6659 }

6660 default:

                         panic("vm_fault_copy: unexpected error 0x%x from "

                             "vm_fault_page()\n", result);

6663 }

                 assert((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);

6665

                 assert(dst_object == VM_PAGE_OBJECT(dst_page));

6667 old_copy_object = dst_object->copy;

6668

6669 /*

6670 * There exists the possiblity that the source and

6671 * destination page are the same. But we can't

6672 * easily determine that now. If they are the

6673 * same, the call to vm_fault_page() for the

6674 * destination page will deadlock. To prevent this we

6675 * wire the page so we can drop busy without having

6676 * the page daemon steal the page. We clean up the

6677 * top page but keep the paging reference on the object

6678 * holding the dest page so it doesn't go away.

6679 */

6680

6681 vm_page_lockspin_queues();

                 vm_page_wire(dst_page, VM_KERN_MEMORY_OSFMK, TRUE);

6683 vm_page_unlock_queues();

6684 PAGE_WAKEUP_DONE(dst_page);

6685 vm_object_unlock(dst_object);

6686

6687 if (dst_top_page != VM_PAGE_NULL) {

6688 vm_object_lock(dst_object);

6689 VM_PAGE_FREE(dst_top_page);

6690 vm_object_paging_end(dst_object);

6691 vm_object_unlock(dst_object);

6692 }

6693

6694 RetrySourceFault:;

6695

6696 if (src_object == VM_OBJECT_NULL) {

6697 /*

6698 * No source object. We will just

6699 * zero-fill the page in dst_object.

6700 */

6701 src_page = VM_PAGE_NULL;

6702 result_page = VM_PAGE_NULL;

6703 } else {

6704 vm_object_lock(src_object);

6705 src_page = vm_page_lookup(src_object,

6706 vm_object_trunc_page(src_offset));

6707 if (src_page == dst_page) {

6708 src_prot = dst_prot;

6709 result_page = VM_PAGE_NULL;

6710 } else {

6711 src_prot = VM_PROT_READ;

6712 vm_object_paging_begin(src_object);

6713

6714 /* cap cluster size at maximum UPL size */

                                 if (os_convert_overflow(amount_left, &cluster_size)) {

                                         cluster_size = 0 - (upl_size_t)PAGE_SIZE;

6717 }

6718 fault_info_src.cluster_size = cluster_size;

6719

6720 result_page = VM_PAGE_NULL;

6721 result = vm_fault_page(

6722 src_object,

6723 vm_object_trunc_page(src_offset),

6724 VM_PROT_READ, FALSE,

6725 FALSE, /* page not looked up */

6726 &src_prot,

6727 &result_page, &src_top_page,

                                         (int *)0, &error, FALSE,

6729 FALSE, &fault_info_src);

6730

6731 switch (result) {

6732 case VM_FAULT_SUCCESS:

6733 break;

6734 case VM_FAULT_RETRY:

6735 goto RetrySourceFault;

6736 case VM_FAULT_MEMORY_SHORTAGE:

                                         if (vm_page_wait(interruptible)) {

6738 goto RetrySourceFault;

6739 }

6740 OS_FALLTHROUGH;

6741 case VM_FAULT_INTERRUPTED:

6742 vm_fault_copy_dst_cleanup(dst_page);

6743 RETURN(MACH_SEND_INTERRUPTED);

6744 case VM_FAULT_SUCCESS_NO_VM_PAGE:

6745 /* success but no VM page: fail */

6746 vm_object_paging_end(src_object);

6747 vm_object_unlock(src_object);

6748 OS_FALLTHROUGH;

6749 case VM_FAULT_MEMORY_ERROR:

6750 vm_fault_copy_dst_cleanup(dst_page);

6751 if (error) {

6752 return error;

6753 } else {

6754 return KERN_MEMORY_ERROR;

6755 }

6756 default:

6757 panic("vm_fault_copy(2): unexpected "

6758 "error 0x%x from "

                                             "vm_fault_page()\n", result);

6760 }

6761

6762 result_page_object = VM_PAGE_OBJECT(result_page);

6763 assert((src_top_page == VM_PAGE_NULL) ==

6764 (result_page_object == src_object));

6765 }

                         assert((src_prot & VM_PROT_READ) != VM_PROT_NONE);

6767 vm_object_unlock(result_page_object);

6768 }

6769

6770 vm_map_lock_read(dst_map);

6771

                 if (!vm_map_verify(dst_map, dst_version)) {

6773 vm_map_unlock_read(dst_map);

                         if (result_page != VM_PAGE_NULL && src_page != dst_page) {

6775 vm_fault_copy_cleanup(result_page, src_top_page);

6776 }

6777 vm_fault_copy_dst_cleanup(dst_page);

6778 break;

6779 }

                 assert(dst_object == VM_PAGE_OBJECT(dst_page));

6781

6782 vm_object_lock(dst_object);

6783

                 if (dst_object->copy != old_copy_object) {

6785 vm_object_unlock(dst_object);

6786 vm_map_unlock_read(dst_map);

                         if (result_page != VM_PAGE_NULL && src_page != dst_page) {

6788 vm_fault_copy_cleanup(result_page, src_top_page);

6789 }

6790 vm_fault_copy_dst_cleanup(dst_page);

6791 break;

6792 }

6793 vm_object_unlock(dst_object);

6794

6795 /*

6796 * Copy the page, and note that it is dirty

6797 * immediately.

6798 */

6799

                 if (!page_aligned(src_offset) ||

6801 !page_aligned(dst_offset) ||

6802 !page_aligned(amount_left)) {

6803 vm_object_offset_t src_po,

6804 dst_po;

6805

                         src_po = src_offset - vm_object_trunc_page(src_offset);

                         dst_po = dst_offset - vm_object_trunc_page(dst_offset);

6808

6809 if (dst_po > src_po) {

6810 part_size = PAGE_SIZE - dst_po;

6811 } else {

6812 part_size = PAGE_SIZE - src_po;

6813 }

6814 if (part_size > (amount_left)) {

6815 part_size = amount_left;

6816 }

6817

6818 if (result_page == VM_PAGE_NULL) {

                                 assert((vm_offset_t) dst_po == dst_po);

                                 assert((vm_size_t) part_size == part_size);

6821 vm_page_part_zero_fill(dst_page,

6822 (vm_offset_t) dst_po,

6823 (vm_size_t) part_size);

6824 } else {

                                 assert((vm_offset_t) src_po == src_po);

                                 assert((vm_offset_t) dst_po == dst_po);

                                 assert((vm_size_t) part_size == part_size);

6828 vm_page_part_copy(result_page,

6829 (vm_offset_t) src_po,

6830 dst_page,

6831 (vm_offset_t) dst_po,

6832 (vm_size_t)part_size);

6833 if (!dst_page->vmp_dirty) {

6834 vm_object_lock(dst_object);

6835 SET_PAGE_DIRTY(dst_page, TRUE);

6836 vm_object_unlock(dst_object);

6837 }

6838 }

6839 } else {

6840 part_size = PAGE_SIZE;

6841

6842 if (result_page == VM_PAGE_NULL) {

6843 vm_page_zero_fill(dst_page);

6844 } else {

6845 vm_object_lock(result_page_object);

6846 vm_page_copy(result_page, dst_page);

6847 vm_object_unlock(result_page_object);

6848

6849 if (!dst_page->vmp_dirty) {

6850 vm_object_lock(dst_object);

6851 SET_PAGE_DIRTY(dst_page, TRUE);

6852 vm_object_unlock(dst_object);

6853 }

6854 }

6855 }

6856

6857 /*

6858 * Unlock everything, and return

6859 */

6860

6861 vm_map_unlock_read(dst_map);

6862

                 if (result_page != VM_PAGE_NULL && src_page != dst_page) {

6864 vm_fault_copy_cleanup(result_page, src_top_page);

6865 }

6866 vm_fault_copy_dst_cleanup(dst_page);

6867

6868 amount_left -= part_size;

6869 src_offset += part_size;

6870 dst_offset += part_size;

         } while (amount_left > 0);

6872

6873 RETURN(KERN_SUCCESS);

6874 #undef RETURN

6875

6876 /*NOTREACHED*/

6877 }

6878

6879 #if VM_FAULT_CLASSIFY

6880 /*

6881 * Temporary statistics gathering support.

6882 */

6883

6884 /*

6885 * Statistics arrays:

6886 */

6887 #define VM_FAULT_TYPES_MAX 5

6888 #define VM_FAULT_LEVEL_MAX 8

6889

6890 int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];

6891

6892 #define VM_FAULT_TYPE_ZERO_FILL 0

6893 #define VM_FAULT_TYPE_MAP_IN 1

6894 #define VM_FAULT_TYPE_PAGER 2

6895 #define VM_FAULT_TYPE_COPY 3

6896 #define VM_FAULT_TYPE_OTHER 4

6897

6898

6899 void

6900 vm_fault_classify(vm_object_t object,

6901 vm_object_offset_t offset,

6902 vm_prot_t fault_type)

6903 {

         int             type, level = 0;

6905 vm_page_t m;

6906

6907 while (TRUE) {

                 m = vm_page_lookup(object, offset);

6909 if (m != VM_PAGE_NULL) {

                         if (m->vmp_busy || m->vmp_error || m->vmp_restart || m->vmp_absent) {

6911 type = VM_FAULT_TYPE_OTHER;

6912 break;

6913 }

                         if (((fault_type & VM_PROT_WRITE) == 0) ||

                             ((level == 0) && object->copy == VM_OBJECT_NULL)) {

6916 type = VM_FAULT_TYPE_MAP_IN;

6917 break;

6918 }

6919 type = VM_FAULT_TYPE_COPY;

6920 break;

6921 } else {

6922 if (object->pager_created) {

6923 type = VM_FAULT_TYPE_PAGER;

6924 break;

6925 }

                         if (object->shadow == VM_OBJECT_NULL) {

6927 type = VM_FAULT_TYPE_ZERO_FILL;

6928 break;

6929 }

6930

6931 offset += object->vo_shadow_offset;

6932 object = object->shadow;

6933 level++;

6934 continue;

6935 }

6936 }

6937

6938 if (level > VM_FAULT_LEVEL_MAX) {

6939 level = VM_FAULT_LEVEL_MAX;

6940 }

6941

         vm_fault_stats[type][level] += 1;

6943

6944 return;

6945 }

6946

6947 /* cleanup routine to call from debugger */

6948

6949 void

6950 vm_fault_classify_init(void)

6951 {

6952 int type, level;

6953

         for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {

                 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {

                         vm_fault_stats[type][level] = 0;

6957 }

6958 }

6959

6960 return;

6961 }

6962 #endif /* VM_FAULT_CLASSIFY */

6963

6964 vm_offset_t

6965 kdp_lightweight_fault(vm_map_t map, vm_offset_t cur_target_addr)

6966 {

6967 vm_map_entry_t entry;

6968 vm_object_t object;

6969 vm_offset_t object_offset;

6970 vm_page_t m;

6971 int compressor_external_state, compressed_count_delta;

         int             compressor_flags = (C_DONT_BLOCK | C_KEEP | C_KDP);

6973 int my_fault_type = VM_PROT_READ;

6974 kern_return_t kr;

6975 int effective_page_mask, effective_page_size;

6976

         if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {

6978 effective_page_mask = VM_MAP_PAGE_MASK(map);

6979 effective_page_size = VM_MAP_PAGE_SIZE(map);

6980 } else {

6981 effective_page_mask = PAGE_MASK;

6982 effective_page_size = PAGE_SIZE;

6983 }

6984

6985 if (not_in_kdp) {

6986 panic("kdp_lightweight_fault called from outside of debugger context");

6987 }

6988

6989 assert(map != VM_MAP_NULL);

6990

         assert((cur_target_addr & effective_page_mask) == 0);

         if ((cur_target_addr & effective_page_mask) != 0) {

6993 return 0;

6994 }

6995

         if (kdp_lck_rw_lock_is_acquired_exclusive(&map->lock)) {

6997 return 0;

6998 }

6999

         if (!vm_map_lookup_entry(map, cur_target_addr, &entry)) {

7001 return 0;

7002 }

7003

7004 if (entry->is_sub_map) {

7005 return 0;

7006 }

7007

7008 object = VME_OBJECT(entry);

7009 if (object == VM_OBJECT_NULL) {

7010 return 0;

7011 }

7012

         object_offset = cur_target_addr - entry->vme_start + VME_OFFSET(entry);

7014

7015 while (TRUE) {

                 if (kdp_lck_rw_lock_is_acquired_exclusive(&object->Lock)) {

7017 return 0;

7018 }

7019

                 if (object->pager_created && (object->paging_in_progress ||

7021 object->activity_in_progress)) {

7022 return 0;

7023 }

7024

                 m = kdp_vm_page_lookup(object, vm_object_trunc_page(object_offset));

7026

7027 if (m != VM_PAGE_NULL) {

                         if ((object->wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {

7029 return 0;

7030 }

7031

                         if (m->vmp_laundry || m->vmp_busy || m->vmp_free_when_done || m->vmp_absent || m->vmp_error || m->vmp_cleaning ||

                             m->vmp_overwriting || m->vmp_restart || m->vmp_unusual) {

7034 return 0;

7035 }

7036

7037 assert(!m->vmp_private);

7038 if (m->vmp_private) {

7039 return 0;

7040 }

7041

7042 assert(!m->vmp_fictitious);

7043 if (m->vmp_fictitious) {

7044 return 0;

7045 }

7046

                         assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);

                         if (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {

7049 return 0;

7050 }

7051

                         return ptoa(VM_PAGE_GET_PHYS_PAGE(m));

7053 }

7054

7055 compressor_external_state = VM_EXTERNAL_STATE_UNKNOWN;

7056

                 if (object->pager_created && MUST_ASK_PAGER(object, object_offset, compressor_external_state)) {

7058 if (compressor_external_state == VM_EXTERNAL_STATE_EXISTS) {

                                 kr = vm_compressor_pager_get(object->pager,

                                     vm_object_trunc_page(object_offset + object->paging_offset),

7061 kdp_compressor_decompressed_page_ppnum, &my_fault_type,

7062 compressor_flags, &compressed_count_delta);

7063 if (kr == KERN_SUCCESS) {

7064 return kdp_compressor_decompressed_page_paddr;

7065 } else {

7066 return 0;

7067 }

7068 }

7069 }

7070

                 if (object->shadow == VM_OBJECT_NULL) {

7072 return 0;

7073 }

7074

7075 object_offset += object->vo_shadow_offset;

7076 object = object->shadow;

7077 }

7078 }

7079

7080 /*

7081 * vm_page_validate_cs_fast():

7082 * Performs a few quick checks to determine if the page's code signature

7083 * really needs to be fully validated. It could:

7084 * 1. have been modified (i.e. automatically tainted),

7085 * 2. have already been validated,

7086 * 3. have already been found to be tainted,

7087 * 4. no longer have a backing store.

7088 * Returns FALSE if the page needs to be fully validated.

7089 */

7090 static boolean_t

7091 vm_page_validate_cs_fast(

7092 vm_page_t page,

7093 vm_map_size_t fault_page_size,

7094 vm_map_offset_t fault_phys_offset)

7095 {

7096 vm_object_t object;

7097

7098 object = VM_PAGE_OBJECT(page);

7099 vm_object_lock_assert_held(object);

7100

7101 if (page->vmp_wpmapped &&

             !VMP_CS_TAINTED(page, fault_page_size, fault_phys_offset)) {

7103 /*

7104 * This page was mapped for "write" access sometime in the

7105 * past and could still be modifiable in the future.

7106 * Consider it tainted.

7107 * [ If the page was already found to be "tainted", no

7108 * need to re-validate. ]

7109 */

7110 vm_object_lock_assert_exclusive(object);

                 VMP_CS_SET_VALIDATED(page, fault_page_size, fault_phys_offset, TRUE);

                 VMP_CS_SET_TAINTED(page, fault_page_size, fault_phys_offset, TRUE);

7113 if (cs_debug) {

                         printf("CODESIGNING: %s: "

                             "page %p obj %p off 0x%llx "

7116 "was modified\n",

7117 __FUNCTION__,

7118 page, object, page->vmp_offset);

7119 }

7120 vm_cs_validated_dirtied++;

7121 }

7122

         if (VMP_CS_VALIDATED(page, fault_page_size, fault_phys_offset) ||

             VMP_CS_TAINTED(page, fault_page_size, fault_phys_offset)) {

7125 return TRUE;

7126 }

7127 vm_object_lock_assert_exclusive(object);

7128

7129 #if CHECK_CS_VALIDATION_BITMAP

7130 kern_return_t kr;

7131

7132 kr = vnode_pager_cs_check_validation_bitmap(

7133 object->pager,

7134 page->vmp_offset + object->paging_offset,

7135 CS_BITMAP_CHECK);

7136 if (kr == KERN_SUCCESS) {

7137 page->vmp_cs_validated = VMP_CS_ALL_TRUE;

7138 page->vmp_cs_tainted = VMP_CS_ALL_FALSE;

7139 vm_cs_bitmap_validated++;

7140 return TRUE;

7141 }

7142 #endif /* CHECK_CS_VALIDATION_BITMAP */

7143

         if (!object->alive || object->terminating || object->pager == NULL) {

7145 /*

7146 * The object is terminating and we don't have its pager

7147 * so we can't validate the data...

7148 */

7149 return TRUE;

7150 }

7151

7152 /* we need to really validate this page */

7153 vm_object_lock_assert_exclusive(object);

7154 return FALSE;

7155 }

7156

7157 void

7158 vm_page_validate_cs_mapped_slow(

7159 vm_page_t page,

7160 const void *kaddr)

7161 {

7162 vm_object_t object;

7163 memory_object_offset_t mo_offset;

7164 memory_object_t pager;

7165 struct vnode *vnode;

7166 int validated, tainted, nx;

7167

7168 assert(page->vmp_busy);

7169 object = VM_PAGE_OBJECT(page);

7170 vm_object_lock_assert_exclusive(object);

7171

7172 vm_cs_validates++;

7173

7174 /*

7175 * Since we get here to validate a page that was brought in by

7176 * the pager, we know that this pager is all setup and ready

7177 * by now.

7178 */

7179 assert(object->code_signed);

7180 assert(!object->internal);

         assert(object->pager != NULL);

7182 assert(object->pager_ready);

7183

7184 pager = object->pager;

7185 assert(object->paging_in_progress);

7186 vnode = vnode_pager_lookup_vnode(pager);

         mo_offset = page->vmp_offset + object->paging_offset;

7188

7189 /* verify the SHA1 hash for this page */

7190 validated = 0;

7191 tainted = 0;

7192 nx = 0;

7193 cs_validate_page(vnode,

7194 pager,

7195 mo_offset,

             (const void *)((const char *)kaddr),

7197 &validated,

7198 &tainted,

7199 &nx);

7200

7201 page->vmp_cs_validated |= validated;

7202 page->vmp_cs_tainted |= tainted;

7203 page->vmp_cs_nx |= nx;

7204

7205 #if CHECK_CS_VALIDATION_BITMAP

         if (page->vmp_cs_validated == VMP_CS_ALL_TRUE &&

7207 page->vmp_cs_tainted == VMP_CS_ALL_FALSE) {

7208 vnode_pager_cs_check_validation_bitmap(object->pager,

7209 mo_offset,

7210 CS_BITMAP_SET);

7211 }

7212 #endif /* CHECK_CS_VALIDATION_BITMAP */

7213 }

7214

7215 void

7216 vm_page_validate_cs_mapped(

7217 vm_page_t page,

7218 vm_map_size_t fault_page_size,

7219 vm_map_offset_t fault_phys_offset,

7220 const void *kaddr)

7221 {

         if (!vm_page_validate_cs_fast(page, fault_page_size, fault_phys_offset)) {

7223 vm_page_validate_cs_mapped_slow(page, kaddr);

7224 }

7225 }

7226

7227 static void

7228 vm_page_map_and_validate_cs(

7229 vm_object_t object,

7230 vm_page_t page)

7231 {

7232 vm_object_offset_t offset;

7233 vm_map_offset_t koffset;

7234 vm_map_size_t ksize;

7235 vm_offset_t kaddr;

7236 kern_return_t kr;

7237 boolean_t busy_page;

7238 boolean_t need_unmap;

7239

7240 vm_object_lock_assert_exclusive(object);

7241

7242 assert(object->code_signed);

7243 offset = page->vmp_offset;

7244

7245 busy_page = page->vmp_busy;

7246 if (!busy_page) {

7247 /* keep page busy while we map (and unlock) the VM object */

7248 page->vmp_busy = TRUE;

7249 }

7250

7251 /*

7252 * Take a paging reference on the VM object

7253 * to protect it from collapse or bypass,

7254 * and keep it from disappearing too.

7255 */

7256 vm_object_paging_begin(object);

7257

7258 /* map the page in the kernel address space */

7259 ksize = PAGE_SIZE_64;

7260 koffset = 0;

7261 need_unmap = FALSE;

7262 kr = vm_paging_map_object(page,

7263 object,

7264 offset,

7265 VM_PROT_READ,

7266 FALSE, /* can't unlock object ! */

7267 &ksize,

7268 &koffset,

7269 &need_unmap);

7270 if (kr != KERN_SUCCESS) {

                 panic("%s: could not map page: 0x%x\n", __FUNCTION__, kr);

7272 }

         kaddr = CAST_DOWN(vm_offset_t, koffset);

7274

7275 /* validate the mapped page */

         vm_page_validate_cs_mapped_slow(page, (const void *) kaddr);

7277

7278 assert(page->vmp_busy);

         assert(object == VM_PAGE_OBJECT(page));

7280 vm_object_lock_assert_exclusive(object);

7281

7282 if (!busy_page) {

7283 PAGE_WAKEUP_DONE(page);

7284 }

7285 if (need_unmap) {

7286 /* unmap the map from the kernel address space */

                 vm_paging_unmap_object(object, koffset, koffset + ksize);

7288 koffset = 0;

7289 ksize = 0;

7290 kaddr = 0;

7291 }

7292 vm_object_paging_end(object);

7293 }

7294

7295 void

7296 vm_page_validate_cs(

7297 vm_page_t page,

7298 vm_map_size_t fault_page_size,

7299 vm_map_offset_t fault_phys_offset)

7300 {

7301 vm_object_t object;

7302

7303 object = VM_PAGE_OBJECT(page);

7304 vm_object_lock_assert_held(object);

7305

         if (vm_page_validate_cs_fast(page, fault_page_size, fault_phys_offset)) {

7307 return;

7308 }

7309 vm_page_map_and_validate_cs(object, page);

7310 }

7311

7312 void

7313 vm_page_validate_cs_mapped_chunk(

7314 vm_page_t page,

7315 const void *kaddr,

7316 vm_offset_t chunk_offset,

7317 vm_size_t chunk_size,

7318 boolean_t *validated_p,

7319 unsigned *tainted_p)

7320 {

7321 vm_object_t object;

7322 vm_object_offset_t offset, offset_in_page;

7323 memory_object_t pager;

7324 struct vnode *vnode;

7325 boolean_t validated;

7326 unsigned tainted;

7327

7328 *validated_p = FALSE;

7329 *tainted_p = 0;

7330

7331 assert(page->vmp_busy);

7332 object = VM_PAGE_OBJECT(page);

7333 vm_object_lock_assert_exclusive(object);

7334

7335 assert(object->code_signed);

7336 offset = page->vmp_offset;

7337

         if (!object->alive || object->terminating || object->pager == NULL) {

7339 /*

7340 * The object is terminating and we don't have its pager

7341 * so we can't validate the data...

7342 */

7343 return;

7344 }

7345 /*

7346 * Since we get here to validate a page that was brought in by

7347 * the pager, we know that this pager is all setup and ready

7348 * by now.

7349 */

7350 assert(!object->internal);

         assert(object->pager != NULL);

7352 assert(object->pager_ready);

7353

7354 pager = object->pager;

7355 assert(object->paging_in_progress);

7356 vnode = vnode_pager_lookup_vnode(pager);

7357

7358 /* verify the signature for this chunk */

7359 offset_in_page = chunk_offset;

7360 assert(offset_in_page < PAGE_SIZE);

7361

7362 tainted = 0;

7363 validated = cs_validate_range(vnode,

7364 pager,

7365 (object->paging_offset +

7366 offset +

7367 offset_in_page),

             (const void *)((const char *)kaddr

7369 + offset_in_page),

7370 chunk_size,

7371 &tainted);

7372 if (validated) {

7373 *validated_p = TRUE;

7374 }

7375 if (tainted) {

7376 *tainted_p = tainted;

7377 }

7378 }

7379

7380 static void

7381 vm_rtfrecord_lock(void)

7382 {

7383 lck_spin_lock(&vm_rtfr_slock);

7384 }

7385

7386 static void

7387 vm_rtfrecord_unlock(void)

7388 {

7389 lck_spin_unlock(&vm_rtfr_slock);

7390 }

7391

7392 unsigned int

7393 vmrtfaultinfo_bufsz(void)

7394 {

         return vmrtf_num_records * sizeof(vm_rtfault_record_t);

7396 }

7397

7398 #include <kern/backtrace.h>

7399

7400 __attribute__((noinline))

7401 static void

 vm_record_rtfault(thread_t cthread, uint64_t fstart, vm_map_offset_t fault_vaddr, int type_of_fault)

7403 {

7404 uint64_t fend = mach_continuous_time();

7405

7406 uint64_t cfpc = 0;

7407 uint64_t ctid = cthread->thread_id;

7408 uint64_t cupid = get_current_unique_pid();

7409

7410 uintptr_t bpc = 0;

7411 int btr = 0;

7412 bool u64 = false;

7413

7414 /* Capture a single-frame backtrace; this extracts just the program

7415 * counter at the point of the fault into "bpc", and should perform no

7416 * further user stack traversals, thus avoiding copyin()s and further

7417 * faults.

7418 */

         unsigned int bfrs = backtrace_thread_user(cthread, &bpc, 1U, &btr, &u64, NULL, false);

7420

         if ((btr == 0) && (bfrs > 0)) {

7422 cfpc = bpc;

7423 }

7424

         assert((fstart != 0) && fend >= fstart);

7426 vm_rtfrecord_lock();

         assert(vmrtfrs.vmrtfr_curi <= vmrtfrs.vmrtfr_maxi);

7428

7429 vmrtfrs.vmrtf_total++;

         vm_rtfault_record_t *cvmr = &vmrtfrs.vm_rtf_records[vmrtfrs.vmrtfr_curi++];

7431

7432 cvmr->rtfabstime = fstart;

7433 cvmr->rtfduration = fend - fstart;

7434 cvmr->rtfaddr = fault_vaddr;

7435 cvmr->rtfpc = cfpc;

7436 cvmr->rtftype = type_of_fault;

7437 cvmr->rtfupid = cupid;

7438 cvmr->rtftid = ctid;

7439

         if (vmrtfrs.vmrtfr_curi > vmrtfrs.vmrtfr_maxi) {

7441 vmrtfrs.vmrtfr_curi = 0;

7442 }

7443

7444 vm_rtfrecord_unlock();

7445 }

7446

7447 int

 vmrtf_extract(uint64_t cupid, __unused boolean_t isroot, unsigned long vrecordsz, void *vrecords, unsigned long *vmrtfrv)

7449 {

7450 vm_rtfault_record_t *cvmrd = vrecords;

7451 size_t residue = vrecordsz;

7452 size_t numextracted = 0;

7453 boolean_t early_exit = FALSE;

7454

7455 vm_rtfrecord_lock();

7456

         for (int vmfi = 0; vmfi <= vmrtfrs.vmrtfr_maxi; vmfi++) {

                 if (residue < sizeof(vm_rtfault_record_t)) {

7459 early_exit = TRUE;

7460 break;

7461 }

7462

                 if (vmrtfrs.vm_rtf_records[vmfi].rtfupid != cupid) {

7464 #if DEVELOPMENT || DEBUG

7465 if (isroot == FALSE) {

7466 continue;

7467 }

7468 #else

7469 continue;

7470 #endif /* DEVDEBUG */

7471 }

7472

                 *cvmrd = vmrtfrs.vm_rtf_records[vmfi];

7474 cvmrd++;

7475 residue -= sizeof(vm_rtfault_record_t);

7476 numextracted++;

7477 }

7478

7479 vm_rtfrecord_unlock();

7480

7481 *vmrtfrv = numextracted;

7482 return early_exit;

7483 }

7484

7485 /*

7486 * Only allow one diagnosis to be in flight at a time, to avoid

7487 * creating too much additional memory usage.

7488 */

7489 static volatile uint_t vmtc_diagnosing;

7490 unsigned int vmtc_total;

7491 unsigned int vmtc_undiagnosed;

7492 unsigned int vmtc_not_eligible;

7493 unsigned int vmtc_copyin_fail;

7494 unsigned int vmtc_not_found;

7495 unsigned int vmtc_one_bit_flip;

 unsigned int vmtc_byte_counts[MAX_TRACK_POWER2 + 1];

7497

7498 #if DEVELOPMENT || DEBUG

7499 /*

7500 * Keep around the last diagnosed corruption buffers to aid in debugging.

7501 */

7502 static size_t vmtc_last_buffer_size;

7503 static uint64_t *vmtc_last_before_buffer = NULL;

7504 static uint64_t *vmtc_last_after_buffer = NULL;

7505 #endif /* DEVELOPMENT || DEBUG */

7506

7507 /*

7508 * Set things up so we can diagnose a potential text page corruption.

7509 */

7510 static uint64_t *

7511 vmtc_text_page_diagnose_setup(

7512 vm_map_offset_t code_addr)

7513 {

7514 uint64_t *buffer;

         size_t          size = MIN(vm_map_page_size(current_map()), PAGE_SIZE);

7516

         (void)OSAddAtomic(1, &vmtc_total);

7518

7519 /*

7520 * If another is being diagnosed, skip this one.

7521 */

         if (!OSCompareAndSwap(0, 1, &vmtc_diagnosing)) {

                 (void)OSAddAtomic(1, &vmtc_undiagnosed);

7524 return NULL;

7525 }

7526

7527 /*

7528 * Get the contents of the corrupt page.

7529 */

         buffer = kheap_alloc(KHEAP_DEFAULT, size, Z_WAITOK);

         if (copyin((user_addr_t)vm_map_trunc_page(code_addr, size - 1), buffer, size) != 0) {

7532 /* copyin error, so undo things */

                 kheap_free(KHEAP_DEFAULT, buffer, size);

                 (void)OSAddAtomic(1, &vmtc_undiagnosed);

7535 ++vmtc_copyin_fail;

                 if (!OSCompareAndSwap(1, 0, &vmtc_diagnosing)) {

7537 panic("Bad compare and swap in setup!");

7538 }

7539 return NULL;

7540 }

7541 return buffer;

7542 }

7543

7544 /*

7545 * Diagnose the text page by comparing its contents with

7546 * the one we've previously saved.

7547 */

7548 static void

7549 vmtc_text_page_diagnose(

7550 vm_map_offset_t code_addr,

7551 uint64_t *old_code_buffer)

7552 {

7553 uint64_t *new_code_buffer;

         size_t          size = MIN(vm_map_page_size(current_map()), PAGE_SIZE);

         uint_t          count = (uint_t)size / sizeof(uint64_t);

7556 uint_t diff_count = 0;

7557 bool bit_flip = false;

7558 uint_t b;

7559 uint64_t *new;

7560 uint64_t *old;

7561

         new_code_buffer = kheap_alloc(KHEAP_DEFAULT, size, Z_WAITOK);

         if (copyin((user_addr_t)vm_map_trunc_page(code_addr, size - 1), new_code_buffer, size) != 0) {

7564 /* copyin error, so undo things */

                 (void)OSAddAtomic(1, &vmtc_undiagnosed);

7566 ++vmtc_copyin_fail;

7567 goto done;

7568 }

7569

7570 new = new_code_buffer;

7571 old = old_code_buffer;

         for (; count-- > 0; ++new, ++old) {

                 if (*new == *old) {

7574 continue;

7575 }

7576

7577 /*

7578 * On first diff, check for a single bit flip

7579 */

                 if (diff_count == 0) {

                         uint64_t x = (*new ^ *old);

                         assert(x != 0);

                         if ((x & (x - 1)) == 0) {

7584 bit_flip = true;

7585 ++diff_count;

7586 continue;

7587 }

7588 }

7589

7590 /*

7591 * count up the number of different bytes.

7592 */

                 for (b = 0; b < sizeof(uint64_t); ++b) {

                         char *n = (char *)new;

                         char *o = (char *)old;

                         if (n[b] != o[b]) {

7597 ++diff_count;

7598 }

7599 }

7600

7601 /* quit counting when too many */

                 if (diff_count > (1 << MAX_TRACK_POWER2)) {

7603 break;

7604 }

7605 }

7606

         if (diff_count > 1) {

7608 bit_flip = false;

7609 }

7610

         if (diff_count == 0) {

7612 ++vmtc_not_found;

7613 } else if (bit_flip) {

7614 ++vmtc_one_bit_flip;

7615 ++vmtc_byte_counts[0];

7616 } else {

                 for (b = 0; b <= MAX_TRACK_POWER2; ++b) {

                         if (diff_count <= (1 << b)) {

7619 ++vmtc_byte_counts[b];

7620 break;

7621 }

7622 }

                 if (diff_count > (1 << MAX_TRACK_POWER2)) {

7624 ++vmtc_byte_counts[MAX_TRACK_POWER2];

7625 }

7626 }

7627

7628 done:

7629 /*

7630 * Free up the code copy buffers, but save the last

7631 * set on development / debug kernels in case they

7632 * can provide evidence for debugging memory stomps.

7633 */

7634 #if DEVELOPMENT || DEBUG

7635 if (vmtc_last_before_buffer != NULL) {

                 kheap_free(KHEAP_DEFAULT, vmtc_last_before_buffer, vmtc_last_buffer_size);

7637 }

7638 if (vmtc_last_after_buffer != NULL) {

                 kheap_free(KHEAP_DEFAULT, vmtc_last_after_buffer, vmtc_last_buffer_size);

7640 }

7641 vmtc_last_before_buffer = old_code_buffer;

7642 vmtc_last_after_buffer = new_code_buffer;

7643 vmtc_last_buffer_size = size;

7644 #else /* DEVELOPMENT || DEBUG */

         kheap_free(KHEAP_DEFAULT, new_code_buffer, size);

         kheap_free(KHEAP_DEFAULT, old_code_buffer, size);

7647 #endif /* DEVELOPMENT || DEBUG */

7648

7649 /*

7650 * We're finished, so clear the diagnosing flag.

7651 */

         if (!OSCompareAndSwap(1, 0, &vmtc_diagnosing)) {

7653 panic("Bad compare and swap in diagnose!");

7654 }

7655 }

7656

7657 /*

7658 * For the given map, virt address, find the object, offset, and page.

7659 * This has to lookup the map entry, verify protections, walk any shadow chains.

7660 * If found, returns with the object locked.

7661 */

7662 static kern_return_t

7663 vmtc_revalidate_lookup(

7664 vm_map_t map,

7665 vm_map_offset_t vaddr,

7666 vm_object_t *ret_object,

7667 vm_object_offset_t *ret_offset,

7668 vm_page_t *ret_page)

7669 {

7670 vm_object_t object;

7671 vm_object_offset_t offset;

7672 vm_page_t page;

7673 kern_return_t kr = KERN_SUCCESS;

7674 uint8_t object_lock_type = OBJECT_LOCK_EXCLUSIVE;

7675 vm_map_version_t version;

7676 boolean_t wired;

7677 struct vm_object_fault_info fault_info = {};

7678 vm_map_t real_map = NULL;

7679 vm_prot_t prot;

7680 vm_object_t shadow;

7681

7682 /*

7683 * Find the object/offset for the given location/map.

7684 * Note this returns with the object locked.

7685 */

7686 restart:

7687 vm_map_lock_read(map);

7688 object = VM_OBJECT_NULL; /* in case we come around the restart path */

         kr = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,

             object_lock_type, &version, &object, &offset, &prot, &wired,

7691 &fault_info, &real_map, NULL);

7692 vm_map_unlock_read(map);

         if (real_map != NULL && real_map != map) {

7694 vm_map_unlock(real_map);

7695 }

7696

7697 /*

7698 * If there's no mapping here, or if we fail because the page

7699 * wasn't mapped executable, we can ignore this.

7700 */

7701 if (kr != KERN_SUCCESS ||

7702 object == NULL ||

7703 !(prot & VM_PROT_EXECUTE)) {

7704 kr = KERN_FAILURE;

7705 goto done;

7706 }

7707

7708 /*

7709 * Chase down any shadow chains to find the actual page.

7710 */

7711 for (;;) {

7712 /*

7713 * See if the page is on the current object.

7714 */

                 page = vm_page_lookup(object, vm_object_trunc_page(offset));

7716 if (page != NULL) {

7717 /* restart the lookup */

7718 if (page->vmp_restart) {

7719 vm_object_unlock(object);

7720 goto restart;

7721 }

7722

7723 /*

7724 * If this page is busy, we need to wait for it.

7725 */

7726 if (page->vmp_busy) {

                                 PAGE_SLEEP(object, page, TRUE);

7728 vm_object_unlock(object);

7729 goto restart;

7730 }

7731 break;

7732 }

7733

7734 /*

7735 * If the object doesn't have the page and

7736 * has no shadow, then we can quit.

7737 */

7738 shadow = object->shadow;

7739 if (shadow == NULL) {

7740 kr = KERN_FAILURE;

7741 goto done;

7742 }

7743

7744 /*

7745 * Move to the next object

7746 */

7747 offset += object->vo_shadow_offset;

7748 vm_object_lock(shadow);

7749 vm_object_unlock(object);

7750 object = shadow;

7751 shadow = VM_OBJECT_NULL;

7752 }

7753 *ret_object = object;

         *ret_offset = vm_object_trunc_page(offset);

7755 *ret_page = page;

7756

7757 done:

         if (kr != KERN_SUCCESS && object != NULL) {

7759 vm_object_unlock(object);

7760 }

7761 return kr;

7762 }

7763

7764 /*

7765 * Check if a page is wired, needs extra locking.

7766 */

7767 static bool

7768 is_page_wired(vm_page_t page)

7769 {

7770 bool result;

7771 vm_page_lock_queues();

7772 result = VM_PAGE_WIRED(page);

7773 vm_page_unlock_queues();

7774 return result;

7775 }

7776

7777 /*

7778 * A fatal process error has occurred in the given task.

7779 * Recheck the code signing of the text page at the given

7780 * address to check for a text page corruption.

7781 *

7782 * Returns KERN_FAILURE if a page was found to be corrupt

7783 * by failing to match its code signature. KERN_SUCCESS

7784 * means the page is either valid or we don't have the

7785 * information to say it's corrupt.

7786 */

7787 kern_return_t

7788 revalidate_text_page(task_t task, vm_map_offset_t code_addr)

7789 {

7790 kern_return_t kr;

7791 vm_map_t map;

7792 vm_object_t object = NULL;

7793 vm_object_offset_t offset;

7794 vm_page_t page = NULL;

7795 struct vnode *vnode;

7796 bool do_invalidate = false;

7797 uint64_t *diagnose_buffer = NULL;

7798

7799 map = task->map;

         if (task->map == NULL) {

7801 return KERN_SUCCESS;

7802 }

7803

         kr = vmtc_revalidate_lookup(map, code_addr, &object, &offset, &page);

7805 if (kr != KERN_SUCCESS) {

7806 goto done;

7807 }

7808

7809 /*

7810 * The object needs to have a pager.

7811 */

         if (object->pager == NULL) {

7813 goto done;

7814 }

7815

7816 /*

7817 * Needs to be a vnode backed page to have a signature.

7818 */

         vnode = vnode_pager_lookup_vnode(object->pager);

7820 if (vnode == NULL) {

7821 goto done;

7822 }

7823

7824 /*

7825 * Object checks to see if we should proceed.

7826 */

         if (!object->code_signed ||     /* no code signature to check */

7828 object->internal || /* internal objects aren't signed */

7829 object->terminating || /* the object and its pages are already going away */

7830 !object->pager_ready) { /* this should happen, but check shouldn't hurt */

7831 goto done;

7832 }

7833

7834 /*

7835 * Check the code signature of the page in question.

7836 */

7837 vm_page_map_and_validate_cs(object, page);

7838

7839 /*

7840 * At this point:

7841 * vmp_cs_validated |= validated (set if a code signature exists)

7842 * vmp_cs_tainted |= tainted (set if code signature violation)

7843 * vmp_cs_nx |= nx; ??

7844 *

7845 * if vmp_pmapped then have to pmap_disconnect..

7846 * other flags to check on object or page?

7847 */

         if (page->vmp_cs_tainted != VMP_CS_ALL_FALSE) {

7849 #if DEBUG || DEVELOPMENT

7850 /*

7851 * On development builds, a boot-arg can be used to cause

7852 * a panic, instead of a quiet repair.

7853 */

7854 if (vmtc_panic_instead) {

                         panic("Text page corruption detected: vm_page_t 0x%llx\n", (long long)(uintptr_t)page);

7856 }

7857 #endif /* DEBUG || DEVELOPMENT */

7858

7859 /*

7860 * We're going to invalidate this page. Mark it as busy so we can

7861 * drop the object lock and use copyin() to save its contents.

7862 */

7863 do_invalidate = true;

7864 assert(!page->vmp_busy);

7865 page->vmp_busy = TRUE;

7866 vm_object_unlock(object);

7867 diagnose_buffer = vmtc_text_page_diagnose_setup(code_addr);

7868 }

7869

7870 done:

7871 if (do_invalidate) {

7872 vm_object_lock(object);

7873 assert(page->vmp_busy);

                 assert(VM_PAGE_OBJECT(page) == object);      /* Since the page was busy, this shouldn't change */

                 assert(page->vmp_offset == offset);

7876 PAGE_WAKEUP_DONE(page); /* make no longer busy */

7877

7878 /*

7879 * Invalidate, i.e. toss, the corrupted page.

7880 */

7881 if (!page->vmp_cleaning &&

7882 !page->vmp_laundry &&

7883 !page->vmp_fictitious &&

7884 !page->vmp_precious &&

7885 !page->vmp_absent &&

7886 !page->vmp_error &&

7887 !page->vmp_dirty &&

7888 !is_page_wired(page)) {

7889 if (page->vmp_pmapped) {

                                 int refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(page));

7891 if (refmod & VM_MEM_MODIFIED) {

7892 SET_PAGE_DIRTY(page, FALSE);

7893 }

7894 if (refmod & VM_MEM_REFERENCED) {

7895 page->vmp_reference = TRUE;

7896 }

7897 }

7898 /* If the page seems intentionally modified, don't trash it. */

7899 if (!page->vmp_dirty) {

7900 VM_PAGE_FREE(page);

7901 } else {

                                 (void)OSAddAtomic(1, &vmtc_not_eligible);

7903 }

7904 } else {

                         (void)OSAddAtomic(1, &vmtc_not_eligible);

7906 }

7907 vm_object_unlock(object);

7908

7909 /*

7910 * Now try to diagnose the type of failure by faulting

7911 * in a new copy and diff'ing it with what we saved.

7912 */

7913 if (diagnose_buffer) {

7914 vmtc_text_page_diagnose(code_addr, diagnose_buffer);

7915 }

7916 return KERN_FAILURE;

7917 }

7918

7919 if (object != NULL) {

7920 vm_object_unlock(object);

7921 }

7922 return KERN_SUCCESS;

7923 }

7924

7925 #if DEBUG || DEVELOPMENT

7926 /*

7927 * For implementing unit tests - ask the pmap to corrupt a text page.

7928 * We have to find the page, to get the physical address, then invoke

7929 * the pmap.

7930 */

 extern kern_return_t vm_corrupt_text_addr(uintptr_t);

7932

7933 kern_return_t

7934 vm_corrupt_text_addr(uintptr_t va)

7935 {

7936 task_t task = current_task();

7937 vm_map_t map;

7938 kern_return_t kr = KERN_SUCCESS;

7939 vm_object_t object = VM_OBJECT_NULL;

7940 vm_object_offset_t offset;

7941 vm_page_t page = NULL;

7942 pmap_paddr_t pa;

7943

7944 map = task->map;

         if (task->map == NULL) {

                 printf("corrupt_text_addr: no map\n");

7947 return KERN_FAILURE;

7948 }

7949

         kr = vmtc_revalidate_lookup(map, (vm_map_offset_t)va, &object, &offset, &page);

7951 if (kr != KERN_SUCCESS) {

                 printf("corrupt_text_addr: page lookup failed\n");

7953 return kr;

7954 }

7955 /* get the physical address to use */

         pa = ptoa(VM_PAGE_GET_PHYS_PAGE(page)) + (va - vm_object_trunc_page(va));

7957

7958 /*

7959 * Check we have something we can work with.

7960 * Due to racing with pageout as we enter the sysctl,

7961 * it's theoretically possible to have the page disappear, just

7962 * before the lookup.

7963 *

7964 * That's highly likely to happen often. I've filed a radar 72857482

7965 * to bubble up the error here to the sysctl result and have the

7966 * test not FAIL in that case.

7967 */

7968 if (page->vmp_busy) {

                 printf("corrupt_text_addr: vmp_busy\n");

7970 kr = KERN_FAILURE;

7971 }

7972 if (page->vmp_cleaning) {

                 printf("corrupt_text_addr: vmp_cleaning\n");

7974 kr = KERN_FAILURE;

7975 }

7976 if (page->vmp_laundry) {

                 printf("corrupt_text_addr: vmp_cleaning\n");

7978 kr = KERN_FAILURE;

7979 }

7980 if (page->vmp_fictitious) {

                 printf("corrupt_text_addr: vmp_fictitious\n");

7982 kr = KERN_FAILURE;

7983 }

7984 if (page->vmp_precious) {

                 printf("corrupt_text_addr: vmp_precious\n");

7986 kr = KERN_FAILURE;

7987 }

7988 if (page->vmp_absent) {

                 printf("corrupt_text_addr: vmp_absent\n");

7990 kr = KERN_FAILURE;

7991 }

7992 if (page->vmp_error) {

                 printf("corrupt_text_addr: vmp_error\n");

7994 kr = KERN_FAILURE;

7995 }

7996 if (page->vmp_dirty) {

                 printf("corrupt_text_addr: vmp_dirty\n");

7998 kr = KERN_FAILURE;

7999 }

         if (is_page_wired(page)) {

                 printf("corrupt_text_addr: wired\n");

8002 kr = KERN_FAILURE;

8003 }

8004 if (!page->vmp_pmapped) {

                 printf("corrupt_text_addr: !vmp_pmapped\n");

8006 kr = KERN_FAILURE;

8007 }

8008

8009 if (kr == KERN_SUCCESS) {

                 printf("corrupt_text_addr: using physaddr 0x%llx\n", (long long)pa);

8011 kr = pmap_test_text_corruption(pa);

8012 if (kr != KERN_SUCCESS) {

                         printf("corrupt_text_addr: pmap error %d\n", kr);

8014 }

8015 } else {

                 printf("corrupt_text_addr: object %p\n", object);

                 printf("corrupt_text_addr: offset 0x%llx\n", (uint64_t)offset);

                 printf("corrupt_text_addr: va 0x%llx\n", (uint64_t)va);

                 printf("corrupt_text_addr: vm_object_trunc_page(va) 0x%llx\n", (uint64_t)vm_object_trunc_page(va));

                 printf("corrupt_text_addr: vm_page_t %p\n", page);

                 printf("corrupt_text_addr: ptoa(PHYS_PAGE) 0x%llx\n", (uint64_t)ptoa(VM_PAGE_GET_PHYS_PAGE(page)));

                 printf("corrupt_text_addr: using physaddr 0x%llx\n", (uint64_t)pa);

8023 }

8024

8025 if (object != VM_OBJECT_NULL) {

8026 vm_object_unlock(object);

8027 }

8028 return kr;

8029 }

8030 #endif /* DEBUG || DEVELOPMENT */