]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/mappings.c
xnu-517.tar.gz
[apple/xnu.git] / osfmk / ppc / mappings.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * This file is used to maintain the virtual to real mappings for a PowerPC machine.
27 * The code herein is primarily used to bridge between the pmap layer and the hardware layer.
28 * Currently, some of the function of this module is contained within pmap.c. We may want to move
29 * all of this into it (or most anyway) for the sake of performance. We shall see as we write it.
30 *
31 * We also depend upon the structure of the phys_entry control block. We do put some processor
32 * specific stuff in there.
33 *
34 */
35
36 #include <cpus.h>
37 #include <debug.h>
38 #include <mach_kgdb.h>
39 #include <mach_vm_debug.h>
40 #include <db_machine_commands.h>
41
42 #include <kern/thread.h>
43 #include <kern/thread_act.h>
44 #include <mach/vm_attributes.h>
45 #include <mach/vm_param.h>
46 #include <vm/vm_fault.h>
47 #include <vm/vm_kern.h>
48 #include <vm/vm_map.h>
49 #include <vm/vm_page.h>
50 #include <kern/spl.h>
51
52 #include <kern/misc_protos.h>
53 #include <ppc/exception.h>
54 #include <ppc/misc_protos.h>
55 #include <ppc/proc_reg.h>
56
57 #include <vm/pmap.h>
58 #include <ppc/pmap.h>
59 #include <ppc/mem.h>
60
61 #include <ppc/new_screen.h>
62 #include <ppc/Firmware.h>
63 #include <ppc/mappings.h>
64 #include <ddb/db_output.h>
65
66 #include <console/video_console.h> /* (TEST/DEBUG) */
67
68 #define PERFTIMES 0
69
70 vm_map_t mapping_map = VM_MAP_NULL;
71
72 unsigned int incrVSID = 0; /* VSID increment value */
73 unsigned int mappingdeb0 = 0;
74 unsigned int mappingdeb1 = 0;
75 int ppc_max_adrsp; /* Maximum address spaces */
76
77 addr64_t *mapdebug; /* (BRINGUP) */
78 extern unsigned int DebugWork; /* (BRINGUP) */
79
80 extern unsigned int hash_table_size;
81
82 void mapping_verify(void);
83 void mapping_phys_unused(ppnum_t pa);
84
85 /*
86 * ppc_prot translates from the mach representation of protections to the PPC version.
87 * We also allow for a direct setting of the protection bits. This extends the mach
88 * concepts to allow the greater control we need for Virtual Machines (VMM).
89 * Calculation of it like this saves a memory reference - and maybe a couple of microseconds.
90 * It eliminates the used of this table.
91 * unsigned char ppc_prot[16] = { 0, 3, 2, 2, 3, 3, 2, 2, 0, 1, 2, 3, 0, 1, 2, 3 };
92 */
93
94 #define ppc_prot(p) ((0xE4E4AFAC >> (p << 1)) & 3)
95
96 /*
97 * About PPC VSID generation:
98 *
99 * This function is called to generate an address space ID. This space ID must be unique within
100 * the system. For the PowerPC, it is used to build the VSID. We build a VSID in the following
101 * way: space ID << 4 | segment. Since a VSID is 24 bits, and out of that, we reserve the last
102 * 4, so, we can have 2^20 (2M) unique IDs. Each pmap has a unique space ID, so we should be able
103 * to have 2M pmaps at a time, which we couldn't, we'd run out of memory way before then. The
104 * problem is that only a certain number of pmaps are kept in a free list and if that is full,
105 * they are release. This causes us to lose track of what space IDs are free to be reused.
106 * We can do 4 things: 1) not worry about it, 2) keep all free pmaps, 3) rebuild all mappings
107 * when the space ID wraps, or 4) scan the list of pmaps and find a free one.
108 *
109 * Yet another consideration is the hardware use of the VSID. It is used as part of the hash
110 * calculation for virtual address lookup. An improperly chosen value could potentially cause
111 * too many hashes to hit the same bucket, causing PTEG overflows. The actual hash function
112 * is (page index XOR vsid) mod number of ptegs. For a 32MB machine, using the suggested
113 * hash table size, there are 2^12 (8192) PTEGs. Remember, though, that the bottom 4 bits
114 * are reserved for the segment number, which means that we really have 2^(12-4) 512 space IDs
115 * before we start hashing to the same buckets with the same vaddrs. Also, within a space ID,
116 * every 8192 pages (32MB) within a segment will hash to the same bucket. That's 8 collisions
117 * per segment. So, a scan of every page for 256MB would fill 32 PTEGs completely, but
118 * with no overflow. I don't think that this is a problem.
119 *
120 * There may be a problem with the space ID, though. A new space ID is generate (mainly)
121 * whenever there is a fork. There shouldn't really be any problem because (for a 32MB
122 * machine) we can have 512 pmaps and still not have hash collisions for the same address.
123 * The potential problem, though, is if we get long-term pmaps that have space IDs that are
124 * the same modulo 512. We can reduce this problem by having the segment number be bits
125 * 0-3 of the space ID rather than 20-23. Doing this means that, in effect, corresponding
126 * vaddrs in different segments hash to the same PTEG. While this is somewhat of a problem,
127 * I don't think that it is as signifigant as the other, so, I'll make the space ID
128 * with segment first.
129 *
130 * The final, and biggest problem is the wrap, which will happen every 2^20 space IDs.
131 * While this is a problem that should only happen in periods counted in weeks, it can and
132 * will happen. This is assuming a monotonically increasing space ID. If we were to search
133 * for an inactive space ID, there could not be a wrap until there was 2^20 concurrent space IDs.
134 * That's pretty unlikely to happen. There couldn't be enough storage to support a million tasks.
135 *
136 * So, what we do is to keep all active pmaps in a chain (anchored from kernel_pmap and
137 * locked by free_pmap_lock) that is sorted in VSID sequence order.
138 *
139 * Whenever we need a VSID, we walk the list looking for the next in the sequence from
140 * the last that was freed. The we allocate that.
141 *
142 * NOTE: We must be called with interruptions off and free_pmap_lock held.
143 *
144 */
145
146 /*
147 * mapping_init();
148 * Do anything that needs to be done before the mapping system can be used.
149 * Hash table must be initialized before we call this.
150 *
151 * Calculate the SID increment. Currently we use size^(1/2) + size^(1/4) + 1;
152 */
153
154 void mapping_init(void) {
155
156 unsigned int tmp, maxeff, rwidth;
157
158 ppc_max_adrsp = maxAdrSp; /* Set maximum address spaces */
159
160 maxeff = 32; /* Assume 32-bit */
161 if(per_proc_info[0].pf.Available & pf64Bit) maxeff = 64; /* Is this a 64-bit machine? */
162
163 rwidth = per_proc_info[0].pf.pfMaxVAddr - maxAdrSpb; /* Reduce address width by width of address space ID */
164 if(rwidth > maxeff) rwidth = maxeff; /* If we still have more virtual than effective, clamp at effective */
165
166 vm_max_address = 0xFFFFFFFFFFFFFFFFULL >> (64 - rwidth); /* Get maximum effective address supported */
167 vm_max_physical = 0xFFFFFFFFFFFFFFFFULL >> (64 - per_proc_info[0].pf.pfMaxPAddr); /* Get maximum physical address supported */
168
169 if(per_proc_info[0].pf.Available & pf64Bit) { /* Are we 64 bit? */
170 tmp = 12; /* Size of hash space */
171 }
172 else {
173 __asm__ volatile("cntlzw %0, %1" : "=r" (tmp) : "r" (hash_table_size)); /* Get number of leading 0s */
174 tmp = 32 - tmp; /* Size of hash space */
175 }
176
177 incrVSID = 1 << ((tmp + 1) >> 1); /* Get ceiling of sqrt of table size */
178 incrVSID |= 1 << ((tmp + 1) >> 2); /* Get ceiling of quadroot of table size */
179 incrVSID |= 1; /* Set bit and add 1 */
180
181 return;
182
183 }
184
185
186 /*
187 * mapping_remove(pmap_t pmap, addr64_t va);
188 * Given a pmap and virtual address, this routine finds the mapping and unmaps it.
189 * The mapping block will be added to
190 * the free list. If the free list threshold is reached, garbage collection will happen.
191 *
192 * We also pass back the next higher mapped address. This is done so that the higher level
193 * pmap_remove function can release a range of addresses simply by calling mapping_remove
194 * in a loop until it finishes the range or is returned a vaddr of 0.
195 *
196 * Note that if the mapping is not found, we return the next VA ORed with 1
197 *
198 */
199
200 addr64_t mapping_remove(pmap_t pmap, addr64_t va) { /* Remove a single mapping for this VADDR
201 Returns TRUE if a mapping was found to remove */
202
203 mapping *mp;
204 addr64_t nextva;
205
206 disable_preemption(); /* Don't change threads */
207
208 while(1) { /* Keep trying until we truely fail */
209 mp = hw_rem_map(pmap, va, &nextva); /* Remove a mapping from this pmap */
210 if(((unsigned int)mp & mapRetCode) != mapRtRemove) break; /* If it is gone, we are done */
211 }
212
213 enable_preemption(); /* Thread change ok */
214
215 if(!mp) return (nextva | 1); /* Nothing found to unmap */
216
217 if((unsigned int)mp & mapRetCode) { /* Was there a failure? */
218
219 panic("mapping_remove: hw_rem_map failed - pmap = %08X, va = %016llX, code = %08X\n",
220 pmap, va, mp);
221 }
222
223 mapping_free(mp); /* Add mapping to the free list */
224
225 return nextva; /* Tell them we did it */
226 }
227
228 /*
229 * mapping_make(pmap, va, pa, flags, size, prot) - map a virtual address to a real one
230 *
231 * This routine takes the given parameters, builds a mapping block, and queues it into the
232 * correct lists.
233 *
234 * pmap (virtual address) is the pmap to map into
235 * va (virtual address) is the 64-bit virtual address that is being mapped
236 * pa (physical page number) is the physical page number (i.e., physcial address >> 12). This is
237 * a 32-bit quantity.
238 * Flags:
239 * block if 1, mapping is a block, size parameter is used. Note: we do not keep
240 * reference and change information or allow protection changes of blocks.
241 * any changes must first unmap and then remap the area.
242 * use attribute Use specified attributes for map, not defaults for physical page
243 * perm Mapping is permanent
244 * cache inhibited Cache inhibited (used if use attribute or block set )
245 * guarded Guarded access (used if use attribute or block set )
246 * size size of block (not used if not block)
247 * prot VM protection bits
248 * attr Cachability/Guardedness
249 *
250 * Returns 0 if mapping was successful. Returns vaddr that overlaps/collides.
251 * Returns 1 for any other failure.
252 *
253 * Note that we make an assumption that all memory in the range 0f 0x0000000080000000 to 0x00000000FFFFFFFF is reserved
254 * for I/O and default the cache attrubutes appropriately. The caller is free to set whatever they want however.
255 *
256 * If there is any physical page that is not found in the physent table, the mapping is forced to be a
257 * block mapping of length 1. This keeps us from trying to update a physent during later mapping use,
258 * e.g., fault handling.
259 *
260 *
261 */
262
263 addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, unsigned int size, vm_prot_t prot) { /* Make an address mapping */
264
265 register mapping *mp;
266 addr64_t colladdr;
267 unsigned int pindex, mflags, pattr, wimg;
268 phys_entry *physent;
269 int i, nlists;
270
271 disable_preemption(); /* Don't change threads */
272
273 pindex = 0;
274
275 mflags = 0x01000000; /* Start building mpFlags field (busy count = 1) */
276
277 if(!(flags & mmFlgBlock)) { /* Is this a block map? */
278
279 size = 1; /* Set size to 1 page if not block */
280
281 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
282 if(!physent) { /* Did we find the physical page? */
283 mflags |= mpBlock; /* Force this to a block if no physent */
284 size = 1; /* Force size to 1 page */
285 pattr = 0; /* Assume normal, non-I/O memory */
286 if((pa & 0xFFF80000) == 0x00080000) pattr = mmFlgCInhib | mmFlgGuarded; /* If this page is in I/O range, set I/O attributes */
287 }
288 else pattr = ((physent->ppLink & (ppI | ppG)) >> 4); /* Get the default attributes from physent */
289
290 if(flags & mmFlgUseAttr) pattr = flags & (mmFlgCInhib | mmFlgGuarded); /* Use requested attributes */
291 }
292 else { /* This is a block */
293
294 pattr = flags & (mmFlgCInhib | mmFlgGuarded); /* Use requested attributes */
295 mflags |= mpBlock; /* Show that this is a block */
296 }
297
298 wimg = 0x2; /* Set basic PPC wimg to 0b0010 - Coherent */
299 if(pattr & mmFlgCInhib) wimg |= 0x4; /* Add cache inhibited if we need to */
300 if(pattr & mmFlgGuarded) wimg |= 0x1; /* Add guarded if we need to */
301
302 mflags = mflags | (pindex << 16); /* Stick in the physical entry table index */
303
304 if(flags & mmFlgPerm) mflags |= mpPerm; /* Set permanent mapping */
305
306 size = size - 1; /* Change size to offset */
307 if(size > 0xFFFF) return 1; /* Leave if size is too big */
308
309 nlists = mapSetLists(pmap); /* Set number of lists this will be on */
310
311 mp = mapping_alloc(nlists); /* Get a spare mapping block with this many lists */
312
313 /* the mapping is zero except that the mpLists field is set */
314 mp->mpFlags |= mflags; /* Add in the rest of the flags to mpLists */
315 mp->mpSpace = pmap->space; /* Set the address space/pmap lookup ID */
316 mp->mpBSize = size; /* Set the size */
317 mp->mpPte = 0; /* Set the PTE invalid */
318 mp->mpPAddr = pa; /* Set the physical page number */
319 mp->mpVAddr = (va & ~mpHWFlags) | (wimg << 3) | ppc_prot(prot); /* Add the protection and attributes to the field */
320
321 while(1) { /* Keep trying... */
322 colladdr = hw_add_map(pmap, mp); /* Go add the mapping to the pmap */
323 if(!colladdr) { /* All is ok... */
324 enable_preemption(); /* Ok to switch around here */
325 return 0; /* Return... */
326 }
327
328 if((colladdr & mapRetCode) == mapRtRemove) { /* Is our target being removed? */
329 (void)mapping_remove(pmap, colladdr); /* Yes, go help out */
330 continue; /* Try to add it now */
331 }
332
333 if((colladdr & mapRetCode) == mapRtMapDup) { /* Is our target already mapped (collision mapping must be identical)? */
334 mapping_free(mp); /* Return mapping to the free list */
335 enable_preemption(); /* Ok to switch around here */
336 return 0; /* Normal return */
337 }
338
339 if(colladdr != mapRtBadLk) { /* Did it collide? */
340 mapping_free(mp); /* Yeah, toss the pending mapping */
341 enable_preemption(); /* Ok to switch around here */
342 return colladdr; /* Pass back the overlapping address */
343 }
344
345 panic("mapping_make: hw_add_map failed - code = %08X, pmap = %08X, va = %016llX, mapping = %08X\n",
346 colladdr, pmap, va, mp); /* Die dead */
347 }
348
349 return 1; /* Leave... */
350 }
351
352
353 /*
354 * mapping *mapping_find(pmap, va, *nextva, full) - Finds a mapping
355 *
356 * Looks up the vaddr and returns the mapping and the next mapped va
357 * If full is true, it will descend through all nested pmaps to find actual mapping
358 *
359 * Must be called with interruptions disabled or we can hang trying to remove found mapping.
360 *
361 * Returns 0 if not found and the virtual address of the mapping if it is
362 * Note that the mappings busy count is bumped. It is the responsibility of the caller
363 * to drop the count. If this is not done, any attempt to remove the mapping will hang.
364 *
365 * NOTE: The nextva field is not valid when full is TRUE.
366 *
367 *
368 */
369
370 mapping *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full) { /* Make an address mapping */
371
372 register mapping *mp;
373 addr64_t curva;
374 pmap_t curpmap;
375 int nestdepth;
376
377 curpmap = pmap; /* Remember entry */
378 nestdepth = 0; /* Set nest depth */
379 curva = (addr64_t)va; /* Set current va */
380
381 while(1) {
382
383 mp = hw_find_map(curpmap, curva, nextva); /* Find the mapping for this address */
384 if((unsigned int)mp == mapRtBadLk) { /* Did we lock up ok? */
385 panic("mapping_find: pmap lock failure - rc = %08X, pmap = %08X\n", mp, curpmap); /* Die... */
386 }
387
388 if(!mp || !(mp->mpFlags & mpNest) || !full) break; /* Are we a nest or are we only going one deep? */
389
390 if(mp->mpFlags & mpSpecial) { /* Don't chain through a special mapping */
391 mp = 0; /* Set not found */
392 break;
393 }
394
395 if(nestdepth++ > 64) { /* Have we nested too far down? */
396 panic("mapping_find: too many nested pmaps - va = %016llX, curva = %016llX, pmap = %08X, curpmap = %08X\n",
397 va, curva, pmap, curpmap);
398 }
399
400 curva = curva + mp->mpNestReloc; /* Relocate va to new pmap */
401 curpmap = (pmap_t) pmapTrans[mp->mpSpace].pmapVAddr; /* Get the address of the nested pmap */
402 mapping_drop_busy(mp); /* We have everything we need from the mapping */
403
404 }
405
406 return mp; /* Return the mapping if we found one */
407 }
408
409 /*
410 * kern_return_t mapping_protect(pmap_t pmap, addt_t va, vm_prot_t prot, addr64_t *nextva) - change the protection of a virtual page
411 *
412 * This routine takes a pmap and virtual address and changes
413 * the protection. If there are PTEs associated with the mappings, they will be invalidated before
414 * the protection is changed.
415 *
416 * We return success if we change the protection or if there is no page mapped at va. We return failure if
417 * the va corresponds to a block mapped area or the mapping is permanant.
418 *
419 *
420 */
421
422 int mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva) { /* Change protection of a virtual page */
423
424 int ret;
425
426 ret = hw_protect(pmap, va, ppc_prot(prot), nextva); /* Try to change the protect here */
427
428 switch (ret) { /* Decode return code */
429
430 case mapRtOK: /* Changed */
431 case mapRtNotFnd: /* Didn't find it */
432 return mapRtOK; /* Ok, return... */
433 break;
434
435 case mapRtBlock: /* Block map, just ignore request */
436 case mapRtNest: /* Nested pmap, just ignore request */
437 return ret; /* Pass back return code */
438 break;
439
440 default:
441 panic("mapping_protect: hw_protect failed - rc = %d, pmap = %08X, va = %016llX\n", ret, pmap, va);
442
443 }
444
445 }
446
447 /*
448 * void mapping_protect_phys(ppnum_t pa, vm_prot_t prot) - change the protection of a physical page
449 *
450 * This routine takes a physical entry and runs through all mappings attached to it and changes
451 * the protection. If there are PTEs associated with the mappings, they will be invalidated before
452 * the protection is changed. There is no limitation on changes, e.g.,
453 * higher to lower, lower to higher.
454 *
455 * Any mapping that is marked permanent is not changed
456 *
457 * Phys_entry is unlocked.
458 */
459
460 void mapping_protect_phys(ppnum_t pa, vm_prot_t prot) { /* Change protection of all mappings to page */
461
462 unsigned int pindex;
463 phys_entry *physent;
464
465 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
466 if(!physent) { /* Did we find the physical page? */
467 panic("mapping_protect_phys: invalid physical page %08X\n", pa);
468 }
469
470 hw_walk_phys(physent, hwpSPrtPhy, hwpSPrtMap, hwpNoop, ppc_prot(prot)); /* Set the new protection for page and mappings */
471
472 return; /* Leave... */
473 }
474
475
476 /*
477 * void mapping_clr_mod(ppnum_t pa) - clears the change bit of a physical page
478 *
479 * This routine takes a physical entry and runs through all mappings attached to it and turns
480 * off the change bit.
481 */
482
483 void mapping_clr_mod(ppnum_t pa) { /* Clears the change bit of a physical page */
484
485 unsigned int pindex;
486 phys_entry *physent;
487
488 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
489 if(!physent) { /* Did we find the physical page? */
490 panic("mapping_clr_mod: invalid physical page %08X\n", pa);
491 }
492
493 hw_walk_phys(physent, hwpNoop, hwpCCngMap, hwpCCngPhy, 0); /* Clear change for page and mappings */
494 return; /* Leave... */
495 }
496
497
498 /*
499 * void mapping_set_mod(ppnum_t pa) - set the change bit of a physical page
500 *
501 * This routine takes a physical entry and runs through all mappings attached to it and turns
502 * on the change bit.
503 */
504
505 void mapping_set_mod(ppnum_t pa) { /* Sets the change bit of a physical page */
506
507 unsigned int pindex;
508 phys_entry *physent;
509
510 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
511 if(!physent) { /* Did we find the physical page? */
512 panic("mapping_set_mod: invalid physical page %08X\n", pa);
513 }
514
515 hw_walk_phys(physent, hwpNoop, hwpSCngMap, hwpSCngPhy, 0); /* Set change for page and mappings */
516 return; /* Leave... */
517 }
518
519
520 /*
521 * void mapping_clr_ref(ppnum_t pa) - clears the reference bit of a physical page
522 *
523 * This routine takes a physical entry and runs through all mappings attached to it and turns
524 * off the reference bit.
525 */
526
527 void mapping_clr_ref(ppnum_t pa) { /* Clears the reference bit of a physical page */
528
529 unsigned int pindex;
530 phys_entry *physent;
531
532 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
533 if(!physent) { /* Did we find the physical page? */
534 panic("mapping_clr_ref: invalid physical page %08X\n", pa);
535 }
536
537 hw_walk_phys(physent, hwpNoop, hwpCRefMap, hwpCRefPhy, 0); /* Clear reference for page and mappings */
538 return; /* Leave... */
539 }
540
541
542 /*
543 * void mapping_set_ref(ppnum_t pa) - set the reference bit of a physical page
544 *
545 * This routine takes a physical entry and runs through all mappings attached to it and turns
546 * on the reference bit.
547 */
548
549 void mapping_set_ref(ppnum_t pa) { /* Sets the reference bit of a physical page */
550
551 unsigned int pindex;
552 phys_entry *physent;
553
554 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
555 if(!physent) { /* Did we find the physical page? */
556 panic("mapping_set_ref: invalid physical page %08X\n", pa);
557 }
558
559 hw_walk_phys(physent, hwpNoop, hwpSRefMap, hwpSRefPhy, 0); /* Set reference for page and mappings */
560 return; /* Leave... */
561 }
562
563
564 /*
565 * void mapping_tst_mod(ppnum_t pa) - test the change bit of a physical page
566 *
567 * This routine takes a physical entry and runs through all mappings attached to it and tests
568 * the changed bit.
569 */
570
571 boolean_t mapping_tst_mod(ppnum_t pa) { /* Tests the change bit of a physical page */
572
573 unsigned int pindex, rc;
574 phys_entry *physent;
575
576 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
577 if(!physent) { /* Did we find the physical page? */
578 panic("mapping_tst_mod: invalid physical page %08X\n", pa);
579 }
580
581 rc = hw_walk_phys(physent, hwpTCngPhy, hwpTCngMap, hwpNoop, 0); /* Set change for page and mappings */
582 return ((rc & (unsigned long)ppC) != 0); /* Leave with change bit */
583 }
584
585
586 /*
587 * void mapping_tst_ref(ppnum_t pa) - tests the reference bit of a physical page
588 *
589 * This routine takes a physical entry and runs through all mappings attached to it and tests
590 * the reference bit.
591 */
592
593 boolean_t mapping_tst_ref(ppnum_t pa) { /* Tests the reference bit of a physical page */
594
595 unsigned int pindex, rc;
596 phys_entry *physent;
597
598 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
599 if(!physent) { /* Did we find the physical page? */
600 panic("mapping_tst_ref: invalid physical page %08X\n", pa);
601 }
602
603 rc = hw_walk_phys(physent, hwpTRefPhy, hwpTRefMap, hwpNoop, 0); /* Test reference for page and mappings */
604 return ((rc & (unsigned long)ppR) != 0); /* Leave with reference bit */
605 }
606
607
608 /*
609 * phys_ent *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex) - tests the reference bit of a physical page
610 *
611 * This routine takes a physical page number and returns the phys_entry associated with it. It also
612 * calculates the bank address associated with the entry
613 * the reference bit.
614 */
615
616 phys_entry *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex) { /* Finds the physical entry for the page */
617
618 phys_entry *physent;
619 int i;
620
621 for(i = 0; i < pmap_mem_regions_count; i++) { /* Walk through the list */
622 if(!(unsigned int)pmap_mem_regions[i].mrPhysTab) continue; /* Skip any empty lists */
623 if((pp < pmap_mem_regions[i].mrStart) || (pp > pmap_mem_regions[i].mrEnd)) continue; /* This isn't ours */
624
625 *pindex = (i * sizeof(mem_region_t)) / 4; /* Make the word index to this list */
626
627 return &pmap_mem_regions[i].mrPhysTab[pp - pmap_mem_regions[i].mrStart]; /* Return the physent pointer */
628 }
629
630 return (phys_entry *)0; /* Shucks, can't find it... */
631
632 }
633
634
635
636
637 /*
638 * mapping_adjust(void) - Releases free mapping blocks and/or allocates new ones
639 *
640 * This routine frees any mapping blocks queued to mapCtl.mapcrel. It also checks
641 * the number of free mappings remaining, and if below a threshold, replenishes them.
642 * The list will be replenshed from mapCtl.mapcrel if there are enough. Otherwise,
643 * a new one is allocated.
644 *
645 * This routine allocates and/or frees memory and must be called from a safe place.
646 * Currently, vm_pageout_scan is the safest place.
647 */
648
649 thread_call_t mapping_adjust_call;
650 static thread_call_data_t mapping_adjust_call_data;
651
652 void mapping_adjust(void) { /* Adjust free mappings */
653
654 kern_return_t retr = KERN_SUCCESS;
655 mappingblok *mb, *mbn;
656 spl_t s;
657 int allocsize, i;
658 extern int vm_page_free_count;
659
660 if(mapCtl.mapcmin <= MAPPERBLOK) {
661 mapCtl.mapcmin = (sane_size / PAGE_SIZE) / 16;
662
663 #if DEBUG
664 kprintf("mapping_adjust: minimum entries rqrd = %08X\n", mapCtl.mapcmin);
665 kprintf("mapping_adjust: free = %08X; in use = %08X; release = %08X\n",
666 mapCtl.mapcfree, mapCtl.mapcinuse, mapCtl.mapcreln);
667 #endif
668 }
669
670 s = splhigh(); /* Don't bother from now on */
671 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
672 panic("mapping_adjust - timeout getting control lock (1)\n"); /* Tell all and die */
673 }
674
675 if (mapping_adjust_call == NULL) {
676 thread_call_setup(&mapping_adjust_call_data,
677 (thread_call_func_t)mapping_adjust,
678 (thread_call_param_t)NULL);
679 mapping_adjust_call = &mapping_adjust_call_data;
680 }
681
682 while(1) { /* Keep going until we've got enough */
683
684 allocsize = mapCtl.mapcmin - mapCtl.mapcfree; /* Figure out how much we need */
685 if(allocsize < 1) break; /* Leave if we have all we need */
686
687 if((unsigned int)(mbn = mapCtl.mapcrel)) { /* Can we rescue a free one? */
688 mapCtl.mapcrel = mbn->nextblok; /* Dequeue it */
689 mapCtl.mapcreln--; /* Back off the count */
690 allocsize = MAPPERBLOK; /* Show we allocated one block */
691 }
692 else { /* No free ones, try to get it */
693
694 allocsize = (allocsize + MAPPERBLOK - 1) / MAPPERBLOK; /* Get the number of pages we need */
695
696 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
697 splx(s); /* Restore 'rupts */
698
699 for(; allocsize > 0; allocsize >>= 1) { /* Try allocating in descending halves */
700 retr = kmem_alloc_wired(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE * allocsize); /* Find a virtual address to use */
701 if((retr != KERN_SUCCESS) && (allocsize == 1)) { /* Did we find any memory at all? */
702 break;
703 }
704 if(retr == KERN_SUCCESS) break; /* We got some memory, bail out... */
705 }
706
707 allocsize = allocsize * MAPPERBLOK; /* Convert pages to number of maps allocated */
708 s = splhigh(); /* Don't bother from now on */
709 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
710 panic("mapping_adjust - timeout getting control lock (2)\n"); /* Tell all and die */
711 }
712 }
713
714 if (retr != KERN_SUCCESS)
715 break; /* Fail to alocate, bail out... */
716 for(; allocsize > 0; allocsize -= MAPPERBLOK) { /* Release one block at a time */
717 mapping_free_init((vm_offset_t)mbn, 0, 1); /* Initialize a non-permanent block */
718 mbn = (mappingblok *)((unsigned int)mbn + PAGE_SIZE); /* Point to the next slot */
719 }
720
721 if ((mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1))) > mapCtl.mapcmaxalloc)
722 mapCtl.mapcmaxalloc = mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1));
723 }
724
725 if(mapCtl.mapcholdoff) { /* Should we hold off this release? */
726 mapCtl.mapcrecurse = 0; /* We are done now */
727 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
728 splx(s); /* Restore 'rupts */
729 return; /* Return... */
730 }
731
732 mbn = mapCtl.mapcrel; /* Get first pending release block */
733 mapCtl.mapcrel = 0; /* Dequeue them */
734 mapCtl.mapcreln = 0; /* Set count to 0 */
735
736 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
737 splx(s); /* Restore 'rupts */
738
739 while((unsigned int)mbn) { /* Toss 'em all */
740 mb = mbn->nextblok; /* Get the next */
741
742 kmem_free(mapping_map, (vm_offset_t) mbn, PAGE_SIZE); /* Release this mapping block */
743
744 mbn = mb; /* Chain to the next */
745 }
746
747 __asm__ volatile("eieio"); /* Make sure all is well */
748 mapCtl.mapcrecurse = 0; /* We are done now */
749 return;
750 }
751
752 /*
753 * mapping_free(mapping *mp) - release a mapping to the free list
754 *
755 * This routine takes a mapping and adds it to the free list.
756 * If this mapping make the block non-empty, we queue it to the free block list.
757 * NOTE: we might want to queue it to the end to keep quelch the pathalogical
758 * case when we get a mapping and free it repeatedly causing the block to chain and unchain.
759 * If this release fills a block and we are above the threshold, we release the block
760 */
761
762 void mapping_free(struct mapping *mp) { /* Release a mapping */
763
764 mappingblok *mb, *mbn;
765 spl_t s;
766 unsigned int full, mindx, lists;
767
768 mindx = ((unsigned int)mp & (PAGE_SIZE - 1)) >> 6; /* Get index to mapping */
769 mb = (mappingblok *)((unsigned int)mp & -PAGE_SIZE); /* Point to the mapping block */
770 lists = (mp->mpFlags & mpLists); /* get #lists */
771 if ((lists == 0) || (lists > kSkipListMaxLists)) /* panic if out of range */
772 panic("mapping_free: mpLists invalid\n");
773
774 #if 0
775 mp->mpFlags = 0x99999999; /* (BRINGUP) */
776 mp->mpSpace = 0x9999; /* (BRINGUP) */
777 mp->mpBSize = 0x9999; /* (BRINGUP) */
778 mp->mpPte = 0x99999998; /* (BRINGUP) */
779 mp->mpPAddr = 0x99999999; /* (BRINGUP) */
780 mp->mpVAddr = 0x9999999999999999ULL; /* (BRINGUP) */
781 mp->mpAlias = 0x9999999999999999ULL; /* (BRINGUP) */
782 mp->mpList0 = 0x9999999999999999ULL; /* (BRINGUP) */
783 mp->mpList[0] = 0x9999999999999999ULL; /* (BRINGUP) */
784 mp->mpList[1] = 0x9999999999999999ULL; /* (BRINGUP) */
785 mp->mpList[2] = 0x9999999999999999ULL; /* (BRINGUP) */
786
787 if(lists > mpBasicLists) { /* (BRINGUP) */
788 mp->mpList[3] = 0x9999999999999999ULL; /* (BRINGUP) */
789 mp->mpList[4] = 0x9999999999999999ULL; /* (BRINGUP) */
790 mp->mpList[5] = 0x9999999999999999ULL; /* (BRINGUP) */
791 mp->mpList[6] = 0x9999999999999999ULL; /* (BRINGUP) */
792 mp->mpList[7] = 0x9999999999999999ULL; /* (BRINGUP) */
793 mp->mpList[8] = 0x9999999999999999ULL; /* (BRINGUP) */
794 mp->mpList[9] = 0x9999999999999999ULL; /* (BRINGUP) */
795 mp->mpList[10] = 0x9999999999999999ULL; /* (BRINGUP) */
796 }
797 #endif
798
799
800 s = splhigh(); /* Don't bother from now on */
801 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
802 panic("mapping_free - timeout getting control lock\n"); /* Tell all and die */
803 }
804
805 full = !(mb->mapblokfree[0] | mb->mapblokfree[1]); /* See if full now */
806 mb->mapblokfree[mindx >> 5] |= (0x80000000 >> (mindx & 31)); /* Flip on the free bit */
807 if ( lists > mpBasicLists ) { /* if big block, lite the 2nd bit too */
808 mindx++;
809 mb->mapblokfree[mindx >> 5] |= (0x80000000 >> (mindx & 31));
810 mapCtl.mapcfree++;
811 mapCtl.mapcinuse--;
812 }
813
814 if(full) { /* If it was full before this: */
815 mb->nextblok = mapCtl.mapcnext; /* Move head of list to us */
816 mapCtl.mapcnext = mb; /* Chain us to the head of the list */
817 if(!((unsigned int)mapCtl.mapclast))
818 mapCtl.mapclast = mb;
819 }
820
821 mapCtl.mapcfree++; /* Bump free count */
822 mapCtl.mapcinuse--; /* Decriment in use count */
823
824 mapCtl.mapcfreec++; /* Count total calls */
825
826 if(mapCtl.mapcfree > mapCtl.mapcmin) { /* Should we consider releasing this? */
827 if(((mb->mapblokfree[0] | 0x80000000) & mb->mapblokfree[1]) == 0xFFFFFFFF) { /* See if empty now */
828
829 if(mapCtl.mapcnext == mb) { /* Are we first on the list? */
830 mapCtl.mapcnext = mb->nextblok; /* Unchain us */
831 if(!((unsigned int)mapCtl.mapcnext)) mapCtl.mapclast = 0; /* If last, remove last */
832 }
833 else { /* We're not first */
834 for(mbn = mapCtl.mapcnext; mbn != 0; mbn = mbn->nextblok) { /* Search for our block */
835 if(mbn->nextblok == mb) break; /* Is the next one our's? */
836 }
837 if(!mbn) panic("mapping_free: attempt to release mapping block (%08X) not on list\n", mp);
838 mbn->nextblok = mb->nextblok; /* Dequeue us */
839 if(mapCtl.mapclast == mb) mapCtl.mapclast = mbn; /* If last, make our predecessor last */
840 }
841
842 if(mb->mapblokflags & mbPerm) { /* Is this permanently assigned? */
843 mb->nextblok = mapCtl.mapcnext; /* Move chain head to us */
844 mapCtl.mapcnext = mb; /* Chain us to the head */
845 if(!((unsigned int)mb->nextblok)) mapCtl.mapclast = mb; /* If last, make us so */
846 }
847 else {
848 mapCtl.mapcfree -= MAPPERBLOK; /* Remove the block from the free count */
849 mapCtl.mapcreln++; /* Count on release list */
850 mb->nextblok = mapCtl.mapcrel; /* Move pointer */
851 mapCtl.mapcrel = mb; /* Chain us in front */
852 }
853 }
854 }
855
856 if(mapCtl.mapcreln > MAPFRTHRSH) { /* Do we have way too many releasable mappings? */
857 if(hw_compare_and_store(0, 1, &mapCtl.mapcrecurse)) { /* Make sure we aren't recursing */
858 thread_call_enter(mapping_adjust_call); /* Go toss some */
859 }
860 }
861 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
862 splx(s); /* Restore 'rupts */
863
864 return; /* Bye, dude... */
865 }
866
867
868 /*
869 * mapping_alloc(lists) - obtain a mapping from the free list
870 *
871 * This routine takes a mapping off of the free list and returns its address.
872 * The mapping is zeroed, and its mpLists count is set. The caller passes in
873 * the number of skiplists it would prefer; if this number is greater than
874 * mpBasicLists (ie, 4) then we need to allocate a 128-byte mapping, which is
875 * just two consequtive free entries coallesced into one. If we cannot find
876 * two consequtive free entries, we clamp the list count down to mpBasicLists
877 * and return a basic 64-byte node. Our caller never knows the difference.
878 *
879 * If this allocation empties a block, we remove it from the free list.
880 * If this allocation drops the total number of free entries below a threshold,
881 * we allocate a new block.
882 *
883 */
884
885 mapping *mapping_alloc(int lists) { /* Obtain a mapping */
886
887 register mapping *mp;
888 mappingblok *mb, *mbn;
889 spl_t s;
890 int mindx;
891 kern_return_t retr;
892 int big = (lists > mpBasicLists); /* set flag if big block req'd */
893 pmap_t refpmap, ckpmap;
894 unsigned int space, i;
895 int ref_count;
896 addr64_t va, nextva;
897 extern pmap_t free_pmap_list;
898 extern int free_pmap_count;
899 decl_simple_lock_data(extern,free_pmap_lock)
900 boolean_t found_mapping;
901 boolean_t do_rescan;
902
903 s = splhigh(); /* Don't bother from now on */
904 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
905 panic("mapping_alloc - timeout getting control lock\n"); /* Tell all and die */
906 }
907
908 if(!((unsigned int)mapCtl.mapcnext)) { /* Are there any free mappings? */
909
910 /*
911 * No free mappings. First, there may be some mapping blocks on the "to be released"
912 * list. If so, rescue one. Otherwise, try to steal a couple blocks worth.
913 */
914
915 if(mbn = mapCtl.mapcrel) { /* Try to rescue a block from impending doom */
916 mapCtl.mapcrel = mbn->nextblok; /* Pop the queue */
917 mapCtl.mapcreln--; /* Back off the count */
918 mapping_free_init((vm_offset_t)mbn, 0, 1); /* Initialize a non-permanent block */
919 goto rescued;
920 }
921
922 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);
923
924 simple_lock(&free_pmap_lock);
925
926 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
927 panic("mapping_alloc - timeout getting control lock\n"); /* Tell all and die */
928 }
929
930 if (!((unsigned int)mapCtl.mapcnext)) {
931
932 refpmap = (pmap_t)cursor_pmap->pmap_link.next;
933 space = mapCtl.mapcflush.spacenum;
934 while (refpmap != cursor_pmap) {
935 if(((pmap_t)(refpmap->pmap_link.next))->spaceNum > space) break;
936 refpmap = (pmap_t)refpmap->pmap_link.next;
937 }
938
939 ckpmap = refpmap;
940 va = mapCtl.mapcflush.addr;
941 found_mapping = FALSE;
942
943 while (mapCtl.mapcfree <= (MAPPERBLOK*2)) {
944
945 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);
946
947 ckpmap = (pmap_t)ckpmap->pmap_link.next;
948
949 if ((ckpmap->stats.resident_count != 0) && (ckpmap != kernel_pmap)) {
950 do_rescan = TRUE;
951 for (i=0;i<8;i++) {
952 mp = hw_purge_map(ckpmap, va, &nextva);
953
954 if((unsigned int)mp & mapRetCode) {
955 panic("mapping_alloc: hw_purge_map failed - pmap = %08X, va = %16llX, code = %08X\n", ckpmap, va, mp);
956 }
957
958 if(!mp) {
959 if (do_rescan)
960 do_rescan = FALSE;
961 else
962 break;
963 } else {
964 mapping_free(mp);
965 found_mapping = TRUE;
966 }
967
968 va = nextva;
969 }
970 }
971
972 if (ckpmap == refpmap) {
973 if (found_mapping == FALSE)
974 panic("no valid pmap to purge mappings\n");
975 else
976 found_mapping = FALSE;
977 }
978
979 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
980 panic("mapping_alloc - timeout getting control lock\n"); /* Tell all and die */
981 }
982
983 }
984
985 mapCtl.mapcflush.spacenum = ckpmap->spaceNum;
986 mapCtl.mapcflush.addr = nextva;
987 }
988
989 simple_unlock(&free_pmap_lock);
990 }
991
992 rescued:
993
994 mb = mapCtl.mapcnext;
995
996 if ( big ) { /* if we need a big (128-byte) mapping */
997 mapCtl.mapcbig++; /* count attempts to allocate a big mapping */
998 mbn = NULL; /* this will be prev ptr */
999 mindx = 0;
1000 while( mb ) { /* loop over mapping blocks with free entries */
1001 mindx = mapalc2(mb); /* try for 2 consequtive free bits in this block */
1002
1003 if ( mindx ) break; /* exit loop if we found them */
1004 mbn = mb; /* remember previous block */
1005 mb = mb->nextblok; /* move on to next block */
1006 }
1007 if ( mindx == 0 ) { /* if we couldn't find 2 consequtive bits... */
1008 mapCtl.mapcbigfails++; /* count failures */
1009 big = 0; /* forget that we needed a big mapping */
1010 lists = mpBasicLists; /* clamp list count down to the max in a 64-byte mapping */
1011 mb = mapCtl.mapcnext; /* back to the first block with a free entry */
1012 }
1013 else { /* if we did find a big mapping */
1014 mapCtl.mapcfree--; /* Decrement free count twice */
1015 mapCtl.mapcinuse++; /* Bump in use count twice */
1016 if ( mindx < 0 ) { /* if we just used the last 2 free bits in this block */
1017 if (mbn) { /* if this wasn't the first block */
1018 mindx = -mindx; /* make positive */
1019 mbn->nextblok = mb->nextblok; /* unlink this one from the middle of block list */
1020 if (mb == mapCtl.mapclast) { /* if we emptied last block */
1021 mapCtl.mapclast = mbn; /* then prev block is now last */
1022 }
1023 }
1024 }
1025 }
1026 }
1027
1028 if ( !big ) { /* if we need a small (64-byte) mapping */
1029 if(!(mindx = mapalc1(mb))) /* Allocate a 1-bit slot */
1030 panic("mapping_alloc - empty mapping block detected at %08X\n", mb);
1031 }
1032
1033 if(mindx < 0) { /* Did we just take the last one */
1034 mindx = -mindx; /* Make positive */
1035 mapCtl.mapcnext = mb->nextblok; /* Remove us from the list */
1036 if(!((unsigned int)mapCtl.mapcnext)) mapCtl.mapclast = 0; /* Removed the last one */
1037 }
1038
1039 mapCtl.mapcfree--; /* Decrement free count */
1040 mapCtl.mapcinuse++; /* Bump in use count */
1041
1042 mapCtl.mapcallocc++; /* Count total calls */
1043
1044 /*
1045 * Note: in the following code, we will attempt to rescue blocks only one at a time.
1046 * Eventually, after a few more mapping_alloc calls, we will catch up. If there are none
1047 * rescueable, we will kick the misc scan who will allocate some for us. We only do this
1048 * if we haven't already done it.
1049 * For early boot, we are set up to only rescue one block at a time. This is because we prime
1050 * the release list with as much as we need until threads start.
1051 */
1052
1053 if(mapCtl.mapcfree < mapCtl.mapcmin) { /* See if we need to replenish */
1054 if(mbn = mapCtl.mapcrel) { /* Try to rescue a block from impending doom */
1055 mapCtl.mapcrel = mbn->nextblok; /* Pop the queue */
1056 mapCtl.mapcreln--; /* Back off the count */
1057 mapping_free_init((vm_offset_t)mbn, 0, 1); /* Initialize a non-permanent block */
1058 }
1059 else { /* We need to replenish */
1060 if (mapCtl.mapcfree < (mapCtl.mapcmin / 4)) {
1061 if(hw_compare_and_store(0, 1, &mapCtl.mapcrecurse)) { /* Make sure we aren't recursing */
1062 thread_call_enter(mapping_adjust_call); /* Go allocate some more */
1063 }
1064 }
1065 }
1066 }
1067
1068 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
1069 splx(s); /* Restore 'rupts */
1070
1071 mp = &((mapping *)mb)[mindx]; /* Point to the allocated mapping */
1072 mp->mpFlags = lists; /* set the list count */
1073
1074
1075 return mp; /* Send it back... */
1076 }
1077
1078
1079 void
1080 consider_mapping_adjust()
1081 {
1082 spl_t s;
1083
1084 s = splhigh(); /* Don't bother from now on */
1085 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
1086 panic("consider_mapping_adjust -- lock timeout\n");
1087 }
1088
1089 if (mapCtl.mapcfree < (mapCtl.mapcmin / 4)) {
1090 if(hw_compare_and_store(0, 1, &mapCtl.mapcrecurse)) { /* Make sure we aren't recursing */
1091 thread_call_enter(mapping_adjust_call); /* Go allocate some more */
1092 }
1093 }
1094
1095 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
1096 splx(s); /* Restore 'rupts */
1097
1098 }
1099
1100
1101
1102 /*
1103 * void mapping_free_init(mb, perm) - Adds a block of storage to the free mapping list
1104 *
1105 * The mapping block is a page size area on a page boundary. It contains 1 header and 63
1106 * mappings. This call adds and initializes a block for use. Mappings come in two sizes,
1107 * 64 and 128 bytes (the only difference is the number of skip-lists.) When we allocate a
1108 * 128-byte mapping we just look for two consequtive free 64-byte mappings, so most of the
1109 * code only deals with "basic" 64-byte mappings. This works for two reasons:
1110 * - Only one in 256 mappings is big, so they are rare.
1111 * - If we cannot find two consequtive free mappings, we just return a small one.
1112 * There is no problem with doing this, except a minor performance degredation.
1113 * Therefore, all counts etc in the mapping control structure are in units of small blocks.
1114 *
1115 * The header contains a chain link, bit maps, a virtual to real translation mask, and
1116 * some statistics. Bit maps map each slot on the page (bit 0 is not used because it
1117 * corresponds to the header). The translation mask is the XOR of the virtual and real
1118 * addresses (needless to say, the block must be wired).
1119 *
1120 * We handle these mappings the same way as saveareas: the block is only on the chain so
1121 * long as there are free entries in it.
1122 *
1123 * Empty blocks are garbage collected when there are at least mapCtl.mapcmin pages worth of free
1124 * mappings. Blocks marked PERM won't ever be released.
1125 *
1126 * If perm is negative, the mapping is initialized, but immediately queued to the mapCtl.mapcrel
1127 * list. We do this only at start up time. This is done because we only allocate blocks
1128 * in the pageout scan and it doesn't start up until after we run out of the initial mappings.
1129 * Therefore, we need to preallocate a bunch, but we don't want them to be permanent. If we put
1130 * them on the release queue, the allocate routine will rescue them. Then when the
1131 * pageout scan starts, all extra ones will be released.
1132 *
1133 */
1134
1135
1136 void mapping_free_init(vm_offset_t mbl, int perm, boolean_t locked) {
1137 /* Set's start and end of a block of mappings
1138 perm indicates if the block can be released
1139 or goes straight to the release queue .
1140 locked indicates if the lock is held already */
1141
1142 mappingblok *mb;
1143 spl_t s;
1144 int i;
1145 addr64_t raddr;
1146 ppnum_t pp;
1147
1148 mb = (mappingblok *)mbl; /* Start of area */
1149
1150 if(perm >= 0) { /* See if we need to initialize the block */
1151 if(perm) {
1152 raddr = (addr64_t)((unsigned int)mbl); /* Perm means V=R */
1153 mb->mapblokflags = mbPerm; /* Set perm */
1154 // mb->mapblokflags |= (unsigned int)mb; /* (BRINGUP) */
1155 }
1156 else {
1157 pp = pmap_find_phys(kernel_pmap, (addr64_t)mbl); /* Get the physical page */
1158 if(!pp) { /* What gives? Where's the page? */
1159 panic("mapping_free_init: could not find translation for vaddr %016llX\n", (addr64_t)mbl);
1160 }
1161
1162 raddr = (addr64_t)pp << 12; /* Convert physical page to physical address */
1163 mb->mapblokflags = 0; /* Set not perm */
1164 // mb->mapblokflags |= (unsigned int)mb; /* (BRINGUP) */
1165 }
1166
1167 mb->mapblokvrswap = raddr ^ (addr64_t)((unsigned int)mbl); /* Form translation mask */
1168
1169 mb->mapblokfree[0] = 0x7FFFFFFF; /* Set first 32 (minus 1) free */
1170 mb->mapblokfree[1] = 0xFFFFFFFF; /* Set next 32 free */
1171 }
1172
1173 s = splhigh(); /* Don't bother from now on */
1174 if(!locked) { /* Do we need the lock? */
1175 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
1176 panic("mapping_free_init: timeout getting control lock\n"); /* Tell all and die */
1177 }
1178 }
1179
1180 if(perm < 0) { /* Direct to release queue? */
1181 mb->nextblok = mapCtl.mapcrel; /* Move forward pointer */
1182 mapCtl.mapcrel = mb; /* Queue us on in */
1183 mapCtl.mapcreln++; /* Count the free block */
1184 }
1185 else { /* Add to the free list */
1186
1187 mb->nextblok = 0; /* We always add to the end */
1188 mapCtl.mapcfree += MAPPERBLOK; /* Bump count */
1189
1190 if(!((unsigned int)mapCtl.mapcnext)) { /* First entry on list? */
1191 mapCtl.mapcnext = mapCtl.mapclast = mb; /* Chain to us */
1192 }
1193 else { /* We are not the first */
1194 mapCtl.mapclast->nextblok = mb; /* Point the last to us */
1195 mapCtl.mapclast = mb; /* We are now last */
1196 }
1197 }
1198
1199 if(!locked) { /* Do we need to unlock? */
1200 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
1201 }
1202
1203 splx(s); /* Restore 'rupts */
1204 return; /* All done, leave... */
1205 }
1206
1207
1208 /*
1209 * void mapping_prealloc(unsigned int) - Preallocates mapppings for large request
1210 *
1211 * No locks can be held, because we allocate memory here.
1212 * This routine needs a corresponding mapping_relpre call to remove the
1213 * hold off flag so that the adjust routine will free the extra mapping
1214 * blocks on the release list. I don't like this, but I don't know
1215 * how else to do this for now...
1216 *
1217 */
1218
1219 void mapping_prealloc(unsigned int size) { /* Preallocates mapppings for large request */
1220
1221 int nmapb, i;
1222 kern_return_t retr;
1223 mappingblok *mbn;
1224 spl_t s;
1225
1226 s = splhigh(); /* Don't bother from now on */
1227 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
1228 panic("mapping_prealloc - timeout getting control lock\n"); /* Tell all and die */
1229 }
1230
1231 nmapb = (size >> 12) + mapCtl.mapcmin; /* Get number of entries needed for this and the minimum */
1232
1233 mapCtl.mapcholdoff++; /* Bump the hold off count */
1234
1235 if((nmapb = (nmapb - mapCtl.mapcfree)) <= 0) { /* Do we already have enough? */
1236 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
1237 splx(s); /* Restore 'rupts */
1238 return;
1239 }
1240 if (!hw_compare_and_store(0, 1, &mapCtl.mapcrecurse)) { /* Make sure we aren't recursing */
1241 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
1242 splx(s); /* Restore 'rupts */
1243 return;
1244 }
1245 nmapb = (nmapb + MAPPERBLOK - 1) / MAPPERBLOK; /* Get number of blocks to get */
1246
1247 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
1248 splx(s); /* Restore 'rupts */
1249
1250 for(i = 0; i < nmapb; i++) { /* Allocate 'em all */
1251 retr = kmem_alloc_wired(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE); /* Find a virtual address to use */
1252 if(retr != KERN_SUCCESS) /* Did we get some memory? */
1253 break;
1254 mapping_free_init((vm_offset_t)mbn, -1, 0); /* Initialize on to the release queue */
1255 }
1256 if ((mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1))) > mapCtl.mapcmaxalloc)
1257 mapCtl.mapcmaxalloc = mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1));
1258
1259 mapCtl.mapcrecurse = 0; /* We are done now */
1260 }
1261
1262 /*
1263 * void mapping_relpre(void) - Releases preallocation release hold off
1264 *
1265 * This routine removes the
1266 * hold off flag so that the adjust routine will free the extra mapping
1267 * blocks on the release list. I don't like this, but I don't know
1268 * how else to do this for now...
1269 *
1270 */
1271
1272 void mapping_relpre(void) { /* Releases release hold off */
1273
1274 spl_t s;
1275
1276 s = splhigh(); /* Don't bother from now on */
1277 if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) { /* Lock the control header */
1278 panic("mapping_relpre - timeout getting control lock\n"); /* Tell all and die */
1279 }
1280 if(--mapCtl.mapcholdoff < 0) { /* Back down the hold off count */
1281 panic("mapping_relpre: hold-off count went negative\n");
1282 }
1283
1284 hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */
1285 splx(s); /* Restore 'rupts */
1286 }
1287
1288 /*
1289 * void mapping_free_prime(void) - Primes the mapping block release list
1290 *
1291 * See mapping_free_init.
1292 * No locks can be held, because we allocate memory here.
1293 * One processor running only.
1294 *
1295 */
1296
1297 void mapping_free_prime(void) { /* Primes the mapping block release list */
1298
1299 int nmapb, i;
1300 kern_return_t retr;
1301 mappingblok *mbn;
1302 vm_offset_t mapping_min;
1303
1304 retr = kmem_suballoc(kernel_map, &mapping_min, sane_size / 16,
1305 FALSE, TRUE, &mapping_map);
1306
1307 if (retr != KERN_SUCCESS)
1308 panic("mapping_free_prime: kmem_suballoc failed");
1309
1310
1311 nmapb = (mapCtl.mapcfree + mapCtl.mapcinuse + MAPPERBLOK - 1) / MAPPERBLOK; /* Get permanent allocation */
1312 nmapb = nmapb * 4; /* Get 4 times our initial allocation */
1313
1314 #if DEBUG
1315 kprintf("mapping_free_prime: free = %08X; in use = %08X; priming = %08X\n",
1316 mapCtl.mapcfree, mapCtl.mapcinuse, nmapb);
1317 #endif
1318
1319 for(i = 0; i < nmapb; i++) { /* Allocate 'em all */
1320 retr = kmem_alloc_wired(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE); /* Find a virtual address to use */
1321 if(retr != KERN_SUCCESS) { /* Did we get some memory? */
1322 panic("Whoops... Not a bit of wired memory left for anyone\n");
1323 }
1324 mapping_free_init((vm_offset_t)mbn, -1, 0); /* Initialize onto release queue */
1325 }
1326 if ((mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1))) > mapCtl.mapcmaxalloc)
1327 mapCtl.mapcmaxalloc = mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1));
1328 }
1329
1330
1331
1332 mapping_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
1333 vm_size_t *alloc_size, int *collectable, int *exhaustable)
1334 {
1335 *count = mapCtl.mapcinuse;
1336 *cur_size = ((PAGE_SIZE / (MAPPERBLOK + 1)) * (mapCtl.mapcinuse + mapCtl.mapcfree)) + (PAGE_SIZE * mapCtl.mapcreln);
1337 *max_size = (PAGE_SIZE / (MAPPERBLOK + 1)) * mapCtl.mapcmaxalloc;
1338 *elem_size = (PAGE_SIZE / (MAPPERBLOK + 1));
1339 *alloc_size = PAGE_SIZE;
1340
1341 *collectable = 1;
1342 *exhaustable = 0;
1343 }
1344
1345
1346 /*
1347 * addr64_t mapping_p2v(pmap_t pmap, ppnum_t pa) - Finds first virtual mapping of a physical page in a space
1348 *
1349 * First looks up the physical entry associated witht the physical page. Then searches the alias
1350 * list for a matching pmap. It grabs the virtual address from the mapping, drops busy, and returns
1351 * that.
1352 *
1353 */
1354
1355 addr64_t mapping_p2v(pmap_t pmap, ppnum_t pa) { /* Finds first virtual mapping of a physical page in a space */
1356
1357 spl_t s;
1358 mapping *mp;
1359 unsigned int pindex;
1360 phys_entry *physent;
1361 addr64_t va;
1362
1363 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
1364 if(!physent) { /* Did we find the physical page? */
1365 panic("mapping_p2v: invalid physical page %08X\n", pa);
1366 }
1367
1368 s = splhigh(); /* Make sure interruptions are disabled */
1369
1370 mp = (mapping *) hw_find_space(physent, pmap->space); /* Go find the first mapping to the page from the requested pmap */
1371
1372 if(mp) { /* Did we find one? */
1373 va = mp->mpVAddr & -4096; /* If so, get the cleaned up vaddr */
1374 mapping_drop_busy(mp); /* Go ahead and relase the mapping now */
1375 }
1376 else va = 0; /* Return failure */
1377
1378 splx(s); /* Restore 'rupts */
1379
1380 return va; /* Bye, bye... */
1381
1382 }
1383
1384 /*
1385 * phystokv(addr)
1386 *
1387 * Convert a physical address to a kernel virtual address if
1388 * there is a mapping, otherwise return NULL
1389 */
1390
1391 vm_offset_t phystokv(vm_offset_t pa) {
1392
1393 addr64_t va;
1394 ppnum_t pp;
1395
1396 pp = pa >> 12; /* Convert to a page number */
1397
1398 if(!(va = mapping_p2v(kernel_pmap, pp))) {
1399 return 0; /* Can't find it, return 0... */
1400 }
1401
1402 return (va | (pa & (PAGE_SIZE - 1))); /* Build and return VADDR... */
1403
1404 }
1405
1406 /*
1407 * kvtophys(addr)
1408 *
1409 * Convert a kernel virtual address to a physical address
1410 */
1411 vm_offset_t kvtophys(vm_offset_t va) {
1412
1413 return pmap_extract(kernel_pmap, va); /* Find mapping and lock the physical entry for this mapping */
1414
1415 }
1416
1417 /*
1418 * void ignore_zero_fault(boolean_t) - Sets up to ignore or honor any fault on
1419 * page 0 access for the current thread.
1420 *
1421 * If parameter is TRUE, faults are ignored
1422 * If parameter is FALSE, faults are honored
1423 *
1424 */
1425
1426 void ignore_zero_fault(boolean_t type) { /* Sets up to ignore or honor any fault on page 0 access for the current thread */
1427
1428 if(type) current_act()->mact.specFlags |= ignoreZeroFault; /* Ignore faults on page 0 */
1429 else current_act()->mact.specFlags &= ~ignoreZeroFault; /* Honor faults on page 0 */
1430
1431 return; /* Return the result or 0... */
1432 }
1433
1434
1435 /*
1436 * Copies data between a physical page and a virtual page, or 2 physical. This is used to
1437 * move data from the kernel to user state. Note that the "which" parm
1438 * says which of the parameters is physical and if we need to flush sink/source.
1439 * Note that both addresses may be physicical but only one may be virtual
1440 *
1441 * The rules are that the size can be anything. Either address can be on any boundary
1442 * and span pages. The physical data must be congiguous as must the virtual.
1443 *
1444 * We can block when we try to resolve the virtual address at each page boundary.
1445 * We don't check protection on the physical page.
1446 *
1447 * Note that we will not check the entire range and if a page translation fails,
1448 * we will stop with partial contents copied.
1449 *
1450 */
1451
1452 kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int which) {
1453
1454 vm_map_t map;
1455 kern_return_t ret;
1456 addr64_t pa, nextva, vaddr, paddr;
1457 register mapping *mp;
1458 spl_t s;
1459 unsigned int sz, left, lop, csize;
1460 int needtran, bothphys;
1461 unsigned int pindex;
1462 phys_entry *physent;
1463 vm_prot_t prot;
1464 int orig_which;
1465
1466 orig_which = which;
1467
1468 map = (which & cppvKmap) ? kernel_map : current_map_fast();
1469
1470 if((which & (cppvPsrc | cppvPsnk)) == 0 ) { /* Make sure that only one is virtual */
1471 panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */
1472 }
1473
1474 bothphys = 1; /* Assume both are physical */
1475
1476 if(!(which & cppvPsnk)) { /* Is there a virtual page here? */
1477 vaddr = sink; /* Sink side is virtual */
1478 bothphys = 0; /* Show both aren't physical */
1479 prot = VM_PROT_READ | VM_PROT_WRITE; /* Sink always must be read/write */
1480 } else if(!(which & cppvPsrc)) { /* Source side is virtual */
1481 vaddr = source; /* Source side is virtual */
1482 bothphys = 0; /* Show both aren't physical */
1483 prot = VM_PROT_READ; /* Virtual source is always read only */
1484 }
1485
1486 needtran = 1; /* Show we need to map the virtual the first time */
1487 s = splhigh(); /* Don't bother me */
1488
1489 while(size) {
1490
1491 if(!bothphys && (needtran || !(vaddr & 4095LL))) { /* If first time or we stepped onto a new page, we need to translate */
1492 if(!needtran) { /* If this is not the first translation, we need to drop the old busy */
1493 mapping_drop_busy(mp); /* Release the old mapping now */
1494 }
1495 needtran = 0;
1496
1497 while(1) {
1498 mp = mapping_find(map->pmap, vaddr, &nextva, 1); /* Find and busy the mapping */
1499 if(!mp) { /* Was it there? */
1500 if(per_proc_info[cpu_number()].istackptr == 0)
1501 panic("copypv: No vaild mapping on memory %s %x", "RD", vaddr);
1502
1503 splx(s); /* Restore the interrupt level */
1504 ret = vm_fault(map, trunc_page_32((vm_offset_t)vaddr), prot, FALSE, FALSE, NULL, 0); /* Didn't find it, try to fault it in... */
1505
1506 if(ret != KERN_SUCCESS)return KERN_FAILURE; /* Didn't find any, return no good... */
1507
1508 s = splhigh(); /* Don't bother me */
1509 continue; /* Go try for the map again... */
1510
1511 }
1512 if (mp->mpVAddr & mpI) { /* cache inhibited, so force the appropriate page to be flushed before */
1513 if (which & cppvPsrc) /* and after the copy to avoid cache paradoxes */
1514 which |= cppvFsnk;
1515 else
1516 which |= cppvFsrc;
1517 } else
1518 which = orig_which;
1519
1520 /* Note that we have to have the destination writable. So, if we already have it, or we are mapping the source,
1521 we can just leave.
1522 */
1523 if((which & cppvPsnk) || !(mp->mpVAddr & 1)) break; /* We got it mapped R/W or the source is not virtual, leave... */
1524
1525 mapping_drop_busy(mp); /* Go ahead and release the mapping for now */
1526 if(per_proc_info[cpu_number()].istackptr == 0)
1527 panic("copypv: No vaild mapping on memory %s %x", "RDWR", vaddr);
1528 splx(s); /* Restore the interrupt level */
1529
1530 ret = vm_fault(map, trunc_page_32((vm_offset_t)vaddr), VM_PROT_READ | VM_PROT_WRITE, FALSE, FALSE, NULL, 0); /* check for a COW area */
1531 if (ret != KERN_SUCCESS) return KERN_FAILURE; /* We couldn't get it R/W, leave in disgrace... */
1532 s = splhigh(); /* Don't bother me */
1533 }
1534 paddr = ((addr64_t)mp->mpPAddr << 12) + (vaddr - (mp->mpVAddr & -4096LL)); /* construct the physical address... this calculation works */
1535 /* properly on both single page and block mappings */
1536 if(which & cppvPsrc) sink = paddr; /* If source is physical, then the sink is virtual */
1537 else source = paddr; /* Otherwise the source is */
1538 }
1539
1540 lop = (unsigned int)(4096LL - (sink & 4095LL)); /* Assume sink smallest */
1541 if(lop > (unsigned int)(4096LL - (source & 4095LL))) lop = (unsigned int)(4096LL - (source & 4095LL)); /* No, source is smaller */
1542
1543 csize = size; /* Assume we can copy it all */
1544 if(lop < size) csize = lop; /* Nope, we can't do it all */
1545
1546 if(which & cppvFsrc) flush_dcache64(source, csize, 1); /* If requested, flush source before move */
1547 if(which & cppvFsnk) flush_dcache64(sink, csize, 1); /* If requested, flush sink before move */
1548
1549 bcopy_physvir(source, sink, csize); /* Do a physical copy, virtually */
1550
1551 if(which & cppvFsrc) flush_dcache64(source, csize, 1); /* If requested, flush source after move */
1552 if(which & cppvFsnk) flush_dcache64(sink, csize, 1); /* If requested, flush sink after move */
1553
1554 /*
1555 * Note that for certain ram disk flavors, we may be copying outside of known memory.
1556 * Therefore, before we try to mark it modifed, we check if it exists.
1557 */
1558
1559 if( !(which & cppvNoModSnk)) {
1560 physent = mapping_phys_lookup(sink >> 12, &pindex); /* Get physical entry for sink */
1561 if(physent) mapping_set_mod((ppnum_t)(sink >> 12)); /* Make sure we know that it is modified */
1562 }
1563 if( !(which & cppvNoRefSrc)) {
1564 physent = mapping_phys_lookup(source >> 12, &pindex); /* Get physical entry for source */
1565 if(physent) mapping_set_ref((ppnum_t)(source >> 12)); /* Make sure we know that it is modified */
1566 }
1567 size = size - csize; /* Calculate what is left */
1568 vaddr = vaddr + csize; /* Move to next sink address */
1569 source = source + csize; /* Bump source to next physical address */
1570 sink = sink + csize; /* Bump sink to next physical address */
1571 }
1572
1573 if(!bothphys) mapping_drop_busy(mp); /* Go ahead and release the mapping of the virtual page if any */
1574 splx(s); /* Open up for interrupts */
1575
1576 return KERN_SUCCESS;
1577 }
1578
1579
1580 /*
1581 * Debug code
1582 */
1583
1584 void mapping_verify(void) {
1585
1586 spl_t s;
1587 mappingblok *mb, *mbn;
1588 int relncnt;
1589 unsigned int dumbodude;
1590
1591 dumbodude = 0;
1592
1593 s = splhigh(); /* Don't bother from now on */
1594
1595 mbn = 0; /* Start with none */
1596 for(mb = mapCtl.mapcnext; mb; mb = mb->nextblok) { /* Walk the free chain */
1597 if((mappingblok *)(mb->mapblokflags & 0x7FFFFFFF) != mb) { /* Is tag ok? */
1598 panic("mapping_verify: flags tag bad, free chain; mb = %08X, tag = %08X\n", mb, mb->mapblokflags);
1599 }
1600 mbn = mb; /* Remember the last one */
1601 }
1602
1603 if(mapCtl.mapcnext && (mapCtl.mapclast != mbn)) { /* Do we point to the last one? */
1604 panic("mapping_verify: last pointer bad; mb = %08X, mapclast = %08X\n", mb, mapCtl.mapclast);
1605 }
1606
1607 relncnt = 0; /* Clear count */
1608 for(mb = mapCtl.mapcrel; mb; mb = mb->nextblok) { /* Walk the release chain */
1609 dumbodude |= mb->mapblokflags; /* Just touch it to make sure it is mapped */
1610 relncnt++; /* Count this one */
1611 }
1612
1613 if(mapCtl.mapcreln != relncnt) { /* Is the count on release queue ok? */
1614 panic("mapping_verify: bad release queue count; mapcreln = %d, cnt = %d, ignore this = %08X\n", mapCtl.mapcreln, relncnt, dumbodude);
1615 }
1616
1617 splx(s); /* Restore 'rupts */
1618
1619 return;
1620 }
1621
1622 void mapping_phys_unused(ppnum_t pa) {
1623
1624 unsigned int pindex;
1625 phys_entry *physent;
1626
1627 physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */
1628 if(!physent) return; /* Did we find the physical page? */
1629
1630 if(!(physent->ppLink & ~(ppLock | ppN | ppFlags))) return; /* No one else is here */
1631
1632 panic("mapping_phys_unused: physical page (%08X) in use, physent = %08X\n", pa, physent);
1633
1634 }
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644