]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/shadow.c
xnu-792.22.5.tar.gz
[apple/xnu.git] / bsd / dev / vn / shadow.c
1
2 /*
3 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. The rights granted to you under the License
11 * may not be used to create, or enable the creation or redistribution of,
12 * unlawful or unlicensed copies of an Apple operating system, or to
13 * circumvent, violate, or enable the circumvention or violation of, any
14 * terms of an Apple operating system software license agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 *
19 * The Original Code and all software distributed under the License are
20 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
21 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
22 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
24 * Please see the License for the specific language governing rights and
25 * limitations under the License.
26 *
27 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 */
29
30 /*
31 * shadow.c
32 *
33 * Implement copy-on-write shadow map to allow a disk image to be
34 * mounted read-only, yet be writable by transferring writes to a
35 * "shadow" file. Subsequent reads from blocks that have been
36 * written will then go the "shadow" file.
37 *
38 * The map has two parts:
39 * 1) a bit map to track which blocks have been written
40 * 2) a band map to map a "band" within the original file to a corresponding
41 * "band" in the shadow file. Each band has the same size.
42 *
43 * The band map is used to ensure that blocks that are contiguous in the
44 * original file will remain contiguous in the shadow file.
45 *
46 * For debugging purposes, this file can be compiled standalone using:
47 * cc -o shadow shadow.c -DTEST_SHADOW
48 */
49
50 /*
51 * Modification History
52 *
53 * December 21, 2001 Dieter Siegmund (dieter@apple.com)
54 * - initial revision
55 */
56 #include <sys/param.h>
57 #include <sys/types.h>
58 #include <mach/boolean.h>
59
60 #include <string.h>
61
62 #ifdef TEST_SHADOW
63 #include <unistd.h>
64 #include <stdlib.h>
65 #define my_malloc(a) malloc(a)
66 #define my_free(a) free(a)
67 #else /* !TEST_SHADOW */
68 #include <sys/malloc.h>
69 #define my_malloc(a) _MALLOC(a, M_TEMP, M_WAITOK)
70 #define my_free(a) FREE(a, M_TEMP)
71 #include <libkern/libkern.h>
72 #endif /* TEST_SHADOW */
73
74 #include "shadow.h"
75
76 #define ULONG_ALL_ONES ((u_long)(-1))
77 #define USHORT_ALL_ONES ((u_short)(-1))
78 #define UCHAR_ALL_ONES ((u_char)(-1))
79
80 #define my_trunc(value, divisor) ((value) / (divisor) * (divisor))
81
82 /* a band size of 128K can represent a file up to 8GB */
83 #define BAND_SIZE_DEFAULT_POWER_2 17
84 #define BAND_SIZE_DEFAULT (1 << BAND_SIZE_DEFAULT_POWER_2)
85
86 typedef u_short band_number_t;
87 #define BAND_ZERO ((band_number_t)0)
88 #define BAND_MAX ((band_number_t)65535)
89
90 struct shadow_map {
91 u_long blocks_per_band;/* size in blocks */
92 u_long block_size;
93 u_char * block_bitmap; /* 1 bit per block; 1=written */
94 band_number_t * bands; /* band map array */
95 u_long file_size_blocks; /* size of file in bands */
96 u_long shadow_size_bands; /* size of shadow in bands */
97 u_long next_band; /* next free band */
98 u_long zeroth_band; /* special-case 0th band */
99 };
100
101
102 typedef struct {
103 u_long byte;
104 u_long bit;
105 } bitmap_offset_t;
106
107 static __inline__ u_char
108 bit(int b)
109 {
110 return ((u_char)(1 << b));
111 }
112
113 /*
114 * Function: bits_lower
115 * Purpose:
116 * Return a byte value in which bits numbered lower than 'b' are set.
117 */
118 static __inline__ u_char
119 bits_lower(int b)
120 {
121 return ((u_char)(bit(b) - 1));
122 }
123
124 /*
125 * Function: byte_set_bits
126 * Purpose:
127 * Set the given range of bits within a byte.
128 */
129 static __inline__ u_char
130 byte_set_bits(int start, int end)
131 {
132 return ((u_char)((~bits_lower(start)) & (bits_lower(end) | bit(end))));
133 }
134
135 static __inline__ bitmap_offset_t
136 bitmap_offset(off_t where)
137 {
138 bitmap_offset_t b;
139
140 b.byte = where / NBBY;
141 b.bit = where % NBBY;
142 return (b);
143 }
144
145 /*
146 * Function: bitmap_set
147 *
148 * Purpose:
149 * Set the given range of bits.
150 *
151 * This algorithm tries to set the extents using the biggest
152 * units, using longs, then a short, then a byte, then bits.
153 */
154 static void
155 bitmap_set(u_char * map, u_long start_bit, u_long bit_count)
156 {
157 bitmap_offset_t start;
158 bitmap_offset_t end;
159
160 start = bitmap_offset(start_bit);
161 end = bitmap_offset(start_bit + bit_count);
162 if (start.byte < end.byte) {
163 u_long n_bytes;
164
165 if (start.bit) {
166 map[start.byte] |= byte_set_bits(start.bit, NBBY - 1);
167 start.bit = 0;
168 start.byte++;
169 if (start.byte == end.byte)
170 goto end;
171 }
172
173 n_bytes = end.byte - start.byte;
174
175 while (n_bytes >= (sizeof(u_long))) {
176 *((u_long *)(map + start.byte)) = ULONG_ALL_ONES;
177 start.byte += sizeof(u_long);
178 n_bytes -= sizeof(u_long);
179 }
180 if (n_bytes >= sizeof(u_short)) {
181 *((u_short *)(map + start.byte)) = USHORT_ALL_ONES;
182 start.byte += sizeof(u_short);
183 n_bytes -= sizeof(u_short);
184 }
185 if (n_bytes == 1) {
186 map[start.byte] = UCHAR_ALL_ONES;
187 start.byte++;
188 n_bytes = 0;
189 }
190 }
191
192 end:
193 if (end.bit > start.bit) {
194 map[start.byte] |= byte_set_bits(start.bit, end.bit - 1);
195 }
196
197 return;
198 }
199
200 /*
201 * Function: bitmap_get
202 *
203 * Purpose:
204 * Return the number of bits in the range that are the same e.g.
205 * 11101 returns 3 because the first 3 bits are the same (1's), whereas
206 * 001100 returns 2 because the first 2 bits are the same.
207 * This algorithm tries to count things in as big a chunk as possible,
208 * first aligning to a byte offset, then trying to count longs, a short,
209 * a byte, then any remaining bits to find the bit that is different.
210 */
211
212 static u_long
213 bitmap_get(u_char * map, u_long start_bit, u_long bit_count,
214 boolean_t * ret_is_set)
215 {
216 u_long count;
217 int i;
218 boolean_t is_set;
219 bitmap_offset_t start;
220 bitmap_offset_t end;
221
222 start = bitmap_offset(start_bit);
223 end = bitmap_offset(start_bit + bit_count);
224
225 is_set = (map[start.byte] & bit(start.bit)) ? TRUE : FALSE;
226 count = 0;
227
228 if (start.byte < end.byte) {
229 u_long n_bytes;
230
231 if (start.bit) { /* try to align to a byte */
232 for (i = start.bit; i < NBBY; i++) {
233 boolean_t this_is_set;
234
235 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
236 if (this_is_set != is_set) {
237 goto done; /* found bit that was different, we're done */
238 }
239 count++;
240 }
241 start.bit = 0; /* made it to the next byte */
242 start.byte++;
243 if (start.byte == end.byte)
244 goto end; /* no more bytes, check for any leftover bits */
245 }
246 /* calculate how many bytes are left in the range */
247 n_bytes = end.byte - start.byte;
248
249 /* check for 4 bytes of the same bits */
250 while (n_bytes >= sizeof(u_long)) {
251 u_long * valPtr = (u_long *)(map + start.byte);
252 if ((is_set && *valPtr == ULONG_ALL_ONES)
253 || (!is_set && *valPtr == 0)) {
254 count += sizeof(*valPtr) * NBBY;
255 start.byte += sizeof(*valPtr);
256 n_bytes -= sizeof(*valPtr);
257 }
258 else
259 break; /* bits differ */
260
261 }
262 /* check for 2 bytes of the same bits */
263 if (n_bytes >= sizeof(u_short)) {
264 u_short * valPtr = (u_short *)(map + start.byte);
265
266 if ((is_set && *valPtr == USHORT_ALL_ONES)
267 || (!is_set && (*valPtr == 0))) {
268 count += sizeof(*valPtr) * NBBY;
269 start.byte += sizeof(*valPtr);
270 n_bytes -= sizeof(*valPtr);
271 }
272 }
273
274 /* check for 1 byte of the same bits */
275 if (n_bytes) {
276 if ((is_set && map[start.byte] == UCHAR_ALL_ONES)
277 || (!is_set && map[start.byte] == 0)) {
278 count += NBBY;
279 start.byte++;
280 n_bytes--;
281 }
282 /* we found bits that were different, find the first one */
283 if (n_bytes) {
284 for (i = 0; i < NBBY; i++) {
285 boolean_t this_is_set;
286
287 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
288 if (this_is_set != is_set) {
289 break;
290 }
291 count++;
292 }
293 goto done;
294 }
295 }
296 }
297
298 end:
299 for (i = start.bit; i < (int)end.bit; i++) {
300 boolean_t this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
301
302 if (this_is_set != is_set) {
303 break;
304 }
305 count++;
306 }
307
308 done:
309 *ret_is_set = is_set;
310 return (count);
311 }
312
313 static __inline__ band_number_t
314 shadow_map_block_to_band(shadow_map_t * map, unsigned long block)
315 {
316 return (block / map->blocks_per_band);
317 }
318
319 /*
320 * Function: shadow_map_mapped_band
321 * Purpose:
322 * Return the mapped band for the given band.
323 * If map_it is FALSE, and the band is not mapped, return FALSE.
324 * If map_it is TRUE, then this function will always return TRUE.
325 */
326 static boolean_t
327 shadow_map_mapped_band(shadow_map_t * map, band_number_t band,
328 boolean_t map_it, band_number_t * mapped_band)
329 {
330 boolean_t is_mapped = FALSE;
331
332 if (band == map->zeroth_band) {
333 *mapped_band = BAND_ZERO;
334 is_mapped = TRUE;
335 }
336 else {
337 *mapped_band = map->bands[band];
338 if (*mapped_band == BAND_ZERO) {
339 if (map_it) {
340 /* grow the file */
341 if (map->next_band == 0) {
342 /* remember the zero'th band */
343 map->zeroth_band = band;
344 }
345 *mapped_band = map->bands[band] = map->next_band++;
346 is_mapped = TRUE;
347 }
348 }
349 else {
350 is_mapped = TRUE;
351 }
352 }
353 return (is_mapped);
354 }
355
356 /*
357 * Function: shadow_map_contiguous
358 *
359 * Purpose:
360 * Return the first offset within the range position..(position + count)
361 * that is not a contiguous mapped band.
362 *
363 * If called with is_write = TRUE, this function will map bands as it goes.
364 */
365 static u_long
366 shadow_map_contiguous(shadow_map_t * map, u_long start_block,
367 u_long num_blocks, boolean_t is_write)
368 {
369 band_number_t band = shadow_map_block_to_band(map, start_block);
370 u_long end_block = start_block + num_blocks;
371 boolean_t is_mapped;
372 band_number_t mapped_band;
373 u_long ret_end_block = end_block;
374 u_long p;
375
376 is_mapped = shadow_map_mapped_band(map, band, is_write, &mapped_band);
377 if (is_write == FALSE && is_mapped == FALSE) {
378 static int happened = 0;
379 /* this can't happen */
380 if (happened == 0) {
381 printf("shadow_map_contiguous: this can't happen!\n");
382 happened = 1;
383 }
384 return (start_block);
385 }
386 for (p = my_trunc(start_block + map->blocks_per_band,
387 map->blocks_per_band);
388 p < end_block; p += map->blocks_per_band) {
389 band_number_t next_mapped_band;
390
391 band++;
392 is_mapped = shadow_map_mapped_band(map, band, is_write,
393 &next_mapped_band);
394 if (is_write == FALSE && is_mapped == FALSE) {
395 return (p);
396 }
397 if ((mapped_band + 1) != next_mapped_band) {
398 /* not contiguous */
399 ret_end_block = p;
400 break;
401 }
402 mapped_band = next_mapped_band;
403 }
404 return (ret_end_block);
405 }
406
407
408 /*
409 * Function: block_bitmap_size
410 * Purpose:
411 * The number of bytes required in a block bitmap to represent a file of size
412 * file_size.
413 *
414 * The bytes required is the number of blocks in the file,
415 * divided by the number of bits per byte.
416 * Note:
417 * An 8GB file requires (assuming 512 byte block):
418 * 2^33 / 2^9 / 2^3 = 2^21 = 2MB
419 * of bitmap space. This is a non-trival amount of memory,
420 * particularly since most of the bits will be zero.
421 * A sparse bitmap would really help in this case.
422 */
423 static __inline__ u_long
424 block_bitmap_size(off_t file_size, u_long block_size)
425 {
426 off_t blocks = howmany(file_size, block_size);
427 return (howmany(blocks, NBBY));
428 }
429
430 /*
431 * Function: shadow_map_read
432 *
433 * Purpose:
434 * Calculate the block offset within the shadow to read, and the number
435 * blocks to read. The input values (block_offset, block_count) refer
436 * to the original file.
437 *
438 * The output values (*incr_block_offset, *incr_block_count) refer to the
439 * shadow file if the return value is TRUE. They refer to the original
440 * file if the return value is FALSE.
441
442 * Blocks within a band may or may not have been written, in addition,
443 * Bands are not necessarily contiguous, therefore:
444 * *incr_block_count <= block_count
445 * The caller must be prepared to call this function interatively
446 * to complete the whole i/o.
447 * Returns:
448 * TRUE if the shadow file should be read, FALSE if the original file
449 * should be read.
450 */
451 boolean_t
452 shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count,
453 u_long * incr_block_offset, u_long * incr_block_count)
454 {
455 boolean_t written = FALSE;
456 u_long n_blocks;
457
458 if (block_offset >= map->file_size_blocks
459 || (block_offset + block_count) > map->file_size_blocks) {
460 printf("shadow_map_read: request (%ld, %ld) exceeds file size %ld\n",
461 block_offset, block_count, map->file_size_blocks);
462 *incr_block_count = 0;
463 }
464 n_blocks = bitmap_get(map->block_bitmap, block_offset, block_count,
465 &written);
466 if (written == FALSE) {
467 *incr_block_count = n_blocks;
468 *incr_block_offset = block_offset;
469 }
470 else { /* start has been written, and therefore mapped */
471 band_number_t mapped_band;
472 u_long band_limit;
473
474 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
475 *incr_block_offset = mapped_band * map->blocks_per_band
476 + (block_offset % map->blocks_per_band);
477 band_limit
478 = shadow_map_contiguous(map, block_offset, block_count, FALSE);
479 *incr_block_count = band_limit - block_offset;
480 if (*incr_block_count > n_blocks) {
481 *incr_block_count = n_blocks;
482 }
483 }
484 return (written);
485 }
486
487 /*
488 * Function: shadow_map_write
489 *
490 * Purpose:
491 * Calculate the block offset within the shadow to write, and the number
492 * blocks to write. The input values (block_offset, block_count) refer
493 * to the original file. The output values
494 * (*incr_block_offset, *incr_block_count) refer to the shadow file.
495 *
496 * Bands are not necessarily contiguous, therefore:
497 * *incr_block_count <= block_count
498 * The caller must be prepared to call this function interatively
499 * to complete the whole i/o.
500 * Returns:
501 * TRUE if the shadow file was grown, FALSE otherwise.
502 */
503 boolean_t
504 shadow_map_write(shadow_map_t * map, u_long block_offset,
505 u_long block_count, u_long * incr_block_offset,
506 u_long * incr_block_count)
507 {
508 u_long band_limit;
509 band_number_t mapped_band;
510 boolean_t shadow_grew = FALSE;
511
512 if (block_offset >= map->file_size_blocks
513 || (block_offset + block_count) > map->file_size_blocks) {
514 printf("shadow_map_write: request (%ld, %ld) exceeds file size %ld\n",
515 block_offset, block_count, map->file_size_blocks);
516 *incr_block_count = 0;
517 }
518
519 band_limit = shadow_map_contiguous(map, block_offset, block_count, TRUE);
520 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
521 *incr_block_offset = mapped_band * map->blocks_per_band
522 + (block_offset % map->blocks_per_band);
523 *incr_block_count = band_limit - block_offset;
524
525 /* mark these blocks as written */
526 bitmap_set(map->block_bitmap, block_offset, *incr_block_count);
527
528 if (map->next_band > map->shadow_size_bands) {
529 map->shadow_size_bands = map->next_band;
530 shadow_grew = TRUE;
531 }
532 return (shadow_grew);
533 }
534
535 boolean_t
536 shadow_map_is_written(shadow_map_t * map, u_long block_offset)
537 {
538 bitmap_offset_t b;
539
540 b = bitmap_offset(block_offset);
541 return ((map->block_bitmap[b.byte] & bit(b.bit)) ? TRUE : FALSE);
542 }
543
544 /*
545 * Function: shadow_map_shadow_size
546 *
547 * Purpose:
548 * To return the size of the shadow file in blocks.
549 */
550 u_long
551 shadow_map_shadow_size(shadow_map_t * map)
552 {
553 return (map->shadow_size_bands * map->blocks_per_band);
554 }
555
556 /*
557 * Function: shadow_map_create
558 *
559 * Purpose:
560 * Allocate the dynamic data for keeping track of the shadow dirty blocks
561 * and the band mapping table.
562 * Returns:
563 * NULL if an error occurred.
564 */
565 shadow_map_t *
566 shadow_map_create(off_t file_size, off_t shadow_size,
567 u_long band_size, u_long block_size)
568 {
569 void * block_bitmap = 0;
570 u_long bitmap_size;
571 band_number_t * bands = 0;
572 shadow_map_t * map;
573 u_long n_bands = 0;
574
575 if (band_size == 0) {
576 band_size = BAND_SIZE_DEFAULT;
577 }
578
579 n_bands = howmany(file_size, band_size);
580 if (n_bands > (BAND_MAX + 1)) {
581 printf("file is too big: %ld > %d\n",
582 n_bands, BAND_MAX);
583 goto failure;
584 }
585
586 /* create a block bitmap, one bit per block */
587 bitmap_size = block_bitmap_size(file_size, block_size);
588 block_bitmap = my_malloc(bitmap_size);
589 if (block_bitmap == NULL) {
590 printf("failed to allocate bitmap\n");
591 goto failure;
592 }
593 bzero(block_bitmap, bitmap_size);
594
595 /* get the band map */
596 bands = (band_number_t *)my_malloc(n_bands * sizeof(band_number_t));
597 if (bands == NULL) {
598 printf("failed to allocate bands\n");
599 goto failure;
600 }
601 bzero(bands, n_bands * sizeof(band_number_t));
602
603 map = my_malloc(sizeof(*map));
604 if (map == NULL) {
605 printf("failed to allocate map\n");
606 goto failure;
607 }
608 map->blocks_per_band = band_size / block_size;
609 map->block_bitmap = block_bitmap;
610 map->bands = bands;
611 map->file_size_blocks = n_bands * map->blocks_per_band;
612 map->next_band = 0;
613 map->zeroth_band = -1;
614 map->shadow_size_bands = howmany(shadow_size, band_size);
615 map->block_size = block_size;
616 return (map);
617
618 failure:
619 if (block_bitmap)
620 my_free(block_bitmap);
621 if (bands)
622 my_free(bands);
623 return (NULL);
624 }
625
626 /*
627 * Function: shadow_map_free
628 * Purpose:
629 * Frees the data structure to deal with the shadow map.
630 */
631 void
632 shadow_map_free(shadow_map_t * map)
633 {
634 if (map->block_bitmap)
635 my_free(map->block_bitmap);
636 if (map->bands)
637 my_free(map->bands);
638 map->block_bitmap = 0;
639 map->bands = 0;
640 my_free(map);
641 return;
642 }
643
644 #ifdef TEST_SHADOW
645 #define BAND_SIZE_BLOCKS (BAND_SIZE_DEFAULT / 512)
646
647 enum {
648 ReadRequest,
649 WriteRequest,
650 };
651
652 typedef struct {
653 int type;
654 u_long offset;
655 u_long count;
656 } block_request_t;
657
658 int
659 main()
660 {
661 shadow_map_t * map;
662 int i;
663 block_request_t requests[] = {
664 { WriteRequest, BAND_SIZE_BLOCKS * 2, 1 },
665 { ReadRequest, BAND_SIZE_BLOCKS / 2, BAND_SIZE_BLOCKS * 2 - 2 },
666 { WriteRequest, BAND_SIZE_BLOCKS * 1, 5 * BAND_SIZE_BLOCKS + 3},
667 { ReadRequest, 0, BAND_SIZE_BLOCKS * 10 },
668 { WriteRequest, BAND_SIZE_BLOCKS * (BAND_MAX - 1),
669 BAND_SIZE_BLOCKS * 2},
670 { 0, 0 },
671 };
672
673 map = shadow_map_create(1024 * 1024 * 1024 * 8ULL, 0, 0, 512);
674 if (map == NULL) {
675 printf("shadow_map_create failed\n");
676 exit(1);
677 }
678 for (i = 0; TRUE; i++) {
679 u_long offset;
680 u_long resid;
681 boolean_t shadow_grew;
682 boolean_t read_shadow;
683
684 if (requests[i].count == 0) {
685 break;
686 }
687 offset = requests[i].offset;
688 resid = requests[i].count;
689 printf("\n%s REQUEST (%ld, %ld)\n",
690 requests[i].type == WriteRequest ? "WRITE" : "READ",
691 offset, resid);
692 switch (requests[i].type) {
693 case WriteRequest:
694 while (resid > 0) {
695 u_long this_offset;
696 u_long this_count;
697
698 shadow_grew = shadow_map_write(map, offset,
699 resid,
700 &this_offset,
701 &this_count);
702 printf("\t(%ld, %ld) => (%ld, %ld)",
703 offset, resid, this_offset, this_count);
704 resid -= this_count;
705 offset += this_count;
706 if (shadow_grew) {
707 printf(" shadow grew to %ld", shadow_map_shadow_size(map));
708 }
709 printf("\n");
710 }
711 break;
712 case ReadRequest:
713 while (resid > 0) {
714 u_long this_offset;
715 u_long this_count;
716
717 read_shadow = shadow_map_read(map, offset,
718 resid,
719 &this_offset,
720 &this_count);
721 printf("\t(%ld, %ld) => (%ld, %ld)%s\n",
722 offset, resid, this_offset, this_count,
723 read_shadow ? " from shadow" : "");
724 if (this_count == 0) {
725 printf("this_count is 0, aborting\n");
726 break;
727 }
728 resid -= this_count;
729 offset += this_count;
730 }
731 break;
732 default:
733 break;
734 }
735 }
736 if (map) {
737 shadow_map_free(map);
738 }
739 exit(0);
740 return (0);
741 }
742 #endif