]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/shadow.c
ebaa362e441bddfaa60f30bd58692311f65b966c
[apple/xnu.git] / bsd / dev / vn / shadow.c
1
2 /*
3 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24
25 /*
26 * shadow.c
27 *
28 * Implement copy-on-write shadow map to allow a disk image to be
29 * mounted read-only, yet be writable by transferring writes to a
30 * "shadow" file. Subsequent reads from blocks that have been
31 * written will then go the "shadow" file.
32 *
33 * The map has two parts:
34 * 1) a bit map to track which blocks have been written
35 * 2) a band map to map a "band" within the original file to a corresponding
36 * "band" in the shadow file. Each band has the same size.
37 *
38 * The band map is used to ensure that blocks that are contiguous in the
39 * original file will remain contiguous in the shadow file.
40 *
41 * For debugging purposes, this file can be compiled standalone using:
42 * cc -o shadow shadow.c -DTEST_SHADOW
43 */
44
45 /*
46 * Modification History
47 *
48 * December 21, 2001 Dieter Siegmund (dieter@apple.com)
49 * - initial revision
50 */
51 #include <sys/param.h>
52 #include <sys/types.h>
53 #include <mach/boolean.h>
54
55 #include <string.h>
56
57 #ifdef TEST_SHADOW
58 #include <unistd.h>
59 #include <stdlib.h>
60 #define my_malloc(a) malloc(a)
61 #define my_free(a) free(a)
62 #else /* !TEST_SHADOW */
63 #include <sys/malloc.h>
64 #define my_malloc(a) _MALLOC(a, M_TEMP, M_WAITOK)
65 #define my_free(a) FREE(a, M_TEMP)
66 #include <libkern/libkern.h>
67 #endif /* TEST_SHADOW */
68
69 #include "shadow.h"
70
71 #define ULONG_ALL_ONES ((u_long)(-1))
72 #define USHORT_ALL_ONES ((u_short)(-1))
73 #define UCHAR_ALL_ONES ((u_char)(-1))
74
75 #define my_trunc(value, divisor) ((value) / (divisor) * (divisor))
76
77 /* a band size of 128K can represent a file up to 8GB */
78 #define BAND_SIZE_DEFAULT_POWER_2 17
79 #define BAND_SIZE_DEFAULT (1 << BAND_SIZE_DEFAULT_POWER_2)
80
81 typedef u_short band_number_t;
82 #define BAND_ZERO ((band_number_t)0)
83 #define BAND_MAX ((band_number_t)65535)
84
85 struct shadow_map {
86 u_long blocks_per_band;/* size in blocks */
87 u_long block_size;
88 u_char * block_bitmap; /* 1 bit per block; 1=written */
89 band_number_t * bands; /* band map array */
90 u_long file_size_blocks; /* size of file in bands */
91 u_long shadow_size_bands; /* size of shadow in bands */
92 u_long next_band; /* next free band */
93 u_long zeroth_band; /* special-case 0th band */
94 };
95
96
97 typedef struct {
98 u_long byte;
99 u_long bit;
100 } bitmap_offset_t;
101
102 static __inline__ u_char
103 bit(int b)
104 {
105 return ((u_char)(1 << b));
106 }
107
108 /*
109 * Function: bits_lower
110 * Purpose:
111 * Return a byte value in which bits numbered lower than 'b' are set.
112 */
113 static __inline__ u_char
114 bits_lower(int b)
115 {
116 return ((u_char)(bit(b) - 1));
117 }
118
119 /*
120 * Function: byte_set_bits
121 * Purpose:
122 * Set the given range of bits within a byte.
123 */
124 static __inline__ u_char
125 byte_set_bits(int start, int end)
126 {
127 return ((u_char)((~bits_lower(start)) & (bits_lower(end) | bit(end))));
128 }
129
130 static __inline__ bitmap_offset_t
131 bitmap_offset(off_t where)
132 {
133 bitmap_offset_t b;
134
135 b.byte = where / NBBY;
136 b.bit = where % NBBY;
137 return (b);
138 }
139
140 /*
141 * Function: bitmap_set
142 *
143 * Purpose:
144 * Set the given range of bits.
145 *
146 * This algorithm tries to set the extents using the biggest
147 * units, using longs, then a short, then a byte, then bits.
148 */
149 static void
150 bitmap_set(u_char * map, u_long start_bit, u_long bit_count)
151 {
152 bitmap_offset_t start;
153 bitmap_offset_t end;
154
155 start = bitmap_offset(start_bit);
156 end = bitmap_offset(start_bit + bit_count);
157 if (start.byte < end.byte) {
158 u_long n_bytes;
159
160 if (start.bit) {
161 map[start.byte] |= byte_set_bits(start.bit, NBBY - 1);
162 start.bit = 0;
163 start.byte++;
164 if (start.byte == end.byte)
165 goto end;
166 }
167
168 n_bytes = end.byte - start.byte;
169
170 while (n_bytes >= (sizeof(u_long))) {
171 *((u_long *)(map + start.byte)) = ULONG_ALL_ONES;
172 start.byte += sizeof(u_long);
173 n_bytes -= sizeof(u_long);
174 }
175 if (n_bytes >= sizeof(u_short)) {
176 *((u_short *)(map + start.byte)) = USHORT_ALL_ONES;
177 start.byte += sizeof(u_short);
178 n_bytes -= sizeof(u_short);
179 }
180 if (n_bytes == 1) {
181 map[start.byte] = UCHAR_ALL_ONES;
182 start.byte++;
183 n_bytes = 0;
184 }
185 }
186
187 end:
188 if (end.bit > start.bit) {
189 map[start.byte] |= byte_set_bits(start.bit, end.bit - 1);
190 }
191
192 return;
193 }
194
195 /*
196 * Function: bitmap_get
197 *
198 * Purpose:
199 * Return the number of bits in the range that are the same e.g.
200 * 11101 returns 3 because the first 3 bits are the same (1's), whereas
201 * 001100 returns 2 because the first 2 bits are the same.
202 * This algorithm tries to count things in as big a chunk as possible,
203 * first aligning to a byte offset, then trying to count longs, a short,
204 * a byte, then any remaining bits to find the bit that is different.
205 */
206
207 static u_long
208 bitmap_get(u_char * map, u_long start_bit, u_long bit_count,
209 boolean_t * ret_is_set)
210 {
211 u_long count;
212 int i;
213 boolean_t is_set;
214 bitmap_offset_t start;
215 bitmap_offset_t end;
216
217 start = bitmap_offset(start_bit);
218 end = bitmap_offset(start_bit + bit_count);
219
220 is_set = (map[start.byte] & bit(start.bit)) ? TRUE : FALSE;
221 count = 0;
222
223 if (start.byte < end.byte) {
224 u_long n_bytes;
225
226 if (start.bit) { /* try to align to a byte */
227 for (i = start.bit; i < NBBY; i++) {
228 boolean_t this_is_set;
229
230 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
231 if (this_is_set != is_set) {
232 goto done; /* found bit that was different, we're done */
233 }
234 count++;
235 }
236 start.bit = 0; /* made it to the next byte */
237 start.byte++;
238 if (start.byte == end.byte)
239 goto end; /* no more bytes, check for any leftover bits */
240 }
241 /* calculate how many bytes are left in the range */
242 n_bytes = end.byte - start.byte;
243
244 /* check for 4 bytes of the same bits */
245 while (n_bytes >= sizeof(u_long)) {
246 u_long * valPtr = (u_long *)(map + start.byte);
247 if ((is_set && *valPtr == ULONG_ALL_ONES)
248 || (!is_set && *valPtr == 0)) {
249 count += sizeof(*valPtr) * NBBY;
250 start.byte += sizeof(*valPtr);
251 n_bytes -= sizeof(*valPtr);
252 }
253 else
254 break; /* bits differ */
255
256 }
257 /* check for 2 bytes of the same bits */
258 if (n_bytes >= sizeof(u_short)) {
259 u_short * valPtr = (u_short *)(map + start.byte);
260
261 if ((is_set && *valPtr == USHORT_ALL_ONES)
262 || (!is_set && (*valPtr == 0))) {
263 count += sizeof(*valPtr) * NBBY;
264 start.byte += sizeof(*valPtr);
265 n_bytes -= sizeof(*valPtr);
266 }
267 }
268
269 /* check for 1 byte of the same bits */
270 if (n_bytes) {
271 if ((is_set && map[start.byte] == UCHAR_ALL_ONES)
272 || (!is_set && map[start.byte] == 0)) {
273 count += NBBY;
274 start.byte++;
275 n_bytes--;
276 }
277 /* we found bits that were different, find the first one */
278 if (n_bytes) {
279 for (i = 0; i < NBBY; i++) {
280 boolean_t this_is_set;
281
282 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
283 if (this_is_set != is_set) {
284 break;
285 }
286 count++;
287 }
288 goto done;
289 }
290 }
291 }
292
293 end:
294 for (i = start.bit; i < (int)end.bit; i++) {
295 boolean_t this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
296
297 if (this_is_set != is_set) {
298 break;
299 }
300 count++;
301 }
302
303 done:
304 *ret_is_set = is_set;
305 return (count);
306 }
307
308 static __inline__ band_number_t
309 shadow_map_block_to_band(shadow_map_t * map, unsigned long block)
310 {
311 return (block / map->blocks_per_band);
312 }
313
314 /*
315 * Function: shadow_map_mapped_band
316 * Purpose:
317 * Return the mapped band for the given band.
318 * If map_it is FALSE, and the band is not mapped, return FALSE.
319 * If map_it is TRUE, then this function will always return TRUE.
320 */
321 static boolean_t
322 shadow_map_mapped_band(shadow_map_t * map, band_number_t band,
323 boolean_t map_it, band_number_t * mapped_band)
324 {
325 boolean_t is_mapped = FALSE;
326
327 if (band == map->zeroth_band) {
328 *mapped_band = BAND_ZERO;
329 is_mapped = TRUE;
330 }
331 else {
332 *mapped_band = map->bands[band];
333 if (*mapped_band == BAND_ZERO) {
334 if (map_it) {
335 /* grow the file */
336 if (map->next_band == 0) {
337 /* remember the zero'th band */
338 map->zeroth_band = band;
339 }
340 *mapped_band = map->bands[band] = map->next_band++;
341 is_mapped = TRUE;
342 }
343 }
344 else {
345 is_mapped = TRUE;
346 }
347 }
348 return (is_mapped);
349 }
350
351 /*
352 * Function: shadow_map_contiguous
353 *
354 * Purpose:
355 * Return the first offset within the range position..(position + count)
356 * that is not a contiguous mapped band.
357 *
358 * If called with is_write = TRUE, this function will map bands as it goes.
359 */
360 static u_long
361 shadow_map_contiguous(shadow_map_t * map, u_long start_block,
362 u_long num_blocks, boolean_t is_write)
363 {
364 band_number_t band = shadow_map_block_to_band(map, start_block);
365 u_long end_block = start_block + num_blocks;
366 boolean_t is_mapped;
367 band_number_t mapped_band;
368 u_long ret_end_block = end_block;
369 u_long p;
370
371 is_mapped = shadow_map_mapped_band(map, band, is_write, &mapped_band);
372 if (is_write == FALSE && is_mapped == FALSE) {
373 static int happened = 0;
374 /* this can't happen */
375 if (happened == 0) {
376 printf("shadow_map_contiguous: this can't happen!\n");
377 happened = 1;
378 }
379 return (start_block);
380 }
381 for (p = my_trunc(start_block + map->blocks_per_band,
382 map->blocks_per_band);
383 p < end_block; p += map->blocks_per_band) {
384 band_number_t next_mapped_band;
385
386 band++;
387 is_mapped = shadow_map_mapped_band(map, band, is_write,
388 &next_mapped_band);
389 if (is_write == FALSE && is_mapped == FALSE) {
390 return (p);
391 }
392 if ((mapped_band + 1) != next_mapped_band) {
393 /* not contiguous */
394 ret_end_block = p;
395 break;
396 }
397 mapped_band = next_mapped_band;
398 }
399 return (ret_end_block);
400 }
401
402
403 /*
404 * Function: block_bitmap_size
405 * Purpose:
406 * The number of bytes required in a block bitmap to represent a file of size
407 * file_size.
408 *
409 * The bytes required is the number of blocks in the file,
410 * divided by the number of bits per byte.
411 * Note:
412 * An 8GB file requires (assuming 512 byte block):
413 * 2^33 / 2^9 / 2^3 = 2^21 = 2MB
414 * of bitmap space. This is a non-trival amount of memory,
415 * particularly since most of the bits will be zero.
416 * A sparse bitmap would really help in this case.
417 */
418 static __inline__ u_long
419 block_bitmap_size(off_t file_size, u_long block_size)
420 {
421 off_t blocks = howmany(file_size, block_size);
422 return (howmany(blocks, NBBY));
423 }
424
425 /*
426 * Function: shadow_map_read
427 *
428 * Purpose:
429 * Calculate the block offset within the shadow to read, and the number
430 * blocks to read. The input values (block_offset, block_count) refer
431 * to the original file.
432 *
433 * The output values (*incr_block_offset, *incr_block_count) refer to the
434 * shadow file if the return value is TRUE. They refer to the original
435 * file if the return value is FALSE.
436
437 * Blocks within a band may or may not have been written, in addition,
438 * Bands are not necessarily contiguous, therefore:
439 * *incr_block_count <= block_count
440 * The caller must be prepared to call this function interatively
441 * to complete the whole i/o.
442 * Returns:
443 * TRUE if the shadow file should be read, FALSE if the original file
444 * should be read.
445 */
446 boolean_t
447 shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count,
448 u_long * incr_block_offset, u_long * incr_block_count)
449 {
450 boolean_t written = FALSE;
451 u_long n_blocks;
452
453 if (block_offset >= map->file_size_blocks
454 || (block_offset + block_count) > map->file_size_blocks) {
455 printf("shadow_map_read: request (%ld, %ld) exceeds file size %ld\n",
456 block_offset, block_count, map->file_size_blocks);
457 *incr_block_count = 0;
458 }
459 n_blocks = bitmap_get(map->block_bitmap, block_offset, block_count,
460 &written);
461 if (written == FALSE) {
462 *incr_block_count = n_blocks;
463 *incr_block_offset = block_offset;
464 }
465 else { /* start has been written, and therefore mapped */
466 band_number_t mapped_band;
467 u_long band_limit;
468
469 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
470 *incr_block_offset = mapped_band * map->blocks_per_band
471 + (block_offset % map->blocks_per_band);
472 band_limit
473 = shadow_map_contiguous(map, block_offset, block_count, FALSE);
474 *incr_block_count = band_limit - block_offset;
475 if (*incr_block_count > n_blocks) {
476 *incr_block_count = n_blocks;
477 }
478 }
479 return (written);
480 }
481
482 /*
483 * Function: shadow_map_write
484 *
485 * Purpose:
486 * Calculate the block offset within the shadow to write, and the number
487 * blocks to write. The input values (block_offset, block_count) refer
488 * to the original file. The output values
489 * (*incr_block_offset, *incr_block_count) refer to the shadow file.
490 *
491 * Bands are not necessarily contiguous, therefore:
492 * *incr_block_count <= block_count
493 * The caller must be prepared to call this function interatively
494 * to complete the whole i/o.
495 * Returns:
496 * TRUE if the shadow file was grown, FALSE otherwise.
497 */
498 boolean_t
499 shadow_map_write(shadow_map_t * map, u_long block_offset,
500 u_long block_count, u_long * incr_block_offset,
501 u_long * incr_block_count)
502 {
503 u_long band_limit;
504 band_number_t mapped_band;
505 boolean_t shadow_grew = FALSE;
506
507 if (block_offset >= map->file_size_blocks
508 || (block_offset + block_count) > map->file_size_blocks) {
509 printf("shadow_map_write: request (%ld, %ld) exceeds file size %ld\n",
510 block_offset, block_count, map->file_size_blocks);
511 *incr_block_count = 0;
512 }
513
514 band_limit = shadow_map_contiguous(map, block_offset, block_count, TRUE);
515 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
516 *incr_block_offset = mapped_band * map->blocks_per_band
517 + (block_offset % map->blocks_per_band);
518 *incr_block_count = band_limit - block_offset;
519
520 /* mark these blocks as written */
521 bitmap_set(map->block_bitmap, block_offset, *incr_block_count);
522
523 if (map->next_band > map->shadow_size_bands) {
524 map->shadow_size_bands = map->next_band;
525 shadow_grew = TRUE;
526 }
527 return (shadow_grew);
528 }
529
530 boolean_t
531 shadow_map_is_written(shadow_map_t * map, u_long block_offset)
532 {
533 bitmap_offset_t b;
534
535 b = bitmap_offset(block_offset);
536 return ((map->block_bitmap[b.byte] & bit(b.bit)) ? TRUE : FALSE);
537 }
538
539 /*
540 * Function: shadow_map_shadow_size
541 *
542 * Purpose:
543 * To return the size of the shadow file in blocks.
544 */
545 u_long
546 shadow_map_shadow_size(shadow_map_t * map)
547 {
548 return (map->shadow_size_bands * map->blocks_per_band);
549 }
550
551 /*
552 * Function: shadow_map_create
553 *
554 * Purpose:
555 * Allocate the dynamic data for keeping track of the shadow dirty blocks
556 * and the band mapping table.
557 * Returns:
558 * NULL if an error occurred.
559 */
560 shadow_map_t *
561 shadow_map_create(off_t file_size, off_t shadow_size,
562 u_long band_size, u_long block_size)
563 {
564 void * block_bitmap = 0;
565 u_long bitmap_size;
566 band_number_t * bands = 0;
567 shadow_map_t * map;
568 u_long n_bands = 0;
569
570 if (band_size == 0) {
571 band_size = BAND_SIZE_DEFAULT;
572 }
573
574 n_bands = howmany(file_size, band_size);
575 if (n_bands > (BAND_MAX + 1)) {
576 printf("file is too big: %ld > %d\n",
577 n_bands, BAND_MAX);
578 goto failure;
579 }
580
581 /* create a block bitmap, one bit per block */
582 bitmap_size = block_bitmap_size(file_size, block_size);
583 block_bitmap = my_malloc(bitmap_size);
584 if (block_bitmap == NULL) {
585 printf("failed to allocate bitmap\n");
586 goto failure;
587 }
588 bzero(block_bitmap, bitmap_size);
589
590 /* get the band map */
591 bands = (band_number_t *)my_malloc(n_bands * sizeof(band_number_t));
592 if (bands == NULL) {
593 printf("failed to allocate bands\n");
594 goto failure;
595 }
596 bzero(bands, n_bands * sizeof(band_number_t));
597
598 map = my_malloc(sizeof(*map));
599 if (map == NULL) {
600 printf("failed to allocate map\n");
601 goto failure;
602 }
603 map->blocks_per_band = band_size / block_size;
604 map->block_bitmap = block_bitmap;
605 map->bands = bands;
606 map->file_size_blocks = n_bands * map->blocks_per_band;
607 map->next_band = 0;
608 map->zeroth_band = -1;
609 map->shadow_size_bands = howmany(shadow_size, band_size);
610 map->block_size = block_size;
611 return (map);
612
613 failure:
614 if (block_bitmap)
615 my_free(block_bitmap);
616 if (bands)
617 my_free(bands);
618 return (NULL);
619 }
620
621 /*
622 * Function: shadow_map_free
623 * Purpose:
624 * Frees the data structure to deal with the shadow map.
625 */
626 void
627 shadow_map_free(shadow_map_t * map)
628 {
629 if (map->block_bitmap)
630 my_free(map->block_bitmap);
631 if (map->bands)
632 my_free(map->bands);
633 map->block_bitmap = 0;
634 map->bands = 0;
635 my_free(map);
636 return;
637 }
638
639 #ifdef TEST_SHADOW
640 #define BAND_SIZE_BLOCKS (BAND_SIZE_DEFAULT / 512)
641
642 enum {
643 ReadRequest,
644 WriteRequest,
645 };
646
647 typedef struct {
648 int type;
649 u_long offset;
650 u_long count;
651 } block_request_t;
652
653 int
654 main()
655 {
656 shadow_map_t * map;
657 int i;
658 block_request_t requests[] = {
659 { WriteRequest, BAND_SIZE_BLOCKS * 2, 1 },
660 { ReadRequest, BAND_SIZE_BLOCKS / 2, BAND_SIZE_BLOCKS * 2 - 2 },
661 { WriteRequest, BAND_SIZE_BLOCKS * 1, 5 * BAND_SIZE_BLOCKS + 3},
662 { ReadRequest, 0, BAND_SIZE_BLOCKS * 10 },
663 { WriteRequest, BAND_SIZE_BLOCKS * (BAND_MAX - 1),
664 BAND_SIZE_BLOCKS * 2},
665 { 0, 0 },
666 };
667
668 map = shadow_map_create(1024 * 1024 * 1024 * 8ULL, 0, 0, 512);
669 if (map == NULL) {
670 printf("shadow_map_create failed\n");
671 exit(1);
672 }
673 for (i = 0; TRUE; i++) {
674 u_long offset;
675 u_long resid;
676 boolean_t shadow_grew;
677 boolean_t read_shadow;
678
679 if (requests[i].count == 0) {
680 break;
681 }
682 offset = requests[i].offset;
683 resid = requests[i].count;
684 printf("\n%s REQUEST (%ld, %ld)\n",
685 requests[i].type == WriteRequest ? "WRITE" : "READ",
686 offset, resid);
687 switch (requests[i].type) {
688 case WriteRequest:
689 while (resid > 0) {
690 u_long this_offset;
691 u_long this_count;
692
693 shadow_grew = shadow_map_write(map, offset,
694 resid,
695 &this_offset,
696 &this_count);
697 printf("\t(%ld, %ld) => (%ld, %ld)",
698 offset, resid, this_offset, this_count);
699 resid -= this_count;
700 offset += this_count;
701 if (shadow_grew) {
702 printf(" shadow grew to %ld", shadow_map_shadow_size(map));
703 }
704 printf("\n");
705 }
706 break;
707 case ReadRequest:
708 while (resid > 0) {
709 u_long this_offset;
710 u_long this_count;
711
712 read_shadow = shadow_map_read(map, offset,
713 resid,
714 &this_offset,
715 &this_count);
716 printf("\t(%ld, %ld) => (%ld, %ld)%s\n",
717 offset, resid, this_offset, this_count,
718 read_shadow ? " from shadow" : "");
719 if (this_count == 0) {
720 printf("this_count is 0, aborting\n");
721 break;
722 }
723 resid -= this_count;
724 offset += this_count;
725 }
726 break;
727 default:
728 break;
729 }
730 }
731 if (map) {
732 shadow_map_free(map);
733 }
734 exit(0);
735 return (0);
736 }
737 #endif