]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/shadow.c
xnu-344.23.tar.gz
[apple/xnu.git] / bsd / dev / vn / shadow.c
1
2 /*
3 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * The contents of this file constitute Original Code as defined in and
8 * are subject to the Apple Public Source License Version 1.1 (the
9 * "License"). You may not use this file except in compliance with the
10 * License. Please obtain a copy of the License at
11 * http://www.apple.com/publicsource and read it before using this file.
12 *
13 * This Original Code and all software distributed under the License are
14 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
18 * License for the specific language governing rights and limitations
19 * under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * shadow.c
26 *
27 * Implement copy-on-write shadow map to allow a disk image to be
28 * mounted read-only, yet be writable by transferring writes to a
29 * "shadow" file. Subsequent reads from blocks that have been
30 * written will then go the "shadow" file.
31 *
32 * The map has two parts:
33 * 1) a bit map to track which blocks have been written
34 * 2) a band map to map a "band" within the original file to a corresponding
35 * "band" in the shadow file. Each band has the same size.
36 *
37 * The band map is used to ensure that blocks that are contiguous in the
38 * original file will remain contiguous in the shadow file.
39 *
40 * For debugging purposes, this file can be compiled standalone using:
41 * cc -o shadow shadow.c -DTEST_SHADOW
42 */
43
44 /*
45 * Modification History
46 *
47 * December 21, 2001 Dieter Siegmund (dieter@apple.com)
48 * - initial revision
49 */
50 #include <sys/param.h>
51 #include <sys/types.h>
52 #include <mach/boolean.h>
53
54 #include <string.h>
55
56 #ifdef TEST_SHADOW
57 #include <unistd.h>
58 #include <stdlib.h>
59 #define my_malloc(a) malloc(a)
60 #define my_free(a) free(a)
61 #else TEST_SHADOW
62 #include <sys/malloc.h>
63 #define my_malloc(a) _MALLOC(a, M_TEMP, M_WAITOK)
64 #define my_free(a) FREE(a, M_TEMP)
65 #endif TEST_SHADOW
66
67 #include "shadow.h"
68
69 #define ULONG_ALL_ONES ((u_long)(-1))
70 #define USHORT_ALL_ONES ((u_short)(-1))
71 #define UCHAR_ALL_ONES ((u_char)(-1))
72
73 #define my_trunc(value, divisor) ((value) / (divisor) * (divisor))
74
75 /* a band size of 128K can represent a file up to 8GB */
76 #define BAND_SIZE_DEFAULT_POWER_2 17
77 #define BAND_SIZE_DEFAULT (1 << BAND_SIZE_DEFAULT_POWER_2)
78
79 typedef u_short band_number_t;
80 #define BAND_ZERO ((band_number_t)0)
81 #define BAND_MAX ((band_number_t)65535)
82
83 struct shadow_map {
84 u_long blocks_per_band;/* size in blocks */
85 u_long block_size;
86 u_char * block_bitmap; /* 1 bit per block; 1=written */
87 band_number_t * bands; /* band map array */
88 u_long file_size_blocks; /* size of file in bands */
89 u_long shadow_size_bands; /* size of shadow in bands */
90 u_long next_band; /* next free band */
91 u_long zeroth_band; /* special-case 0th band */
92 };
93
94
95 typedef struct {
96 u_long byte;
97 u_long bit;
98 } bitmap_offset_t;
99
100 static __inline__ u_char
101 bit(int b)
102 {
103 return ((u_char)(1 << b));
104 }
105
106 /*
107 * Function: bits_lower
108 * Purpose:
109 * Return a byte value in which bits numbered lower than 'b' are set.
110 */
111 static __inline__ u_char
112 bits_lower(int b)
113 {
114 return ((u_char)(bit(b) - 1));
115 }
116
117 /*
118 * Function: byte_set_bits
119 * Purpose:
120 * Set the given range of bits within a byte.
121 */
122 static __inline__ u_char
123 byte_set_bits(int start, int end)
124 {
125 return ((u_char)((~bits_lower(start)) & (bits_lower(end) | bit(end))));
126 }
127
128 static __inline__ bitmap_offset_t
129 bitmap_offset(off_t where)
130 {
131 bitmap_offset_t b;
132
133 b.byte = where / NBBY;
134 b.bit = where % NBBY;
135 return (b);
136 }
137
138 /*
139 * Function: bitmap_set
140 *
141 * Purpose:
142 * Set the given range of bits.
143 *
144 * This algorithm tries to set the extents using the biggest
145 * units, using longs, then a short, then a byte, then bits.
146 */
147 static void
148 bitmap_set(u_char * map, u_long start_bit, u_long bit_count)
149 {
150 bitmap_offset_t start;
151 bitmap_offset_t end;
152
153 start = bitmap_offset(start_bit);
154 end = bitmap_offset(start_bit + bit_count);
155 if (start.byte < end.byte) {
156 u_long n_bytes;
157
158 if (start.bit) {
159 map[start.byte] |= byte_set_bits(start.bit, NBBY - 1);
160 start.bit = 0;
161 start.byte++;
162 if (start.byte == end.byte)
163 goto end;
164 }
165
166 n_bytes = end.byte - start.byte;
167
168 while (n_bytes >= (sizeof(u_long))) {
169 *((u_long *)(map + start.byte)) = ULONG_ALL_ONES;
170 start.byte += sizeof(u_long);
171 n_bytes -= sizeof(u_long);
172 }
173 if (n_bytes >= sizeof(u_short)) {
174 *((u_short *)(map + start.byte)) = USHORT_ALL_ONES;
175 start.byte += sizeof(u_short);
176 n_bytes -= sizeof(u_short);
177 }
178 if (n_bytes == 1) {
179 map[start.byte] = UCHAR_ALL_ONES;
180 start.byte++;
181 n_bytes = 0;
182 }
183 }
184
185 end:
186 if (end.bit > start.bit) {
187 map[start.byte] |= byte_set_bits(start.bit, end.bit - 1);
188 }
189
190 return;
191 }
192
193 /*
194 * Function: bitmap_get
195 *
196 * Purpose:
197 * Return the number of bits in the range that are the same e.g.
198 * 11101 returns 3 because the first 3 bits are the same (1's), whereas
199 * 001100 returns 2 because the first 2 bits are the same.
200 * This algorithm tries to count things in as big a chunk as possible,
201 * first aligning to a byte offset, then trying to count longs, a short,
202 * a byte, then any remaining bits to find the bit that is different.
203 */
204
205 static u_long
206 bitmap_get(u_char * map, u_long start_bit, u_long bit_count,
207 boolean_t * ret_is_set)
208 {
209 u_long count;
210 int i;
211 boolean_t is_set;
212 bitmap_offset_t start;
213 bitmap_offset_t end;
214
215 start = bitmap_offset(start_bit);
216 end = bitmap_offset(start_bit + bit_count);
217
218 is_set = (map[start.byte] & bit(start.bit)) ? TRUE : FALSE;
219 count = 0;
220
221 if (start.byte < end.byte) {
222 u_long n_bytes;
223
224 if (start.bit) { /* try to align to a byte */
225 for (i = start.bit; i < NBBY; i++) {
226 boolean_t this_is_set;
227
228 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
229 if (this_is_set != is_set) {
230 goto done; /* found bit that was different, we're done */
231 }
232 count++;
233 }
234 start.bit = 0; /* made it to the next byte */
235 start.byte++;
236 if (start.byte == end.byte)
237 goto end; /* no more bytes, check for any leftover bits */
238 }
239 /* calculate how many bytes are left in the range */
240 n_bytes = end.byte - start.byte;
241
242 /* check for 4 bytes of the same bits */
243 while (n_bytes >= sizeof(u_long)) {
244 u_long * valPtr = (u_long *)(map + start.byte);
245 if ((is_set && *valPtr == ULONG_ALL_ONES)
246 || (!is_set && *valPtr == 0)) {
247 count += sizeof(*valPtr) * NBBY;
248 start.byte += sizeof(*valPtr);
249 n_bytes -= sizeof(*valPtr);
250 }
251 else
252 break; /* bits differ */
253
254 }
255 /* check for 2 bytes of the same bits */
256 if (n_bytes >= sizeof(u_short)) {
257 u_short * valPtr = (u_short *)(map + start.byte);
258
259 if ((is_set && *valPtr == USHORT_ALL_ONES)
260 || (!is_set && (*valPtr == 0))) {
261 count += sizeof(*valPtr) * NBBY;
262 start.byte += sizeof(*valPtr);
263 n_bytes -= sizeof(*valPtr);
264 }
265 }
266
267 /* check for 1 byte of the same bits */
268 if (n_bytes) {
269 if ((is_set && map[start.byte] == UCHAR_ALL_ONES)
270 || (!is_set && map[start.byte] == 0)) {
271 count += NBBY;
272 start.byte++;
273 n_bytes--;
274 }
275 /* we found bits that were different, find the first one */
276 if (n_bytes) {
277 for (i = 0; i < NBBY; i++) {
278 boolean_t this_is_set;
279
280 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
281 if (this_is_set != is_set) {
282 break;
283 }
284 count++;
285 }
286 goto done;
287 }
288 }
289 }
290
291 end:
292 for (i = start.bit; i < end.bit; i++) {
293 boolean_t this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
294
295 if (this_is_set != is_set) {
296 break;
297 }
298 count++;
299 }
300
301 done:
302 *ret_is_set = is_set;
303 return (count);
304 }
305
306 static __inline__ band_number_t
307 shadow_map_block_to_band(shadow_map_t * map, unsigned long block)
308 {
309 return (block / map->blocks_per_band);
310 }
311
312 /*
313 * Function: shadow_map_mapped_band
314 * Purpose:
315 * Return the mapped band for the given band.
316 * If map_it is FALSE, and the band is not mapped, return FALSE.
317 * If map_it is TRUE, then this function will always return TRUE.
318 */
319 static boolean_t
320 shadow_map_mapped_band(shadow_map_t * map, band_number_t band,
321 boolean_t map_it, band_number_t * mapped_band)
322 {
323 boolean_t is_mapped = FALSE;
324
325 if (band == map->zeroth_band) {
326 *mapped_band = BAND_ZERO;
327 is_mapped = TRUE;
328 }
329 else {
330 *mapped_band = map->bands[band];
331 if (*mapped_band == BAND_ZERO) {
332 if (map_it) {
333 /* grow the file */
334 if (map->next_band == 0) {
335 /* remember the zero'th band */
336 map->zeroth_band = band;
337 }
338 *mapped_band = map->bands[band] = map->next_band++;
339 is_mapped = TRUE;
340 }
341 }
342 else {
343 is_mapped = TRUE;
344 }
345 }
346 return (is_mapped);
347 }
348
349 /*
350 * Function: shadow_map_contiguous
351 *
352 * Purpose:
353 * Return the first offset within the range position..(position + count)
354 * that is not a contiguous mapped band.
355 *
356 * If called with is_write = TRUE, this function will map bands as it goes.
357 */
358 static u_long
359 shadow_map_contiguous(shadow_map_t * map, u_long start_block,
360 u_long num_blocks, boolean_t is_write)
361 {
362 band_number_t band = shadow_map_block_to_band(map, start_block);
363 u_long end_block = start_block + num_blocks;
364 boolean_t is_mapped;
365 band_number_t mapped_band;
366 u_long ret_end_block = end_block;
367 u_long p;
368
369 is_mapped = shadow_map_mapped_band(map, band, is_write, &mapped_band);
370 if (is_write == FALSE && is_mapped == FALSE) {
371 static int happened = 0;
372 /* this can't happen */
373 if (happened == 0) {
374 printf("shadow_map_contiguous: this can't happen!\n");
375 happened = 1;
376 }
377 return (start_block);
378 }
379 for (p = my_trunc(start_block + map->blocks_per_band,
380 map->blocks_per_band);
381 p < end_block; p += map->blocks_per_band) {
382 band_number_t next_mapped_band;
383
384 band++;
385 is_mapped = shadow_map_mapped_band(map, band, is_write,
386 &next_mapped_band);
387 if (is_write == FALSE && is_mapped == FALSE) {
388 return (p);
389 }
390 if ((mapped_band + 1) != next_mapped_band) {
391 /* not contiguous */
392 ret_end_block = p;
393 break;
394 }
395 mapped_band = next_mapped_band;
396 }
397 return (ret_end_block);
398 }
399
400
401 /*
402 * Function: block_bitmap_size
403 * Purpose:
404 * The number of bytes required in a block bitmap to represent a file of size
405 * file_size.
406 *
407 * The bytes required is the number of blocks in the file,
408 * divided by the number of bits per byte.
409 * Note:
410 * An 8GB file requires (assuming 512 byte block):
411 * 2^33 / 2^9 / 2^3 = 2^21 = 2MB
412 * of bitmap space. This is a non-trival amount of memory,
413 * particularly since most of the bits will be zero.
414 * A sparse bitmap would really help in this case.
415 */
416 static __inline__ u_long
417 block_bitmap_size(off_t file_size, u_long block_size)
418 {
419 off_t blocks = howmany(file_size, block_size);
420 return (howmany(blocks, NBBY));
421 }
422
423 /*
424 * Function: shadow_map_read
425 *
426 * Purpose:
427 * Calculate the block offset within the shadow to read, and the number
428 * blocks to read. The input values (block_offset, block_count) refer
429 * to the original file.
430 *
431 * The output values (*incr_block_offset, *incr_block_count) refer to the
432 * shadow file if the return value is TRUE. They refer to the original
433 * file if the return value is FALSE.
434
435 * Blocks within a band may or may not have been written, in addition,
436 * Bands are not necessarily contiguous, therefore:
437 * *incr_block_count <= block_count
438 * The caller must be prepared to call this function interatively
439 * to complete the whole i/o.
440 * Returns:
441 * TRUE if the shadow file should be read, FALSE if the original file
442 * should be read.
443 */
444 boolean_t
445 shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count,
446 u_long * incr_block_offset, u_long * incr_block_count)
447 {
448 boolean_t written = FALSE;
449 u_long n_blocks;
450
451 if (block_offset >= map->file_size_blocks
452 || (block_offset + block_count) > map->file_size_blocks) {
453 printf("shadow_map_read: request (%ld, %ld) exceeds file size %ld\n",
454 block_offset, block_count, map->file_size_blocks);
455 *incr_block_count = 0;
456 }
457 n_blocks = bitmap_get(map->block_bitmap, block_offset, block_count,
458 &written);
459 if (written == FALSE) {
460 *incr_block_count = n_blocks;
461 *incr_block_offset = block_offset;
462 }
463 else { /* start has been written, and therefore mapped */
464 band_number_t mapped_band;
465 u_long band_limit;
466
467 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
468 *incr_block_offset = mapped_band * map->blocks_per_band
469 + (block_offset % map->blocks_per_band);
470 band_limit
471 = shadow_map_contiguous(map, block_offset, block_count, FALSE);
472 *incr_block_count = band_limit - block_offset;
473 if (*incr_block_count > n_blocks) {
474 *incr_block_count = n_blocks;
475 }
476 }
477 return (written);
478 }
479
480 /*
481 * Function: shadow_map_write
482 *
483 * Purpose:
484 * Calculate the block offset within the shadow to write, and the number
485 * blocks to write. The input values (block_offset, block_count) refer
486 * to the original file. The output values
487 * (*incr_block_offset, *incr_block_count) refer to the shadow file.
488 *
489 * Bands are not necessarily contiguous, therefore:
490 * *incr_block_count <= block_count
491 * The caller must be prepared to call this function interatively
492 * to complete the whole i/o.
493 * Returns:
494 * TRUE if the shadow file was grown, FALSE otherwise.
495 */
496 boolean_t
497 shadow_map_write(shadow_map_t * map, u_long block_offset,
498 u_long block_count, u_long * incr_block_offset,
499 u_long * incr_block_count)
500 {
501 u_long band_limit;
502 band_number_t mapped_band;
503 boolean_t shadow_grew = FALSE;
504
505 if (block_offset >= map->file_size_blocks
506 || (block_offset + block_count) > map->file_size_blocks) {
507 printf("shadow_map_write: request (%ld, %ld) exceeds file size %ld\n",
508 block_offset, block_count, map->file_size_blocks);
509 *incr_block_count = 0;
510 }
511
512 band_limit = shadow_map_contiguous(map, block_offset, block_count, TRUE);
513 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
514 *incr_block_offset = mapped_band * map->blocks_per_band
515 + (block_offset % map->blocks_per_band);
516 *incr_block_count = band_limit - block_offset;
517
518 /* mark these blocks as written */
519 bitmap_set(map->block_bitmap, block_offset, *incr_block_count);
520
521 if (map->next_band > map->shadow_size_bands) {
522 map->shadow_size_bands = map->next_band;
523 shadow_grew = TRUE;
524 }
525 return (shadow_grew);
526 }
527
528 /*
529 * Function: shadow_map_shadow_size
530 *
531 * Purpose:
532 * To return the size of the shadow file in blocks.
533 */
534 u_long
535 shadow_map_shadow_size(shadow_map_t * map)
536 {
537 return (map->shadow_size_bands * map->blocks_per_band);
538 }
539
540 /*
541 * Function: shadow_map_create
542 *
543 * Purpose:
544 * Allocate the dynamic data for keeping track of the shadow dirty blocks
545 * and the band mapping table.
546 * Returns:
547 * NULL if an error occurred.
548 */
549 shadow_map_t *
550 shadow_map_create(off_t file_size, off_t shadow_size,
551 u_long band_size, u_long block_size)
552 {
553 void * block_bitmap = 0;
554 u_long bitmap_size;
555 band_number_t * bands = 0;
556 shadow_map_t * map;
557 u_long n_bands = 0;
558
559 if (band_size == 0) {
560 band_size = BAND_SIZE_DEFAULT;
561 }
562
563 n_bands = howmany(file_size, band_size);
564 if (n_bands > (BAND_MAX + 1)) {
565 printf("file is too big: %ld > %d\n",
566 n_bands, BAND_MAX);
567 goto failure;
568 }
569
570 /* create a block bitmap, one bit per block */
571 bitmap_size = block_bitmap_size(file_size, block_size);
572 block_bitmap = my_malloc(bitmap_size);
573 if (block_bitmap == NULL) {
574 printf("failed to allocate bitmap\n");
575 goto failure;
576 }
577 bzero(block_bitmap, bitmap_size);
578
579 /* get the band map */
580 bands = (band_number_t *)my_malloc(n_bands * sizeof(band_number_t));
581 if (bands == NULL) {
582 printf("failed to allocate bands\n");
583 goto failure;
584 }
585 bzero(bands, n_bands * sizeof(band_number_t));
586
587 map = my_malloc(sizeof(*map));
588 if (map == NULL) {
589 printf("failed to allocate map\n");
590 goto failure;
591 }
592 map->blocks_per_band = band_size / block_size;
593 map->block_bitmap = block_bitmap;
594 map->bands = bands;
595 map->file_size_blocks = n_bands * map->blocks_per_band;
596 map->next_band = 0;
597 map->zeroth_band = -1;
598 map->shadow_size_bands = howmany(shadow_size, band_size);
599 map->block_size = block_size;
600 return (map);
601
602 failure:
603 if (block_bitmap)
604 my_free(block_bitmap);
605 if (bands)
606 my_free(bands);
607 return (NULL);
608 }
609
610 /*
611 * Function: shadow_map_free
612 * Purpose:
613 * Frees the data structure to deal with the shadow map.
614 */
615 void
616 shadow_map_free(shadow_map_t * map)
617 {
618 if (map->block_bitmap)
619 my_free(map->block_bitmap);
620 if (map->bands)
621 my_free(map->bands);
622 map->block_bitmap = 0;
623 map->bands = 0;
624 my_free(map);
625 return;
626 }
627
628 #ifdef TEST_SHADOW
629 #define BAND_SIZE_BLOCKS (BAND_SIZE_DEFAULT / 512)
630
631 enum {
632 ReadRequest,
633 WriteRequest,
634 };
635
636 typedef struct {
637 int type;
638 u_long offset;
639 u_long count;
640 } block_request_t;
641
642 int
643 main()
644 {
645 shadow_map_t * map;
646 int i;
647 block_request_t requests[] = {
648 { WriteRequest, BAND_SIZE_BLOCKS * 2, 1 },
649 { ReadRequest, BAND_SIZE_BLOCKS / 2, BAND_SIZE_BLOCKS * 2 - 2 },
650 { WriteRequest, BAND_SIZE_BLOCKS * 1, 5 * BAND_SIZE_BLOCKS + 3},
651 { ReadRequest, 0, BAND_SIZE_BLOCKS * 10 },
652 { WriteRequest, BAND_SIZE_BLOCKS * (BAND_MAX - 1),
653 BAND_SIZE_BLOCKS * 2},
654 { 0, 0 },
655 };
656
657 map = shadow_map_create(1024 * 1024 * 1024 * 8ULL, 0, 0, 512);
658 if (map == NULL) {
659 printf("shadow_map_create failed\n");
660 exit(1);
661 }
662 for (i = 0; TRUE; i++) {
663 u_long offset;
664 u_long resid;
665 boolean_t shadow_grew;
666 boolean_t read_shadow;
667
668 if (requests[i].count == 0) {
669 break;
670 }
671 offset = requests[i].offset;
672 resid = requests[i].count;
673 printf("\n%s REQUEST (%ld, %ld)\n",
674 requests[i].type == WriteRequest ? "WRITE" : "READ",
675 offset, resid);
676 switch (requests[i].type) {
677 case WriteRequest:
678 while (resid > 0) {
679 u_long this_offset;
680 u_long this_count;
681
682 shadow_grew = shadow_map_write(map, offset,
683 resid,
684 &this_offset,
685 &this_count);
686 printf("\t(%ld, %ld) => (%ld, %ld)",
687 offset, resid, this_offset, this_count);
688 resid -= this_count;
689 offset += this_count;
690 if (shadow_grew) {
691 printf(" shadow grew to %ld", shadow_map_shadow_size(map));
692 }
693 printf("\n");
694 }
695 break;
696 case ReadRequest:
697 while (resid > 0) {
698 u_long this_offset;
699 u_long this_count;
700
701 read_shadow = shadow_map_read(map, offset,
702 resid,
703 &this_offset,
704 &this_count);
705 printf("\t(%ld, %ld) => (%ld, %ld)%s\n",
706 offset, resid, this_offset, this_count,
707 read_shadow ? " from shadow" : "");
708 if (this_count == 0) {
709 printf("this_count is 0, aborting\n");
710 break;
711 }
712 resid -= this_count;
713 offset += this_count;
714 }
715 break;
716 default:
717 break;
718 }
719 }
720 if (map) {
721 shadow_map_free(map);
722 }
723 exit(0);
724 return (0);
725 }
726 #endif