]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/vn/shadow.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / bsd / dev / vn / shadow.c
CommitLineData
9bccf70c
A
1
2/*
3 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
4 *
8ad349bb 5 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
9bccf70c 6 *
8ad349bb
A
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. The rights granted to you under the
11 * License may not be used to create, or enable the creation or
12 * redistribution of, unlawful or unlicensed copies of an Apple operating
13 * system, or to circumvent, violate, or enable the circumvention or
14 * violation of, any terms of an Apple operating system software license
15 * agreement.
16 *
17 * Please obtain a copy of the License at
18 * http://www.opensource.apple.com/apsl/ and read it before using this
19 * file.
20 *
21 * The Original Code and all software distributed under the License are
22 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
23 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
24 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
26 * Please see the License for the specific language governing rights and
27 * limitations under the License.
28 *
29 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
9bccf70c
A
30 */
31
32/*
33 * shadow.c
34 *
35 * Implement copy-on-write shadow map to allow a disk image to be
36 * mounted read-only, yet be writable by transferring writes to a
37 * "shadow" file. Subsequent reads from blocks that have been
38 * written will then go the "shadow" file.
39 *
40 * The map has two parts:
41 * 1) a bit map to track which blocks have been written
42 * 2) a band map to map a "band" within the original file to a corresponding
43 * "band" in the shadow file. Each band has the same size.
44 *
45 * The band map is used to ensure that blocks that are contiguous in the
46 * original file will remain contiguous in the shadow file.
47 *
48 * For debugging purposes, this file can be compiled standalone using:
49 * cc -o shadow shadow.c -DTEST_SHADOW
50 */
51
52/*
53 * Modification History
54 *
55 * December 21, 2001 Dieter Siegmund (dieter@apple.com)
56 * - initial revision
57 */
58#include <sys/param.h>
59#include <sys/types.h>
60#include <mach/boolean.h>
61
62#include <string.h>
63
64#ifdef TEST_SHADOW
65#include <unistd.h>
66#include <stdlib.h>
67#define my_malloc(a) malloc(a)
68#define my_free(a) free(a)
55e303ae 69#else /* !TEST_SHADOW */
9bccf70c
A
70#include <sys/malloc.h>
71#define my_malloc(a) _MALLOC(a, M_TEMP, M_WAITOK)
72#define my_free(a) FREE(a, M_TEMP)
91447636 73#include <libkern/libkern.h>
55e303ae 74#endif /* TEST_SHADOW */
9bccf70c
A
75
76#include "shadow.h"
77
78#define ULONG_ALL_ONES ((u_long)(-1))
79#define USHORT_ALL_ONES ((u_short)(-1))
80#define UCHAR_ALL_ONES ((u_char)(-1))
81
82#define my_trunc(value, divisor) ((value) / (divisor) * (divisor))
83
84/* a band size of 128K can represent a file up to 8GB */
85#define BAND_SIZE_DEFAULT_POWER_2 17
86#define BAND_SIZE_DEFAULT (1 << BAND_SIZE_DEFAULT_POWER_2)
87
88typedef u_short band_number_t;
89#define BAND_ZERO ((band_number_t)0)
90#define BAND_MAX ((band_number_t)65535)
91
92struct shadow_map {
93 u_long blocks_per_band;/* size in blocks */
94 u_long block_size;
95 u_char * block_bitmap; /* 1 bit per block; 1=written */
96 band_number_t * bands; /* band map array */
97 u_long file_size_blocks; /* size of file in bands */
98 u_long shadow_size_bands; /* size of shadow in bands */
99 u_long next_band; /* next free band */
100 u_long zeroth_band; /* special-case 0th band */
101};
102
103
104typedef struct {
105 u_long byte;
106 u_long bit;
107} bitmap_offset_t;
108
109static __inline__ u_char
110bit(int b)
111{
112 return ((u_char)(1 << b));
113}
114
115/*
116 * Function: bits_lower
117 * Purpose:
118 * Return a byte value in which bits numbered lower than 'b' are set.
119 */
120static __inline__ u_char
121bits_lower(int b)
122{
123 return ((u_char)(bit(b) - 1));
124}
125
126/*
127 * Function: byte_set_bits
128 * Purpose:
129 * Set the given range of bits within a byte.
130 */
131static __inline__ u_char
132byte_set_bits(int start, int end)
133{
134 return ((u_char)((~bits_lower(start)) & (bits_lower(end) | bit(end))));
135}
136
137static __inline__ bitmap_offset_t
138bitmap_offset(off_t where)
139{
140 bitmap_offset_t b;
141
142 b.byte = where / NBBY;
143 b.bit = where % NBBY;
144 return (b);
145}
146
147/*
148 * Function: bitmap_set
149 *
150 * Purpose:
151 * Set the given range of bits.
152 *
153 * This algorithm tries to set the extents using the biggest
154 * units, using longs, then a short, then a byte, then bits.
155 */
156static void
157bitmap_set(u_char * map, u_long start_bit, u_long bit_count)
158{
159 bitmap_offset_t start;
160 bitmap_offset_t end;
161
162 start = bitmap_offset(start_bit);
163 end = bitmap_offset(start_bit + bit_count);
164 if (start.byte < end.byte) {
165 u_long n_bytes;
166
167 if (start.bit) {
168 map[start.byte] |= byte_set_bits(start.bit, NBBY - 1);
169 start.bit = 0;
170 start.byte++;
171 if (start.byte == end.byte)
172 goto end;
173 }
174
175 n_bytes = end.byte - start.byte;
176
177 while (n_bytes >= (sizeof(u_long))) {
178 *((u_long *)(map + start.byte)) = ULONG_ALL_ONES;
179 start.byte += sizeof(u_long);
180 n_bytes -= sizeof(u_long);
181 }
182 if (n_bytes >= sizeof(u_short)) {
183 *((u_short *)(map + start.byte)) = USHORT_ALL_ONES;
184 start.byte += sizeof(u_short);
185 n_bytes -= sizeof(u_short);
186 }
187 if (n_bytes == 1) {
188 map[start.byte] = UCHAR_ALL_ONES;
189 start.byte++;
190 n_bytes = 0;
191 }
192 }
193
194 end:
195 if (end.bit > start.bit) {
196 map[start.byte] |= byte_set_bits(start.bit, end.bit - 1);
197 }
198
199 return;
200}
201
202/*
203 * Function: bitmap_get
204 *
205 * Purpose:
206 * Return the number of bits in the range that are the same e.g.
207 * 11101 returns 3 because the first 3 bits are the same (1's), whereas
208 * 001100 returns 2 because the first 2 bits are the same.
209 * This algorithm tries to count things in as big a chunk as possible,
210 * first aligning to a byte offset, then trying to count longs, a short,
211 * a byte, then any remaining bits to find the bit that is different.
212 */
213
214static u_long
215bitmap_get(u_char * map, u_long start_bit, u_long bit_count,
216 boolean_t * ret_is_set)
217{
218 u_long count;
219 int i;
220 boolean_t is_set;
221 bitmap_offset_t start;
222 bitmap_offset_t end;
223
224 start = bitmap_offset(start_bit);
225 end = bitmap_offset(start_bit + bit_count);
226
227 is_set = (map[start.byte] & bit(start.bit)) ? TRUE : FALSE;
228 count = 0;
229
230 if (start.byte < end.byte) {
231 u_long n_bytes;
232
233 if (start.bit) { /* try to align to a byte */
234 for (i = start.bit; i < NBBY; i++) {
235 boolean_t this_is_set;
236
237 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
238 if (this_is_set != is_set) {
239 goto done; /* found bit that was different, we're done */
240 }
241 count++;
242 }
243 start.bit = 0; /* made it to the next byte */
244 start.byte++;
245 if (start.byte == end.byte)
246 goto end; /* no more bytes, check for any leftover bits */
247 }
248 /* calculate how many bytes are left in the range */
249 n_bytes = end.byte - start.byte;
250
251 /* check for 4 bytes of the same bits */
252 while (n_bytes >= sizeof(u_long)) {
253 u_long * valPtr = (u_long *)(map + start.byte);
254 if ((is_set && *valPtr == ULONG_ALL_ONES)
255 || (!is_set && *valPtr == 0)) {
256 count += sizeof(*valPtr) * NBBY;
257 start.byte += sizeof(*valPtr);
258 n_bytes -= sizeof(*valPtr);
259 }
260 else
261 break; /* bits differ */
262
263 }
264 /* check for 2 bytes of the same bits */
265 if (n_bytes >= sizeof(u_short)) {
266 u_short * valPtr = (u_short *)(map + start.byte);
267
268 if ((is_set && *valPtr == USHORT_ALL_ONES)
269 || (!is_set && (*valPtr == 0))) {
270 count += sizeof(*valPtr) * NBBY;
271 start.byte += sizeof(*valPtr);
272 n_bytes -= sizeof(*valPtr);
273 }
274 }
275
276 /* check for 1 byte of the same bits */
277 if (n_bytes) {
278 if ((is_set && map[start.byte] == UCHAR_ALL_ONES)
279 || (!is_set && map[start.byte] == 0)) {
280 count += NBBY;
281 start.byte++;
282 n_bytes--;
283 }
284 /* we found bits that were different, find the first one */
285 if (n_bytes) {
286 for (i = 0; i < NBBY; i++) {
287 boolean_t this_is_set;
288
289 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
290 if (this_is_set != is_set) {
291 break;
292 }
293 count++;
294 }
295 goto done;
296 }
297 }
298 }
299
300 end:
91447636 301 for (i = start.bit; i < (int)end.bit; i++) {
9bccf70c
A
302 boolean_t this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
303
304 if (this_is_set != is_set) {
305 break;
306 }
307 count++;
308 }
309
310 done:
311 *ret_is_set = is_set;
312 return (count);
313}
314
315static __inline__ band_number_t
316shadow_map_block_to_band(shadow_map_t * map, unsigned long block)
317{
318 return (block / map->blocks_per_band);
319}
320
321/*
322 * Function: shadow_map_mapped_band
323 * Purpose:
324 * Return the mapped band for the given band.
325 * If map_it is FALSE, and the band is not mapped, return FALSE.
326 * If map_it is TRUE, then this function will always return TRUE.
327 */
328static boolean_t
329shadow_map_mapped_band(shadow_map_t * map, band_number_t band,
330 boolean_t map_it, band_number_t * mapped_band)
331{
332 boolean_t is_mapped = FALSE;
333
334 if (band == map->zeroth_band) {
335 *mapped_band = BAND_ZERO;
336 is_mapped = TRUE;
337 }
338 else {
339 *mapped_band = map->bands[band];
340 if (*mapped_band == BAND_ZERO) {
341 if (map_it) {
342 /* grow the file */
343 if (map->next_band == 0) {
344 /* remember the zero'th band */
345 map->zeroth_band = band;
346 }
347 *mapped_band = map->bands[band] = map->next_band++;
348 is_mapped = TRUE;
349 }
350 }
351 else {
352 is_mapped = TRUE;
353 }
354 }
355 return (is_mapped);
356}
357
358/*
359 * Function: shadow_map_contiguous
360 *
361 * Purpose:
362 * Return the first offset within the range position..(position + count)
363 * that is not a contiguous mapped band.
364 *
365 * If called with is_write = TRUE, this function will map bands as it goes.
366 */
367static u_long
368shadow_map_contiguous(shadow_map_t * map, u_long start_block,
369 u_long num_blocks, boolean_t is_write)
370{
371 band_number_t band = shadow_map_block_to_band(map, start_block);
372 u_long end_block = start_block + num_blocks;
373 boolean_t is_mapped;
374 band_number_t mapped_band;
375 u_long ret_end_block = end_block;
376 u_long p;
377
378 is_mapped = shadow_map_mapped_band(map, band, is_write, &mapped_band);
379 if (is_write == FALSE && is_mapped == FALSE) {
380 static int happened = 0;
381 /* this can't happen */
382 if (happened == 0) {
383 printf("shadow_map_contiguous: this can't happen!\n");
384 happened = 1;
385 }
386 return (start_block);
387 }
388 for (p = my_trunc(start_block + map->blocks_per_band,
389 map->blocks_per_band);
390 p < end_block; p += map->blocks_per_band) {
391 band_number_t next_mapped_band;
392
393 band++;
394 is_mapped = shadow_map_mapped_band(map, band, is_write,
395 &next_mapped_band);
396 if (is_write == FALSE && is_mapped == FALSE) {
397 return (p);
398 }
399 if ((mapped_band + 1) != next_mapped_band) {
400 /* not contiguous */
401 ret_end_block = p;
402 break;
403 }
404 mapped_band = next_mapped_band;
405 }
406 return (ret_end_block);
407}
408
409
410/*
411 * Function: block_bitmap_size
412 * Purpose:
413 * The number of bytes required in a block bitmap to represent a file of size
414 * file_size.
415 *
416 * The bytes required is the number of blocks in the file,
417 * divided by the number of bits per byte.
418 * Note:
419 * An 8GB file requires (assuming 512 byte block):
420 * 2^33 / 2^9 / 2^3 = 2^21 = 2MB
421 * of bitmap space. This is a non-trival amount of memory,
422 * particularly since most of the bits will be zero.
423 * A sparse bitmap would really help in this case.
424 */
425static __inline__ u_long
426block_bitmap_size(off_t file_size, u_long block_size)
427{
428 off_t blocks = howmany(file_size, block_size);
429 return (howmany(blocks, NBBY));
430}
431
432/*
433 * Function: shadow_map_read
434 *
435 * Purpose:
436 * Calculate the block offset within the shadow to read, and the number
437 * blocks to read. The input values (block_offset, block_count) refer
438 * to the original file.
439 *
440 * The output values (*incr_block_offset, *incr_block_count) refer to the
441 * shadow file if the return value is TRUE. They refer to the original
442 * file if the return value is FALSE.
443
444 * Blocks within a band may or may not have been written, in addition,
445 * Bands are not necessarily contiguous, therefore:
446 * *incr_block_count <= block_count
447 * The caller must be prepared to call this function interatively
448 * to complete the whole i/o.
449 * Returns:
450 * TRUE if the shadow file should be read, FALSE if the original file
451 * should be read.
452 */
453boolean_t
454shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count,
455 u_long * incr_block_offset, u_long * incr_block_count)
456{
457 boolean_t written = FALSE;
458 u_long n_blocks;
459
460 if (block_offset >= map->file_size_blocks
461 || (block_offset + block_count) > map->file_size_blocks) {
462 printf("shadow_map_read: request (%ld, %ld) exceeds file size %ld\n",
463 block_offset, block_count, map->file_size_blocks);
464 *incr_block_count = 0;
465 }
466 n_blocks = bitmap_get(map->block_bitmap, block_offset, block_count,
467 &written);
468 if (written == FALSE) {
469 *incr_block_count = n_blocks;
470 *incr_block_offset = block_offset;
471 }
472 else { /* start has been written, and therefore mapped */
473 band_number_t mapped_band;
474 u_long band_limit;
475
476 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
477 *incr_block_offset = mapped_band * map->blocks_per_band
478 + (block_offset % map->blocks_per_band);
479 band_limit
480 = shadow_map_contiguous(map, block_offset, block_count, FALSE);
481 *incr_block_count = band_limit - block_offset;
482 if (*incr_block_count > n_blocks) {
483 *incr_block_count = n_blocks;
484 }
485 }
486 return (written);
487}
488
489/*
490 * Function: shadow_map_write
491 *
492 * Purpose:
493 * Calculate the block offset within the shadow to write, and the number
494 * blocks to write. The input values (block_offset, block_count) refer
495 * to the original file. The output values
496 * (*incr_block_offset, *incr_block_count) refer to the shadow file.
497 *
498 * Bands are not necessarily contiguous, therefore:
499 * *incr_block_count <= block_count
500 * The caller must be prepared to call this function interatively
501 * to complete the whole i/o.
502 * Returns:
503 * TRUE if the shadow file was grown, FALSE otherwise.
504 */
505boolean_t
506shadow_map_write(shadow_map_t * map, u_long block_offset,
507 u_long block_count, u_long * incr_block_offset,
508 u_long * incr_block_count)
509{
510 u_long band_limit;
511 band_number_t mapped_band;
512 boolean_t shadow_grew = FALSE;
513
514 if (block_offset >= map->file_size_blocks
515 || (block_offset + block_count) > map->file_size_blocks) {
516 printf("shadow_map_write: request (%ld, %ld) exceeds file size %ld\n",
517 block_offset, block_count, map->file_size_blocks);
518 *incr_block_count = 0;
519 }
520
521 band_limit = shadow_map_contiguous(map, block_offset, block_count, TRUE);
522 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
523 *incr_block_offset = mapped_band * map->blocks_per_band
524 + (block_offset % map->blocks_per_band);
525 *incr_block_count = band_limit - block_offset;
526
527 /* mark these blocks as written */
528 bitmap_set(map->block_bitmap, block_offset, *incr_block_count);
529
530 if (map->next_band > map->shadow_size_bands) {
531 map->shadow_size_bands = map->next_band;
532 shadow_grew = TRUE;
533 }
534 return (shadow_grew);
535}
536
91447636
A
537boolean_t
538shadow_map_is_written(shadow_map_t * map, u_long block_offset)
539{
540 bitmap_offset_t b;
541
542 b = bitmap_offset(block_offset);
543 return ((map->block_bitmap[b.byte] & bit(b.bit)) ? TRUE : FALSE);
544}
545
9bccf70c
A
546/*
547 * Function: shadow_map_shadow_size
548 *
549 * Purpose:
550 * To return the size of the shadow file in blocks.
551 */
552u_long
553shadow_map_shadow_size(shadow_map_t * map)
554{
555 return (map->shadow_size_bands * map->blocks_per_band);
556}
557
558/*
559 * Function: shadow_map_create
560 *
561 * Purpose:
562 * Allocate the dynamic data for keeping track of the shadow dirty blocks
563 * and the band mapping table.
564 * Returns:
565 * NULL if an error occurred.
566 */
567shadow_map_t *
568shadow_map_create(off_t file_size, off_t shadow_size,
569 u_long band_size, u_long block_size)
570{
571 void * block_bitmap = 0;
572 u_long bitmap_size;
573 band_number_t * bands = 0;
574 shadow_map_t * map;
575 u_long n_bands = 0;
576
577 if (band_size == 0) {
578 band_size = BAND_SIZE_DEFAULT;
579 }
580
581 n_bands = howmany(file_size, band_size);
582 if (n_bands > (BAND_MAX + 1)) {
583 printf("file is too big: %ld > %d\n",
584 n_bands, BAND_MAX);
585 goto failure;
586 }
587
588 /* create a block bitmap, one bit per block */
589 bitmap_size = block_bitmap_size(file_size, block_size);
590 block_bitmap = my_malloc(bitmap_size);
591 if (block_bitmap == NULL) {
592 printf("failed to allocate bitmap\n");
593 goto failure;
594 }
595 bzero(block_bitmap, bitmap_size);
596
597 /* get the band map */
598 bands = (band_number_t *)my_malloc(n_bands * sizeof(band_number_t));
599 if (bands == NULL) {
600 printf("failed to allocate bands\n");
601 goto failure;
602 }
603 bzero(bands, n_bands * sizeof(band_number_t));
604
605 map = my_malloc(sizeof(*map));
606 if (map == NULL) {
607 printf("failed to allocate map\n");
608 goto failure;
609 }
610 map->blocks_per_band = band_size / block_size;
611 map->block_bitmap = block_bitmap;
612 map->bands = bands;
613 map->file_size_blocks = n_bands * map->blocks_per_band;
614 map->next_band = 0;
615 map->zeroth_band = -1;
616 map->shadow_size_bands = howmany(shadow_size, band_size);
617 map->block_size = block_size;
618 return (map);
619
620 failure:
621 if (block_bitmap)
622 my_free(block_bitmap);
623 if (bands)
624 my_free(bands);
625 return (NULL);
626}
627
628/*
629 * Function: shadow_map_free
630 * Purpose:
631 * Frees the data structure to deal with the shadow map.
632 */
633void
634shadow_map_free(shadow_map_t * map)
635{
636 if (map->block_bitmap)
637 my_free(map->block_bitmap);
638 if (map->bands)
639 my_free(map->bands);
640 map->block_bitmap = 0;
641 map->bands = 0;
642 my_free(map);
643 return;
644}
645
646#ifdef TEST_SHADOW
647#define BAND_SIZE_BLOCKS (BAND_SIZE_DEFAULT / 512)
648
649enum {
650 ReadRequest,
651 WriteRequest,
652};
653
654typedef struct {
655 int type;
656 u_long offset;
657 u_long count;
658} block_request_t;
659
660int
661main()
662{
663 shadow_map_t * map;
664 int i;
665 block_request_t requests[] = {
666 { WriteRequest, BAND_SIZE_BLOCKS * 2, 1 },
667 { ReadRequest, BAND_SIZE_BLOCKS / 2, BAND_SIZE_BLOCKS * 2 - 2 },
668 { WriteRequest, BAND_SIZE_BLOCKS * 1, 5 * BAND_SIZE_BLOCKS + 3},
669 { ReadRequest, 0, BAND_SIZE_BLOCKS * 10 },
670 { WriteRequest, BAND_SIZE_BLOCKS * (BAND_MAX - 1),
671 BAND_SIZE_BLOCKS * 2},
672 { 0, 0 },
673 };
674
675 map = shadow_map_create(1024 * 1024 * 1024 * 8ULL, 0, 0, 512);
676 if (map == NULL) {
677 printf("shadow_map_create failed\n");
678 exit(1);
679 }
680 for (i = 0; TRUE; i++) {
681 u_long offset;
682 u_long resid;
683 boolean_t shadow_grew;
684 boolean_t read_shadow;
685
686 if (requests[i].count == 0) {
687 break;
688 }
689 offset = requests[i].offset;
690 resid = requests[i].count;
691 printf("\n%s REQUEST (%ld, %ld)\n",
692 requests[i].type == WriteRequest ? "WRITE" : "READ",
693 offset, resid);
694 switch (requests[i].type) {
695 case WriteRequest:
696 while (resid > 0) {
697 u_long this_offset;
698 u_long this_count;
699
700 shadow_grew = shadow_map_write(map, offset,
701 resid,
702 &this_offset,
703 &this_count);
704 printf("\t(%ld, %ld) => (%ld, %ld)",
705 offset, resid, this_offset, this_count);
706 resid -= this_count;
707 offset += this_count;
708 if (shadow_grew) {
709 printf(" shadow grew to %ld", shadow_map_shadow_size(map));
710 }
711 printf("\n");
712 }
713 break;
714 case ReadRequest:
715 while (resid > 0) {
716 u_long this_offset;
717 u_long this_count;
718
719 read_shadow = shadow_map_read(map, offset,
720 resid,
721 &this_offset,
722 &this_count);
723 printf("\t(%ld, %ld) => (%ld, %ld)%s\n",
724 offset, resid, this_offset, this_count,
725 read_shadow ? " from shadow" : "");
726 if (this_count == 0) {
727 printf("this_count is 0, aborting\n");
728 break;
729 }
730 resid -= this_count;
731 offset += this_count;
732 }
733 break;
734 default:
735 break;
736 }
737 }
738 if (map) {
739 shadow_map_free(map);
740 }
741 exit(0);
742 return (0);
743}
744#endif