]>
git.saurik.com Git - apple/hfs.git/blob - fsck_hfs/cache.c
2 * Copyright (c) 2000-2012 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
30 #include <sys/types.h>
46 * Allocate an unused cache block.
48 void *CacheAllocBlock (Cache_t
*cache
);
53 * Release an active cache block.
56 CacheFreeBlock( Cache_t
*cache
, Tag_t
*tag
);
61 * Obtain a cache block. If one already exists, it is returned. Otherwise a
62 * new one is created and inserted into the cache.
64 int CacheLookup (Cache_t
*cache
, uint64_t off
, Tag_t
**tag
);
69 * Perform a direct read on the file.
71 int CacheRawRead (Cache_t
*cache
, uint64_t off
, uint32_t len
, void *buf
);
76 * Perform a direct write on the file.
78 int CacheRawWrite (Cache_t
*cache
, uint64_t off
, uint32_t len
, void *buf
);
83 * Flush, and optionally remove, all cache blocks that intersect
87 CacheFlushRange( Cache_t
*cache
, uint64_t start
, uint64_t len
, int remove
);
92 * Initializes the LRU data structures.
94 static int LRUInit (LRU_t
*lru
);
101 * NOTE: This is typically a no-op, since the cache manager will be clearing
104 static int LRUDestroy (LRU_t
*lru
);
109 * Registers data activity on the given node. If the node is already in the
110 * LRU, it is moved to the front. Otherwise, it is inserted at the front.
112 * NOTE: If the node is not in the LRU, we assume that its pointers are NULL.
114 static int LRUHit (LRU_t
*lru
, LRUNode_t
*node
, int age
);
119 * Chooses a buffer to release.
121 * TODO: Under extreme conditions, it should be possible to release the buffer
122 * of an actively referenced cache buffer, leaving the tag behind as a
123 * placeholder. This would be required for implementing 2Q-LRU
126 static int LRUEvict (LRU_t
*lru
, LRUNode_t
*node
);
129 * CalculateCacheSizes
131 * Determine the cache size values that should be used to initialize the cache.
132 * If the requested value does not validate according to the conditions described
133 * below, it is adjusted.
135 * If no input values are provided, use default values for cache size
136 * and cache block size.
138 * Cache size should be -
139 * a. greater than or equal to minimum cache size
140 * b. less than or equal to maximum cache size. The maximum cache size
141 * is limited by the maximum value that can be allocated using malloc
142 * or mmap (maximum value for size_t)
143 * c. multiple of cache block size
146 * *calcBlockSize: the size of the blocks in the cache
147 * *calcTotalBlocks: the number of blocks in the cache
149 void CalculateCacheSizes(uint64_t cacheSize
, uint32_t *calcBlockSize
, uint32_t *calcTotalBlocks
, char cache_debug
)
151 uint32_t blockSize
= DefaultCacheBlockSize
;
152 const size_t max_size_t
= ~0; /* Maximum value represented by size_t */
154 /* Simple case - no user cache size, use default values */
156 *calcBlockSize
= DefaultCacheBlockSize
;
157 *calcTotalBlocks
= DefaultCacheBlocks
;
161 /* User provided cache size - check with minimum and maximum values */
162 if (cacheSize
< MinCacheSize
) {
163 cacheSize
= MinCacheSize
;
165 if (cacheSize
> max_size_t
||
166 cacheSize
> MaxCacheSize
) {
168 printf ("\tCache size should be greater than %uM and less than %luM\n", MinCacheSize
/(1024*1024), max_size_t
/(1024*1024));
170 cacheSize
= MaxCacheSize
;
173 /* Cache size should be multiple of cache block size */
174 if (cacheSize
% blockSize
) {
176 printf ("\tCache size should be multiple of cache block size (currently %uK)\n", blockSize
/1024);
178 cacheSize
= (cacheSize
/ blockSize
) * blockSize
;
181 *calcBlockSize
= blockSize
;
182 *calcTotalBlocks
= cacheSize
/ blockSize
;
191 * Initializes the cache for use. If preTouch is non-zero, the cache memory will
192 * be iterated through, with one byte per page touched. (This is to ensure that
193 * the memory is actually created, and is used to avoid deadlocking due to swapping
194 * during a live verify of the boot volume.)
196 int CacheInit (Cache_t
*cache
, int fdRead
, int fdWrite
, uint32_t devBlockSize
,
197 uint32_t cacheBlockSize
, uint32_t cacheTotalBlocks
, uint32_t hashSize
, int preTouch
)
203 memset (cache
, 0x00, sizeof (Cache_t
));
205 cache
->FD_R
= fdRead
;
206 cache
->FD_W
= fdWrite
;
207 cache
->DevBlockSize
= devBlockSize
;
208 /* CacheFlush requires cleared cache->Hash */
209 cache
->Hash
= (Tag_t
**) calloc( 1, (sizeof (Tag_t
*) * hashSize
) );
210 cache
->HashSize
= hashSize
;
211 cache
->BlockSize
= cacheBlockSize
;
213 /* Allocate the cache memory */
214 /* Break out of the loop on success, or when the proposed cache is < MinCacheSize */
216 cache
->FreeHead
= mmap (NULL
,
217 cacheTotalBlocks
* cacheBlockSize
,
218 PROT_READ
| PROT_WRITE
,
219 MAP_ANON
| MAP_PRIVATE
,
222 if (cache
->FreeHead
== (void *)-1) {
223 if ((cacheTotalBlocks
* cacheBlockSize
) <= MinCacheSize
) {
225 printf("\tTried to allocate %dK, minimum is %dK\n",
226 (cacheTotalBlocks
* cacheBlockSize
) / 1024,
227 MinCacheSize
/ 1024);
231 printf("\tFailed to allocate %uK for cache; trying %uK\n",
232 (cacheTotalBlocks
* cacheBlockSize
) / 1024,
233 (cacheTotalBlocks
* cacheBlockSize
/ 2) / 1024);
234 CalculateCacheSizes((cacheTotalBlocks
* cacheBlockSize
) / 2, &cacheBlockSize
, &cacheTotalBlocks
, debug
);
238 printf ("\tUsing cacheBlockSize=%uK cacheTotalBlock=%u cacheSize=%uK.\n", cacheBlockSize
/1024, cacheTotalBlocks
, (cacheBlockSize
/1024) * cacheTotalBlocks
);
243 if (cache
->FreeHead
== (void*)-1) {
245 printf("%s(%d): FreeHead = -1\n", __FUNCTION__
, __LINE__
);
251 /* If necessary, touch a byte in each page */
253 size_t pageSize
= getpagesize();
254 unsigned char *ptr
= (unsigned char *)cache
->FreeHead
;
255 unsigned char *end
= ptr
+ (cacheTotalBlocks
* cacheBlockSize
);
262 /* Initialize the cache memory free list */
263 temp
= cache
->FreeHead
;
264 for (i
= 0; i
< cacheTotalBlocks
- 1; i
++) {
265 *temp
= ((char *)temp
+ cacheBlockSize
);
266 temp
= (void **)((char *)temp
+ cacheBlockSize
);
269 cache
->FreeSize
= cacheTotalBlocks
;
271 buf
= (Buf_t
*)malloc(sizeof(Buf_t
) * MAXBUFS
);
274 printf("%s(%d): malloc(%zu) failed\n", __FUNCTION__
, __LINE__
, sizeof(Buf_t
) * MAXBUFS
);
279 memset (&buf
[0], 0x00, sizeof (Buf_t
) * MAXBUFS
);
280 for (i
= 1 ; i
< MAXBUFS
; i
++) {
281 (&buf
[i
-1])->Next
= &buf
[i
];
283 cache
->FreeBufs
= &buf
[0];
286 printf( "%s - cacheTotalBlocks %d cacheBlockSize %d hashSize %d \n",
287 __FUNCTION__
, cacheTotalBlocks
, cacheBlockSize
, hashSize
);
288 printf( "%s - cache memory %d \n", __FUNCTION__
, (cacheTotalBlocks
* cacheBlockSize
) );
291 return (LRUInit (&cache
->LRU
));
298 * Shutdown the cache.
300 int CacheDestroy (Cache_t
*cache
)
305 /* Print cache report */
306 printf ("Cache Report:\n");
307 printf ("\tRead Requests: %d\n", cache
->ReqRead
);
308 printf ("\tWrite Requests: %d\n", cache
->ReqWrite
);
309 printf ("\tDisk Reads: %d\n", cache
->DiskRead
);
310 printf ("\tDisk Writes: %d\n", cache
->DiskWrite
);
311 printf ("\tSpans: %d\n", cache
->Span
);
313 /* Shutdown the LRU */
314 LRUDestroy (&cache
->LRU
);
316 /* I'm lazy, I'll come back to it :P */
323 * Reads a range of bytes from the cache, returning a pointer to a buffer
324 * containing the requested bytes.
326 * NOTE: The returned buffer may directly refer to a cache block, or an
327 * anonymous buffer. Do not make any assumptions about the nature of
328 * the returned buffer, except that it is contiguous.
330 int CacheRead (Cache_t
*cache
, uint64_t off
, uint32_t len
, Buf_t
**bufp
)
335 uint32_t coff
= (off
% cache
->BlockSize
);
336 uint64_t cblk
= (off
- coff
);
339 /* Check for conflicts with other bufs */
340 searchBuf
= cache
->ActiveBufs
;
341 while (searchBuf
!= NULL
) {
342 if ((searchBuf
->Offset
>= off
) && (searchBuf
->Offset
< off
+ len
)) {
344 printf ("ERROR: CacheRead: Deadlock (searchBuff = <%llu, %u>, off = %llu, off+len = %llu)\n", searchBuf
->Offset
, searchBuf
->Length
, off
, off
+len
);
349 searchBuf
= searchBuf
->Next
;
352 /* get a free buffer */
353 if ((buf
= cache
->FreeBufs
) == NULL
) {
355 printf ("ERROR: CacheRead: no more bufs!\n");
359 cache
->FreeBufs
= buf
->Next
;
362 /* Clear the buf structure */
370 /* If this is unaligned or spans multiple cache blocks */
371 if ((cblk
/ cache
->BlockSize
) != ((off
+ len
- 1) / cache
->BlockSize
)) {
372 buf
->Flags
|= BUF_SPAN
;
374 /* Fetch the first cache block */
376 printf("%s(%d): Looking up cache block %llu for offset %llu, cache blockSize %u\n", __FUNCTION__
, __LINE__
, cblk
, off
, cache
->BlockSize
);
378 error
= CacheLookup (cache
, cblk
, &tag
);
381 printf ("ERROR: CacheRead: CacheLookup error %d\n", error
);
386 /* If we live nicely inside a cache block */
387 if (!(buf
->Flags
& BUF_SPAN
)) {
388 /* Offset the buffer into the cache block */
389 buf
->Buffer
= tag
->Buffer
+ coff
;
391 /* Bump the cache block's reference count */
394 /* Kick the node into the right queue */
395 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, 0);
397 /* Otherwise, things get ugly */
399 uint32_t boff
; /* Offset into the buffer */
400 uint32_t blen
; /* Space to fill in the buffer */
403 /* Allocate a temp buffer */
404 buf
->Buffer
= (void *)malloc (len
);
405 if (buf
->Buffer
== NULL
) {
407 printf ("ERROR: CacheRead: No Memory\n");
412 /* Blit the first chunk into the buffer */
413 boff
= cache
->BlockSize
- coff
;
416 printf("INFO: memcpy(%p, %p + %u, %u)\n", buf
->Buffer
, tag
->Buffer
, coff
, boff
);
418 memcpy (buf
->Buffer
, tag
->Buffer
+ coff
, boff
);
420 /* Bump the cache block's reference count */
423 /* Kick the node into the right queue */
424 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, 0);
426 /* Next cache block */
427 cblk
+= cache
->BlockSize
;
429 /* Read data a cache block at a time */
431 /* Fetch the next cache block */
432 error
= CacheLookup (cache
, cblk
, &tag
);
434 /* Free the allocated buffer */
438 /* Release all the held tags */
439 cblk
-= cache
->BlockSize
;
441 if (CacheLookup (cache
, cblk
, &tag
) != EOK
) {
442 fprintf (stderr
, "CacheRead: Unrecoverable error\n");
447 /* Kick the node into the right queue */
448 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, 0);
454 /* Blit the cache block into the buffer */
455 temp
= ((blen
> cache
->BlockSize
) ? cache
->BlockSize
: blen
);
457 printf ("INFO: memcpy(%p + %u, %p, %u)\n", buf
->Buffer
, boff
, tag
->Buffer
, temp
);
459 memcpy (buf
->Buffer
+ boff
,
463 /* Update counters */
468 /* Advance to the next cache block */
469 cblk
+= cache
->BlockSize
;
471 /* Kick the node into the right queue */
472 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, 0);
475 /* Count the spanned access */
479 /* Attach to head of active buffers list */
480 if (cache
->ActiveBufs
!= NULL
) {
481 buf
->Next
= cache
->ActiveBufs
;
484 cache
->ActiveBufs
->Prev
= buf
;
487 cache
->ActiveBufs
= buf
;
490 /* Update counters */
497 * All of the uses of kLockWrite need to be audited for
498 * when the journal replay is writing.
503 * Writes a buffer through the cache.
505 int CacheWrite ( Cache_t
*cache
, Buf_t
*buf
, int age
, uint32_t writeOptions
)
508 uint32_t coff
= (buf
->Offset
% cache
->BlockSize
);
509 uint64_t cblk
= (buf
->Offset
- coff
);
512 /* Fetch the first cache block */
513 error
= CacheLookup (cache
, cblk
, &tag
);
514 if (error
!= EOK
) return (error
);
516 /* If the buffer was a direct reference */
517 if (!(buf
->Flags
& BUF_SPAN
)) {
518 /* Commit the dirty block */
519 if ( (writeOptions
& (kLazyWrite
| kLockWrite
)) != 0 )
521 /* Copy flags to tag */
522 tag
->Flags
|= (writeOptions
& (kLazyWrite
| kLockWrite
));
526 error
= CacheRawWrite (cache
,
530 if (error
!= EOK
) return (error
);
533 /* Release the reference */
534 if ((writeOptions
& kLockWrite
) == 0)
537 /* Kick the node into the right queue */
538 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
540 /* Otherwise, we do the ugly thing again */
542 uint32_t boff
; /* Offset into the buffer */
543 uint32_t blen
; /* Space to fill in the buffer */
546 /* Blit the first chunk back into the cache */
547 boff
= cache
->BlockSize
- coff
;
548 blen
= buf
->Length
- boff
;
549 memcpy (tag
->Buffer
+ coff
, buf
->Buffer
, boff
);
551 /* Commit the dirty block */
552 if ( (writeOptions
& (kLazyWrite
| kLockWrite
)) != 0 )
554 /* flag this for lazy write */
555 tag
->Flags
|= (writeOptions
& (kLazyWrite
| kLockWrite
));
559 error
= CacheRawWrite (cache
,
563 if (error
!= EOK
) return (error
);
566 /* Release the cache block reference */
567 if ((writeOptions
& kLockWrite
) == 0)
570 /* Kick the node into the right queue */
571 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
573 /* Next cache block */
574 cblk
+= cache
->BlockSize
;
576 /* Write data a cache block at a time */
578 /* Fetch the next cache block */
579 error
= CacheLookup (cache
, cblk
, &tag
);
580 /* We must go through with the write regardless */
582 /* Blit the next buffer chunk back into the cache */
583 temp
= ((blen
> cache
->BlockSize
) ? cache
->BlockSize
: blen
);
588 /* Commit the dirty block */
589 if ( (writeOptions
& (kLazyWrite
| kLockWrite
)) != 0 )
591 /* flag this for lazy write */
592 tag
->Flags
|= (writeOptions
& (kLazyWrite
| kLockWrite
));
596 error
= CacheRawWrite (cache
,
600 if (error
!= EOK
) return (error
);
603 /* Update counters */
606 if ((writeOptions
& kLockWrite
) == 0)
609 /* Kick the node into the right queue */
610 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
611 /* And go to the next cache block */
612 cblk
+= cache
->BlockSize
;
615 /* Release the anonymous buffer */
619 /* Detach the buffer */
620 if (buf
->Next
!= NULL
)
621 buf
->Next
->Prev
= buf
->Prev
;
622 if (buf
->Prev
!= NULL
)
623 buf
->Prev
->Next
= buf
->Next
;
624 if (cache
->ActiveBufs
== buf
)
625 cache
->ActiveBufs
= buf
->Next
;
627 /* Clear the buffer and put it back on free list */
628 memset (buf
, 0x00, sizeof (Buf_t
));
629 buf
->Next
= cache
->FreeBufs
;
630 cache
->FreeBufs
= buf
;
632 /* Update counters */
641 * Releases a clean buffer.
643 * NOTE: We don't verify whether it's dirty or not.
645 int CacheRelease (Cache_t
*cache
, Buf_t
*buf
, int age
)
648 uint32_t coff
= (buf
->Offset
% cache
->BlockSize
);
649 uint64_t cblk
= (buf
->Offset
- coff
);
652 /* Fetch the first cache block */
653 error
= CacheLookup (cache
, cblk
, &tag
);
656 printf ("ERROR: CacheRelease: CacheLookup error\n");
661 /* If the buffer was a direct reference */
662 if (!(buf
->Flags
& BUF_SPAN
)) {
663 /* Release the reference */
664 if ((tag
->Flags
& kLockWrite
) == 0) {
668 /* Kick the node into the right queue */
669 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
671 /* Otherwise, we do the ugly thing again */
673 uint32_t blen
; /* Space to fill in the buffer */
675 /* Blit the first chunk back into the cache */
676 blen
= buf
->Length
- cache
->BlockSize
+ coff
;
678 /* Release the cache block reference */
679 if ((tag
->Flags
& kLockWrite
) == 0) {
683 /* Kick the node into the right queue */
684 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
686 /* Next cache block */
687 cblk
+= cache
->BlockSize
;
689 /* Release cache blocks one at a time */
691 /* Fetch the next cache block */
692 error
= CacheLookup (cache
, cblk
, &tag
);
693 /* We must go through with the write regardless */
695 /* Update counters */
696 blen
-= ((blen
> cache
->BlockSize
) ? cache
->BlockSize
: blen
);
697 if ((tag
->Flags
& kLockWrite
) == 0)
700 /* Kick the node into the right queue */
701 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
702 /* Advance to the next block */
703 cblk
+= cache
->BlockSize
;
706 /* Release the anonymous buffer */
710 /* Detach the buffer */
711 if (buf
->Next
!= NULL
)
712 buf
->Next
->Prev
= buf
->Prev
;
713 if (buf
->Prev
!= NULL
)
714 buf
->Prev
->Next
= buf
->Next
;
715 if (cache
->ActiveBufs
== buf
)
716 cache
->ActiveBufs
= buf
->Next
;
718 /* Clear the buffer and put it back on free list */
719 memset (buf
, 0x00, sizeof (Buf_t
));
720 buf
->Next
= cache
->FreeBufs
;
721 cache
->FreeBufs
= buf
;
729 * Disposes of a particular buffer.
731 int CacheRemove (Cache_t
*cache
, Tag_t
*tag
)
735 /* Make sure it's not busy */
736 if (tag
->Refs
) return (EBUSY
);
739 if (tag
->Next
!= NULL
)
740 tag
->Next
->Prev
= tag
->Prev
;
741 if (tag
->Prev
!= NULL
)
742 tag
->Prev
->Next
= tag
->Next
;
744 cache
->Hash
[tag
->Offset
% cache
->HashSize
] = tag
->Next
;
746 /* Make sure the head node doesn't have a back pointer */
747 if ((cache
->Hash
[tag
->Offset
% cache
->HashSize
] != NULL
) &&
748 (cache
->Hash
[tag
->Offset
% cache
->HashSize
]->Prev
!= NULL
)) {
750 printf ("ERROR: CacheRemove: Corrupt hash chain\n");
754 /* Release it's buffer (if it has one) */
755 if (tag
->Buffer
!= NULL
)
757 error
= CacheFreeBlock (cache
, tag
);
762 /* Zero the tag (for easy debugging) */
763 memset (tag
, 0x00, sizeof (Tag_t
));
774 * Only dispose of the buffer, leave the tag intact.
776 int CacheEvict (Cache_t
*cache
, Tag_t
*tag
)
780 /* Make sure it's not busy */
781 if (tag
->Refs
) return (EBUSY
);
783 /* Release the buffer */
784 if (tag
->Buffer
!= NULL
)
786 error
= CacheFreeBlock (cache
, tag
);
798 * Allocate an unused cache block.
800 void *CacheAllocBlock (Cache_t
*cache
)
804 if (cache
->FreeHead
== NULL
)
806 if (cache
->FreeSize
== 0)
809 temp
= cache
->FreeHead
;
810 cache
->FreeHead
= *((void **)cache
->FreeHead
);
819 * Release an active cache block.
822 CacheFreeBlock( Cache_t
*cache
, Tag_t
*tag
)
826 if ( (tag
->Flags
& kLazyWrite
) != 0 )
828 /* this cache block has been marked for lazy write - do it now */
829 error
= CacheRawWrite( cache
,
836 printf( "%s - CacheRawWrite failed with error %d \n", __FUNCTION__
, error
);
840 tag
->Flags
&= ~kLazyWrite
;
843 if ((tag
->Flags
& kLockWrite
) == 0)
845 *((void **)tag
->Buffer
) = cache
->FreeHead
;
846 cache
->FreeHead
= (void **)tag
->Buffer
;
856 * Write out any blocks that are marked for lazy write.
859 CacheFlush( Cache_t
*cache
)
865 for ( i
= 0; i
< cache
->HashSize
; i
++ )
867 myTagPtr
= cache
->Hash
[ i
];
869 while ( NULL
!= myTagPtr
)
871 if ( (myTagPtr
->Flags
& kLazyWrite
) != 0 )
873 /* this cache block has been marked for lazy write - do it now */
874 error
= CacheRawWrite( cache
,
881 printf( "%s - CacheRawWrite failed with error %d \n", __FUNCTION__
, error
);
885 myTagPtr
->Flags
&= ~kLazyWrite
;
887 myTagPtr
= myTagPtr
->Next
;
899 * Return true if the two given ranges intersect.
903 RangeIntersect(uint64_t start1
, uint64_t len1
, uint64_t start2
, uint64_t len2
)
905 uint64_t end1
= start1
+ len1
- 1;
906 uint64_t end2
= start2
+ len2
- 1;
908 if (end1
< start2
|| start1
> end2
)
918 * Flush, and optionally remove, all cache blocks that intersect
922 CacheFlushRange( Cache_t
*cache
, uint64_t start
, uint64_t len
, int remove
)
926 Tag_t
*currentTag
, *nextTag
;
928 for ( i
= 0; i
< cache
->HashSize
; i
++ )
930 currentTag
= cache
->Hash
[ i
];
932 while ( NULL
!= currentTag
)
934 /* Keep track of the next block, in case we remove the current block */
935 nextTag
= currentTag
->Next
;
937 if ( currentTag
->Flags
& kLazyWrite
&&
938 RangeIntersect(currentTag
->Offset
, cache
->BlockSize
, start
, len
))
940 error
= CacheRawWrite( cache
,
943 currentTag
->Buffer
);
947 printf( "%s - CacheRawWrite failed with error %d \n", __FUNCTION__
, error
);
951 currentTag
->Flags
&= ~kLazyWrite
;
953 if ( remove
&& ((currentTag
->Flags
& kLockWrite
) == 0))
954 CacheRemove( cache
, currentTag
);
957 currentTag
= nextTag
;
962 } /* CacheFlushRange */
964 /* Function: CacheCopyDiskBlocks
966 * Description: Perform direct disk block copy from from_offset to to_offset
969 * The function flushes the cache blocks intersecting with disk blocks
970 * belonging to from_offset. Invalidating the disk blocks belonging to
971 * to_offset from the cache would have been sufficient, but its block
972 * start and end might not lie on cache block size boundary. Therefore we
973 * flush the disk blocks belonging to to_offset on the disk .
975 * The function performs raw read and write on the disk of cache block size,
976 * with exception of last operation.
978 * Note that the data written to disk does not exist in cache after
979 * this function. This function however ensures that if the device
980 * offset being read/written on disk existed in cache, it is invalidated and
981 * written to disk before performing any read/write operation.
984 * 1. cache - pointer to cache.
985 * 2. from_offset - disk offset to copy from.
986 * 3. to_offset - disk offset to copy to.
987 * 4. len - length in bytes to be copied. Note that this length should be
988 * a multiple of disk block size, else read/write will return error.
991 * zero (EOK) on success.
992 * On failure, non-zero value.
993 * Known error values:
994 * ENOMEM - insufficient memory to allocate intermediate copy buffer.
995 * EINVAL - the length of data to read/write is not multiple of
996 * device block size, or
997 * the device offset is not multiple of device block size, or
998 * ENXIO - invalid disk offset
1000 int CacheCopyDiskBlocks (Cache_t
*cache
, uint64_t from_offset
, uint64_t to_offset
, uint32_t len
)
1004 char *tmpBuffer
= NULL
;
1005 uint32_t ioReqCount
;
1006 uint32_t numberOfBuffersToWrite
;
1008 /* Return error if length of data to be written on disk is
1009 * less than the length of the buffer to be written, or
1010 * disk offsets are not multiple of device block size
1012 if ((len
% cache
->DevBlockSize
) ||
1013 (from_offset
% cache
->DevBlockSize
) ||
1014 (to_offset
% cache
->DevBlockSize
)) {
1019 /* Flush contents of from_offset on the disk */
1020 error
= CacheFlushRange(cache
, from_offset
, len
, 1);
1021 if (error
!= EOK
) goto out
;
1023 /* Flush contents of to_offset on the disk */
1024 error
= CacheFlushRange(cache
, to_offset
, len
, 1);
1025 if (error
!= EOK
) goto out
;
1027 /* Allocate temporary buffer for reading/writing, currently
1028 * set to block size of cache.
1030 tmpBuffer
= malloc(cache
->BlockSize
);
1033 printf("%s(%d): malloc(%zd) failed\n", __FUNCTION__
, __LINE__
, (size_t)cache
->BlockSize
);
1039 ioReqCount
= cache
->BlockSize
;
1040 numberOfBuffersToWrite
= (len
+ ioReqCount
- 1) / ioReqCount
;
1042 for (i
=0; i
<numberOfBuffersToWrite
; i
++) {
1043 if (i
== (numberOfBuffersToWrite
- 1)) {
1045 ioReqCount
= len
- (i
* cache
->BlockSize
);
1049 error
= CacheRawRead (cache
, from_offset
, ioReqCount
, tmpBuffer
);
1050 if (error
!= EOK
) goto out
;
1053 error
= CacheRawWrite (cache
, to_offset
, ioReqCount
, tmpBuffer
);
1054 if (error
!= EOK
) goto out
;
1057 printf ("%s: Copying %d bytes from %qd to %qd\n", __FUNCTION__
, ioReqCount
, from_offset
, to_offset
);
1060 /* Increment offsets with data read/written */
1061 from_offset
+= ioReqCount
;
1062 to_offset
+= ioReqCount
;
1072 /* Function: CacheWriteBufferToDisk
1074 * Description: Write data on disk starting at given offset for upto write_len.
1075 * The data from given buffer upto buf_len is written to the disk starting
1076 * at given offset. If the amount of data written on disk is greater than
1077 * the length of buffer, all the remaining data is written as zeros.
1079 * If no buffer is provided or if length of buffer is zero, the function
1080 * writes zeros on disk from offset upto write_len bytes.
1082 * The function requires the length of buffer is either equal to or less
1083 * than the data to be written on disk. It also requires that the length
1084 * of data to be written on disk is a multiple of device block size.
1086 * Note that the data written to disk does not exist in cache after
1087 * this function. This function however ensures that if the device
1088 * offset being written on disk existed in cache, it is invalidated and
1089 * written to disk before performing any read/write operation.
1092 * 1. cache - pointer to cache
1093 * 2. offset - offset on disk to write data of buffer
1094 * 3. buffer - pointer to data to be written on disk
1095 * 4. len - length of buffer to be written on disk.
1098 * zero (EOK) on success.
1099 * On failure, non-zero value.
1100 * Known error values:
1101 * ENOMEM - insufficient memory to allocate intermediate copy buffer.
1102 * EINVAL - the length of data to read/write is not multiple of
1103 * device block size, or
1104 * the device offset is not multiple of device block size, or
1105 * the length of data to be written on disk is less than
1106 * the length of buffer.
1107 * ENXIO - invalid disk offset
1109 int CacheWriteBufferToDisk (Cache_t
*cache
, uint64_t offset
, uint32_t write_len
, u_char
*buffer
, uint32_t buf_len
)
1112 u_char
*write_buffer
= NULL
;
1114 uint32_t buf_offset
;
1115 uint32_t bytes_remain
;
1116 uint8_t zero_fill
= false;
1118 /* Check if buffer is provided */
1119 if (buffer
== NULL
) {
1123 /* Return error if length of data to be written on disk is,
1124 * less than the length of the buffer to be written, or
1125 * is not a multiple of device block size, or offset to write
1126 * is not multiple of device block size
1128 if ((write_len
% cache
->DevBlockSize
) ||
1129 (offset
% cache
->DevBlockSize
) ||
1130 (write_len
< buf_len
)) {
1135 /* Flush cache contents of offset range to be written on the disk */
1136 error
= CacheFlushRange(cache
, offset
, write_len
, 1);
1141 /* Calculate correct size of buffer to be written each time */
1142 io_count
= (write_len
< cache
->BlockSize
) ? write_len
: cache
->BlockSize
;
1144 /* Allocate temporary buffer to write data to disk */
1145 write_buffer
= malloc (io_count
);
1146 if (!write_buffer
) {
1148 printf("%s(%d): malloc(%zd) failed\n", __FUNCTION__
, __LINE__
, (size_t)cache
->BlockSize
);
1154 /* Read offset in data buffer to be written to disk */
1158 /* The last buffer might be less than io_count bytes */
1159 if (write_len
< io_count
) {
1160 io_count
= write_len
;
1163 /* Check whether data buffer was written completely to disk */
1164 if (buf_offset
< buf_len
) {
1165 /* Calculate the bytes from data buffer still to be written */
1166 bytes_remain
= buf_len
- buf_offset
;
1168 if (bytes_remain
>= io_count
) {
1169 /* Bytes remaining is greater than bytes written in one
1170 * IO request. Limit bytes read from data buffer in this
1171 * pass to the bytes written in one IO request
1173 bytes_remain
= io_count
;
1175 /* Copy data from data buffer to write buffer */
1176 memcpy (write_buffer
, buffer
, bytes_remain
);
1178 /* Bytes remaining is less than bytes written in one
1179 * IO request. Zero fill the remaining write buffer.
1182 /* Copy data from data buffer to write buffer */
1183 memcpy (write_buffer
, buffer
, bytes_remain
);
1185 /* Zero fill remain buffer, if any */
1186 memset (write_buffer
+ bytes_remain
, 0, io_count
- bytes_remain
);
1189 buf_offset
+= bytes_remain
;
1190 buffer
+= bytes_remain
;
1192 /* Do not zero fill the buffer if we have already done it */
1193 if (zero_fill
== false) {
1194 /* Zero fill entire write buffer */
1195 memset (write_buffer
, 0, io_count
);
1201 error
= CacheRawWrite (cache
, offset
, io_count
, write_buffer
);
1202 if (error
!= EOK
) goto out
;
1205 write_len
-= io_count
;
1209 /* If we allocated a temporary buffer, deallocate it */
1210 if (write_buffer
!= NULL
) {
1211 free (write_buffer
);
1219 * Obtain a cache block. If one already exists, it is returned. Otherwise a
1220 * new one is created and inserted into the cache.
1222 int CacheLookup (Cache_t
*cache
, uint64_t off
, Tag_t
**tag
)
1225 uint32_t hash
= off
% cache
->HashSize
;
1230 /* Search the hash table */
1232 temp
= cache
->Hash
[hash
];
1233 while (temp
!= NULL
) {
1234 if (temp
->Offset
== off
) break;
1240 /* Perform MTF if necessary */
1241 if (cache
->Hash
[hash
] != temp
) {
1242 /* Disconnect the tag */
1243 if (temp
->Next
!= NULL
)
1244 temp
->Next
->Prev
= temp
->Prev
;
1245 temp
->Prev
->Next
= temp
->Next
;
1248 /* Otherwise, it's a miss */
1250 /* Allocate a new tag */
1251 temp
= (Tag_t
*)calloc (sizeof (Tag_t
), 1);/* We really only need to zero the
1252 LRU portion though */
1255 /* Kick the tag onto the LRU */
1256 //LRUHit (&cache->LRU, (LRUNode_t *)temp, 0);
1259 /* Insert at the head (if it's not already there) */
1260 if (cache
->Hash
[hash
] != temp
) {
1262 temp
->Next
= cache
->Hash
[hash
];
1263 if (temp
->Next
!= NULL
)
1264 temp
->Next
->Prev
= temp
;
1265 cache
->Hash
[hash
] = temp
;
1268 /* Make sure there's a buffer */
1269 if (temp
->Buffer
== NULL
) {
1270 /* Find a free buffer */
1271 temp
->Buffer
= CacheAllocBlock (cache
);
1272 if (temp
->Buffer
== NULL
) {
1273 /* Try to evict a buffer */
1274 error
= LRUEvict (&cache
->LRU
, (LRUNode_t
*)temp
);
1275 if (error
!= EOK
) return (error
);
1278 temp
->Buffer
= CacheAllocBlock (cache
);
1279 if (temp
->Buffer
== NULL
) {
1281 printf("%s(%d): CacheAllocBlock failed (FreeHead = %p, FreeSize = %u)\n", __FUNCTION__
, __LINE__
, cache
->FreeHead
, cache
->FreeSize
);
1287 /* Load the block from disk */
1288 error
= CacheRawRead (cache
, off
, cache
->BlockSize
, temp
->Buffer
);
1289 if (error
!= EOK
) return (error
);
1293 if (temp
&& temp
->Flags
& kLockWrite
) {
1294 fprintf(stderr
, "CacheLookup(%p, %llu, %p): Found cache-locked block\n", cache
, off
, tag
);
1305 * Perform a direct read on the file.
1307 int CacheRawRead (Cache_t
*cache
, uint64_t off
, uint32_t len
, void *buf
)
1312 /* Both offset and length must be multiples of the device block size */
1313 if (off
% cache
->DevBlockSize
) return (EINVAL
);
1314 if (len
% cache
->DevBlockSize
) return (EINVAL
);
1316 /* Seek to the position */
1318 result
= lseek (cache
->FD_R
, off
, SEEK_SET
);
1319 if (result
== (off_t
)-1 && errno
!= 0)
1321 if (result
!= off
) return (ENXIO
);
1322 /* Read into the buffer */
1324 printf("%s: offset %llu, len %u\n", __FUNCTION__
, off
, len
);
1326 nread
= read (cache
->FD_R
, buf
, len
);
1327 if (nread
== -1) return (errno
);
1328 if (nread
== 0) return (ENXIO
);
1330 /* Update counters */
1339 * Perform a direct write on the file.
1341 int CacheRawWrite (Cache_t
*cache
, uint64_t off
, uint32_t len
, void *buf
)
1346 /* Both offset and length must be multiples of the device block size */
1347 if (off
% cache
->DevBlockSize
) return (EINVAL
);
1348 if (len
% cache
->DevBlockSize
) return (EINVAL
);
1350 /* Seek to the position */
1352 result
= lseek (cache
->FD_W
, off
, SEEK_SET
);
1353 if (result
== (off_t
)-1 && errno
!= 0) return (errno
);
1354 if (result
!= off
) return (ENXIO
);
1356 /* Write into the buffer */
1357 nwritten
= write (cache
->FD_W
, buf
, len
);
1358 if (nwritten
== -1) return (errno
);
1359 if (nwritten
== 0) return (ENXIO
);
1361 /* Update counters */
1372 * Initializes the LRU data structures.
1374 static int LRUInit (LRU_t
*lru
)
1376 /* Make the dummy nodes point to themselves */
1377 lru
->Head
.Next
= &lru
->Head
;
1378 lru
->Head
.Prev
= &lru
->Head
;
1380 lru
->Busy
.Next
= &lru
->Busy
;
1381 lru
->Busy
.Prev
= &lru
->Busy
;
1391 * NOTE: This is typically a no-op, since the cache manager will be clearing
1394 static int LRUDestroy (LRU_t
*lru
)
1403 * Registers data activity on the given node. If the node is already in the
1404 * LRU, it is moved to the front. Otherwise, it is inserted at the front.
1406 * NOTE: If the node is not in the LRU, we assume that its pointers are NULL.
1408 static int LRUHit (LRU_t
*lru
, LRUNode_t
*node
, int age
)
1410 /* Handle existing nodes */
1411 if ((node
->Next
!= NULL
) && (node
->Prev
!= NULL
)) {
1412 /* Detach the node */
1413 node
->Next
->Prev
= node
->Prev
;
1414 node
->Prev
->Next
= node
->Next
;
1417 /* If it's busy (we can't evict it) */
1418 if (((Tag_t
*)node
)->Refs
) {
1419 /* Insert at the head of the Busy queue */
1420 node
->Next
= lru
->Busy
.Next
;
1421 node
->Prev
= &lru
->Busy
;
1424 /* Insert at the head of the LRU */
1425 node
->Next
= &lru
->Head
;
1426 node
->Prev
= lru
->Head
.Prev
;
1429 /* Insert at the head of the LRU */
1430 node
->Next
= lru
->Head
.Next
;
1431 node
->Prev
= &lru
->Head
;
1434 node
->Next
->Prev
= node
;
1435 node
->Prev
->Next
= node
;
1443 * Chooses a buffer to release.
1445 * TODO: Under extreme conditions, it shoud be possible to release the buffer
1446 * of an actively referenced cache buffer, leaving the tag behind as a
1447 * placeholder. This would be required for implementing 2Q-LRU
1450 * NOTE: Make sure we never evict the node we're trying to find a buffer for!
1452 static int LRUEvict (LRU_t
*lru
, LRUNode_t
*node
)
1459 temp
= lru
->Head
.Prev
;
1461 /* Stop if we're empty */
1462 if (temp
== &lru
->Head
) {
1464 printf("%s(%d): empty?\n", __FUNCTION__
, __LINE__
);
1469 /* Detach the tail */
1470 temp
->Next
->Prev
= temp
->Prev
;
1471 temp
->Prev
->Next
= temp
->Next
;
1473 /* If it's not busy, we have a victim */
1474 if (!((Tag_t
*)temp
)->Refs
) break;
1476 /* Insert at the head of the Busy queue */
1477 temp
->Next
= lru
->Busy
.Next
;
1478 temp
->Prev
= &lru
->Busy
;
1480 temp
->Next
->Prev
= temp
;
1481 temp
->Prev
->Next
= temp
;
1486 /* Remove the tag */
1487 CacheRemove ((Cache_t
*)lru
, (Tag_t
*)temp
);
1493 * Dump the cache contents.
1494 * If nobody else calls it, it gets optimized out. Annoying and yet
1498 dumpCache(Cache_t
*cache
)
1504 printf("\tDevBlockSize = %u\n", cache
->DevBlockSize
);
1505 printf("\tCache Block Size = %u\n", cache
->BlockSize
);
1506 printf("\tHash Size = %u\n", cache
->HashSize
);
1507 printf("\tHash Table:\n");
1508 for (i
= 0; i
< cache
->HashSize
; i
++) {
1511 for (tag
= cache
->Hash
[i
]; tag
; tag
= tag
->Next
) {
1513 printf("\t\tOffset %llu, refs %u, Flags %#x (%skLazyWrite, %skLockWrite)\n",
1514 tag
->Offset
, tag
->Refs
, tag
->Flags
,
1515 (tag
->Flags
& kLazyWrite
) ? "" : "no ",
1516 (tag
->Flags
& kLockWrite
) ? "" : "no ");
1519 printf("\tNumber of entries: %u\n", numEntries
);