]>
git.saurik.com Git - redis.git/blob - src/db.c
   5 /*----------------------------------------------------------------------------- 
   7  *----------------------------------------------------------------------------*/ 
   9 /* Important notes on lookup and disk store. 
  11  * When disk store is enabled on lookup we can have different cases. 
  13  * a) The key is in memory: 
  14  *    - If the key is not in IO_SAVEINPROG state we can access it. 
  15  *      As if it's just IO_SAVE this means we have the key in the IO queue 
  16  *      but can't be accessed by the IO thread (it requires to be 
  17  *      translated into an IO Job by the cache cron function.) 
  18  *    - If the key is in IO_SAVEINPROG we can't touch the key and have 
  19  *      to blocking wait completion of operations. 
  20  * b) The key is not in memory: 
  21  *    - If it's marked as non existing on disk as well (negative cache) 
  22  *      we don't need to perform the disk access. 
  23  *    - if the key MAY EXIST, but is not in memory, and it is marked as IO_SAVE 
  24  *      then the key can only be a deleted one. As IO_SAVE keys are never 
  25  *      evicted (dirty state), so the only possibility is that key was deleted. 
  26  *    - if the key MAY EXIST we need to blocking load it. 
  27  *      We check that the key is not in IO_SAVEINPROG state before accessing 
  28  *      the disk object. If it is in this state, we wait. 
  31 void lookupWaitBusyKey(redisDb 
*db
, robj 
*key
) { 
  32     /* FIXME: wait just for this key, not everything */ 
  33     waitEmptyIOJobsQueue(); 
  34     processAllPendingIOJobs(); 
  35     redisAssert((cacheScheduleIOGetFlags(db
,key
) & REDIS_IO_SAVEINPROG
) == 0); 
  38 robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
  39     dictEntry 
*de 
= dictFind(db
->dict
,key
->ptr
); 
  41         robj 
*val 
= dictGetEntryVal(de
); 
  43         /* Update the access time for the aging algorithm. 
  44          * Don't do it if we have a saving child, as this will trigger 
  45          * a copy on write madness. */ 
  46         if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) 
  47             val
->lru 
= server
.lruclock
; 
  49         if (server
.ds_enabled 
&& 
  50             cacheScheduleIOGetFlags(db
,key
) & REDIS_IO_SAVEINPROG
) 
  52             /* Need to wait for the key to get unbusy */ 
  53             redisLog(REDIS_WARNING
,"Lookup found a key in SAVEINPROG state. Waiting. (Key was in the cache)"); 
  54             lookupWaitBusyKey(db
,key
); 
  56         server
.stat_keyspace_hits
++; 
  62         /* Key not found in the in memory hash table, but if disk store is 
  63          * enabled we may have this key on disk. If so load it in memory 
  64          * in a blocking way. */ 
  65         if (server
.ds_enabled 
&& cacheKeyMayExist(db
,key
)) { 
  66             long flags 
= cacheScheduleIOGetFlags(db
,key
); 
  68             /* They key is not in cache, but it has a SAVE op in queue? 
  69              * The only possibility is that the key was deleted, since 
  70              * dirty keys are not evicted. */ 
  71             if (flags 
& REDIS_IO_SAVE
) { 
  72                 server
.stat_keyspace_misses
++; 
  76             /* At this point we need to blocking load the key in memory. 
  77              * The first thing we do is waiting here if the key is busy. */ 
  78             if (flags 
& REDIS_IO_SAVEINPROG
) { 
  79                 redisLog(REDIS_WARNING
,"Lookup found a key in SAVEINPROG state. Waiting (while force loading)."); 
  80                 lookupWaitBusyKey(db
,key
); 
  83             redisLog(REDIS_DEBUG
,"Force loading key %s via lookup", key
->ptr
); 
  84             val 
= dsGet(db
,key
,&expire
); 
  86                 int retval 
= dbAdd(db
,key
,val
); 
  87                 redisAssert(retval 
== REDIS_OK
); 
  88                 if (expire 
!= -1) setExpire(db
,key
,expire
); 
  89                 server
.stat_keyspace_hits
++; 
  92                 cacheSetKeyDoesNotExist(db
,key
); 
  95         server
.stat_keyspace_misses
++; 
 100 robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
 101     expireIfNeeded(db
,key
); 
 102     return lookupKey(db
,key
); 
 105 robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
 106     expireIfNeeded(db
,key
); 
 107     return lookupKey(db
,key
); 
 110 robj 
*lookupKeyReadOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
 111     robj 
*o 
= lookupKeyRead(c
->db
, key
); 
 112     if (!o
) addReply(c
,reply
); 
 116 robj 
*lookupKeyWriteOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
 117     robj 
*o 
= lookupKeyWrite(c
->db
, key
); 
 118     if (!o
) addReply(c
,reply
); 
 122 /* Add the key to the DB. If the key already exists REDIS_ERR is returned, 
 123  * otherwise REDIS_OK is returned, and the caller should increment the 
 124  * refcount of 'val'. */ 
 125 int dbAdd(redisDb 
*db
, robj 
*key
, robj 
*val
) { 
 126     /* Perform a lookup before adding the key, as we need to copy the 
 128     if (dictFind(db
->dict
, key
->ptr
) != NULL
) { 
 131         sds copy 
= sdsdup(key
->ptr
); 
 132         dictAdd(db
->dict
, copy
, val
); 
 133         if (server
.ds_enabled
) cacheSetKeyMayExist(db
,key
); 
 138 /* If the key does not exist, this is just like dbAdd(). Otherwise 
 139  * the value associated to the key is replaced with the new one. 
 141  * On update (key already existed) 0 is returned. Otherwise 1. */ 
 142 int dbReplace(redisDb 
*db
, robj 
*key
, robj 
*val
) { 
 146     if ((oldval 
= dictFetchValue(db
->dict
,key
->ptr
)) == NULL
) { 
 147         sds copy 
= sdsdup(key
->ptr
); 
 148         dictAdd(db
->dict
, copy
, val
); 
 151         dictReplace(db
->dict
, key
->ptr
, val
); 
 154     if (server
.ds_enabled
) cacheSetKeyMayExist(db
,key
); 
 158 int dbExists(redisDb 
*db
, robj 
*key
) { 
 159     return dictFind(db
->dict
,key
->ptr
) != NULL
; 
 162 /* Return a random key, in form of a Redis object. 
 163  * If there are no keys, NULL is returned. 
 165  * The function makes sure to return keys not already expired. */ 
 166 robj 
*dbRandomKey(redisDb 
*db
) { 
 167     struct dictEntry 
*de
; 
 173         de 
= dictGetRandomKey(db
->dict
); 
 174         if (de 
== NULL
) return NULL
; 
 176         key 
= dictGetEntryKey(de
); 
 177         keyobj 
= createStringObject(key
,sdslen(key
)); 
 178         if (dictFind(db
->expires
,key
)) { 
 179             if (expireIfNeeded(db
,keyobj
)) { 
 180                 decrRefCount(keyobj
); 
 181                 continue; /* search for another key. This expired. */ 
 188 /* Delete a key, value, and associated expiration entry if any, from the DB */ 
 189 int dbDelete(redisDb 
*db
, robj 
*key
) { 
 190     /* If diskstore is enabled make sure to awake waiting clients for this key 
 191      * as it is not really useful to wait for a key already deleted to be 
 192      * loaded from disk. */ 
 193     if (server
.ds_enabled
) { 
 194         handleClientsBlockedOnSwappedKey(db
,key
); 
 195         cacheSetKeyDoesNotExist(db
,key
); 
 198     /* Deleting an entry from the expires dict will not free the sds of 
 199      * the key, because it is shared with the main dictionary. */ 
 200     if (dictSize(db
->expires
) > 0) dictDelete(db
->expires
,key
->ptr
); 
 201     return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
; 
 204 /* Empty the whole database */ 
 205 long long emptyDb() { 
 207     long long removed 
= 0; 
 209     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
 210         removed 
+= dictSize(server
.db
[j
].dict
); 
 211         dictEmpty(server
.db
[j
].dict
); 
 212         dictEmpty(server
.db
[j
].expires
); 
 217 int selectDb(redisClient 
*c
, int id
) { 
 218     if (id 
< 0 || id 
>= server
.dbnum
) 
 220     c
->db 
= &server
.db
[id
]; 
 224 /*----------------------------------------------------------------------------- 
 225  * Hooks for key space changes. 
 227  * Every time a key in the database is modified the function 
 228  * signalModifiedKey() is called. 
 230  * Every time a DB is flushed the function signalFlushDb() is called. 
 231  *----------------------------------------------------------------------------*/ 
 233 void signalModifiedKey(redisDb 
*db
, robj 
*key
) { 
 234     touchWatchedKey(db
,key
); 
 235     if (server
.ds_enabled
) 
 236         cacheScheduleIO(db
,key
,REDIS_IO_SAVE
); 
 239 void signalFlushedDb(int dbid
) { 
 240     touchWatchedKeysOnFlush(dbid
); 
 243 /*----------------------------------------------------------------------------- 
 244  * Type agnostic commands operating on the key space 
 245  *----------------------------------------------------------------------------*/ 
 247 void flushdbCommand(redisClient 
*c
) { 
 248     server
.dirty 
+= dictSize(c
->db
->dict
); 
 249     signalFlushedDb(c
->db
->id
); 
 250     dictEmpty(c
->db
->dict
); 
 251     dictEmpty(c
->db
->expires
); 
 252     if (server
.ds_enabled
) dsFlushDb(c
->db
->id
); 
 253     addReply(c
,shared
.ok
); 
 256 void flushallCommand(redisClient 
*c
) { 
 258     server
.dirty 
+= emptyDb(); 
 259     addReply(c
,shared
.ok
); 
 260     if (server
.bgsavechildpid 
!= -1) { 
 261         kill(server
.bgsavechildpid
,SIGKILL
); 
 262         rdbRemoveTempFile(server
.bgsavechildpid
); 
 264     if (server
.ds_enabled
) 
 267         rdbSave(server
.dbfilename
); 
 271 void delCommand(redisClient 
*c
) { 
 274     for (j 
= 1; j 
< c
->argc
; j
++) { 
 275         if (server
.ds_enabled
) { 
 276             lookupKeyRead(c
->db
,c
->argv
[j
]); 
 277             /* FIXME: this can be optimized a lot, no real need to load 
 278              * a possibly huge value. */ 
 280         if (dbDelete(c
->db
,c
->argv
[j
])) { 
 281             signalModifiedKey(c
->db
,c
->argv
[j
]); 
 284         } else if (server
.ds_enabled
) { 
 285             if (cacheKeyMayExist(c
->db
,c
->argv
[j
]) && 
 286                 dsExists(c
->db
,c
->argv
[j
])) 
 288                 cacheScheduleIO(c
->db
,c
->argv
[j
],REDIS_IO_SAVE
); 
 293     addReplyLongLong(c
,deleted
); 
 296 void existsCommand(redisClient 
*c
) { 
 297     expireIfNeeded(c
->db
,c
->argv
[1]); 
 298     if (dbExists(c
->db
,c
->argv
[1])) { 
 299         addReply(c
, shared
.cone
); 
 301         addReply(c
, shared
.czero
); 
 305 void selectCommand(redisClient 
*c
) { 
 306     int id 
= atoi(c
->argv
[1]->ptr
); 
 308     if (selectDb(c
,id
) == REDIS_ERR
) { 
 309         addReplyError(c
,"invalid DB index"); 
 311         addReply(c
,shared
.ok
); 
 315 void randomkeyCommand(redisClient 
*c
) { 
 318     if ((key 
= dbRandomKey(c
->db
)) == NULL
) { 
 319         addReply(c
,shared
.nullbulk
); 
 327 void keysCommand(redisClient 
*c
) { 
 330     sds pattern 
= c
->argv
[1]->ptr
; 
 331     int plen 
= sdslen(pattern
), allkeys
; 
 332     unsigned long numkeys 
= 0; 
 333     void *replylen 
= addDeferredMultiBulkLength(c
); 
 335     di 
= dictGetIterator(c
->db
->dict
); 
 336     allkeys 
= (pattern
[0] == '*' && pattern
[1] == '\0'); 
 337     while((de 
= dictNext(di
)) != NULL
) { 
 338         sds key 
= dictGetEntryKey(de
); 
 341         if (allkeys 
|| stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
 342             keyobj 
= createStringObject(key
,sdslen(key
)); 
 343             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
 344                 addReplyBulk(c
,keyobj
); 
 347             decrRefCount(keyobj
); 
 350     dictReleaseIterator(di
); 
 351     setDeferredMultiBulkLength(c
,replylen
,numkeys
); 
 354 void dbsizeCommand(redisClient 
*c
) { 
 355     addReplyLongLong(c
,dictSize(c
->db
->dict
)); 
 358 void lastsaveCommand(redisClient 
*c
) { 
 359     addReplyLongLong(c
,server
.lastsave
); 
 362 void typeCommand(redisClient 
*c
) { 
 366     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
 371         case REDIS_STRING
: type 
= "string"; break; 
 372         case REDIS_LIST
: type 
= "list"; break; 
 373         case REDIS_SET
: type 
= "set"; break; 
 374         case REDIS_ZSET
: type 
= "zset"; break; 
 375         case REDIS_HASH
: type 
= "hash"; break; 
 376         default: type 
= "unknown"; break; 
 379     addReplyStatus(c
,type
); 
 382 void saveCommand(redisClient 
*c
) { 
 383     if (server
.bgsavechildpid 
!= -1) { 
 384         addReplyError(c
,"Background save already in progress"); 
 387     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
 388         addReply(c
,shared
.ok
); 
 390         addReply(c
,shared
.err
); 
 394 void bgsaveCommand(redisClient 
*c
) { 
 395     if (server
.bgsavechildpid 
!= -1) { 
 396         addReplyError(c
,"Background save already in progress"); 
 399     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
 400         addReplyStatus(c
,"Background saving started"); 
 402         addReply(c
,shared
.err
); 
 406 void shutdownCommand(redisClient 
*c
) { 
 407     if (prepareForShutdown() == REDIS_OK
) 
 409     addReplyError(c
,"Errors trying to SHUTDOWN. Check logs."); 
 412 void renameGenericCommand(redisClient 
*c
, int nx
) { 
 415     /* To use the same key as src and dst is probably an error */ 
 416     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
 417         addReply(c
,shared
.sameobjecterr
); 
 421     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
) 
 425     if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) { 
 428             addReply(c
,shared
.czero
); 
 431         dbReplace(c
->db
,c
->argv
[2],o
); 
 433     dbDelete(c
->db
,c
->argv
[1]); 
 434     signalModifiedKey(c
->db
,c
->argv
[1]); 
 435     signalModifiedKey(c
->db
,c
->argv
[2]); 
 437     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
 440 void renameCommand(redisClient 
*c
) { 
 441     renameGenericCommand(c
,0); 
 444 void renamenxCommand(redisClient 
*c
) { 
 445     renameGenericCommand(c
,1); 
 448 void moveCommand(redisClient 
*c
) { 
 453     /* Obtain source and target DB pointers */ 
 456     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
 457         addReply(c
,shared
.outofrangeerr
); 
 461     selectDb(c
,srcid
); /* Back to the source DB */ 
 463     /* If the user is moving using as target the same 
 464      * DB as the source DB it is probably an error. */ 
 466         addReply(c
,shared
.sameobjecterr
); 
 470     /* Check if the element exists and get a reference */ 
 471     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
 473         addReply(c
,shared
.czero
); 
 477     /* Try to add the element to the target DB */ 
 478     if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) { 
 479         addReply(c
,shared
.czero
); 
 484     /* OK! key moved, free the entry in the source DB */ 
 485     dbDelete(src
,c
->argv
[1]); 
 487     addReply(c
,shared
.cone
); 
 490 /*----------------------------------------------------------------------------- 
 492  *----------------------------------------------------------------------------*/ 
 494 int removeExpire(redisDb 
*db
, robj 
*key
) { 
 495     /* An expire may only be removed if there is a corresponding entry in the 
 496      * main dict. Otherwise, the key will never be freed. */ 
 497     redisAssert(dictFind(db
->dict
,key
->ptr
) != NULL
); 
 498     return dictDelete(db
->expires
,key
->ptr
) == DICT_OK
; 
 501 void setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
 504     /* Reuse the sds from the main dict in the expire dict */ 
 505     de 
= dictFind(db
->dict
,key
->ptr
); 
 506     redisAssert(de 
!= NULL
); 
 507     dictReplace(db
->expires
,dictGetEntryKey(de
),(void*)when
); 
 510 /* Return the expire time of the specified key, or -1 if no expire 
 511  * is associated with this key (i.e. the key is non volatile) */ 
 512 time_t getExpire(redisDb 
*db
, robj 
*key
) { 
 515     /* No expire? return ASAP */ 
 516     if (dictSize(db
->expires
) == 0 || 
 517        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1; 
 519     /* The entry was found in the expire dict, this means it should also 
 520      * be present in the main dict (safety check). */ 
 521     redisAssert(dictFind(db
->dict
,key
->ptr
) != NULL
); 
 522     return (time_t) dictGetEntryVal(de
); 
 525 /* Propagate expires into slaves and the AOF file. 
 526  * When a key expires in the master, a DEL operation for this key is sent 
 527  * to all the slaves and the AOF file if enabled. 
 529  * This way the key expiry is centralized in one place, and since both 
 530  * AOF and the master->slave link guarantee operation ordering, everything 
 531  * will be consistent even if we allow write operations against expiring 
 533 void propagateExpire(redisDb 
*db
, robj 
*key
) { 
 536     argv
[0] = createStringObject("DEL",3); 
 540     if (server
.appendonly
) 
 541         feedAppendOnlyFile(server
.delCommand
,db
->id
,argv
,2); 
 542     if (listLength(server
.slaves
)) 
 543         replicationFeedSlaves(server
.slaves
,db
->id
,argv
,2); 
 545     decrRefCount(argv
[0]); 
 546     decrRefCount(argv
[1]); 
 549 int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
 550     time_t when 
= getExpire(db
,key
); 
 552     /* If we are running in the context of a slave, return ASAP: 
 553      * the slave key expiration is controlled by the master that will 
 554      * send us synthesized DEL operations for expired keys. 
 556      * Still we try to return the right information to the caller,  
 557      * that is, 0 if we think the key should be still valid, 1 if 
 558      * we think the key is expired at this time. */ 
 559     if (server
.masterhost 
!= NULL
) { 
 560         return time(NULL
) > when
; 
 563     if (when 
< 0) return 0; 
 565     /* Return when this key has not expired */ 
 566     if (time(NULL
) <= when
) return 0; 
 569     server
.stat_expiredkeys
++; 
 570     propagateExpire(db
,key
); 
 571     return dbDelete(db
,key
); 
 574 /*----------------------------------------------------------------------------- 
 576  *----------------------------------------------------------------------------*/ 
 578 void expireGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*param
, long offset
) { 
 582     if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return; 
 586     de 
= dictFind(c
->db
->dict
,key
->ptr
); 
 588         addReply(c
,shared
.czero
); 
 592         if (dbDelete(c
->db
,key
)) server
.dirty
++; 
 593         addReply(c
, shared
.cone
); 
 594         signalModifiedKey(c
->db
,key
); 
 597         time_t when 
= time(NULL
)+seconds
; 
 598         setExpire(c
->db
,key
,when
); 
 599         addReply(c
,shared
.cone
); 
 600         signalModifiedKey(c
->db
,key
); 
 606 void expireCommand(redisClient 
*c
) { 
 607     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0); 
 610 void expireatCommand(redisClient 
*c
) { 
 611     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
)); 
 614 void ttlCommand(redisClient 
*c
) { 
 615     time_t expire
, ttl 
= -1; 
 617     expire 
= getExpire(c
->db
,c
->argv
[1]); 
 619         ttl 
= (expire
-time(NULL
)); 
 620         if (ttl 
< 0) ttl 
= -1; 
 622     addReplyLongLong(c
,(long long)ttl
); 
 625 void persistCommand(redisClient 
*c
) { 
 628     de 
= dictFind(c
->db
->dict
,c
->argv
[1]->ptr
); 
 630         addReply(c
,shared
.czero
); 
 632         if (removeExpire(c
->db
,c
->argv
[1])) { 
 633             addReply(c
,shared
.cone
); 
 636             addReply(c
,shared
.czero
);