]> git.saurik.com Git - redis.git/blob - src/db.c
brainstorming with myself in dscache.c comments
[redis.git] / src / db.c
1 #include "redis.h"
2
3 #include <signal.h>
4
5 /*-----------------------------------------------------------------------------
6 * C-level DB API
7 *----------------------------------------------------------------------------*/
8
9 robj *lookupKey(redisDb *db, robj *key) {
10 dictEntry *de = dictFind(db->dict,key->ptr);
11 if (de) {
12 robj *val = dictGetEntryVal(de);
13
14 /* Update the access time for the aging algorithm.
15 * Don't do it if we have a saving child, as this will trigger
16 * a copy on write madness. */
17 if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1)
18 val->lru = server.lruclock;
19
20 if (server.ds_enabled && val->storage == REDIS_DS_SAVING) {
21 /* FIXME: change this code to just wait for our object to
22 * get out of the IO Job. */
23 waitEmptyIOJobsQueue();
24 processAllPendingIOJobs();
25 redisAssert(val->storage != REDIS_DS_SAVING);
26 }
27 server.stat_keyspace_hits++;
28 return val;
29 } else {
30 time_t expire;
31 robj *val;
32
33 /* Key not found in the in memory hash table, but if disk store is
34 * enabled we may have this key on disk. If so load it in memory
35 * in a blocking way.
36 *
37 * FIXME: race condition here. If there was an already scheduled
38 * async loading of this key, what may happen is that the old
39 * key is loaded in memory if this gets deleted in the meantime. */
40 if (server.ds_enabled && cacheKeyMayExist(db,key)) {
41 val = dsGet(db,key,&expire);
42 if (val) {
43 int retval = dbAdd(db,key,val);
44 redisAssert(retval == REDIS_OK);
45 if (expire != -1) setExpire(db,key,expire);
46 server.stat_keyspace_hits++;
47 return val;
48 }
49 }
50 server.stat_keyspace_misses++;
51 return NULL;
52 }
53 }
54
55 robj *lookupKeyRead(redisDb *db, robj *key) {
56 expireIfNeeded(db,key);
57 return lookupKey(db,key);
58 }
59
60 robj *lookupKeyWrite(redisDb *db, robj *key) {
61 expireIfNeeded(db,key);
62 return lookupKey(db,key);
63 }
64
65 robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
66 robj *o = lookupKeyRead(c->db, key);
67 if (!o) addReply(c,reply);
68 return o;
69 }
70
71 robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply) {
72 robj *o = lookupKeyWrite(c->db, key);
73 if (!o) addReply(c,reply);
74 return o;
75 }
76
77 /* Add the key to the DB. If the key already exists REDIS_ERR is returned,
78 * otherwise REDIS_OK is returned, and the caller should increment the
79 * refcount of 'val'. */
80 int dbAdd(redisDb *db, robj *key, robj *val) {
81 /* Perform a lookup before adding the key, as we need to copy the
82 * key value. */
83 if (dictFind(db->dict, key->ptr) != NULL) {
84 return REDIS_ERR;
85 } else {
86 sds copy = sdsdup(key->ptr);
87 dictAdd(db->dict, copy, val);
88 if (server.ds_enabled) {
89 /* FIXME: remove entry from negative cache */
90 }
91 return REDIS_OK;
92 }
93 }
94
95 /* If the key does not exist, this is just like dbAdd(). Otherwise
96 * the value associated to the key is replaced with the new one.
97 *
98 * On update (key already existed) 0 is returned. Otherwise 1. */
99 int dbReplace(redisDb *db, robj *key, robj *val) {
100 if (dictFind(db->dict,key->ptr) == NULL) {
101 sds copy = sdsdup(key->ptr);
102 dictAdd(db->dict, copy, val);
103 return 1;
104 } else {
105 dictReplace(db->dict, key->ptr, val);
106 return 0;
107 }
108 }
109
110 int dbExists(redisDb *db, robj *key) {
111 return dictFind(db->dict,key->ptr) != NULL;
112 }
113
114 /* Return a random key, in form of a Redis object.
115 * If there are no keys, NULL is returned.
116 *
117 * The function makes sure to return keys not already expired. */
118 robj *dbRandomKey(redisDb *db) {
119 struct dictEntry *de;
120
121 while(1) {
122 sds key;
123 robj *keyobj;
124
125 de = dictGetRandomKey(db->dict);
126 if (de == NULL) return NULL;
127
128 key = dictGetEntryKey(de);
129 keyobj = createStringObject(key,sdslen(key));
130 if (dictFind(db->expires,key)) {
131 if (expireIfNeeded(db,keyobj)) {
132 decrRefCount(keyobj);
133 continue; /* search for another key. This expired. */
134 }
135 }
136 return keyobj;
137 }
138 }
139
140 /* Delete a key, value, and associated expiration entry if any, from the DB */
141 int dbDelete(redisDb *db, robj *key) {
142 /* If VM is enabled make sure to awake waiting clients for this key:
143 * deleting the key will kill the I/O thread bringing the key from swap
144 * to memory, so the client will never be notified and unblocked if we
145 * don't do it now. */
146 if (server.ds_enabled) handleClientsBlockedOnSwappedKey(db,key);
147
148 /* FIXME: we need to delete the IO Job loading the key, or simply we can
149 * wait for it to finish. */
150
151 /* Deleting an entry from the expires dict will not free the sds of
152 * the key, because it is shared with the main dictionary. */
153 if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
154 return dictDelete(db->dict,key->ptr) == DICT_OK;
155 }
156
157 /* Empty the whole database */
158 long long emptyDb() {
159 int j;
160 long long removed = 0;
161
162 for (j = 0; j < server.dbnum; j++) {
163 removed += dictSize(server.db[j].dict);
164 dictEmpty(server.db[j].dict);
165 dictEmpty(server.db[j].expires);
166 }
167 return removed;
168 }
169
170 int selectDb(redisClient *c, int id) {
171 if (id < 0 || id >= server.dbnum)
172 return REDIS_ERR;
173 c->db = &server.db[id];
174 return REDIS_OK;
175 }
176
177 /*-----------------------------------------------------------------------------
178 * Hooks for key space changes.
179 *
180 * Every time a key in the database is modified the function
181 * signalModifiedKey() is called.
182 *
183 * Every time a DB is flushed the function signalFlushDb() is called.
184 *----------------------------------------------------------------------------*/
185
186 void signalModifiedKey(redisDb *db, robj *key) {
187 touchWatchedKey(db,key);
188 if (server.ds_enabled)
189 cacheScheduleForFlush(db,key);
190 }
191
192 void signalFlushedDb(int dbid) {
193 touchWatchedKeysOnFlush(dbid);
194 if (server.ds_enabled)
195 dsFlushDb(dbid);
196 }
197
198 /*-----------------------------------------------------------------------------
199 * Type agnostic commands operating on the key space
200 *----------------------------------------------------------------------------*/
201
202 void flushdbCommand(redisClient *c) {
203 server.dirty += dictSize(c->db->dict);
204 signalFlushedDb(c->db->id);
205 dictEmpty(c->db->dict);
206 dictEmpty(c->db->expires);
207 addReply(c,shared.ok);
208 }
209
210 void flushallCommand(redisClient *c) {
211 signalFlushedDb(-1);
212 server.dirty += emptyDb();
213 addReply(c,shared.ok);
214 if (server.bgsavechildpid != -1) {
215 kill(server.bgsavechildpid,SIGKILL);
216 rdbRemoveTempFile(server.bgsavechildpid);
217 }
218 rdbSave(server.dbfilename);
219 server.dirty++;
220 }
221
222 void delCommand(redisClient *c) {
223 int deleted = 0, j;
224
225 for (j = 1; j < c->argc; j++) {
226 if (server.ds_enabled) {
227 lookupKeyRead(c->db,c->argv[j]);
228 /* FIXME: this can be optimized a lot, no real need to load
229 * a possibly huge value. */
230 }
231 if (dbDelete(c->db,c->argv[j])) {
232 signalModifiedKey(c->db,c->argv[j]);
233 server.dirty++;
234 deleted++;
235 } else if (server.ds_enabled) {
236 if (cacheKeyMayExist(c->db,c->argv[j]) &&
237 dsExists(c->db,c->argv[j]))
238 {
239 cacheScheduleForFlush(c->db,c->argv[j]);
240 deleted = 1;
241 }
242 }
243 }
244 addReplyLongLong(c,deleted);
245 }
246
247 void existsCommand(redisClient *c) {
248 expireIfNeeded(c->db,c->argv[1]);
249 if (dbExists(c->db,c->argv[1])) {
250 addReply(c, shared.cone);
251 } else {
252 addReply(c, shared.czero);
253 }
254 }
255
256 void selectCommand(redisClient *c) {
257 int id = atoi(c->argv[1]->ptr);
258
259 if (selectDb(c,id) == REDIS_ERR) {
260 addReplyError(c,"invalid DB index");
261 } else {
262 addReply(c,shared.ok);
263 }
264 }
265
266 void randomkeyCommand(redisClient *c) {
267 robj *key;
268
269 if ((key = dbRandomKey(c->db)) == NULL) {
270 addReply(c,shared.nullbulk);
271 return;
272 }
273
274 addReplyBulk(c,key);
275 decrRefCount(key);
276 }
277
278 void keysCommand(redisClient *c) {
279 dictIterator *di;
280 dictEntry *de;
281 sds pattern = c->argv[1]->ptr;
282 int plen = sdslen(pattern), allkeys;
283 unsigned long numkeys = 0;
284 void *replylen = addDeferredMultiBulkLength(c);
285
286 di = dictGetIterator(c->db->dict);
287 allkeys = (pattern[0] == '*' && pattern[1] == '\0');
288 while((de = dictNext(di)) != NULL) {
289 sds key = dictGetEntryKey(de);
290 robj *keyobj;
291
292 if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) {
293 keyobj = createStringObject(key,sdslen(key));
294 if (expireIfNeeded(c->db,keyobj) == 0) {
295 addReplyBulk(c,keyobj);
296 numkeys++;
297 }
298 decrRefCount(keyobj);
299 }
300 }
301 dictReleaseIterator(di);
302 setDeferredMultiBulkLength(c,replylen,numkeys);
303 }
304
305 void dbsizeCommand(redisClient *c) {
306 addReplyLongLong(c,dictSize(c->db->dict));
307 }
308
309 void lastsaveCommand(redisClient *c) {
310 addReplyLongLong(c,server.lastsave);
311 }
312
313 void typeCommand(redisClient *c) {
314 robj *o;
315 char *type;
316
317 o = lookupKeyRead(c->db,c->argv[1]);
318 if (o == NULL) {
319 type = "none";
320 } else {
321 switch(o->type) {
322 case REDIS_STRING: type = "string"; break;
323 case REDIS_LIST: type = "list"; break;
324 case REDIS_SET: type = "set"; break;
325 case REDIS_ZSET: type = "zset"; break;
326 case REDIS_HASH: type = "hash"; break;
327 default: type = "unknown"; break;
328 }
329 }
330 addReplyStatus(c,type);
331 }
332
333 void saveCommand(redisClient *c) {
334 if (server.bgsavechildpid != -1) {
335 addReplyError(c,"Background save already in progress");
336 return;
337 }
338 if (rdbSave(server.dbfilename) == REDIS_OK) {
339 addReply(c,shared.ok);
340 } else {
341 addReply(c,shared.err);
342 }
343 }
344
345 void bgsaveCommand(redisClient *c) {
346 if (server.bgsavechildpid != -1) {
347 addReplyError(c,"Background save already in progress");
348 return;
349 }
350 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
351 addReplyStatus(c,"Background saving started");
352 } else {
353 addReply(c,shared.err);
354 }
355 }
356
357 void shutdownCommand(redisClient *c) {
358 if (prepareForShutdown() == REDIS_OK)
359 exit(0);
360 addReplyError(c,"Errors trying to SHUTDOWN. Check logs.");
361 }
362
363 void renameGenericCommand(redisClient *c, int nx) {
364 robj *o;
365
366 /* To use the same key as src and dst is probably an error */
367 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
368 addReply(c,shared.sameobjecterr);
369 return;
370 }
371
372 if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
373 return;
374
375 incrRefCount(o);
376 if (dbAdd(c->db,c->argv[2],o) == REDIS_ERR) {
377 if (nx) {
378 decrRefCount(o);
379 addReply(c,shared.czero);
380 return;
381 }
382 dbReplace(c->db,c->argv[2],o);
383 }
384 dbDelete(c->db,c->argv[1]);
385 signalModifiedKey(c->db,c->argv[1]);
386 signalModifiedKey(c->db,c->argv[2]);
387 server.dirty++;
388 addReply(c,nx ? shared.cone : shared.ok);
389 }
390
391 void renameCommand(redisClient *c) {
392 renameGenericCommand(c,0);
393 }
394
395 void renamenxCommand(redisClient *c) {
396 renameGenericCommand(c,1);
397 }
398
399 void moveCommand(redisClient *c) {
400 robj *o;
401 redisDb *src, *dst;
402 int srcid;
403
404 /* Obtain source and target DB pointers */
405 src = c->db;
406 srcid = c->db->id;
407 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
408 addReply(c,shared.outofrangeerr);
409 return;
410 }
411 dst = c->db;
412 selectDb(c,srcid); /* Back to the source DB */
413
414 /* If the user is moving using as target the same
415 * DB as the source DB it is probably an error. */
416 if (src == dst) {
417 addReply(c,shared.sameobjecterr);
418 return;
419 }
420
421 /* Check if the element exists and get a reference */
422 o = lookupKeyWrite(c->db,c->argv[1]);
423 if (!o) {
424 addReply(c,shared.czero);
425 return;
426 }
427
428 /* Try to add the element to the target DB */
429 if (dbAdd(dst,c->argv[1],o) == REDIS_ERR) {
430 addReply(c,shared.czero);
431 return;
432 }
433 incrRefCount(o);
434
435 /* OK! key moved, free the entry in the source DB */
436 dbDelete(src,c->argv[1]);
437 server.dirty++;
438 addReply(c,shared.cone);
439 }
440
441 /*-----------------------------------------------------------------------------
442 * Expires API
443 *----------------------------------------------------------------------------*/
444
445 int removeExpire(redisDb *db, robj *key) {
446 /* An expire may only be removed if there is a corresponding entry in the
447 * main dict. Otherwise, the key will never be freed. */
448 redisAssert(dictFind(db->dict,key->ptr) != NULL);
449 return dictDelete(db->expires,key->ptr) == DICT_OK;
450 }
451
452 void setExpire(redisDb *db, robj *key, time_t when) {
453 dictEntry *de;
454
455 /* Reuse the sds from the main dict in the expire dict */
456 de = dictFind(db->dict,key->ptr);
457 redisAssert(de != NULL);
458 dictReplace(db->expires,dictGetEntryKey(de),(void*)when);
459 }
460
461 /* Return the expire time of the specified key, or -1 if no expire
462 * is associated with this key (i.e. the key is non volatile) */
463 time_t getExpire(redisDb *db, robj *key) {
464 dictEntry *de;
465
466 /* No expire? return ASAP */
467 if (dictSize(db->expires) == 0 ||
468 (de = dictFind(db->expires,key->ptr)) == NULL) return -1;
469
470 /* The entry was found in the expire dict, this means it should also
471 * be present in the main dict (safety check). */
472 redisAssert(dictFind(db->dict,key->ptr) != NULL);
473 return (time_t) dictGetEntryVal(de);
474 }
475
476 /* Propagate expires into slaves and the AOF file.
477 * When a key expires in the master, a DEL operation for this key is sent
478 * to all the slaves and the AOF file if enabled.
479 *
480 * This way the key expiry is centralized in one place, and since both
481 * AOF and the master->slave link guarantee operation ordering, everything
482 * will be consistent even if we allow write operations against expiring
483 * keys. */
484 void propagateExpire(redisDb *db, robj *key) {
485 robj *argv[2];
486
487 argv[0] = createStringObject("DEL",3);
488 argv[1] = key;
489 incrRefCount(key);
490
491 if (server.appendonly)
492 feedAppendOnlyFile(server.delCommand,db->id,argv,2);
493 if (listLength(server.slaves))
494 replicationFeedSlaves(server.slaves,db->id,argv,2);
495
496 decrRefCount(argv[0]);
497 decrRefCount(argv[1]);
498 }
499
500 int expireIfNeeded(redisDb *db, robj *key) {
501 time_t when = getExpire(db,key);
502
503 /* If we are running in the context of a slave, return ASAP:
504 * the slave key expiration is controlled by the master that will
505 * send us synthesized DEL operations for expired keys.
506 *
507 * Still we try to return the right information to the caller,
508 * that is, 0 if we think the key should be still valid, 1 if
509 * we think the key is expired at this time. */
510 if (server.masterhost != NULL) {
511 return time(NULL) > when;
512 }
513
514 if (when < 0) return 0;
515
516 /* Return when this key has not expired */
517 if (time(NULL) <= when) return 0;
518
519 /* Delete the key */
520 server.stat_expiredkeys++;
521 propagateExpire(db,key);
522 return dbDelete(db,key);
523 }
524
525 /*-----------------------------------------------------------------------------
526 * Expires Commands
527 *----------------------------------------------------------------------------*/
528
529 void expireGenericCommand(redisClient *c, robj *key, robj *param, long offset) {
530 dictEntry *de;
531 long seconds;
532
533 if (getLongFromObjectOrReply(c, param, &seconds, NULL) != REDIS_OK) return;
534
535 seconds -= offset;
536
537 de = dictFind(c->db->dict,key->ptr);
538 if (de == NULL) {
539 addReply(c,shared.czero);
540 return;
541 }
542 if (seconds <= 0) {
543 if (dbDelete(c->db,key)) server.dirty++;
544 addReply(c, shared.cone);
545 signalModifiedKey(c->db,key);
546 return;
547 } else {
548 time_t when = time(NULL)+seconds;
549 setExpire(c->db,key,when);
550 addReply(c,shared.cone);
551 signalModifiedKey(c->db,key);
552 server.dirty++;
553 return;
554 }
555 }
556
557 void expireCommand(redisClient *c) {
558 expireGenericCommand(c,c->argv[1],c->argv[2],0);
559 }
560
561 void expireatCommand(redisClient *c) {
562 expireGenericCommand(c,c->argv[1],c->argv[2],time(NULL));
563 }
564
565 void ttlCommand(redisClient *c) {
566 time_t expire, ttl = -1;
567
568 expire = getExpire(c->db,c->argv[1]);
569 if (expire != -1) {
570 ttl = (expire-time(NULL));
571 if (ttl < 0) ttl = -1;
572 }
573 addReplyLongLong(c,(long long)ttl);
574 }
575
576 void persistCommand(redisClient *c) {
577 dictEntry *de;
578
579 de = dictFind(c->db->dict,c->argv[1]->ptr);
580 if (de == NULL) {
581 addReply(c,shared.czero);
582 } else {
583 if (removeExpire(c->db,c->argv[1])) {
584 addReply(c,shared.cone);
585 server.dirty++;
586 } else {
587 addReply(c,shared.czero);
588 }
589 }
590 }