]> git.saurik.com Git - redis.git/blob - src/db.c
Object approximated LRU algorithm enhanced / fixed / refactored. This is used for...
[redis.git] / src / db.c
1 #include "redis.h"
2
3 #include <signal.h>
4
5 /*-----------------------------------------------------------------------------
6 * C-level DB API
7 *----------------------------------------------------------------------------*/
8
9 robj *lookupKey(redisDb *db, robj *key) {
10 dictEntry *de = dictFind(db->dict,key->ptr);
11 if (de) {
12 robj *val = dictGetEntryVal(de);
13
14 /* Update the access time for the aging algorithm. */
15 val->lru = server.lruclock;
16
17 if (server.vm_enabled) {
18 if (val->storage == REDIS_VM_MEMORY ||
19 val->storage == REDIS_VM_SWAPPING)
20 {
21 /* If we were swapping the object out, cancel the operation */
22 if (val->storage == REDIS_VM_SWAPPING)
23 vmCancelThreadedIOJob(val);
24 } else {
25 int notify = (val->storage == REDIS_VM_LOADING);
26
27 /* Our value was swapped on disk. Bring it at home. */
28 redisAssert(val->type == REDIS_VMPOINTER);
29 val = vmLoadObject(val);
30 dictGetEntryVal(de) = val;
31
32 /* Clients blocked by the VM subsystem may be waiting for
33 * this key... */
34 if (notify) handleClientsBlockedOnSwappedKey(db,key);
35 }
36 }
37 return val;
38 } else {
39 return NULL;
40 }
41 }
42
43 robj *lookupKeyRead(redisDb *db, robj *key) {
44 expireIfNeeded(db,key);
45 return lookupKey(db,key);
46 }
47
48 robj *lookupKeyWrite(redisDb *db, robj *key) {
49 expireIfNeeded(db,key);
50 return lookupKey(db,key);
51 }
52
53 robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
54 robj *o = lookupKeyRead(c->db, key);
55 if (!o) addReply(c,reply);
56 return o;
57 }
58
59 robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply) {
60 robj *o = lookupKeyWrite(c->db, key);
61 if (!o) addReply(c,reply);
62 return o;
63 }
64
65 /* Add the key to the DB. If the key already exists REDIS_ERR is returned,
66 * otherwise REDIS_OK is returned, and the caller should increment the
67 * refcount of 'val'. */
68 int dbAdd(redisDb *db, robj *key, robj *val) {
69 /* Perform a lookup before adding the key, as we need to copy the
70 * key value. */
71 if (dictFind(db->dict, key->ptr) != NULL) {
72 return REDIS_ERR;
73 } else {
74 sds copy = sdsdup(key->ptr);
75 dictAdd(db->dict, copy, val);
76 return REDIS_OK;
77 }
78 }
79
80 /* If the key does not exist, this is just like dbAdd(). Otherwise
81 * the value associated to the key is replaced with the new one.
82 *
83 * On update (key already existed) 0 is returned. Otherwise 1. */
84 int dbReplace(redisDb *db, robj *key, robj *val) {
85 if (dictFind(db->dict,key->ptr) == NULL) {
86 sds copy = sdsdup(key->ptr);
87 dictAdd(db->dict, copy, val);
88 return 1;
89 } else {
90 dictReplace(db->dict, key->ptr, val);
91 return 0;
92 }
93 }
94
95 int dbExists(redisDb *db, robj *key) {
96 return dictFind(db->dict,key->ptr) != NULL;
97 }
98
99 /* Return a random key, in form of a Redis object.
100 * If there are no keys, NULL is returned.
101 *
102 * The function makes sure to return keys not already expired. */
103 robj *dbRandomKey(redisDb *db) {
104 struct dictEntry *de;
105
106 while(1) {
107 sds key;
108 robj *keyobj;
109
110 de = dictGetRandomKey(db->dict);
111 if (de == NULL) return NULL;
112
113 key = dictGetEntryKey(de);
114 keyobj = createStringObject(key,sdslen(key));
115 if (dictFind(db->expires,key)) {
116 if (expireIfNeeded(db,keyobj)) {
117 decrRefCount(keyobj);
118 continue; /* search for another key. This expired. */
119 }
120 }
121 return keyobj;
122 }
123 }
124
125 /* Delete a key, value, and associated expiration entry if any, from the DB */
126 int dbDelete(redisDb *db, robj *key) {
127 /* If VM is enabled make sure to awake waiting clients for this key:
128 * deleting the key will kill the I/O thread bringing the key from swap
129 * to memory, so the client will never be notified and unblocked if we
130 * don't do it now. */
131 if (server.vm_enabled) handleClientsBlockedOnSwappedKey(db,key);
132 /* Deleting an entry from the expires dict will not free the sds of
133 * the key, because it is shared with the main dictionary. */
134 if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
135 return dictDelete(db->dict,key->ptr) == DICT_OK;
136 }
137
138 /* Empty the whole database */
139 long long emptyDb() {
140 int j;
141 long long removed = 0;
142
143 for (j = 0; j < server.dbnum; j++) {
144 removed += dictSize(server.db[j].dict);
145 dictEmpty(server.db[j].dict);
146 dictEmpty(server.db[j].expires);
147 }
148 return removed;
149 }
150
151 int selectDb(redisClient *c, int id) {
152 if (id < 0 || id >= server.dbnum)
153 return REDIS_ERR;
154 c->db = &server.db[id];
155 return REDIS_OK;
156 }
157
158 /*-----------------------------------------------------------------------------
159 * Type agnostic commands operating on the key space
160 *----------------------------------------------------------------------------*/
161
162 void flushdbCommand(redisClient *c) {
163 server.dirty += dictSize(c->db->dict);
164 touchWatchedKeysOnFlush(c->db->id);
165 dictEmpty(c->db->dict);
166 dictEmpty(c->db->expires);
167 addReply(c,shared.ok);
168 }
169
170 void flushallCommand(redisClient *c) {
171 touchWatchedKeysOnFlush(-1);
172 server.dirty += emptyDb();
173 addReply(c,shared.ok);
174 if (server.bgsavechildpid != -1) {
175 kill(server.bgsavechildpid,SIGKILL);
176 rdbRemoveTempFile(server.bgsavechildpid);
177 }
178 rdbSave(server.dbfilename);
179 server.dirty++;
180 }
181
182 void delCommand(redisClient *c) {
183 int deleted = 0, j;
184
185 for (j = 1; j < c->argc; j++) {
186 if (dbDelete(c->db,c->argv[j])) {
187 touchWatchedKey(c->db,c->argv[j]);
188 server.dirty++;
189 deleted++;
190 }
191 }
192 addReplyLongLong(c,deleted);
193 }
194
195 void existsCommand(redisClient *c) {
196 expireIfNeeded(c->db,c->argv[1]);
197 if (dbExists(c->db,c->argv[1])) {
198 addReply(c, shared.cone);
199 } else {
200 addReply(c, shared.czero);
201 }
202 }
203
204 void selectCommand(redisClient *c) {
205 int id = atoi(c->argv[1]->ptr);
206
207 if (selectDb(c,id) == REDIS_ERR) {
208 addReplyError(c,"invalid DB index");
209 } else {
210 addReply(c,shared.ok);
211 }
212 }
213
214 void randomkeyCommand(redisClient *c) {
215 robj *key;
216
217 if ((key = dbRandomKey(c->db)) == NULL) {
218 addReply(c,shared.nullbulk);
219 return;
220 }
221
222 addReplyBulk(c,key);
223 decrRefCount(key);
224 }
225
226 void keysCommand(redisClient *c) {
227 dictIterator *di;
228 dictEntry *de;
229 sds pattern = c->argv[1]->ptr;
230 int plen = sdslen(pattern), allkeys;
231 unsigned long numkeys = 0;
232 void *replylen = addDeferredMultiBulkLength(c);
233
234 di = dictGetIterator(c->db->dict);
235 allkeys = (pattern[0] == '*' && pattern[1] == '\0');
236 while((de = dictNext(di)) != NULL) {
237 sds key = dictGetEntryKey(de);
238 robj *keyobj;
239
240 if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) {
241 keyobj = createStringObject(key,sdslen(key));
242 if (expireIfNeeded(c->db,keyobj) == 0) {
243 addReplyBulk(c,keyobj);
244 numkeys++;
245 }
246 decrRefCount(keyobj);
247 }
248 }
249 dictReleaseIterator(di);
250 setDeferredMultiBulkLength(c,replylen,numkeys);
251 }
252
253 void dbsizeCommand(redisClient *c) {
254 addReplyLongLong(c,dictSize(c->db->dict));
255 }
256
257 void lastsaveCommand(redisClient *c) {
258 addReplyLongLong(c,server.lastsave);
259 }
260
261 void typeCommand(redisClient *c) {
262 robj *o;
263 char *type;
264
265 o = lookupKeyRead(c->db,c->argv[1]);
266 if (o == NULL) {
267 type = "none";
268 } else {
269 switch(o->type) {
270 case REDIS_STRING: type = "string"; break;
271 case REDIS_LIST: type = "list"; break;
272 case REDIS_SET: type = "set"; break;
273 case REDIS_ZSET: type = "zset"; break;
274 case REDIS_HASH: type = "hash"; break;
275 default: type = "unknown"; break;
276 }
277 }
278 addReplyStatus(c,type);
279 }
280
281 void saveCommand(redisClient *c) {
282 if (server.bgsavechildpid != -1) {
283 addReplyError(c,"Background save already in progress");
284 return;
285 }
286 if (rdbSave(server.dbfilename) == REDIS_OK) {
287 addReply(c,shared.ok);
288 } else {
289 addReply(c,shared.err);
290 }
291 }
292
293 void bgsaveCommand(redisClient *c) {
294 if (server.bgsavechildpid != -1) {
295 addReplyError(c,"Background save already in progress");
296 return;
297 }
298 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
299 addReplyStatus(c,"Background saving started");
300 } else {
301 addReply(c,shared.err);
302 }
303 }
304
305 void shutdownCommand(redisClient *c) {
306 if (prepareForShutdown() == REDIS_OK)
307 exit(0);
308 addReplyError(c,"Errors trying to SHUTDOWN. Check logs.");
309 }
310
311 void renameGenericCommand(redisClient *c, int nx) {
312 robj *o;
313
314 /* To use the same key as src and dst is probably an error */
315 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
316 addReply(c,shared.sameobjecterr);
317 return;
318 }
319
320 if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
321 return;
322
323 incrRefCount(o);
324 if (dbAdd(c->db,c->argv[2],o) == REDIS_ERR) {
325 if (nx) {
326 decrRefCount(o);
327 addReply(c,shared.czero);
328 return;
329 }
330 dbReplace(c->db,c->argv[2],o);
331 }
332 dbDelete(c->db,c->argv[1]);
333 touchWatchedKey(c->db,c->argv[1]);
334 touchWatchedKey(c->db,c->argv[2]);
335 server.dirty++;
336 addReply(c,nx ? shared.cone : shared.ok);
337 }
338
339 void renameCommand(redisClient *c) {
340 renameGenericCommand(c,0);
341 }
342
343 void renamenxCommand(redisClient *c) {
344 renameGenericCommand(c,1);
345 }
346
347 void moveCommand(redisClient *c) {
348 robj *o;
349 redisDb *src, *dst;
350 int srcid;
351
352 /* Obtain source and target DB pointers */
353 src = c->db;
354 srcid = c->db->id;
355 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
356 addReply(c,shared.outofrangeerr);
357 return;
358 }
359 dst = c->db;
360 selectDb(c,srcid); /* Back to the source DB */
361
362 /* If the user is moving using as target the same
363 * DB as the source DB it is probably an error. */
364 if (src == dst) {
365 addReply(c,shared.sameobjecterr);
366 return;
367 }
368
369 /* Check if the element exists and get a reference */
370 o = lookupKeyWrite(c->db,c->argv[1]);
371 if (!o) {
372 addReply(c,shared.czero);
373 return;
374 }
375
376 /* Try to add the element to the target DB */
377 if (dbAdd(dst,c->argv[1],o) == REDIS_ERR) {
378 addReply(c,shared.czero);
379 return;
380 }
381 incrRefCount(o);
382
383 /* OK! key moved, free the entry in the source DB */
384 dbDelete(src,c->argv[1]);
385 server.dirty++;
386 addReply(c,shared.cone);
387 }
388
389 /*-----------------------------------------------------------------------------
390 * Expires API
391 *----------------------------------------------------------------------------*/
392
393 int removeExpire(redisDb *db, robj *key) {
394 /* An expire may only be removed if there is a corresponding entry in the
395 * main dict. Otherwise, the key will never be freed. */
396 redisAssert(dictFind(db->dict,key->ptr) != NULL);
397 return dictDelete(db->expires,key->ptr) == DICT_OK;
398 }
399
400 void setExpire(redisDb *db, robj *key, time_t when) {
401 dictEntry *de;
402
403 /* Reuse the sds from the main dict in the expire dict */
404 de = dictFind(db->dict,key->ptr);
405 redisAssert(de != NULL);
406 dictReplace(db->expires,dictGetEntryKey(de),(void*)when);
407 }
408
409 /* Return the expire time of the specified key, or -1 if no expire
410 * is associated with this key (i.e. the key is non volatile) */
411 time_t getExpire(redisDb *db, robj *key) {
412 dictEntry *de;
413
414 /* No expire? return ASAP */
415 if (dictSize(db->expires) == 0 ||
416 (de = dictFind(db->expires,key->ptr)) == NULL) return -1;
417
418 /* The entry was found in the expire dict, this means it should also
419 * be present in the main dict (safety check). */
420 redisAssert(dictFind(db->dict,key->ptr) != NULL);
421 return (time_t) dictGetEntryVal(de);
422 }
423
424 /* Propagate expires into slaves and the AOF file.
425 * When a key expires in the master, a DEL operation for this key is sent
426 * to all the slaves and the AOF file if enabled.
427 *
428 * This way the key expiry is centralized in one place, and since both
429 * AOF and the master->slave link guarantee operation ordering, everything
430 * will be consistent even if we allow write operations against expiring
431 * keys. */
432 void propagateExpire(redisDb *db, robj *key) {
433 struct redisCommand *cmd;
434 robj *argv[2];
435
436 cmd = lookupCommand("del");
437 argv[0] = createStringObject("DEL",3);
438 argv[1] = key;
439 incrRefCount(key);
440
441 if (server.appendonly)
442 feedAppendOnlyFile(cmd,db->id,argv,2);
443 if (listLength(server.slaves))
444 replicationFeedSlaves(server.slaves,db->id,argv,2);
445
446 decrRefCount(argv[0]);
447 decrRefCount(argv[1]);
448 }
449
450 int expireIfNeeded(redisDb *db, robj *key) {
451 time_t when = getExpire(db,key);
452
453 /* If we are running in the context of a slave, return ASAP:
454 * the slave key expiration is controlled by the master that will
455 * send us synthesized DEL operations for expired keys.
456 *
457 * Still we try to return the right information to the caller,
458 * that is, 0 if we think the key should be still valid, 1 if
459 * we think the key is expired at this time. */
460 if (server.masterhost != NULL) {
461 return time(NULL) > when;
462 }
463
464 if (when < 0) return 0;
465
466 /* Return when this key has not expired */
467 if (time(NULL) <= when) return 0;
468
469 /* Delete the key */
470 server.stat_expiredkeys++;
471 server.dirty++;
472 propagateExpire(db,key);
473 return dbDelete(db,key);
474 }
475
476 /*-----------------------------------------------------------------------------
477 * Expires Commands
478 *----------------------------------------------------------------------------*/
479
480 void expireGenericCommand(redisClient *c, robj *key, robj *param, long offset) {
481 dictEntry *de;
482 long seconds;
483
484 if (getLongFromObjectOrReply(c, param, &seconds, NULL) != REDIS_OK) return;
485
486 seconds -= offset;
487
488 de = dictFind(c->db->dict,key->ptr);
489 if (de == NULL) {
490 addReply(c,shared.czero);
491 return;
492 }
493 if (seconds <= 0) {
494 if (dbDelete(c->db,key)) server.dirty++;
495 addReply(c, shared.cone);
496 touchWatchedKey(c->db,key);
497 return;
498 } else {
499 time_t when = time(NULL)+seconds;
500 setExpire(c->db,key,when);
501 addReply(c,shared.cone);
502 touchWatchedKey(c->db,key);
503 server.dirty++;
504 return;
505 }
506 }
507
508 void expireCommand(redisClient *c) {
509 expireGenericCommand(c,c->argv[1],c->argv[2],0);
510 }
511
512 void expireatCommand(redisClient *c) {
513 expireGenericCommand(c,c->argv[1],c->argv[2],time(NULL));
514 }
515
516 void ttlCommand(redisClient *c) {
517 time_t expire, ttl = -1;
518
519 expire = getExpire(c->db,c->argv[1]);
520 if (expire != -1) {
521 ttl = (expire-time(NULL));
522 if (ttl < 0) ttl = -1;
523 }
524 addReplyLongLong(c,(long long)ttl);
525 }
526
527 void persistCommand(redisClient *c) {
528 dictEntry *de;
529
530 de = dictFind(c->db->dict,c->argv[1]->ptr);
531 if (de == NULL) {
532 addReply(c,shared.czero);
533 } else {
534 if (removeExpire(c->db,c->argv[1])) {
535 addReply(c,shared.cone);
536 server.dirty++;
537 } else {
538 addReply(c,shared.czero);
539 }
540 }
541 }