X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/5043dff351f4ad9095f66bbef8e08d363cd959a0..48f0308ad327d3bf4a89c1fc1ca977b7ed6f0f96:/redis.c?ds=sidebyside diff --git a/redis.c b/redis.c index e26e676f..375a31f8 100644 --- a/redis.c +++ b/redis.c @@ -58,7 +58,8 @@ #include #include #include -#if defined(__sun) && defined(__GNUC__) + +#if defined(__sun) #include "solarisfixes.h" #endif @@ -189,6 +190,11 @@ #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */ +/* Append only defines */ +#define APPENDFSYNC_NO 0 +#define APPENDFSYNC_ALWAYS 1 +#define APPENDFSYNC_EVERYSEC 2 + /*================================= Data types ============================== */ /* A redis object, that is a type able to hold a string / list / set */ @@ -260,6 +266,11 @@ struct redisServer { int maxidletime; int dbnum; int daemonize; + int appendonly; + int appendfsync; + time_t lastfsync; + int appendfd; + int appendseldb; char *pidfile; int bgsaveinprogress; pid_t bgsavechildpid; @@ -268,6 +279,7 @@ struct redisServer { char *logfile; char *bindaddr; char *dbfilename; + char *appendfilename; char *requirepass; int shareobjects; /* Replication related */ @@ -322,7 +334,7 @@ typedef struct zskiplistNode { typedef struct zskiplist { struct zskiplistNode *header, *tail; - long length; + unsigned long length; int level; } zskiplist; @@ -363,6 +375,7 @@ static void incrRefCount(robj *o); static int rdbSaveBackground(char *filename); static robj *createStringObject(char *ptr, size_t len); static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc); +static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc); static int syncWithMaster(void); static robj *tryObjectSharing(robj *o); static int tryObjectEncoding(robj *o); @@ -439,6 +452,7 @@ static void infoCommand(redisClient *c); static void mgetCommand(redisClient *c); static void monitorCommand(redisClient *c); static void expireCommand(redisClient *c); +static void expireatCommand(redisClient *c); static void getsetCommand(redisClient *c); static void ttlCommand(redisClient *c); static void slaveofCommand(redisClient *c); @@ -449,9 +463,10 @@ static void zaddCommand(redisClient *c); static void zrangeCommand(redisClient *c); static void zrangebyscoreCommand(redisClient *c); static void zrevrangeCommand(redisClient *c); -static void zlenCommand(redisClient *c); +static void zcardCommand(redisClient *c); static void zremCommand(redisClient *c); static void zscoreCommand(redisClient *c); +static void zremrangebyscoreCommand(redisClient *c); /*================================= Globals ================================= */ @@ -492,10 +507,11 @@ static struct redisCommand cmdTable[] = { {"smembers",sinterCommand,2,REDIS_CMD_INLINE}, {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM}, {"zrem",zremCommand,3,REDIS_CMD_BULK}, + {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE}, {"zrange",zrangeCommand,4,REDIS_CMD_INLINE}, {"zrangebyscore",zrangebyscoreCommand,4,REDIS_CMD_INLINE}, {"zrevrange",zrevrangeCommand,4,REDIS_CMD_INLINE}, - {"zlen",zlenCommand,2,REDIS_CMD_INLINE}, + {"zcard",zcardCommand,2,REDIS_CMD_INLINE}, {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM}, {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM}, {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM}, @@ -508,6 +524,7 @@ static struct redisCommand cmdTable[] = { {"rename",renameCommand,3,REDIS_CMD_INLINE}, {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE}, {"expire",expireCommand,3,REDIS_CMD_INLINE}, + {"expireat",expireatCommand,3,REDIS_CMD_INLINE}, {"keys",keysCommand,2,REDIS_CMD_INLINE}, {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE}, {"auth",authCommand,2,REDIS_CMD_INLINE}, @@ -668,7 +685,7 @@ static void redisLog(int level, const char *fmt, ...) { time_t now; now = time(NULL); - strftime(buf,64,"%d %b %H:%M:%S",gmtime(&now)); + strftime(buf,64,"%d %b %H:%M:%S",localtime(&now)); fprintf(fp,"%s %c ",buf,c[level]); vfprintf(fp, fmt, ap); fprintf(fp,"\n"); @@ -1018,8 +1035,14 @@ static void initServerConfig() { server.bindaddr = NULL; server.glueoutputbuf = 1; server.daemonize = 0; + server.appendonly = 0; + server.appendfsync = APPENDFSYNC_EVERYSEC; + server.lastfsync = time(NULL); + server.appendfd = -1; + server.appendseldb = -1; /* Make sure the first time will not match */ server.pidfile = "/var/run/redis.pid"; server.dbfilename = "dump.rdb"; + server.appendfilename = "appendonly.log"; server.requirepass = NULL; server.shareobjects = 0; server.sharingpoolsize = 1024; @@ -1079,6 +1102,15 @@ static void initServer() { server.stat_numconnections = 0; server.stat_starttime = time(NULL); aeCreateTimeEvent(server.el, 1000, serverCron, NULL, NULL); + + if (server.appendonly) { + server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT); + if (server.appendfd == -1) { + redisLog(REDIS_WARNING, "Can't open the append-only file: %s", + strerror(errno)); + exit(1); + } + } } /* Empty the whole database */ @@ -1218,6 +1250,21 @@ static void loadServerConfig(char *filename) { if ((server.daemonize = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) { + if ((server.appendonly = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } + } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) { + if (strcasecmp(argv[1],"no")) { + server.appendfsync = APPENDFSYNC_NO; + } else if (strcasecmp(argv[1],"always")) { + server.appendfsync = APPENDFSYNC_ALWAYS; + } else if (strcasecmp(argv[1],"everysec")) { + server.appendfsync = APPENDFSYNC_EVERYSEC; + } else { + err = "argument must be 'no', 'always' or 'everysec'"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { server.requirepass = zstrdup(argv[1]); } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) { @@ -1533,6 +1580,8 @@ static int processCommand(redisClient *c) { /* Exec the command */ dirty = server.dirty; cmd->proc(c); + if (server.appendonly != 0) + feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc); if (server.dirty-dirty != 0 && listLength(server.slaves)) replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc); if (listLength(server.monitors)) @@ -1617,6 +1666,63 @@ static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int di if (outv != static_outv) zfree(outv); } +/* TODO: translate EXPIREs into EXPIRETOs */ +static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) { + sds buf = sdsempty(); + int j; + ssize_t nwritten; + time_t now; + + /* The DB this command was targetting is not the same as the last command + * we appendend. To issue a SELECT command is needed. */ + if (dictid != server.appendseldb) { + char seldb[64]; + + snprintf(seldb,sizeof(seldb),"%d",dictid); + buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n", + strlen(seldb),seldb); + server.appendseldb = dictid; + } + /* Append the actual command */ + buf = sdscatprintf(buf,"*%d\r\n",argc); + for (j = 0; j < argc; j++) { + robj *o = argv[j]; + + if (o->encoding != REDIS_ENCODING_RAW) + o = getDecodedObject(o); + buf = sdscatprintf(buf,"$%d\r\n",sdslen(o->ptr)); + buf = sdscatlen(buf,o->ptr,sdslen(o->ptr)); + buf = sdscatlen(buf,"\r\n",2); + if (o != argv[j]) + decrRefCount(o); + } + /* We want to perform a single write. This should be guaranteed atomic + * at least if the filesystem we are writing is a real physical one. + * While this will save us against the server being killed I don't think + * there is much to do about the whole server stopping for power problems + * or alike */ + nwritten = write(server.appendfd,buf,sdslen(buf)); + if (nwritten != (unsigned)sdslen(buf)) { + /* Ooops, we are in troubles. The best thing to do for now is + * to simply exit instead to give the illusion that everything is + * working as expected. */ + if (nwritten == -1) { + redisLog(REDIS_WARNING,"Aborting on error writing to the append-only file: %s",strerror(errno)); + } else { + redisLog(REDIS_WARNING,"Aborting on short write while writing to the append-only file: %s",strerror(errno)); + } + abort(); + } + now = time(NULL); + if (server.appendfsync == APPENDFSYNC_ALWAYS || + (server.appendfsync == APPENDFSYNC_EVERYSEC && + now-server.lastfsync > 1)) + { + fsync(server.appendfd); /* Let's try to get this data on the disk */ + server.lastfsync = now; + } +} + static void processInputBuffer(redisClient *c) { again: if (c->bulklen == -1) { @@ -2718,7 +2824,7 @@ static void authCommand(redisClient *c) { addReply(c,shared.ok); } else { c->authenticated = 0; - addReply(c,shared.err); + addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); } } @@ -3938,6 +4044,49 @@ static int zslDelete(zskiplist *zsl, double score, robj *obj) { return 0; /* not found */ } +/* Delete all the elements with score between min and max from the skiplist. + * Min and mx are inclusive, so a score >= min || score <= max is deleted. + * Note that this function takes the reference to the hash table view of the + * sorted set, in order to remove the elements from the hash table too. */ +static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) { + zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; + unsigned long removed = 0; + int i; + + x = zsl->header; + for (i = zsl->level-1; i >= 0; i--) { + while (x->forward[i] && x->forward[i]->score < min) + x = x->forward[i]; + update[i] = x; + } + /* We may have multiple elements with the same score, what we need + * is to find the element with both the right score and object. */ + x = x->forward[0]; + while (x && x->score <= max) { + zskiplistNode *next; + + for (i = 0; i < zsl->level; i++) { + if (update[i]->forward[i] != x) break; + update[i]->forward[i] = x->forward[i]; + } + if (x->forward[0]) { + x->forward[0]->backward = (x->backward == zsl->header) ? + NULL : x->backward; + } else { + zsl->tail = x->backward; + } + next = x->forward[0]; + dictDelete(dict,x->obj); + zslFreeNode(x); + while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL) + zsl->level--; + zsl->length--; + removed++; + x = next; + } + return removed; /* not found */ +} + /* Find the first node having a score equal or greater than the specified one. * Returns NULL if there is no match. */ static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) { @@ -4041,6 +4190,30 @@ static void zremCommand(redisClient *c) { } } +static void zremrangebyscoreCommand(redisClient *c) { + double min = strtod(c->argv[2]->ptr,NULL); + double max = strtod(c->argv[3]->ptr,NULL); + robj *zsetobj; + zset *zs; + + zsetobj = lookupKeyWrite(c->db,c->argv[1]); + if (zsetobj == NULL) { + addReply(c,shared.czero); + } else { + long deleted; + + if (zsetobj->type != REDIS_ZSET) { + addReply(c,shared.wrongtypeerr); + return; + } + zs = zsetobj->ptr; + deleted = zslDeleteRange(zs->zsl,min,max,zs->dict); + if (htNeedsResize(zs->dict)) dictResize(zs->dict); + server.dirty += deleted; + addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted)); + } +} + static void zrangeGenericCommand(redisClient *c, int reverse) { robj *o; int start = atoi(c->argv[2]->ptr); @@ -4153,7 +4326,7 @@ static void zrangebyscoreCommand(redisClient *c) { } } -static void zlenCommand(redisClient *c) { +static void zcardCommand(redisClient *c) { robj *o; zset *zs; @@ -4578,7 +4751,7 @@ static void infoCommand(redisClient *c) { server.masterport, (server.replstate == REDIS_REPL_CONNECTED) ? "up" : "down", - (int)(time(NULL)-server.master->lastinteraction) + server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1 ); } for (j = 0; j < server.dbnum; j++) { @@ -4666,21 +4839,21 @@ static int deleteIfVolatile(redisDb *db, robj *key) { return dictDelete(db->dict,key) == DICT_OK; } -static void expireCommand(redisClient *c) { +static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) { dictEntry *de; - int seconds = atoi(c->argv[2]->ptr); - de = dictFind(c->db->dict,c->argv[1]); + de = dictFind(c->db->dict,key); if (de == NULL) { addReply(c,shared.czero); return; } - if (seconds <= 0) { - addReply(c, shared.czero); + if (seconds < 0) { + if (deleteKey(c->db,key)) server.dirty++; + addReply(c, shared.cone); return; } else { time_t when = time(NULL)+seconds; - if (setExpire(c->db,c->argv[1],when)) { + if (setExpire(c->db,key,when)) { addReply(c,shared.cone); server.dirty++; } else { @@ -4690,6 +4863,14 @@ static void expireCommand(redisClient *c) { } } +static void expireCommand(redisClient *c) { + expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)); +} + +static void expireatCommand(redisClient *c) { + expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL)); +} + static void ttlCommand(redisClient *c) { time_t expire; int ttl = -1; @@ -5001,6 +5182,11 @@ static int syncWithMaster(void) { strerror(errno)); return REDIS_ERR; } + if (buf[0] != '$') { + close(fd); + redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); + return REDIS_ERR; + } dumpsize = atoi(buf+1); redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize); /* Read the bulk write data on a temp file */ @@ -5237,6 +5423,7 @@ static struct redisFunctionSym symsTable[] = { {"mgetCommand", (unsigned long)mgetCommand}, {"monitorCommand", (unsigned long)monitorCommand}, {"expireCommand", (unsigned long)expireCommand}, +{"expireatCommand", (unsigned long)expireatCommand}, {"getsetCommand", (unsigned long)getsetCommand}, {"ttlCommand", (unsigned long)ttlCommand}, {"slaveofCommand", (unsigned long)slaveofCommand},