X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/4763ecc9add311467c9c3852a81664a0b3005919..fd535c58623852e480906311c48b136d3d2646bb:/src/cluster.c diff --git a/src/cluster.c b/src/cluster.c index 0b55b107..1c0b1c23 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1014,11 +1014,24 @@ int clusterNodeGetSlotBit(clusterNode *n, int slot) { * If the slot is already assigned to another instance this is considered * an error and REDIS_ERR is returned. */ int clusterAddSlot(clusterNode *n, int slot) { - redisAssert(clusterNodeSetSlotBit(n,slot) == 0); + if (clusterNodeSetSlotBit(n,slot) != 0) + return REDIS_ERR; server.cluster.slots[slot] = n; return REDIS_OK; } +/* Delete the specified slot marking it as unassigned. + * Returns REDIS_OK if the slot was assigned, otherwise if the slot was + * already unassigned REDIS_ERR is returned. */ +int clusterDelSlot(int slot) { + clusterNode *n = server.cluster.slots[slot]; + + if (!n) return REDIS_ERR; + redisAssert(clusterNodeClearSlotBit(n,slot) == 1); + server.cluster.slots[slot] = NULL; + return REDIS_OK; +} + /* ----------------------------------------------------------------------------- * Cluster state evaluation function * -------------------------------------------------------------------------- */ @@ -1128,6 +1141,18 @@ sds clusterGenNodesDescription(void) { return ci; } +int getSlotOrReply(redisClient *c, robj *o) { + long long slot; + + if (getLongLongFromObject(o,&slot) != REDIS_OK || + slot < 0 || slot > REDIS_CLUSTER_SLOTS) + { + addReplyError(c,"Invalid or out of range slot"); + return -1; + } + return (int) slot; +} + void clusterCommand(redisClient *c) { if (server.cluster_enabled == 0) { addReplyError(c,"This instance has cluster support disabled"); @@ -1165,24 +1190,26 @@ void clusterCommand(redisClient *c) { o = createObject(REDIS_STRING,ci); addReplyBulk(c,o); decrRefCount(o); - } else if (!strcasecmp(c->argv[1]->ptr,"addslots") && c->argc >= 3) { - int j; - long long slot; + } else if ((!strcasecmp(c->argv[1]->ptr,"addslots") || + !strcasecmp(c->argv[1]->ptr,"delslots")) && c->argc >= 3) { + int j, slot; unsigned char *slots = zmalloc(REDIS_CLUSTER_SLOTS); + int del = !strcasecmp(c->argv[1]->ptr,"delslots"); memset(slots,0,REDIS_CLUSTER_SLOTS); /* Check that all the arguments are parsable and that all the * slots are not already busy. */ for (j = 2; j < c->argc; j++) { - if (getLongLongFromObject(c->argv[j],&slot) != REDIS_OK || - slot < 0 || slot > REDIS_CLUSTER_SLOTS) - { - addReplyError(c,"Invalid or out of range slot index"); + if ((slot = getSlotOrReply(c,c->argv[j])) == -1) { zfree(slots); return; } - if (server.cluster.slots[slot]) { - addReplyErrorFormat(c,"Slot %lld is already busy", slot); + if (del && server.cluster.slots[slot] == NULL) { + addReplyErrorFormat(c,"Slot %d is already unassigned", slot); + zfree(slots); + return; + } else if (!del && server.cluster.slots[slot]) { + addReplyErrorFormat(c,"Slot %d is already busy", slot); zfree(slots); return; } @@ -1195,8 +1222,15 @@ void clusterCommand(redisClient *c) { } for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) { if (slots[j]) { - int retval = clusterAddSlot(server.cluster.myself,j); - + int retval; + + /* If this slot was set as importing we can clear this + * state as now we are the real owner of the slot. */ + if (server.cluster.importing_slots_from[j]) + server.cluster.importing_slots_from[j] = NULL; + + retval = del ? clusterDelSlot(j) : + clusterAddSlot(server.cluster.myself,j); redisAssert(retval == REDIS_OK); } } @@ -1208,22 +1242,16 @@ void clusterCommand(redisClient *c) { /* SETSLOT 10 MIGRATING */ /* SETSLOT 10 IMPORTING */ /* SETSLOT 10 STABLE */ - long long aux; - unsigned int slot; + int slot; clusterNode *n; - if (getLongLongFromObjectOrReply(c,c->argv[2],&aux,NULL) != REDIS_OK) - return; - if (aux < 0 || aux >= REDIS_CLUSTER_SLOTS) { - addReplyError(c,"Slot out of range"); - return; - } - slot = (unsigned int) aux; - if (server.cluster.slots[slot] != server.cluster.myself) { - addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot); - return; - } + if ((slot = getSlotOrReply(c,c->argv[2])) == -1) return; + if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) { + if (server.cluster.slots[slot] != server.cluster.myself) { + addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot); + return; + } if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) { addReplyErrorFormat(c,"I don't know about node %s", (char*)c->argv[4]->ptr); @@ -1231,6 +1259,11 @@ void clusterCommand(redisClient *c) { } server.cluster.migrating_slots_to[slot] = n; } else if (!strcasecmp(c->argv[3]->ptr,"importing") && c->argc == 5) { + if (server.cluster.slots[slot] == server.cluster.myself) { + addReplyErrorFormat(c, + "I'm already the owner of hash slot %u",slot); + return; + } if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) { addReplyErrorFormat(c,"I don't know about node %s", (char*)c->argv[3]->ptr); @@ -1238,7 +1271,40 @@ void clusterCommand(redisClient *c) { } server.cluster.importing_slots_from[slot] = n; } else if (!strcasecmp(c->argv[3]->ptr,"stable") && c->argc == 4) { + /* CLUSTER SETSLOT STABLE */ server.cluster.importing_slots_from[slot] = NULL; + server.cluster.migrating_slots_to[slot] = NULL; + } else if (!strcasecmp(c->argv[3]->ptr,"node") && c->argc == 4) { + /* CLUSTER SETSLOT NODE */ + clusterNode *n = clusterLookupNode(c->argv[4]->ptr); + + if (!n) addReplyErrorFormat(c,"Unknown node %s", + (char*)c->argv[4]->ptr); + /* If this hash slot was served by 'myself' before to switch + * make sure there are no longer local keys for this hash slot. */ + if (server.cluster.slots[slot] == server.cluster.myself && + n != server.cluster.myself) + { + int numkeys; + robj **keys; + + keys = zmalloc(sizeof(robj*)*1); + numkeys = GetKeysInSlot(slot, keys, 1); + zfree(keys); + if (numkeys == 0) { + addReplyErrorFormat(c, "Can't assign hashslot %d to a different node while I still hold keys for this hash slot.", slot); + return; + } + } + /* If this node was the slot owner and the slot was marked as + * migrating, assigning the slot to another node will clear + * the migratig status. */ + if (server.cluster.slots[slot] == server.cluster.myself && + server.cluster.migrating_slots_to[slot]) + server.cluster.migrating_slots_to[slot] = NULL; + + clusterDelSlot(slot); + clusterAddSlot(n,slot); } else { addReplyError(c,"Invalid CLUSTER SETSLOT action or number of arguments"); return; @@ -1317,11 +1383,10 @@ void clusterCommand(redisClient *c) { /* RESTORE key ttl serialized-value */ void restoreCommand(redisClient *c) { - FILE *fp; - char buf[64]; - robj *o; - unsigned char *data; long ttl; + rio payload; + int type; + robj *obj; /* Make sure this key does not already exist here... */ if (dbExists(c->db,c->argv[1])) { @@ -1337,44 +1402,16 @@ void restoreCommand(redisClient *c) { return; } - /* rdbLoadObject() only works against file descriptors so we need to - * dump the serialized object into a file and reload. */ - snprintf(buf,sizeof(buf),"redis-restore-%d.tmp",getpid()); - fp = fopen(buf,"w+"); - if (!fp) { - redisLog(REDIS_WARNING,"Can't open tmp file for RESTORE: %s", - strerror(errno)); - addReplyErrorFormat(c,"RESTORE failed, tmp file creation error: %s", - strerror(errno)); - return; - } - unlink(buf); - - /* Write the actual data and rewind the file */ - data = (unsigned char*) c->argv[3]->ptr; - if (fwrite(data+1,sdslen((sds)data)-1,1,fp) != 1) { - redisLog(REDIS_WARNING,"Can't write against tmp file for RESTORE: %s", - strerror(errno)); - addReplyError(c,"RESTORE failed, tmp file I/O error."); - fclose(fp); - return; - } - rewind(fp); - - /* Finally create the object from the serialized dump and - * store it at the specified key. */ - if ((data[0] > 4 && data[0] < 9) || - data[0] > 11 || - (o = rdbLoadObject(data[0],fp)) == NULL) + payload = rioInitWithBuffer(c->argv[3]->ptr); + if (((type = rdbLoadObjectType(&payload)) == -1) || + ((obj = rdbLoadObject(type,&payload)) == NULL)) { - addReplyError(c,"Bad data format."); - fclose(fp); + addReplyError(c,"Bad data format"); return; } - fclose(fp); /* Create the key and set the TTL if any */ - dbAdd(c->db,c->argv[1],o); + dbAdd(c->db,c->argv[1],obj); if (ttl) setExpire(c->db,c->argv[1],time(NULL)+ttl); addReply(c,shared.ok); } @@ -1384,12 +1421,9 @@ void migrateCommand(redisClient *c) { int fd; long timeout; long dbid; - char buf[64]; - FILE *fp; time_t ttl; robj *o; - unsigned char type; - off_t payload_len; + rio cmd, payload; /* Sanity check */ if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != REDIS_OK) @@ -1419,54 +1453,41 @@ void migrateCommand(redisClient *c) { return; } - /* Create temp file */ - snprintf(buf,sizeof(buf),"redis-migrate-%d.tmp",getpid()); - fp = fopen(buf,"w+"); - if (!fp) { - redisLog(REDIS_WARNING,"Can't open tmp file for MIGRATE: %s", - strerror(errno)); - addReplyErrorFormat(c,"MIGRATE failed, tmp file creation error: %s.", - strerror(errno)); - return; - } - unlink(buf); - - /* Build the SELECT + RESTORE query writing it in our temp file. */ - if (fwriteBulkCount(fp,'*',2) == 0) goto file_wr_err; - if (fwriteBulkString(fp,"SELECT",6) == 0) goto file_wr_err; - if (fwriteBulkLongLong(fp,dbid) == 0) goto file_wr_err; + cmd = rioInitWithBuffer(sdsempty()); + redisAssert(rioWriteBulkCount(&cmd,'*',2)); + redisAssert(rioWriteBulkString(&cmd,"SELECT",6)); + redisAssert(rioWriteBulkLongLong(&cmd,dbid)); ttl = getExpire(c->db,c->argv[3]); - type = o->type; - if (fwriteBulkCount(fp,'*',4) == 0) goto file_wr_err; - if (fwriteBulkString(fp,"RESTORE",7) == 0) goto file_wr_err; - if (fwriteBulkObject(fp,c->argv[3]) == 0) goto file_wr_err; - if (fwriteBulkLongLong(fp, (ttl == -1) ? 0 : ttl) == 0) goto file_wr_err; + redisAssert(rioWriteBulkCount(&cmd,'*',4)); + redisAssert(rioWriteBulkString(&cmd,"RESTORE",7)); + redisAssert(c->argv[3]->encoding == REDIS_ENCODING_RAW); + redisAssert(rioWriteBulkString(&cmd,c->argv[3]->ptr,sdslen(c->argv[3]->ptr))); + redisAssert(rioWriteBulkLongLong(&cmd,(ttl == -1) ? 0 : ttl)); /* Finally the last argument that is the serailized object payload - * in the form: . */ - payload_len = rdbSavedObjectLen(o); - if (fwriteBulkCount(fp,'$',payload_len+1) == 0) goto file_wr_err; - if (fwrite(&type,1,1,fp) == 0) goto file_wr_err; - if (rdbSaveObject(fp,o) == -1) goto file_wr_err; - if (fwrite("\r\n",2,1,fp) == 0) goto file_wr_err; - - /* Tranfer the query to the other node */ - rewind(fp); + * in the form: . */ + payload = rioInitWithBuffer(sdsempty()); + redisAssert(rdbSaveObjectType(&payload,o)); + redisAssert(rdbSaveObject(&payload,o) != -1); + redisAssert(rioWriteBulkString(&cmd,payload.io.buffer.ptr,sdslen(payload.io.buffer.ptr))); + sdsfree(payload.io.buffer.ptr); + + /* Tranfer the query to the other node in 64K chunks. */ { - char buf[4096]; - size_t nread; - - while ((nread = fread(buf,1,sizeof(buf),fp)) != 0) { - int nwritten; - - nwritten = syncWrite(fd,buf,nread,timeout); - if (nwritten != (signed)nread) goto socket_wr_err; + sds buf = cmd.io.buffer.ptr; + size_t pos = 0, towrite; + int nwritten = 0; + + while ((towrite = sdslen(buf)-pos) > 0) { + towrite = (towrite > (64*1024) ? (64*1024) : towrite); + nwritten = syncWrite(fd,buf+nwritten,towrite,timeout); + if (nwritten != (signed)towrite) goto socket_wr_err; + pos += nwritten; } - if (ferror(fp)) goto file_rd_err; } - /* Read back the reply */ + /* Read back the reply. */ { char buf1[1024]; char buf2[1024]; @@ -1475,7 +1496,7 @@ void migrateCommand(redisClient *c) { if (syncReadLine(fd, buf1, sizeof(buf1), timeout) <= 0) goto socket_rd_err; if (syncReadLine(fd, buf2, sizeof(buf2), timeout) <= 0) - goto socket_rd_err; + goto socket_rd_err; if (buf1[0] == '-' || buf2[0] == '-') { addReplyErrorFormat(c,"Target instance replied with error: %s", (buf1[0] == '-') ? buf1+1 : buf2+1); @@ -1484,25 +1505,8 @@ void migrateCommand(redisClient *c) { addReply(c,shared.ok); } } - fclose(fp); - close(fd); - return; -file_wr_err: - redisLog(REDIS_WARNING,"Can't write on tmp file for MIGRATE: %s", - strerror(errno)); - addReplyErrorFormat(c,"MIGRATE failed, tmp file write error: %s.", - strerror(errno)); - fclose(fp); - close(fd); - return; - -file_rd_err: - redisLog(REDIS_WARNING,"Can't read from tmp file for MIGRATE: %s", - strerror(errno)); - addReplyErrorFormat(c,"MIGRATE failed, tmp file read error: %s.", - strerror(errno)); - fclose(fp); + sdsfree(cmd.io.buffer.ptr); close(fd); return; @@ -1511,7 +1515,7 @@ socket_wr_err: strerror(errno)); addReplyErrorFormat(c,"MIGRATE failed, writing to target node: %s.", strerror(errno)); - fclose(fp); + sdsfree(cmd.io.buffer.ptr); close(fd); return; @@ -1520,7 +1524,7 @@ socket_rd_err: strerror(errno)); addReplyErrorFormat(c,"MIGRATE failed, reading from target node: %s.", strerror(errno)); - fclose(fp); + sdsfree(cmd.io.buffer.ptr); close(fd); return; } @@ -1529,74 +1533,26 @@ socket_rd_err: * DUMP is actually not used by Redis Cluster but it is the obvious * complement of RESTORE and can be useful for different applications. */ void dumpCommand(redisClient *c) { - char buf[64]; - FILE *fp; robj *o, *dumpobj; - sds dump = NULL; - off_t payload_len; - unsigned int type; + rio payload; /* Check if the key is here. */ if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { addReply(c,shared.nullbulk); return; } - - /* Create temp file */ - snprintf(buf,sizeof(buf),"redis-dump-%d.tmp",getpid()); - fp = fopen(buf,"w+"); - if (!fp) { - redisLog(REDIS_WARNING,"Can't open tmp file for MIGRATE: %s", - strerror(errno)); - addReplyErrorFormat(c,"DUMP failed, tmp file creation error: %s.", - strerror(errno)); - return; - } - unlink(buf); - - /* Dump the serailized object and read it back in memory. - * We prefix it with a one byte containing the type ID. - * This is the serialization format understood by RESTORE. */ - if (rdbSaveObject(fp,o) == -1) goto file_wr_err; - payload_len = ftello(fp); - if (fseeko(fp,0,SEEK_SET) == -1) goto file_rd_err; - dump = sdsnewlen(NULL,payload_len+1); - if (payload_len && fread(dump+1,payload_len,1,fp) != 1) goto file_rd_err; - fclose(fp); - type = o->type; - if (type == REDIS_LIST && o->encoding == REDIS_ENCODING_ZIPLIST) - type = REDIS_LIST_ZIPLIST; - else if (type == REDIS_HASH && o->encoding == REDIS_ENCODING_ZIPMAP) - type = REDIS_HASH_ZIPMAP; - else if (type == REDIS_SET && o->encoding == REDIS_ENCODING_INTSET) - type = REDIS_SET_INTSET; - else - type = o->type; - dump[0] = type; + + /* Serialize the object in a RDB-like format. It consist of an object type + * byte followed by the serialized object. This is understood by RESTORE. */ + payload = rioInitWithBuffer(sdsempty()); + redisAssert(rdbSaveObjectType(&payload,o)); + redisAssert(rdbSaveObject(&payload,o)); /* Transfer to the client */ - dumpobj = createObject(REDIS_STRING,dump); + dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr); addReplyBulk(c,dumpobj); decrRefCount(dumpobj); return; - -file_wr_err: - redisLog(REDIS_WARNING,"Can't write on tmp file for DUMP: %s", - strerror(errno)); - addReplyErrorFormat(c,"DUMP failed, tmp file write error: %s.", - strerror(errno)); - sdsfree(dump); - fclose(fp); - return; - -file_rd_err: - redisLog(REDIS_WARNING,"Can't read from tmp file for DUMP: %s", - strerror(errno)); - addReplyErrorFormat(c,"DUMP failed, tmp file read error: %s.", - strerror(errno)); - sdsfree(dump); - fclose(fp); - return; } /* -----------------------------------------------------------------------------