if (server.cluster.slots[j] == NULL ||
server.cluster.slots[j]->flags & REDIS_NODE_FAIL)
{
- server.cluster.slots[j] = sender;
+ clusterAddSlot(sender,j);
update_state = update_config = 1;
}
}
* If the slot is already assigned to another instance this is considered
* an error and REDIS_ERR is returned. */
int clusterAddSlot(clusterNode *n, int slot) {
- redisAssert(clusterNodeSetSlotBit(n,slot) == 0);
+ if (clusterNodeSetSlotBit(n,slot) != 0)
+ return REDIS_ERR;
server.cluster.slots[slot] = n;
return REDIS_OK;
}
+/* Delete the specified slot marking it as unassigned.
+ * Returns REDIS_OK if the slot was assigned, otherwise if the slot was
+ * already unassigned REDIS_ERR is returned. */
+int clusterDelSlot(int slot) {
+ clusterNode *n = server.cluster.slots[slot];
+
+ if (!n) return REDIS_ERR;
+ redisAssert(clusterNodeClearSlotBit(n,slot) == 1);
+ server.cluster.slots[slot] = NULL;
+ return REDIS_OK;
+}
+
/* -----------------------------------------------------------------------------
* Cluster state evaluation function
* -------------------------------------------------------------------------- */
ci = sdscatprintf(ci,"%ld %ld %s",
(long) node->ping_sent,
(long) node->pong_received,
- node->link ? "connected" : "disconnected");
+ (node->link || node->flags & REDIS_NODE_MYSELF) ?
+ "connected" : "disconnected");
/* Slots served by this instance */
start = -1;
return ci;
}
+int getSlotOrReply(redisClient *c, robj *o) {
+ long long slot;
+
+ if (getLongLongFromObject(o,&slot) != REDIS_OK ||
+ slot < 0 || slot > REDIS_CLUSTER_SLOTS)
+ {
+ addReplyError(c,"Invalid or out of range slot");
+ return -1;
+ }
+ return (int) slot;
+}
+
void clusterCommand(redisClient *c) {
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
o = createObject(REDIS_STRING,ci);
addReplyBulk(c,o);
decrRefCount(o);
- } else if (!strcasecmp(c->argv[1]->ptr,"addslots") && c->argc >= 3) {
- int j;
- long long slot;
+ } else if ((!strcasecmp(c->argv[1]->ptr,"addslots") ||
+ !strcasecmp(c->argv[1]->ptr,"delslots")) && c->argc >= 3) {
+ int j, slot;
unsigned char *slots = zmalloc(REDIS_CLUSTER_SLOTS);
+ int del = !strcasecmp(c->argv[1]->ptr,"delslots");
memset(slots,0,REDIS_CLUSTER_SLOTS);
/* Check that all the arguments are parsable and that all the
* slots are not already busy. */
for (j = 2; j < c->argc; j++) {
- if (getLongLongFromObject(c->argv[j],&slot) != REDIS_OK ||
- slot < 0 || slot > REDIS_CLUSTER_SLOTS)
- {
- addReplyError(c,"Invalid or out of range slot index");
+ if ((slot = getSlotOrReply(c,c->argv[j])) == -1) {
zfree(slots);
return;
}
- if (server.cluster.slots[slot]) {
- addReplyErrorFormat(c,"Slot %lld is already busy", slot);
+ if (del && server.cluster.slots[slot] == NULL) {
+ addReplyErrorFormat(c,"Slot %d is already unassigned", slot);
+ zfree(slots);
+ return;
+ } else if (!del && server.cluster.slots[slot]) {
+ addReplyErrorFormat(c,"Slot %d is already busy", slot);
zfree(slots);
return;
}
}
for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
if (slots[j]) {
- int retval = clusterAddSlot(server.cluster.myself,j);
-
+ int retval;
+
+ /* If this slot was set as importing we can clear this
+ * state as now we are the real owner of the slot. */
+ if (server.cluster.importing_slots_from[j])
+ server.cluster.importing_slots_from[j] = NULL;
+
+ retval = del ? clusterDelSlot(j) :
+ clusterAddSlot(server.cluster.myself,j);
redisAssert(retval == REDIS_OK);
}
}
clusterSaveConfigOrDie();
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"setslot") && c->argc >= 4) {
- /* SETSLOT 10 MIGRATING <instance ID> */
- /* SETSLOT 10 IMPORTING <instance ID> */
+ /* SETSLOT 10 MIGRATING <node ID> */
+ /* SETSLOT 10 IMPORTING <node ID> */
/* SETSLOT 10 STABLE */
- long long aux;
- unsigned int slot;
+ /* SETSLOT 10 NODE <node ID> */
+ int slot;
clusterNode *n;
- if (getLongLongFromObjectOrReply(c,c->argv[2],&aux,NULL) != REDIS_OK)
- return;
- if (aux < 0 || aux >= REDIS_CLUSTER_SLOTS) {
- addReplyError(c,"Slot out of range");
- return;
- }
- slot = (unsigned int) aux;
- if (server.cluster.slots[slot] != server.cluster.myself) {
- addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot);
- return;
- }
+ if ((slot = getSlotOrReply(c,c->argv[2])) == -1) return;
+
if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) {
+ if (server.cluster.slots[slot] != server.cluster.myself) {
+ addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot);
+ return;
+ }
if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) {
addReplyErrorFormat(c,"I don't know about node %s",
(char*)c->argv[4]->ptr);
}
server.cluster.migrating_slots_to[slot] = n;
} else if (!strcasecmp(c->argv[3]->ptr,"importing") && c->argc == 5) {
+ if (server.cluster.slots[slot] == server.cluster.myself) {
+ addReplyErrorFormat(c,
+ "I'm already the owner of hash slot %u",slot);
+ return;
+ }
if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) {
addReplyErrorFormat(c,"I don't know about node %s",
(char*)c->argv[3]->ptr);
}
server.cluster.importing_slots_from[slot] = n;
} else if (!strcasecmp(c->argv[3]->ptr,"stable") && c->argc == 4) {
+ /* CLUSTER SETSLOT <SLOT> STABLE */
server.cluster.importing_slots_from[slot] = NULL;
+ server.cluster.migrating_slots_to[slot] = NULL;
+ } else if (!strcasecmp(c->argv[3]->ptr,"node") && c->argc == 5) {
+ /* CLUSTER SETSLOT <SLOT> NODE <NODE ID> */
+ clusterNode *n = clusterLookupNode(c->argv[4]->ptr);
+
+ if (!n) addReplyErrorFormat(c,"Unknown node %s",
+ (char*)c->argv[4]->ptr);
+ /* If this hash slot was served by 'myself' before to switch
+ * make sure there are no longer local keys for this hash slot. */
+ if (server.cluster.slots[slot] == server.cluster.myself &&
+ n != server.cluster.myself)
+ {
+ int numkeys;
+ robj **keys;
+
+ keys = zmalloc(sizeof(robj*)*1);
+ numkeys = GetKeysInSlot(slot, keys, 1);
+ zfree(keys);
+ if (numkeys != 0) {
+ addReplyErrorFormat(c, "Can't assign hashslot %d to a different node while I still hold keys for this hash slot.", slot);
+ return;
+ }
+ }
+ /* If this node was the slot owner and the slot was marked as
+ * migrating, assigning the slot to another node will clear
+ * the migratig status. */
+ if (server.cluster.slots[slot] == server.cluster.myself &&
+ server.cluster.migrating_slots_to[slot])
+ server.cluster.migrating_slots_to[slot] = NULL;
+
+ /* If this node was importing this slot, assigning the slot to
+ * itself also clears the importing status. */
+ if (n == server.cluster.myself && server.cluster.importing_slots_from[slot])
+ server.cluster.importing_slots_from[slot] = NULL;
+
+ clusterDelSlot(slot);
+ clusterAddSlot(n,slot);
} else {
addReplyError(c,"Invalid CLUSTER SETSLOT action or number of arguments");
return;
/* RESTORE key ttl serialized-value */
void restoreCommand(redisClient *c) {
- FILE *fp;
- char buf[64];
- robj *o;
- unsigned char *data;
long ttl;
+ rio payload;
+ int type;
+ robj *obj;
/* Make sure this key does not already exist here... */
- if (dbExists(c->db,c->argv[1])) {
+ if (lookupKeyWrite(c->db,c->argv[1]) != NULL) {
addReplyError(c,"Target key name is busy.");
return;
}
return;
}
- /* rdbLoadObject() only works against file descriptors so we need to
- * dump the serialized object into a file and reload. */
- snprintf(buf,sizeof(buf),"redis-restore-%d.tmp",getpid());
- fp = fopen(buf,"w+");
- if (!fp) {
- redisLog(REDIS_WARNING,"Can't open tmp file for RESTORE: %s",
- strerror(errno));
- addReplyErrorFormat(c,"RESTORE failed, tmp file creation error: %s",
- strerror(errno));
- return;
- }
- unlink(buf);
-
- /* Write the actual data and rewind the file */
- data = (unsigned char*) c->argv[3]->ptr;
- if (fwrite(data+1,sdslen((sds)data)-1,1,fp) != 1) {
- redisLog(REDIS_WARNING,"Can't write against tmp file for RESTORE: %s",
- strerror(errno));
- addReplyError(c,"RESTORE failed, tmp file I/O error.");
- fclose(fp);
- return;
- }
- rewind(fp);
-
- /* Finally create the object from the serialized dump and
- * store it at the specified key. */
- if ((data[0] > 4 && data[0] < 9) ||
- data[0] > 11 ||
- (o = rdbLoadObject(data[0],fp)) == NULL)
+ rioInitWithBuffer(&payload,c->argv[3]->ptr);
+ if (((type = rdbLoadObjectType(&payload)) == -1) ||
+ ((obj = rdbLoadObject(type,&payload)) == NULL))
{
- addReplyError(c,"Bad data format.");
- fclose(fp);
+ addReplyError(c,"Bad data format");
return;
}
- fclose(fp);
/* Create the key and set the TTL if any */
- dbAdd(c->db,c->argv[1],o);
+ dbAdd(c->db,c->argv[1],obj);
if (ttl) setExpire(c->db,c->argv[1],time(NULL)+ttl);
addReply(c,shared.ok);
+ server.dirty++;
}
/* MIGRATE host port key dbid timeout */
int fd;
long timeout;
long dbid;
- char buf[64];
- FILE *fp;
time_t ttl;
robj *o;
- unsigned char type;
- off_t payload_len;
+ rio cmd, payload;
/* Sanity check */
if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != REDIS_OK)
return;
}
- /* Create temp file */
- snprintf(buf,sizeof(buf),"redis-migrate-%d.tmp",getpid());
- fp = fopen(buf,"w+");
- if (!fp) {
- redisLog(REDIS_WARNING,"Can't open tmp file for MIGRATE: %s",
- strerror(errno));
- addReplyErrorFormat(c,"MIGRATE failed, tmp file creation error: %s.",
- strerror(errno));
- return;
- }
- unlink(buf);
-
- /* Build the SELECT + RESTORE query writing it in our temp file. */
- if (fwriteBulkCount(fp,'*',2) == 0) goto file_wr_err;
- if (fwriteBulkString(fp,"SELECT",6) == 0) goto file_wr_err;
- if (fwriteBulkLongLong(fp,dbid) == 0) goto file_wr_err;
+ rioInitWithBuffer(&cmd,sdsempty());
+ redisAssert(rioWriteBulkCount(&cmd,'*',2));
+ redisAssert(rioWriteBulkString(&cmd,"SELECT",6));
+ redisAssert(rioWriteBulkLongLong(&cmd,dbid));
ttl = getExpire(c->db,c->argv[3]);
- type = o->type;
- if (fwriteBulkCount(fp,'*',4) == 0) goto file_wr_err;
- if (fwriteBulkString(fp,"RESTORE",7) == 0) goto file_wr_err;
- if (fwriteBulkObject(fp,c->argv[3]) == 0) goto file_wr_err;
- if (fwriteBulkLongLong(fp, (ttl == -1) ? 0 : ttl) == 0) goto file_wr_err;
+ redisAssert(rioWriteBulkCount(&cmd,'*',4));
+ redisAssert(rioWriteBulkString(&cmd,"RESTORE",7));
+ redisAssert(c->argv[3]->encoding == REDIS_ENCODING_RAW);
+ redisAssert(rioWriteBulkString(&cmd,c->argv[3]->ptr,sdslen(c->argv[3]->ptr)));
+ redisAssert(rioWriteBulkLongLong(&cmd,(ttl == -1) ? 0 : ttl));
/* Finally the last argument that is the serailized object payload
- * in the form: <type><rdb-serailized-object>. */
- payload_len = rdbSavedObjectLen(o);
- if (fwriteBulkCount(fp,'$',payload_len+1) == 0) goto file_wr_err;
- if (fwrite(&type,1,1,fp) == 0) goto file_wr_err;
- if (rdbSaveObject(fp,o) == -1) goto file_wr_err;
- if (fwrite("\r\n",2,1,fp) == 0) goto file_wr_err;
-
- /* Tranfer the query to the other node */
- rewind(fp);
+ * in the form: <type><rdb-serialized-object>. */
+ rioInitWithBuffer(&payload,sdsempty());
+ redisAssert(rdbSaveObjectType(&payload,o));
+ redisAssert(rdbSaveObject(&payload,o) != -1);
+ redisAssert(rioWriteBulkString(&cmd,payload.io.buffer.ptr,sdslen(payload.io.buffer.ptr)));
+ sdsfree(payload.io.buffer.ptr);
+
+ /* Tranfer the query to the other node in 64K chunks. */
{
- char buf[4096];
- size_t nread;
-
- while ((nread = fread(buf,1,sizeof(buf),fp)) != 0) {
- int nwritten;
-
- nwritten = syncWrite(fd,buf,nread,timeout);
- if (nwritten != (signed)nread) goto socket_wr_err;
+ sds buf = cmd.io.buffer.ptr;
+ size_t pos = 0, towrite;
+ int nwritten = 0;
+
+ while ((towrite = sdslen(buf)-pos) > 0) {
+ towrite = (towrite > (64*1024) ? (64*1024) : towrite);
+ nwritten = syncWrite(fd,buf+nwritten,towrite,timeout);
+ if (nwritten != (signed)towrite) goto socket_wr_err;
+ pos += nwritten;
}
- if (ferror(fp)) goto file_rd_err;
}
- /* Read back the reply */
+ /* Read back the reply. */
{
char buf1[1024];
char buf2[1024];
if (syncReadLine(fd, buf1, sizeof(buf1), timeout) <= 0)
goto socket_rd_err;
if (syncReadLine(fd, buf2, sizeof(buf2), timeout) <= 0)
- goto socket_rd_err;
+ goto socket_rd_err;
if (buf1[0] == '-' || buf2[0] == '-') {
addReplyErrorFormat(c,"Target instance replied with error: %s",
(buf1[0] == '-') ? buf1+1 : buf2+1);
} else {
+ robj *aux;
+
dbDelete(c->db,c->argv[3]);
addReply(c,shared.ok);
+ server.dirty++;
+
+ /* Translate MIGRATE as DEL for replication/AOF. */
+ aux = createStringObject("DEL",2);
+ rewriteClientCommandVector(c,2,aux,c->argv[3]);
+ decrRefCount(aux);
}
}
- fclose(fp);
- close(fd);
- return;
-
-file_wr_err:
- redisLog(REDIS_WARNING,"Can't write on tmp file for MIGRATE: %s",
- strerror(errno));
- addReplyErrorFormat(c,"MIGRATE failed, tmp file write error: %s.",
- strerror(errno));
- fclose(fp);
- close(fd);
- return;
-file_rd_err:
- redisLog(REDIS_WARNING,"Can't read from tmp file for MIGRATE: %s",
- strerror(errno));
- addReplyErrorFormat(c,"MIGRATE failed, tmp file read error: %s.",
- strerror(errno));
- fclose(fp);
+ sdsfree(cmd.io.buffer.ptr);
close(fd);
return;
strerror(errno));
addReplyErrorFormat(c,"MIGRATE failed, writing to target node: %s.",
strerror(errno));
- fclose(fp);
+ sdsfree(cmd.io.buffer.ptr);
close(fd);
return;
strerror(errno));
addReplyErrorFormat(c,"MIGRATE failed, reading from target node: %s.",
strerror(errno));
- fclose(fp);
+ sdsfree(cmd.io.buffer.ptr);
close(fd);
return;
}
* DUMP is actually not used by Redis Cluster but it is the obvious
* complement of RESTORE and can be useful for different applications. */
void dumpCommand(redisClient *c) {
- char buf[64];
- FILE *fp;
robj *o, *dumpobj;
- sds dump = NULL;
- off_t payload_len;
- unsigned int type;
+ rio payload;
/* Check if the key is here. */
if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
addReply(c,shared.nullbulk);
return;
}
-
- /* Create temp file */
- snprintf(buf,sizeof(buf),"redis-dump-%d.tmp",getpid());
- fp = fopen(buf,"w+");
- if (!fp) {
- redisLog(REDIS_WARNING,"Can't open tmp file for MIGRATE: %s",
- strerror(errno));
- addReplyErrorFormat(c,"DUMP failed, tmp file creation error: %s.",
- strerror(errno));
- return;
- }
- unlink(buf);
-
- /* Dump the serailized object and read it back in memory.
- * We prefix it with a one byte containing the type ID.
- * This is the serialization format understood by RESTORE. */
- if (rdbSaveObject(fp,o) == -1) goto file_wr_err;
- payload_len = ftello(fp);
- if (fseeko(fp,0,SEEK_SET) == -1) goto file_rd_err;
- dump = sdsnewlen(NULL,payload_len+1);
- if (payload_len && fread(dump+1,payload_len,1,fp) != 1) goto file_rd_err;
- fclose(fp);
- type = o->type;
- if (type == REDIS_LIST && o->encoding == REDIS_ENCODING_ZIPLIST)
- type = REDIS_LIST_ZIPLIST;
- else if (type == REDIS_HASH && o->encoding == REDIS_ENCODING_ZIPMAP)
- type = REDIS_HASH_ZIPMAP;
- else if (type == REDIS_SET && o->encoding == REDIS_ENCODING_INTSET)
- type = REDIS_SET_INTSET;
- else
- type = o->type;
- dump[0] = type;
+
+ /* Serialize the object in a RDB-like format. It consist of an object type
+ * byte followed by the serialized object. This is understood by RESTORE. */
+ rioInitWithBuffer(&payload,sdsempty());
+ redisAssert(rdbSaveObjectType(&payload,o));
+ redisAssert(rdbSaveObject(&payload,o));
/* Transfer to the client */
- dumpobj = createObject(REDIS_STRING,dump);
+ dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr);
addReplyBulk(c,dumpobj);
decrRefCount(dumpobj);
return;
-
-file_wr_err:
- redisLog(REDIS_WARNING,"Can't write on tmp file for DUMP: %s",
- strerror(errno));
- addReplyErrorFormat(c,"DUMP failed, tmp file write error: %s.",
- strerror(errno));
- sdsfree(dump);
- fclose(fp);
- return;
-
-file_rd_err:
- redisLog(REDIS_WARNING,"Can't read from tmp file for DUMP: %s",
- strerror(errno));
- addReplyErrorFormat(c,"DUMP failed, tmp file read error: %s.",
- strerror(errno));
- sdsfree(dump);
- fclose(fp);
- return;
}
/* -----------------------------------------------------------------------------