X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/0ba2932298e3e432f32441ebc5d95a86158d706c..cb16b6c3899d0696a7e633c29f764e06b222b2fe:/src/cluster.c diff --git a/src/cluster.c b/src/cluster.c index c4a56f4d..3e65af78 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -24,12 +24,10 @@ void clusterGetRandomName(char *p) { char *charset = "0123456789abcdef"; int j; - if (!fp) { - redisLog(REDIS_WARNING, - "Unrecovarable error: can't open /dev/urandom:%s" ,strerror(errno)); - exit(1); + if (fp == NULL || fread(p,REDIS_CLUSTER_NAMELEN,1,fp) == 0) { + for (j = 0; j < REDIS_CLUSTER_NAMELEN; j++) + p[j] = rand(); } - fread(p,REDIS_CLUSTER_NAMELEN,1,fp); for (j = 0; j < REDIS_CLUSTER_NAMELEN; j++) p[j] = charset[p[j] & 0x0F]; fclose(fp); @@ -1016,11 +1014,24 @@ int clusterNodeGetSlotBit(clusterNode *n, int slot) { * If the slot is already assigned to another instance this is considered * an error and REDIS_ERR is returned. */ int clusterAddSlot(clusterNode *n, int slot) { - redisAssert(clusterNodeSetSlotBit(n,slot) == 0); + if (clusterNodeSetSlotBit(n,slot) != 0) + return REDIS_ERR; server.cluster.slots[slot] = n; return REDIS_OK; } +/* Delete the specified slot marking it as unassigned. + * Returns REDIS_OK if the slot was assigned, otherwise if the slot was + * already unassigned REDIS_ERR is returned. */ +int clusterDelSlot(int slot) { + clusterNode *n = server.cluster.slots[slot]; + + if (!n) return REDIS_ERR; + redisAssert(clusterNodeClearSlotBit(n,slot) == 1); + server.cluster.slots[slot] = NULL; + return REDIS_OK; +} + /* ----------------------------------------------------------------------------- * Cluster state evaluation function * -------------------------------------------------------------------------- */ @@ -1130,6 +1141,18 @@ sds clusterGenNodesDescription(void) { return ci; } +int getSlotOrReply(redisClient *c, robj *o) { + long long slot; + + if (getLongLongFromObject(o,&slot) != REDIS_OK || + slot < 0 || slot > REDIS_CLUSTER_SLOTS) + { + addReplyError(c,"Invalid or out of range slot"); + return -1; + } + return (int) slot; +} + void clusterCommand(redisClient *c) { if (server.cluster_enabled == 0) { addReplyError(c,"This instance has cluster support disabled"); @@ -1167,24 +1190,26 @@ void clusterCommand(redisClient *c) { o = createObject(REDIS_STRING,ci); addReplyBulk(c,o); decrRefCount(o); - } else if (!strcasecmp(c->argv[1]->ptr,"addslots") && c->argc >= 3) { - int j; - long long slot; + } else if ((!strcasecmp(c->argv[1]->ptr,"addslots") || + !strcasecmp(c->argv[1]->ptr,"delslots")) && c->argc >= 3) { + int j, slot; unsigned char *slots = zmalloc(REDIS_CLUSTER_SLOTS); + int del = !strcasecmp(c->argv[1]->ptr,"delslots"); memset(slots,0,REDIS_CLUSTER_SLOTS); /* Check that all the arguments are parsable and that all the * slots are not already busy. */ for (j = 2; j < c->argc; j++) { - if (getLongLongFromObject(c->argv[j],&slot) != REDIS_OK || - slot < 0 || slot > REDIS_CLUSTER_SLOTS) - { - addReplyError(c,"Invalid or out of range slot index"); + if ((slot = getSlotOrReply(c,c->argv[j])) == -1) { zfree(slots); return; } - if (server.cluster.slots[slot]) { - addReplyErrorFormat(c,"Slot %lld is already busy", slot); + if (del && server.cluster.slots[slot] == NULL) { + addReplyErrorFormat(c,"Slot %d is already unassigned", slot); + zfree(slots); + return; + } else if (!del && server.cluster.slots[slot]) { + addReplyErrorFormat(c,"Slot %d is already busy", slot); zfree(slots); return; } @@ -1197,8 +1222,15 @@ void clusterCommand(redisClient *c) { } for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) { if (slots[j]) { - int retval = clusterAddSlot(server.cluster.myself,j); - + int retval; + + /* If this slot was set as importing we can clear this + * state as now we are the real owner of the slot. */ + if (server.cluster.importing_slots_from[j]) + server.cluster.importing_slots_from[j] = NULL; + + retval = del ? clusterDelSlot(j) : + clusterAddSlot(server.cluster.myself,j); redisAssert(retval == REDIS_OK); } } @@ -1210,22 +1242,16 @@ void clusterCommand(redisClient *c) { /* SETSLOT 10 MIGRATING */ /* SETSLOT 10 IMPORTING */ /* SETSLOT 10 STABLE */ - long long aux; - unsigned int slot; + int slot; clusterNode *n; - if (getLongLongFromObjectOrReply(c,c->argv[2],&aux,NULL) != REDIS_OK) - return; - if (aux < 0 || aux >= REDIS_CLUSTER_SLOTS) { - addReplyError(c,"Slot out of range"); - return; - } - slot = (unsigned int) aux; - if (server.cluster.slots[slot] != server.cluster.myself) { - addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot); - return; - } + if ((slot = getSlotOrReply(c,c->argv[2])) == -1) return; + if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) { + if (server.cluster.slots[slot] != server.cluster.myself) { + addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot); + return; + } if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) { addReplyErrorFormat(c,"I don't know about node %s", (char*)c->argv[4]->ptr); @@ -1233,6 +1259,11 @@ void clusterCommand(redisClient *c) { } server.cluster.migrating_slots_to[slot] = n; } else if (!strcasecmp(c->argv[3]->ptr,"importing") && c->argc == 5) { + if (server.cluster.slots[slot] == server.cluster.myself) { + addReplyErrorFormat(c, + "I'm already the owner of hash slot %u",slot); + return; + } if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) { addReplyErrorFormat(c,"I don't know about node %s", (char*)c->argv[3]->ptr); @@ -1240,9 +1271,43 @@ void clusterCommand(redisClient *c) { } server.cluster.importing_slots_from[slot] = n; } else if (!strcasecmp(c->argv[3]->ptr,"stable") && c->argc == 4) { + /* CLUSTER SETSLOT STABLE */ server.cluster.importing_slots_from[slot] = NULL; + server.cluster.migrating_slots_to[slot] = NULL; + } else if (!strcasecmp(c->argv[3]->ptr,"node") && c->argc == 4) { + /* CLUSTER SETSLOT NODE */ + clusterNode *n = clusterLookupNode(c->argv[4]->ptr); + + if (!n) addReplyErrorFormat(c,"Unknown node %s", + (char*)c->argv[4]->ptr); + /* If this hash slot was served by 'myself' before to switch + * make sure there are no longer local keys for this hash slot. */ + if (server.cluster.slots[slot] == server.cluster.myself && + n != server.cluster.myself) + { + int numkeys; + robj **keys; + + keys = zmalloc(sizeof(robj*)*1); + numkeys = GetKeysInSlot(slot, keys, 1); + zfree(keys); + if (numkeys == 0) { + addReplyErrorFormat(c, "Can't assign hashslot %d to a different node while I still hold keys for this hash slot.", slot); + return; + } + } + /* If this node was the slot owner and the slot was marked as + * migrating, assigning the slot to another node will clear + * the migratig status. */ + if (server.cluster.slots[slot] == server.cluster.myself && + server.cluster.migrating_slots_to[slot]) + server.cluster.migrating_slots_to[slot] = NULL; + + clusterDelSlot(slot); + clusterAddSlot(n,slot); } else { addReplyError(c,"Invalid CLUSTER SETSLOT action or number of arguments"); + return; } clusterSaveConfigOrDie(); addReply(c,shared.ok); @@ -1607,12 +1672,19 @@ file_rd_err: /* Return the pointer to the cluster node that is able to serve the query * as all the keys belong to hash slots for which the node is in charge. * - * If keys in query spawn multiple nodes NULL is returned. */ -clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot) { + * If the returned node should be used only for this request, the *ask + * integer is set to '1', otherwise to '0'. This is used in order to + * let the caller know if we should reply with -MOVED or with -ASK. + * + * If the request contains more than a single key NULL is returned, + * however a request with more then a key argument where the key is always + * the same is valid, like in: RPOPLPUSH mylist mylist.*/ +clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask) { clusterNode *n = NULL; + robj *firstkey = NULL; multiState *ms, _ms; multiCmd mc; - int i; + int i, slot = 0; /* We handle all the cases as if they were EXEC commands, so we have * a common code path for everything */ @@ -1622,7 +1694,9 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg if (!(c->flags & REDIS_MULTI)) return server.cluster.myself; ms = &c->mstate; } else { - /* Create a fake Multi State structure, with just one command */ + /* In order to have a single codepath create a fake Multi State + * structure if the client is not in MULTI/EXEC state, this way + * we have a single codepath below. */ ms = &_ms; _ms.commands = &mc; _ms.count = 1; @@ -1631,6 +1705,8 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg mc.cmd = cmd; } + /* Check that all the keys are the same key, and get the slot and + * node for this key. */ for (i = 0; i < ms->count; i++) { struct redisCommand *mcmd; robj **margv; @@ -1641,26 +1717,50 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg margv = ms->commands[i].argv; keyindex = getKeysFromCommand(mcmd,margv,margc,&numkeys, - REDIS_GETKEYS_PRELOAD); + REDIS_GETKEYS_ALL); for (j = 0; j < numkeys; j++) { - int slot = keyHashSlot((char*)margv[keyindex[j]]->ptr, - sdslen(margv[keyindex[j]]->ptr)); - struct clusterNode *slotnode; - - slotnode = server.cluster.slots[slot]; - if (hashslot) *hashslot = slot; - /* Node not assigned? (Should never happen actually - * if we reached this function). - * Different node than the previous one? - * Return NULL, the cluster can't serve multi-node requests */ - if (slotnode == NULL || (n && slotnode != n)) { - getKeysFreeResult(keyindex); - return NULL; + if (firstkey == NULL) { + /* This is the first key we see. Check what is the slot + * and node. */ + firstkey = margv[keyindex[j]]; + + slot = keyHashSlot((char*)firstkey->ptr, sdslen(firstkey->ptr)); + n = server.cluster.slots[slot]; + redisAssert(n != NULL); } else { - n = slotnode; + /* If it is not the first key, make sure it is exactly + * the same key as the first we saw. */ + if (!equalStringObjects(firstkey,margv[keyindex[j]])) { + decrRefCount(firstkey); + getKeysFreeResult(keyindex); + return NULL; + } } } getKeysFreeResult(keyindex); } - return (n == NULL) ? server.cluster.myself : n; + if (ask) *ask = 0; /* This is the default. Set to 1 if needed later. */ + /* No key at all in command? then we can serve the request + * without redirections. */ + if (n == NULL) return server.cluster.myself; + if (hashslot) *hashslot = slot; + /* This request is about a slot we are migrating into another instance? + * Then we need to check if we have the key. If we have it we can reply. + * If instead is a new key, we pass the request to the node that is + * receiving the slot. */ + if (n == server.cluster.myself && + server.cluster.migrating_slots_to[slot] != NULL) + { + if (lookupKeyRead(&server.db[0],firstkey) == NULL) { + if (ask) *ask = 1; + return server.cluster.migrating_slots_to[slot]; + } + } + /* Handle the case in which we are receiving this hash slot from + * another instance, so we'll accept the query even if in the table + * it is assigned to a different node. */ + if (server.cluster.importing_slots_from[slot] != NULL) + return server.cluster.myself; + /* It's not a -ASK case. Base case: just return the right node. */ + return n; }