X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/1793752d97d72c82ce237b461165d5a06c44587e..a54d9805ee1c46d574aab8e349fd778d3a4bc67b:/src/cluster.c?ds=inline diff --git a/src/cluster.c b/src/cluster.c index 6d117aca..4e3cf746 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -107,6 +107,10 @@ int clusterLoadConfig(char *filename) { clusterNodeAddSlave(master,n); } + /* Set ping sent / pong received timestamps */ + if (atoi(argv[4])) n->ping_sent = time(NULL); + if (atoi(argv[5])) n->pong_received = time(NULL); + /* Populate hash slots served by this instance. */ for (j = 7; j < argc; j++) { int start, stop; @@ -130,6 +134,7 @@ int clusterLoadConfig(char *filename) { redisAssert(server.cluster.myself != NULL); redisLog(REDIS_NOTICE,"Node configuration loaded, I'm %.40s", server.cluster.myself->name); + clusterUpdateState(); return REDIS_OK; fmterr: @@ -627,7 +632,8 @@ int clusterProcessPacket(clusterLink *link) { clusterNode *failing; failing = clusterLookupNode(hdr->data.fail.about.nodename); - if (failing && !(failing->flags & REDIS_NODE_FAIL)) { + if (failing && !(failing->flags & (REDIS_NODE_FAIL|REDIS_NODE_MYSELF))) + { redisLog(REDIS_NOTICE, "FAIL message received from %.40s about %.40s", hdr->sender, hdr->data.fail.about.nodename); @@ -915,22 +921,34 @@ void clusterCron(void) { int delay; if (node->flags & - (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR|REDIS_NODE_HANDSHAKE| - REDIS_NODE_FAIL)) continue; + (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR|REDIS_NODE_HANDSHAKE)) + continue; /* Check only if we already sent a ping and did not received * a reply yet. */ if (node->ping_sent == 0 || node->ping_sent <= node->pong_received) continue; delay = time(NULL) - node->pong_received; - if (node->flags & REDIS_NODE_PFAIL) { + if (delay < server.cluster.node_timeout) { /* The PFAIL condition can be reversed without external * help if it is not transitive (that is, if it does not - * turn into a FAIL state). */ - if (delay < server.cluster.node_timeout) + * turn into a FAIL state). + * + * The FAIL condition is also reversible if there are no slaves + * for this host, so no slave election should be in progress. + * + * TODO: consider all the implications of resurrecting a + * FAIL node. */ + if (node->flags & REDIS_NODE_PFAIL) { node->flags &= ~REDIS_NODE_PFAIL; + } else if (node->flags & REDIS_NODE_FAIL && !node->numslaves) { + node->flags &= ~REDIS_NODE_FAIL; + clusterUpdateState(); + } } else { - if (delay >= server.cluster.node_timeout) { + /* Timeout reached. Set the noad se possibly failing if it is + * not already in this state. */ + if (!(node->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL))) { redisLog(REDIS_DEBUG,"*** NODE %.40s possibly failing", node->name); node->flags |= REDIS_NODE_PFAIL; @@ -975,7 +993,7 @@ int clusterNodeGetSlotBit(clusterNode *n, int slot) { * an error and REDIS_ERR is returned. */ int clusterAddSlot(clusterNode *n, int slot) { redisAssert(clusterNodeSetSlotBit(n,slot) == 0); - server.cluster.slots[slot] = server.cluster.myself; + server.cluster.slots[slot] = n; printf("SLOT %d added to %.40s\n", slot, n->name); return REDIS_OK; } @@ -1175,11 +1193,13 @@ void clusterCommand(redisClient *c) { "cluster_slots_ok:%d\r\n" "cluster_slots_pfail:%d\r\n" "cluster_slots_fail:%d\r\n" + "cluster_known_nodes:%lu\r\n" , statestr[server.cluster.state], slots_assigned, slots_ok, slots_pfail, - slots_fail + slots_fail, + dictSize(server.cluster.nodes) ); addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n", (unsigned long)sdslen(info)));