update state when FAIL is cleared from a node

[redis.git] / src / cluster.c
diff --git a/src/cluster.c b/src/cluster.c

index 6d117acadf306ee548eee7c3abbe9cdcf5aef6de..70c34aab884632c134ba587bcae6449e18e8aa79 100644 (file)
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -107,6 +107,10 @@ int clusterLoadConfig(char *filename) {
              clusterNodeAddSlave(master,n);
          }
  
+        /* Set ping sent / pong received timestamps */
+        if (atoi(argv[4])) n->ping_sent = time(NULL);
+        if (atoi(argv[5])) n->pong_received = time(NULL);
+
          /* Populate hash slots served by this instance. */
          for (j = 7; j < argc; j++) {
              int start, stop;
@@ -130,6 +134,7 @@ int clusterLoadConfig(char *filename) {
      redisAssert(server.cluster.myself != NULL);
      redisLog(REDIS_NOTICE,"Node configuration loaded, I'm %.40s",
          server.cluster.myself->name);
+    clusterUpdateState();
      return REDIS_OK;
  
  fmterr:
@@ -627,7 +632,8 @@ int clusterProcessPacket(clusterLink *link) {
          clusterNode *failing;
  
          failing = clusterLookupNode(hdr->data.fail.about.nodename);
-        if (failing && !(failing->flags & REDIS_NODE_FAIL)) {
+        if (failing && !(failing->flags & (REDIS_NODE_FAIL|REDIS_NODE_MYSELF)))
+        {
              redisLog(REDIS_NOTICE,
                  "FAIL message received from %.40s about %.40s",
                  hdr->sender, hdr->data.fail.about.nodename);
@@ -915,22 +921,34 @@ void clusterCron(void) {
          int delay;
  
          if (node->flags &
-            (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR|REDIS_NODE_HANDSHAKE|
-             REDIS_NODE_FAIL)) continue;
+            (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR|REDIS_NODE_HANDSHAKE))
+                continue;
          /* Check only if we already sent a ping and did not received
           * a reply yet. */
          if (node->ping_sent == 0 ||
              node->ping_sent <= node->pong_received) continue;
  
          delay = time(NULL) - node->pong_received;
-        if (node->flags & REDIS_NODE_PFAIL) {
+        if (delay < server.cluster.node_timeout) {
              /* The PFAIL condition can be reversed without external
               * help if it is not transitive (that is, if it does not
-             * turn into a FAIL state). */
-            if (delay < server.cluster.node_timeout)
+             * turn into a FAIL state).
+             *
+             * The FAIL condition is also reversible if there are no slaves
+             * for this host, so no slave election should be in progress.
+             *
+             * TODO: consider all the implications of resurrecting a
+             * FAIL node. */
+            if (node->flags & REDIS_NODE_PFAIL) {
                  node->flags &= ~REDIS_NODE_PFAIL;
+            } else if (node->flags & REDIS_NODE_FAIL && !node->numslaves) {
+                node->flags &= ~REDIS_NODE_FAIL;
+                clusterUpdateState();
+            }
          } else {
-            if (delay >= server.cluster.node_timeout) {
+            /* Timeout reached. Set the noad se possibly failing if it is
+             * not already in this state. */
+            if (!(node->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL))) {
                  redisLog(REDIS_DEBUG,"*** NODE %.40s possibly failing",
                      node->name);
                  node->flags |= REDIS_NODE_PFAIL;
@@ -975,7 +993,7 @@ int clusterNodeGetSlotBit(clusterNode *n, int slot) {
   * an error and REDIS_ERR is returned. */
  int clusterAddSlot(clusterNode *n, int slot) {
      redisAssert(clusterNodeSetSlotBit(n,slot) == 0);
-    server.cluster.slots[slot] = server.cluster.myself;
+    server.cluster.slots[slot] = n;
      printf("SLOT %d added to %.40s\n", slot, n->name);
      return REDIS_OK;
  }