]> git.saurik.com Git - redis.git/blobdiff - src/sentinel.c
Sentinel: ability to execute notification scripts.
[redis.git] / src / sentinel.c
index 8ec5d151b99f6e14388b2d3e918aee5dc2d3dd6b..227fb69386d1e61a6b221d8d40d187abfa32d714 100644 (file)
@@ -37,6 +37,8 @@
 #include <arpa/inet.h>
 #include <sys/socket.h>
 
+extern char **environ;
+
 #define REDIS_SENTINEL_PORT 26379
 
 /* ======================== Sentinel global state =========================== */
@@ -169,7 +171,7 @@ typedef struct sentinelRedisInstance {
     struct sentinelRedisInstance *promoted_slave; /* Promoted slave instance. */
     /* Scripts executed to notify admin or reconfigure clients: when they
      * are set to NULL no script is executed. */
-    char *notify_script;
+    char *notification_script;
     char *client_reconfig_script;
 } sentinelRedisInstance;
 
@@ -290,6 +292,8 @@ int yesnotoi(char *s);
 void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c);
 void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c);
 const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
+void sentinelAbortFailover(sentinelRedisInstance *ri);
+void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
 
 /* ========================= Dictionary types =============================== */
 
@@ -402,7 +406,29 @@ void releaseSentinelAddr(sentinelAddr *sa) {
 /* =========================== Events notification ========================== */
 
 void sentinelCallNotificationScript(char *scriptpath, char *type, char *msg) {
-    /* TODO: implement it. */
+    pid_t pid = fork();
+
+    if (pid == -1) {
+        /* Parent on error. */
+        sentinelEvent(REDIS_WARNING,"-notification-script-error",NULL,
+                      "#can't fork: %s",strerror(errno));
+        return;
+    } else if (pid == 0) {
+        /* Child */
+        char *argv[4];
+
+        argv[0] = scriptpath;
+        argv[1] = type;
+        argv[2] = msg;
+        argv[3] = NULL;
+        execve(scriptpath,argv,environ);
+        /* If we are here an error occurred. */
+        sentinelEvent(REDIS_WARNING,"-notification-script-error",NULL,
+                      "#execve(2): %s",strerror(errno));
+        _exit(1);
+    } else {
+        sentinelEvent(REDIS_DEBUG,"+child",NULL,"%ld",(long)pid);
+    }
 }
 
 /* Send an event to log, pub/sub, user notification script.
@@ -479,8 +505,9 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
     if (level == REDIS_WARNING && ri != NULL) {
         sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
                                          ri : ri->master;
-        if (master->notify_script) {
-            sentinelCallNotificationScript(master->notify_script,type,msg);
+        if (master->notification_script) {
+            sentinelCallNotificationScript(master->notification_script,
+                                           type,msg);
         }
     }
 }
@@ -583,7 +610,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
     ri->failover_start_time = 0;
     ri->failover_timeout = SENTINEL_DEFAULT_FAILOVER_TIMEOUT;
     ri->promoted_slave = NULL;
-    ri->notify_script = NULL;
+    ri->notification_script = NULL;
     ri->client_reconfig_script = NULL;
 
     /* Add into the right table. */
@@ -607,7 +634,7 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
     /* Free other resources. */
     sdsfree(ri->name);
     sdsfree(ri->runid);
-    sdsfree(ri->notify_script);
+    sdsfree(ri->notification_script);
     sdsfree(ri->client_reconfig_script);
     sdsfree(ri->slave_master_host);
     sdsfree(ri->leader);
@@ -880,6 +907,21 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
         ri = sentinelGetMasterByName(argv[1]);
         if (!ri) return "No such master with specified name.";
         ri->parallel_syncs = atoi(argv[2]);
+   } else if (!strcasecmp(argv[0],"notification-script") && argc == 3) {
+        /* notification-script <name> <path> */
+        ri = sentinelGetMasterByName(argv[1]);
+        if (!ri) return "No such master with specified name.";
+        if (access(argv[2],X_OK) == -1)
+            return "Notification script seems non existing or non executable.";
+        ri->notification_script = sdsnew(argv[2]);
+   } else if (!strcasecmp(argv[0],"client-reconfig-script") && argc == 3) {
+        /* client-reconfig-script <name> <path> */
+        ri = sentinelGetMasterByName(argv[1]);
+        if (!ri) return "No such master with specified name.";
+        if (access(argv[2],X_OK) == -1)
+            return "Client reconfiguration script seems non existing or "
+                   "non executable.";
+        ri->client_reconfig_script = sdsnew(argv[2]);
     } else {
         return "Unrecognized sentinel configuration statement.";
     }
@@ -2110,13 +2152,8 @@ void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
     sentinelRedisInstance *slave = sentinelSelectSlave(ri);
 
     if (slave == NULL) {
-        sentinelEvent(REDIS_WARNING,"-no-good-slave",ri,
-            "%@ #retrying in %d seconds",
-            (SENTINEL_FAILOVER_FIXED_DELAY+
-             SENTINEL_FAILOVER_MAX_RANDOM_DELAY)/1000);
-        ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
-        ri->failover_start_time = mstime() + SENTINEL_FAILOVER_FIXED_DELAY +
-                                  SENTINEL_FAILOVER_MAX_RANDOM_DELAY;
+        sentinelEvent(REDIS_WARNING,"-failover-abort-no-good-slave",ri,"%@");
+        sentinelAbortFailover(ri);
     } else {
         sentinelEvent(REDIS_WARNING,"+selected-slave",slave,"%@");
         slave->flags |= SRI_PROMOTED;
@@ -2337,40 +2374,38 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
     }
 }
 
-/* The following is called only for master instances and will abort the
- * failover process if:
- *
- * 1) The failover is in progress.
- * 2) We already promoted a slave.
- * 3) The promoted slave is in extended SDOWN condition.
+/* Abort a failover in progress with the following steps:
+ * 1) If this instance is the leaer send a SLAVEOF command to all the already
+ *    reconfigured slaves if any to configure them to replicate with the
+ *    original master.
+ * 2) For both leaders and observers: clear the failover flags and state in
+ *    the master instance.
+ * 3) If there is already a promoted slave and we are the leader, and this
+ *    slave is not DISCONNECTED, try to reconfigure it to replicate
+ *    back to the master as well, sending a best effort SLAVEOF command.
  */
-void sentinelAbortFailoverIfNeeded(sentinelRedisInstance *ri) {
+void sentinelAbortFailover(sentinelRedisInstance *ri) {
+    char master_port[32];
     dictIterator *di;
     dictEntry *de;
 
-    /* Failover is in progress? Do we have a promoted slave? */
-    if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS) || !ri->promoted_slave) return;
-
-    /* Is the promoted slave into an extended SDOWN state? */
-    if (!(ri->promoted_slave->flags & SRI_S_DOWN) ||
-        (mstime() - ri->promoted_slave->s_down_since_time) <
-        (ri->down_after_period * SENTINEL_EXTENDED_SDOWN_MULTIPLIER)) return;
-
-    sentinelEvent(REDIS_WARNING,"-failover-abort-x-sdown",ri->promoted_slave,"%@");
+    redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
+    ll2string(master_port,sizeof(master_port),ri->addr->port);
 
     /* Clear failover related flags from slaves.
      * Also if we are the leader make sure to send SLAVEOF commands to all the
      * already reconfigured slaves in order to turn them back into slaves of
      * the original master. */
-
     di = dictGetIterator(ri->slaves);
     while((de = dictNext(di)) != NULL) {
         sentinelRedisInstance *slave = dictGetVal(de);
-        if (ri->flags & SRI_I_AM_THE_LEADER) {
-            char master_port[32];
+        if ((ri->flags & SRI_I_AM_THE_LEADER) &&
+            !(slave->flags & SRI_DISCONNECTED) &&
+             (slave->flags & (SRI_PROMOTED|SRI_RECONF_SENT|SRI_RECONF_INPROG|
+                              SRI_RECONF_DONE)))
+        {
             int retval;
 
-            ll2string(master_port,sizeof(master_port),ri->addr->port);
             retval = redisAsyncCommand(slave->cc,
                 sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
                     ri->addr->ip,
@@ -2385,8 +2420,30 @@ void sentinelAbortFailoverIfNeeded(sentinelRedisInstance *ri) {
     ri->flags &= ~(SRI_FAILOVER_IN_PROGRESS|SRI_I_AM_THE_LEADER);
     ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
     ri->failover_state_change_time = mstime();
-    ri->promoted_slave->flags &= ~SRI_PROMOTED;
-    ri->promoted_slave = NULL;
+    if (ri->promoted_slave) {
+        ri->promoted_slave->flags &= ~SRI_PROMOTED;
+        ri->promoted_slave = NULL;
+    }
+}
+
+/* The following is called only for master instances and will abort the
+ * failover process if:
+ *
+ * 1) The failover is in progress.
+ * 2) We already promoted a slave.
+ * 3) The promoted slave is in extended SDOWN condition.
+ */
+void sentinelAbortFailoverIfNeeded(sentinelRedisInstance *ri) {
+    /* Failover is in progress? Do we have a promoted slave? */
+    if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS) || !ri->promoted_slave) return;
+
+    /* Is the promoted slave into an extended SDOWN state? */
+    if (!(ri->promoted_slave->flags & SRI_S_DOWN) ||
+        (mstime() - ri->promoted_slave->s_down_since_time) <
+        (ri->down_after_period * SENTINEL_EXTENDED_SDOWN_MULTIPLIER)) return;
+
+    sentinelEvent(REDIS_WARNING,"-failover-abort-x-sdown",ri->promoted_slave,"%@");
+    sentinelAbortFailover(ri);
 }
 
 /* ======================== SENTINEL timer handler ==========================
@@ -2495,8 +2552,27 @@ void sentinelCheckTiltCondition(void) {
     sentinel.previous_time = mstime();
 }
 
+/* Handle terminated childs resulting from calls to notifications and client
+ * reconfigurations scripts. */
+void sentinelHandleChildren(void) {
+    int statloc;
+    pid_t pid;
+
+    if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
+        int exitcode = WEXITSTATUS(statloc);
+        int bysignal = 0;
+        
+        if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
+        sentinelEvent(REDIS_DEBUG,"-child",NULL,"%ld %d %d",
+            (long)pid, exitcode, bysignal);
+
+        /* TODO: remove client reconfiguration scripts from the queue. */
+    }
+}
+
 void sentinelTimer(void) {
     sentinelCheckTiltCondition();
     sentinelHandleDictOfRedisInstances(sentinel.masters);
+    sentinelHandleChildren();
 }