#include <arpa/inet.h>
#include <sys/socket.h>
+extern char **environ;
+
#define REDIS_SENTINEL_PORT 26379
/* ======================== Sentinel global state =========================== */
struct sentinelRedisInstance *promoted_slave; /* Promoted slave instance. */
/* Scripts executed to notify admin or reconfigure clients: when they
* are set to NULL no script is executed. */
- char *notify_script;
+ char *notification_script;
char *client_reconfig_script;
} sentinelRedisInstance;
void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c);
void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c);
const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
+void sentinelAbortFailover(sentinelRedisInstance *ri);
+void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
/* ========================= Dictionary types =============================== */
/* =========================== Events notification ========================== */
void sentinelCallNotificationScript(char *scriptpath, char *type, char *msg) {
- /* TODO: implement it. */
+ pid_t pid = fork();
+
+ if (pid == -1) {
+ /* Parent on error. */
+ sentinelEvent(REDIS_WARNING,"-notification-script-error",NULL,
+ "#can't fork: %s",strerror(errno));
+ return;
+ } else if (pid == 0) {
+ /* Child */
+ char *argv[4];
+
+ argv[0] = scriptpath;
+ argv[1] = type;
+ argv[2] = msg;
+ argv[3] = NULL;
+ execve(scriptpath,argv,environ);
+ /* If we are here an error occurred. */
+ sentinelEvent(REDIS_WARNING,"-notification-script-error",NULL,
+ "#execve(2): %s",strerror(errno));
+ _exit(1);
+ } else {
+ sentinelEvent(REDIS_DEBUG,"+child",NULL,"%ld",(long)pid);
+ }
}
/* Send an event to log, pub/sub, user notification script.
if (level == REDIS_WARNING && ri != NULL) {
sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
ri : ri->master;
- if (master->notify_script) {
- sentinelCallNotificationScript(master->notify_script,type,msg);
+ if (master->notification_script) {
+ sentinelCallNotificationScript(master->notification_script,
+ type,msg);
}
}
}
ri->failover_start_time = 0;
ri->failover_timeout = SENTINEL_DEFAULT_FAILOVER_TIMEOUT;
ri->promoted_slave = NULL;
- ri->notify_script = NULL;
+ ri->notification_script = NULL;
ri->client_reconfig_script = NULL;
/* Add into the right table. */
/* Free other resources. */
sdsfree(ri->name);
sdsfree(ri->runid);
- sdsfree(ri->notify_script);
+ sdsfree(ri->notification_script);
sdsfree(ri->client_reconfig_script);
sdsfree(ri->slave_master_host);
sdsfree(ri->leader);
ri = sentinelGetMasterByName(argv[1]);
if (!ri) return "No such master with specified name.";
ri->parallel_syncs = atoi(argv[2]);
+ } else if (!strcasecmp(argv[0],"notification-script") && argc == 3) {
+ /* notification-script <name> <path> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ if (access(argv[2],X_OK) == -1)
+ return "Notification script seems non existing or non executable.";
+ ri->notification_script = sdsnew(argv[2]);
+ } else if (!strcasecmp(argv[0],"client-reconfig-script") && argc == 3) {
+ /* client-reconfig-script <name> <path> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ if (access(argv[2],X_OK) == -1)
+ return "Client reconfiguration script seems non existing or "
+ "non executable.";
+ ri->client_reconfig_script = sdsnew(argv[2]);
} else {
return "Unrecognized sentinel configuration statement.";
}
sentinelRedisInstance *slave = sentinelSelectSlave(ri);
if (slave == NULL) {
- sentinelEvent(REDIS_WARNING,"-no-good-slave",ri,
- "%@ #retrying in %d seconds",
- (SENTINEL_FAILOVER_FIXED_DELAY+
- SENTINEL_FAILOVER_MAX_RANDOM_DELAY)/1000);
- ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
- ri->failover_start_time = mstime() + SENTINEL_FAILOVER_FIXED_DELAY +
- SENTINEL_FAILOVER_MAX_RANDOM_DELAY;
+ sentinelEvent(REDIS_WARNING,"-failover-abort-no-good-slave",ri,"%@");
+ sentinelAbortFailover(ri);
} else {
sentinelEvent(REDIS_WARNING,"+selected-slave",slave,"%@");
slave->flags |= SRI_PROMOTED;
}
}
-/* The following is called only for master instances and will abort the
- * failover process if:
- *
- * 1) The failover is in progress.
- * 2) We already promoted a slave.
- * 3) The promoted slave is in extended SDOWN condition.
+/* Abort a failover in progress with the following steps:
+ * 1) If this instance is the leaer send a SLAVEOF command to all the already
+ * reconfigured slaves if any to configure them to replicate with the
+ * original master.
+ * 2) For both leaders and observers: clear the failover flags and state in
+ * the master instance.
+ * 3) If there is already a promoted slave and we are the leader, and this
+ * slave is not DISCONNECTED, try to reconfigure it to replicate
+ * back to the master as well, sending a best effort SLAVEOF command.
*/
-void sentinelAbortFailoverIfNeeded(sentinelRedisInstance *ri) {
+void sentinelAbortFailover(sentinelRedisInstance *ri) {
+ char master_port[32];
dictIterator *di;
dictEntry *de;
- /* Failover is in progress? Do we have a promoted slave? */
- if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS) || !ri->promoted_slave) return;
-
- /* Is the promoted slave into an extended SDOWN state? */
- if (!(ri->promoted_slave->flags & SRI_S_DOWN) ||
- (mstime() - ri->promoted_slave->s_down_since_time) <
- (ri->down_after_period * SENTINEL_EXTENDED_SDOWN_MULTIPLIER)) return;
-
- sentinelEvent(REDIS_WARNING,"-failover-abort-x-sdown",ri->promoted_slave,"%@");
+ redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
+ ll2string(master_port,sizeof(master_port),ri->addr->port);
/* Clear failover related flags from slaves.
* Also if we are the leader make sure to send SLAVEOF commands to all the
* already reconfigured slaves in order to turn them back into slaves of
* the original master. */
-
di = dictGetIterator(ri->slaves);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *slave = dictGetVal(de);
- if (ri->flags & SRI_I_AM_THE_LEADER) {
- char master_port[32];
+ if ((ri->flags & SRI_I_AM_THE_LEADER) &&
+ !(slave->flags & SRI_DISCONNECTED) &&
+ (slave->flags & (SRI_PROMOTED|SRI_RECONF_SENT|SRI_RECONF_INPROG|
+ SRI_RECONF_DONE)))
+ {
int retval;
- ll2string(master_port,sizeof(master_port),ri->addr->port);
retval = redisAsyncCommand(slave->cc,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
ri->addr->ip,
ri->flags &= ~(SRI_FAILOVER_IN_PROGRESS|SRI_I_AM_THE_LEADER);
ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
ri->failover_state_change_time = mstime();
- ri->promoted_slave->flags &= ~SRI_PROMOTED;
- ri->promoted_slave = NULL;
+ if (ri->promoted_slave) {
+ ri->promoted_slave->flags &= ~SRI_PROMOTED;
+ ri->promoted_slave = NULL;
+ }
+}
+
+/* The following is called only for master instances and will abort the
+ * failover process if:
+ *
+ * 1) The failover is in progress.
+ * 2) We already promoted a slave.
+ * 3) The promoted slave is in extended SDOWN condition.
+ */
+void sentinelAbortFailoverIfNeeded(sentinelRedisInstance *ri) {
+ /* Failover is in progress? Do we have a promoted slave? */
+ if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS) || !ri->promoted_slave) return;
+
+ /* Is the promoted slave into an extended SDOWN state? */
+ if (!(ri->promoted_slave->flags & SRI_S_DOWN) ||
+ (mstime() - ri->promoted_slave->s_down_since_time) <
+ (ri->down_after_period * SENTINEL_EXTENDED_SDOWN_MULTIPLIER)) return;
+
+ sentinelEvent(REDIS_WARNING,"-failover-abort-x-sdown",ri->promoted_slave,"%@");
+ sentinelAbortFailover(ri);
}
/* ======================== SENTINEL timer handler ==========================
sentinel.previous_time = mstime();
}
+/* Handle terminated childs resulting from calls to notifications and client
+ * reconfigurations scripts. */
+void sentinelHandleChildren(void) {
+ int statloc;
+ pid_t pid;
+
+ if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
+ int exitcode = WEXITSTATUS(statloc);
+ int bysignal = 0;
+
+ if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
+ sentinelEvent(REDIS_DEBUG,"-child",NULL,"%ld %d %d",
+ (long)pid, exitcode, bysignal);
+
+ /* TODO: remove client reconfiguration scripts from the queue. */
+ }
+}
+
void sentinelTimer(void) {
sentinelCheckTiltCondition();
sentinelHandleDictOfRedisInstances(sentinel.masters);
+ sentinelHandleChildren();
}