/* Redis Sentinel implementation
- * -----------------------------
*
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
#define SRI_RECONF_INPROG (1<<12) /* Slave synchronization in progress. */
#define SRI_RECONF_DONE (1<<13) /* Slave synchronized with new master. */
#define SRI_FORCE_FAILOVER (1<<14) /* Force failover with master up. */
+#define SRI_SCRIPT_KILL_SENT (1<<15) /* SCRIPT KILL already sent on -BUSY */
#define SENTINEL_INFO_PERIOD 10000
#define SENTINEL_PING_PERIOD 1000
dict *slaves; /* Slaves for this master instance. */
int quorum; /* Number of sentinels that need to agree on failure. */
int parallel_syncs; /* How many slaves to reconfigure at same time. */
+ char *auth_pass; /* Password to use for AUTH against master & slaves. */
/* Slave specific. */
mstime_t master_link_down_time; /* Slave replication link down time. */
sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master);
void sentinelScheduleScriptExecution(char *path, ...);
void sentinelStartFailover(sentinelRedisInstance *master, int state);
+void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata);
/* ========================= Dictionary types =============================== */
/* =========================== Initialization =============================== */
void sentinelCommand(redisClient *c);
+void sentinelInfoCommand(redisClient *c);
struct redisCommand sentinelcmds[] = {
{"ping",pingCommand,1,"",0,NULL,0,0,0,0,0},
{"subscribe",subscribeCommand,-2,"",0,NULL,0,0,0,0,0},
{"unsubscribe",unsubscribeCommand,-1,"",0,NULL,0,0,0,0,0},
{"psubscribe",psubscribeCommand,-2,"",0,NULL,0,0,0,0,0},
- {"punsubscribe",punsubscribeCommand,-1,"",0,NULL,0,0,0,0,0}
+ {"punsubscribe",punsubscribeCommand,-1,"",0,NULL,0,0,0,0,0},
+ {"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0}
};
/* This function overwrites a few normal Redis config default with Sentinel
sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
sentinelRedisInstance *ri;
sentinelAddr *addr;
- dict *table;
+ dict *table = NULL;
char slavename[128], *sdsname;
redisAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
ri->down_after_period = master ? master->down_after_period :
SENTINEL_DOWN_AFTER_PERIOD;
ri->master_link_down_time = 0;
+ ri->auth_pass = NULL;
ri->slave_priority = SENTINEL_DEFAULT_SLAVE_PRIORITY;
ri->slave_reconf_sent_time = 0;
ri->slave_master_host = NULL;
sdsfree(ri->client_reconfig_script);
sdsfree(ri->slave_master_host);
sdsfree(ri->leader);
+ sdsfree(ri->auth_pass);
releaseSentinelAddr(ri->addr);
/* Clear state into the master if needed. */
return "Client reconfiguration script seems non existing or "
"non executable.";
ri->client_reconfig_script = sdsnew(argv[2]);
+ } else if (!strcasecmp(argv[0],"auth-pass") && argc == 3) {
+ /* auth-pass <name> <password> */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ ri->auth_pass = sdsnew(argv[2]);
} else {
return "Unrecognized sentinel configuration statement.";
}
sentinelDisconnectInstanceFromContext(c);
}
+/* Send the AUTH command with the specified master password if needed.
+ * Note that for slaves the password set for the master is used.
+ *
+ * We don't check at all if the command was successfully transmitted
+ * to the instance as if it fails Sentinel will detect the instance down,
+ * will disconnect and reconnect the link and so forth. */
+void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
+ char *auth_pass = (ri->flags & SRI_MASTER) ? ri->auth_pass :
+ ri->master->auth_pass;
+
+ if (auth_pass)
+ redisAsyncCommand(c, sentinelDiscardReplyCallback, NULL, "AUTH %s",
+ auth_pass);
+}
+
/* Create the async connections for the specified instance if the instance
* is disconnected. Note that the SRI_DISCONNECTED flag is set even if just
* one of the two links (commands and pub/sub) is missing. */
sentinelLinkEstablishedCallback);
redisAsyncSetDisconnectCallback(ri->cc,
sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,ri->cc);
}
}
/* Pub / Sub */
sentinelLinkEstablishedCallback);
redisAsyncSetDisconnectCallback(ri->pc,
sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,ri->pc);
/* Now we subscribe to the Sentinels "Hello" channel. */
retval = redisAsyncCommand(ri->pc,
sentinelReceiveHelloMessages, NULL, "SUBSCRIBE %s",
SENTINEL_MASTER_LINK_STATUS_UP :
SENTINEL_MASTER_LINK_STATUS_DOWN;
}
+
+ /* slave_priority:<priority> */
+ if (sdslen(l) >= 15 && !memcmp(l,"slave_priority:",15))
+ ri->slave_priority = atoi(l+15);
}
}
ri->info_refresh = mstime();
/* Act if a master turned into a slave. */
if ((ri->flags & SRI_MASTER) && role == SRI_SLAVE) {
- if (first_runid && ri->slave_master_host) {
+ if ((first_runid || runid_changed) && ri->slave_master_host) {
/* If it is the first time we receive INFO from it, but it's
* a slave while it was configured as a master, we want to monitor
* its master instead. */
if (!(ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
(runid_changed || first_runid))
{
- /* If a slave turned into maser but:
+ /* If a slave turned into master but:
*
* 1) Failover not in progress.
* 2) RunID hs changed, or its the first time we see an INFO output.
strncmp(r->str,"MASTERDOWN",10) == 0)
{
ri->last_avail_time = mstime();
+ } else {
+ /* Send a SCRIPT KILL command if the instance appears to be
+ * down because of a busy script. */
+ if (strncmp(r->str,"BUSY",4) == 0 &&
+ (ri->flags & SRI_S_DOWN) &&
+ !(ri->flags & SRI_SCRIPT_KILL_SENT))
+ {
+ redisAsyncCommand(ri->cc,
+ sentinelDiscardReplyCallback, NULL, "SCRIPT KILL");
+ ri->flags |= SRI_SCRIPT_KILL_SENT;
+ }
}
}
ri->last_pong_time = mstime();
addReplyBulkCString(c,"master-port");
addReplyBulkLongLong(c,ri->slave_master_port);
fields++;
+
+ addReplyBulkCString(c,"slave-priority");
+ addReplyBulkLongLong(c,ri->slave_priority);
+ fields++;
}
/* Only sentinels */
ri = sentinelGetMasterByName(c->argv[2]->ptr);
if (ri == NULL) {
addReply(c,shared.nullmultibulk);
+ } else if (ri->info_refresh == 0) {
+ addReplySds(c,sdsnew("-IDONTKNOW I have not enough information to reply. Please ask another Sentinel.\r\n"));
} else {
sentinelAddr *addr = ri->addr;
(char*)c->argv[1]->ptr);
}
+void sentinelInfoCommand(redisClient *c) {
+ char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
+ sds info = sdsempty();
+ int defsections = !strcasecmp(section,"default");
+ int sections = 0;
+
+ if (c->argc > 2) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ if (!strcasecmp(section,"server") || defsections) {
+ if (sections++) info = sdscat(info,"\r\n");
+ sds serversection = genRedisInfoString("server");
+ info = sdscatlen(info,serversection,sdslen(serversection));
+ sdsfree(serversection);
+ }
+
+ if (!strcasecmp(section,"sentinel") || defsections) {
+ dictIterator *di;
+ dictEntry *de;
+ int master_id = 0;
+
+ if (sections++) info = sdscat(info,"\r\n");
+ info = sdscatprintf(info,
+ "# Sentinel\r\n"
+ "sentinel_masters:%lu\r\n"
+ "sentinel_tilt:%d\r\n"
+ "sentinel_running_scripts:%d\r\n"
+ "sentinel_scripts_queue_length:%ld\r\n",
+ dictSize(sentinel.masters),
+ sentinel.tilt,
+ sentinel.running_scripts,
+ listLength(sentinel.scripts_queue));
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ char *status = "ok";
+
+ if (ri->flags & SRI_O_DOWN) status = "odown";
+ else if (ri->flags & SRI_S_DOWN) status = "sdown";
+ info = sdscatprintf(info,
+ "master%d:name=%s,status=%s,address=%s:%d,"
+ "slaves=%lu,sentinels=%lu\r\n",
+ master_id++, ri->name, status,
+ ri->addr->ip, ri->addr->port,
+ dictSize(ri->slaves),
+ dictSize(ri->sentinels)+1);
+ }
+ dictReleaseIterator(di);
+ }
+
+ addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
+ (unsigned long)sdslen(info)));
+ addReplySds(c,info);
+ addReply(c,shared.crlf);
+}
+
/* ===================== SENTINEL availability checks ======================= */
/* Is this instance down from our point of view? */
/* Is subjectively up */
if (ri->flags & SRI_S_DOWN) {
sentinelEvent(REDIS_WARNING,"-sdown",ri,"%@");
- ri->flags &= ~SRI_S_DOWN;
+ ri->flags &= ~(SRI_S_DOWN|SRI_SCRIPT_KILL_SENT);
}
}
}
* 3) info_refresh more recent than SENTINEL_INFO_VALIDITY_TIME.
* 4) master_link_down_time no more than:
* (now - master->s_down_since_time) + (master->down_after_period * 10).
+ * 5) Slave priority can't be zero, otherwise the slave is discareded.
*
* Among all the slaves matching the above conditions we select the slave
* with lower slave_priority. If priority is the same we select the slave
int compareSlavesForPromotion(const void *a, const void *b) {
sentinelRedisInstance **sa = (sentinelRedisInstance **)a,
**sb = (sentinelRedisInstance **)b;
+ char *sa_runid, *sb_runid;
+
if ((*sa)->slave_priority != (*sb)->slave_priority)
return (*sa)->slave_priority - (*sb)->slave_priority;
- return strcasecmp((*sa)->runid,(*sb)->runid);
+
+ /* If priority is the same, select the slave with that has the
+ * lexicographically smaller runid. Note that we try to handle runid
+ * == NULL as there are old Redis versions that don't publish runid in
+ * INFO. A NULL runid is considered bigger than any other runid. */
+ sa_runid = (*sa)->runid;
+ sb_runid = (*sb)->runid;
+ if (sa_runid == NULL && sb_runid == NULL) return 0;
+ else if (sa_runid == NULL) return 1; /* a > b */
+ else if (sb_runid == NULL) return -1; /* a < b */
+ return strcasecmp(sa_runid, sb_runid);
}
sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN|SRI_DISCONNECTED)) continue;
if (slave->last_avail_time < info_validity_time) continue;
+ if (slave->slave_priority == 0) continue;
/* If the master is in SDOWN state we get INFO for slaves every second.
* Otherwise we get it with the usual period so we need to account for