]> git.saurik.com Git - redis.git/blob - src/sentinel.c
Sentinel: client reconfiguration script execution.
[redis.git] / src / sentinel.c
1 /* Redis Sentinel implementation
2 * -----------------------------
3 *
4 * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * * Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Redis nor the names of its contributors may be used
16 * to endorse or promote products derived from this software without
17 * specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "redis.h"
33 #include "hiredis.h"
34 #include "async.h"
35
36 #include <ctype.h>
37 #include <arpa/inet.h>
38 #include <sys/socket.h>
39 #include <sys/wait.h>
40
41 extern char **environ;
42
43 #define REDIS_SENTINEL_PORT 26379
44
45 /* ======================== Sentinel global state =========================== */
46
47 typedef long long mstime_t; /* millisecond time type. */
48
49 /* Address object, used to describe an ip:port pair. */
50 typedef struct sentinelAddr {
51 char *ip;
52 int port;
53 } sentinelAddr;
54
55 /* A Sentinel Redis Instance object is monitoring. */
56 #define SRI_MASTER (1<<0)
57 #define SRI_SLAVE (1<<1)
58 #define SRI_SENTINEL (1<<2)
59 #define SRI_DISCONNECTED (1<<3)
60 #define SRI_S_DOWN (1<<4) /* Subjectively down (no quorum). */
61 #define SRI_O_DOWN (1<<5) /* Objectively down (quorum reached). */
62 #define SRI_MASTER_DOWN (1<<6) /* A Sentinel with this flag set thinks that
63 its master is down. */
64 /* SRI_CAN_FAILOVER when set in an SRI_MASTER instance means that we are
65 * allowed to perform the failover for this master.
66 * When set in a SRI_SENTINEL instance means that sentinel is allowed to
67 * perform the failover on its master. */
68 #define SRI_CAN_FAILOVER (1<<7)
69 #define SRI_FAILOVER_IN_PROGRESS (1<<8) /* Failover is in progress for
70 this master. */
71 #define SRI_I_AM_THE_LEADER (1<<9) /* We are the leader for this master. */
72 #define SRI_PROMOTED (1<<10) /* Slave selected for promotion. */
73 #define SRI_RECONF_SENT (1<<11) /* SLAVEOF <newmaster> sent. */
74 #define SRI_RECONF_INPROG (1<<12) /* Slave synchronization in progress. */
75 #define SRI_RECONF_DONE (1<<13) /* Slave synchronized with new master. */
76
77 #define SENTINEL_INFO_PERIOD 10000
78 #define SENTINEL_PING_PERIOD 1000
79 #define SENTINEL_ASK_PERIOD 1000
80 #define SENTINEL_PUBLISH_PERIOD 5000
81 #define SENTINEL_DOWN_AFTER_PERIOD 30000
82 #define SENTINEL_HELLO_CHANNEL "__sentinel__:hello"
83 #define SENTINEL_TILT_TRIGGER 2000
84 #define SENTINEL_TILT_PERIOD (SENTINEL_PING_PERIOD*30)
85 #define SENTINEL_DEFAULT_SLAVE_PRIORITY 100
86 #define SENTINEL_PROMOTION_RETRY_PERIOD 30000
87 #define SENTINEL_SLAVE_RECONF_RETRY_PERIOD 10000
88 #define SENTINEL_DEFAULT_PARALLEL_SYNCS 1
89 #define SENTINEL_MIN_LINK_RECONNECT_PERIOD 15000
90 #define SENTINEL_DEFAULT_FAILOVER_TIMEOUT (60*15*1000)
91 #define SENTINEL_MAX_PENDING_COMMANDS 100
92 #define SENTINEL_EXTENDED_SDOWN_MULTIPLIER 10
93
94 /* How many milliseconds is an information valid? This applies for instance
95 * to the reply to SENTINEL IS-MASTER-DOWN-BY-ADDR replies. */
96 #define SENTINEL_INFO_VALIDITY_TIME 5000
97 #define SENTINEL_FAILOVER_FIXED_DELAY 5000
98 #define SENTINEL_FAILOVER_MAX_RANDOM_DELAY 10000
99
100 /* Failover machine different states. */
101 #define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */
102 #define SENTINEL_FAILOVER_STATE_WAIT_START 1 /* Wait for failover_start_time*/
103 #define SENTINEL_FAILOVER_STATE_SELECT_SLAVE 2 /* Select slave to promote */
104 #define SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE 3 /* Slave -> Master */
105 #define SENTINEL_FAILOVER_STATE_WAIT_PROMOTION 4 /* Wait slave to change role */
106 #define SENTINEL_FAILOVER_STATE_RECONF_SLAVES 5 /* SLAVEOF newmaster */
107 #define SENTINEL_FAILOVER_STATE_WAIT_NEXT_SLAVE 6 /* wait replication */
108 #define SENTINEL_FAILOVER_STATE_ALERT_CLIENTS 7 /* Run user script. */
109 #define SENTINEL_FAILOVER_STATE_WAIT_ALERT_SCRIPT 8 /* Wait script exec. */
110 #define SENTINEL_FAILOVER_STATE_DETECT_END 9 /* Check for failover end. */
111 #define SENTINEL_FAILOVER_STATE_UPDATE_CONFIG 10 /* Monitor promoted slave. */
112
113 #define SENTINEL_MASTER_LINK_STATUS_UP 0
114 #define SENTINEL_MASTER_LINK_STATUS_DOWN 1
115
116 /* Generic flags that can be used with different functions. */
117 #define SENTINEL_NO_FLAGS 0
118 #define SENTINEL_GENERATE_EVENT 1
119 #define SENTINEL_LEADER 2
120 #define SENTINEL_OBSERVER 4
121
122 /* Script execution flags and limits. */
123 #define SENTINEL_SCRIPT_NONE 0
124 #define SENTINEL_SCRIPT_RUNNING 1
125 #define SENTINEL_SCRIPT_MAX_QUEUE 256
126 #define SENTINEL_SCRIPT_MAX_RUNNING 16
127 #define SENTINEL_SCRIPT_MAX_RUNTIME 60000 /* 60 seconds max exec time. */
128 #define SENTINEL_SCRIPT_MAX_RETRY 10
129 #define SENTINEL_SCRIPT_RETRY_DELAY 30000 /* 30 seconds between retries. */
130
131 typedef struct sentinelRedisInstance {
132 int flags; /* See SRI_... defines */
133 char *name; /* Master name from the point of view of this sentinel. */
134 char *runid; /* run ID of this instance. */
135 sentinelAddr *addr; /* Master host. */
136 redisAsyncContext *cc; /* Hiredis context for commands. */
137 redisAsyncContext *pc; /* Hiredis context for Pub / Sub. */
138 int pending_commands; /* Number of commands sent waiting for a reply. */
139 mstime_t cc_conn_time; /* cc connection time. */
140 mstime_t pc_conn_time; /* pc connection time. */
141 mstime_t pc_last_activity; /* Last time we received any message. */
142 mstime_t last_avail_time; /* Last time the instance replied to ping with
143 a reply we consider valid. */
144 mstime_t last_pong_time; /* Last time the instance replied to ping,
145 whatever the reply was. That's used to check
146 if the link is idle and must be reconnected. */
147 mstime_t last_pub_time; /* Last time we sent hello via Pub/Sub. */
148 mstime_t last_hello_time; /* Only used if SRI_SENTINEL is set. Last time
149 we received an hello from this Sentinel
150 via Pub/Sub. */
151 mstime_t last_master_down_reply_time; /* Time of last reply to
152 SENTINEL is-master-down command. */
153 mstime_t s_down_since_time; /* Subjectively down since time. */
154 mstime_t o_down_since_time; /* Objectively down since time. */
155 mstime_t down_after_period; /* Consider it down after that period. */
156 mstime_t info_refresh; /* Time at which we received INFO output from it. */
157
158 /* Master specific. */
159 dict *sentinels; /* Other sentinels monitoring the same master. */
160 dict *slaves; /* Slaves for this master instance. */
161 int quorum; /* Number of sentinels that need to agree on failure. */
162 int parallel_syncs; /* How many slaves to reconfigure at same time. */
163
164 /* Slave specific. */
165 mstime_t master_link_down_time; /* Slave replication link down time. */
166 int slave_priority; /* Slave priority according to its INFO output. */
167 mstime_t slave_reconf_sent_time; /* Time at which we sent SLAVE OF <new> */
168 struct sentinelRedisInstance *master; /* Master instance if SRI_SLAVE is set. */
169 char *slave_master_host; /* Master host as reported by INFO */
170 int slave_master_port; /* Master port as reported by INFO */
171 int slave_master_link_status; /* Master link status as reported by INFO */
172 /* Failover */
173 char *leader; /* If this is a master instance, this is the runid of
174 the Sentinel that should perform the failover. If
175 this is a Sentinel, this is the runid of the Sentinel
176 that this other Sentinel is voting as leader.
177 This field is valid only if SRI_MASTER_DOWN is
178 set on the Sentinel instance. */
179 int failover_state; /* See SENTINEL_FAILOVER_STATE_* defines. */
180 mstime_t failover_state_change_time;
181 mstime_t failover_start_time; /* When to start to failover if leader. */
182 mstime_t failover_timeout; /* Max time to refresh failover state. */
183 struct sentinelRedisInstance *promoted_slave; /* Promoted slave instance. */
184 /* Scripts executed to notify admin or reconfigure clients: when they
185 * are set to NULL no script is executed. */
186 char *notification_script;
187 char *client_reconfig_script;
188 } sentinelRedisInstance;
189
190 /* Main state. */
191 struct sentinelState {
192 dict *masters; /* Dictionary of master sentinelRedisInstances.
193 Key is the instance name, value is the
194 sentinelRedisInstance structure pointer. */
195 int tilt; /* Are we in TILT mode? */
196 int running_scripts; /* Number of scripts in execution right now. */
197 mstime_t tilt_start_time; /* When TITL started. */
198 mstime_t previous_time; /* Time last time we ran the time handler. */
199 list *scripts_queue; /* Queue of user scripts to execute. */
200 } sentinel;
201
202 /* A script execution job. */
203 typedef struct sentinelScriptJob {
204 int flags; /* Script job flags: SENTINEL_SCRIPT_* */
205 int retry_num; /* Number of times we tried to execute it. */
206 char **argv; /* Arguments to call the script. */
207 mstime_t start_time; /* Script execution time if the script is running,
208 otherwise 0 if we are allowed to retry the
209 execution at any time. If the script is not
210 running and it's not 0, it means: do not run
211 before the specified time. */
212 pid_t pid; /* Script execution pid. */
213 } sentinelScriptJob;
214
215 /* ======================= hiredis ae.c adapters =============================
216 * Note: this implementation is taken from hiredis/adapters/ae.h, however
217 * we have our modified copy for Sentinel in order to use our allocator
218 * and to have full control over how the adapter works. */
219
220 typedef struct redisAeEvents {
221 redisAsyncContext *context;
222 aeEventLoop *loop;
223 int fd;
224 int reading, writing;
225 } redisAeEvents;
226
227 static void redisAeReadEvent(aeEventLoop *el, int fd, void *privdata, int mask) {
228 ((void)el); ((void)fd); ((void)mask);
229
230 redisAeEvents *e = (redisAeEvents*)privdata;
231 redisAsyncHandleRead(e->context);
232 }
233
234 static void redisAeWriteEvent(aeEventLoop *el, int fd, void *privdata, int mask) {
235 ((void)el); ((void)fd); ((void)mask);
236
237 redisAeEvents *e = (redisAeEvents*)privdata;
238 redisAsyncHandleWrite(e->context);
239 }
240
241 static void redisAeAddRead(void *privdata) {
242 redisAeEvents *e = (redisAeEvents*)privdata;
243 aeEventLoop *loop = e->loop;
244 if (!e->reading) {
245 e->reading = 1;
246 aeCreateFileEvent(loop,e->fd,AE_READABLE,redisAeReadEvent,e);
247 }
248 }
249
250 static void redisAeDelRead(void *privdata) {
251 redisAeEvents *e = (redisAeEvents*)privdata;
252 aeEventLoop *loop = e->loop;
253 if (e->reading) {
254 e->reading = 0;
255 aeDeleteFileEvent(loop,e->fd,AE_READABLE);
256 }
257 }
258
259 static void redisAeAddWrite(void *privdata) {
260 redisAeEvents *e = (redisAeEvents*)privdata;
261 aeEventLoop *loop = e->loop;
262 if (!e->writing) {
263 e->writing = 1;
264 aeCreateFileEvent(loop,e->fd,AE_WRITABLE,redisAeWriteEvent,e);
265 }
266 }
267
268 static void redisAeDelWrite(void *privdata) {
269 redisAeEvents *e = (redisAeEvents*)privdata;
270 aeEventLoop *loop = e->loop;
271 if (e->writing) {
272 e->writing = 0;
273 aeDeleteFileEvent(loop,e->fd,AE_WRITABLE);
274 }
275 }
276
277 static void redisAeCleanup(void *privdata) {
278 redisAeEvents *e = (redisAeEvents*)privdata;
279 redisAeDelRead(privdata);
280 redisAeDelWrite(privdata);
281 zfree(e);
282 }
283
284 static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
285 redisContext *c = &(ac->c);
286 redisAeEvents *e;
287
288 /* Nothing should be attached when something is already attached */
289 if (ac->ev.data != NULL)
290 return REDIS_ERR;
291
292 /* Create container for context and r/w events */
293 e = (redisAeEvents*)zmalloc(sizeof(*e));
294 e->context = ac;
295 e->loop = loop;
296 e->fd = c->fd;
297 e->reading = e->writing = 0;
298
299 /* Register functions to start/stop listening for events */
300 ac->ev.addRead = redisAeAddRead;
301 ac->ev.delRead = redisAeDelRead;
302 ac->ev.addWrite = redisAeAddWrite;
303 ac->ev.delWrite = redisAeDelWrite;
304 ac->ev.cleanup = redisAeCleanup;
305 ac->ev.data = e;
306
307 return REDIS_OK;
308 }
309
310 /* ============================= Prototypes ================================= */
311
312 void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status);
313 void sentinelDisconnectCallback(const redisAsyncContext *c, int status);
314 void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata);
315 sentinelRedisInstance *sentinelGetMasterByName(char *name);
316 char *sentinelGetSubjectiveLeader(sentinelRedisInstance *master);
317 char *sentinelGetObjectiveLeader(sentinelRedisInstance *master);
318 int yesnotoi(char *s);
319 void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c);
320 void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c);
321 const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
322 void sentinelAbortFailover(sentinelRedisInstance *ri);
323 void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
324 sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master);
325 void sentinelScheduleScriptExecution(char *path, ...);
326
327 /* ========================= Dictionary types =============================== */
328
329 unsigned int dictSdsHash(const void *key);
330 int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
331 void releaseSentinelRedisInstance(sentinelRedisInstance *ri);
332
333 void dictInstancesValDestructor (void *privdata, void *obj) {
334 releaseSentinelRedisInstance(obj);
335 }
336
337 /* Instance name (sds) -> instance (sentinelRedisInstance pointer)
338 *
339 * also used for: sentinelRedisInstance->sentinels dictionary that maps
340 * sentinels ip:port to last seen time in Pub/Sub hello message. */
341 dictType instancesDictType = {
342 dictSdsHash, /* hash function */
343 NULL, /* key dup */
344 NULL, /* val dup */
345 dictSdsKeyCompare, /* key compare */
346 NULL, /* key destructor */
347 dictInstancesValDestructor /* val destructor */
348 };
349
350 /* Instance runid (sds) -> votes (long casted to void*)
351 *
352 * This is useful into sentinelGetObjectiveLeader() function in order to
353 * count the votes and understand who is the leader. */
354 dictType leaderVotesDictType = {
355 dictSdsHash, /* hash function */
356 NULL, /* key dup */
357 NULL, /* val dup */
358 dictSdsKeyCompare, /* key compare */
359 NULL, /* key destructor */
360 NULL /* val destructor */
361 };
362
363 /* =========================== Initialization =============================== */
364
365 void sentinelCommand(redisClient *c);
366
367 struct redisCommand sentinelcmds[] = {
368 {"ping",pingCommand,1,"",0,NULL,0,0,0,0,0},
369 {"sentinel",sentinelCommand,-2,"",0,NULL,0,0,0,0,0},
370 {"subscribe",subscribeCommand,-2,"",0,NULL,0,0,0,0,0},
371 {"unsubscribe",unsubscribeCommand,-1,"",0,NULL,0,0,0,0,0},
372 {"psubscribe",psubscribeCommand,-2,"",0,NULL,0,0,0,0,0},
373 {"punsubscribe",punsubscribeCommand,-1,"",0,NULL,0,0,0,0,0}
374 };
375
376 /* This function overwrites a few normal Redis config default with Sentinel
377 * specific defaults. */
378 void initSentinelConfig(void) {
379 server.port = REDIS_SENTINEL_PORT;
380 }
381
382 /* Perform the Sentinel mode initialization. */
383 void initSentinel(void) {
384 int j;
385
386 /* Remove usual Redis commands from the command table, then just add
387 * the SENTINEL command. */
388 dictEmpty(server.commands);
389 for (j = 0; j < sizeof(sentinelcmds)/sizeof(sentinelcmds[0]); j++) {
390 int retval;
391 struct redisCommand *cmd = sentinelcmds+j;
392
393 retval = dictAdd(server.commands, sdsnew(cmd->name), cmd);
394 redisAssert(retval == DICT_OK);
395 }
396
397 /* Initialize various data structures. */
398 sentinel.masters = dictCreate(&instancesDictType,NULL);
399 sentinel.tilt = 0;
400 sentinel.tilt_start_time = mstime();
401 sentinel.previous_time = mstime();
402 sentinel.running_scripts = 0;
403 sentinel.scripts_queue = listCreate();
404 }
405
406 /* ============================== sentinelAddr ============================== */
407
408 /* Create a sentinelAddr object and return it on success.
409 * On error NULL is returned and errno is set to:
410 * ENOENT: Can't resolve the hostname.
411 * EINVAL: Invalid port number.
412 */
413 sentinelAddr *createSentinelAddr(char *hostname, int port) {
414 char buf[32];
415 sentinelAddr *sa;
416
417 if (port <= 0 || port > 65535) {
418 errno = EINVAL;
419 return NULL;
420 }
421 if (anetResolve(NULL,hostname,buf) == ANET_ERR) {
422 errno = ENOENT;
423 return NULL;
424 }
425 sa = zmalloc(sizeof(*sa));
426 sa->ip = sdsnew(buf);
427 sa->port = port;
428 return sa;
429 }
430
431 /* Free a Sentinel address. Can't fail. */
432 void releaseSentinelAddr(sentinelAddr *sa) {
433 sdsfree(sa->ip);
434 zfree(sa);
435 }
436
437 /* =========================== Events notification ========================== */
438
439 /* Send an event to log, pub/sub, user notification script.
440 *
441 * 'level' is the log level for logging. Only REDIS_WARNING events will trigger
442 * the execution of the user notification script.
443 *
444 * 'type' is the message type, also used as a pub/sub channel name.
445 *
446 * 'ri', is the redis instance target of this event if applicable, and is
447 * used to obtain the path of the notification script to execute.
448 *
449 * The remaining arguments are printf-alike.
450 * If the format specifier starts with the two characters "%@" then ri is
451 * not NULL, and the message is prefixed with an instance identifier in the
452 * following format:
453 *
454 * <instance type> <instance name> <ip> <port>
455 *
456 * If the instance type is not master, than the additional string is
457 * added to specify the originating master:
458 *
459 * @ <master name> <master ip> <master port>
460 *
461 * Any other specifier after "%@" is processed by printf itself.
462 */
463 void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
464 const char *fmt, ...) {
465 va_list ap;
466 char msg[REDIS_MAX_LOGMSG_LEN];
467 robj *channel, *payload;
468
469 /* Handle %@ */
470 if (fmt[0] == '%' && fmt[1] == '@') {
471 sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
472 NULL : ri->master;
473
474 if (master) {
475 snprintf(msg, sizeof(msg), "%s %s %s %d @ %s %s %d",
476 sentinelRedisInstanceTypeStr(ri),
477 ri->name, ri->addr->ip, ri->addr->port,
478 master->name, master->addr->ip, master->addr->port);
479 } else {
480 snprintf(msg, sizeof(msg), "%s %s %s %d",
481 sentinelRedisInstanceTypeStr(ri),
482 ri->name, ri->addr->ip, ri->addr->port);
483 }
484 fmt += 2;
485 } else {
486 msg[0] = '\0';
487 }
488
489 /* Use vsprintf for the rest of the formatting if any. */
490 if (fmt[0] != '\0') {
491 va_start(ap, fmt);
492 vsnprintf(msg+strlen(msg), sizeof(msg)-strlen(msg), fmt, ap);
493 va_end(ap);
494 }
495
496 /* Log the message if the log level allows it to be logged. */
497 if (level >= server.verbosity)
498 redisLog(level,"%s %s",type,msg);
499
500 /* Publish the message via Pub/Sub if it's not a debugging one. */
501 if (level != REDIS_DEBUG) {
502 channel = createStringObject(type,strlen(type));
503 payload = createStringObject(msg,strlen(msg));
504 pubsubPublishMessage(channel,payload);
505 decrRefCount(channel);
506 decrRefCount(payload);
507 }
508
509 /* Call the notification script if applicable. */
510 if (level == REDIS_WARNING && ri != NULL) {
511 sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
512 ri : ri->master;
513 if (master->notification_script) {
514 sentinelScheduleScriptExecution(master->notification_script,
515 type,msg,NULL);
516 }
517 }
518 }
519
520 /* ============================ script execution ============================ */
521
522 /* Release a script job structure and all the associated data. */
523 void sentinelReleaseScriptJob(sentinelScriptJob *sj) {
524 int j = 0;
525
526 while(sj->argv[j]) sdsfree(sj->argv[j++]);
527 zfree(sj->argv);
528 zfree(sj);
529 }
530
531 #define SENTINEL_SCRIPT_MAX_ARGS 16
532 void sentinelScheduleScriptExecution(char *path, ...) {
533 va_list ap;
534 char *argv[SENTINEL_SCRIPT_MAX_ARGS+1];
535 int argc = 1;
536 sentinelScriptJob *sj;
537
538 va_start(ap, path);
539 while(argc < SENTINEL_SCRIPT_MAX_ARGS) {
540 argv[argc] = va_arg(ap,char*);
541 if (!argv[argc]) break;
542 argv[argc] = sdsnew(argv[argc]); /* Copy the string. */
543 argc++;
544 }
545 va_end(ap);
546 argv[0] = sdsnew(path);
547
548 sj = zmalloc(sizeof(*sj));
549 sj->flags = SENTINEL_SCRIPT_NONE;
550 sj->retry_num = 0;
551 sj->argv = zmalloc(sizeof(char*)*(argc+1));
552 sj->start_time = 0;
553 sj->pid = 0;
554 memcpy(sj->argv,argv,sizeof(char*)*(argc+1));
555
556 listAddNodeTail(sentinel.scripts_queue,sj);
557
558 /* Remove the oldest non running script if we already hit the limit. */
559 if (listLength(sentinel.scripts_queue) > SENTINEL_SCRIPT_MAX_QUEUE) {
560 listNode *ln;
561 listIter li;
562
563 listRewind(sentinel.scripts_queue,&li);
564 while ((ln = listNext(&li)) != NULL) {
565 sj = ln->value;
566
567 if (sj->flags & SENTINEL_SCRIPT_RUNNING) continue;
568 /* The first node is the oldest as we add on tail. */
569 listDelNode(sentinel.scripts_queue,ln);
570 sentinelReleaseScriptJob(sj);
571 break;
572 }
573 redisAssert(listLength(sentinel.scripts_queue) <=
574 SENTINEL_SCRIPT_MAX_QUEUE);
575 }
576 }
577
578 /* Lookup a script in the scripts queue via pid, and returns the list node
579 * (so that we can easily remove it from the queue if needed). */
580 listNode *sentinelGetScriptListNodeByPid(pid_t pid) {
581 listNode *ln;
582 listIter li;
583
584 listRewind(sentinel.scripts_queue,&li);
585 while ((ln = listNext(&li)) != NULL) {
586 sentinelScriptJob *sj = ln->value;
587
588 if ((sj->flags & SENTINEL_SCRIPT_RUNNING) && sj->pid == pid)
589 return ln;
590 }
591 return NULL;
592 }
593
594 /* Run pending scripts if we are not already at max number of running
595 * scripts. */
596 void sentinelRunPendingScripts(void) {
597 listNode *ln;
598 listIter li;
599 mstime_t now = mstime();
600
601 /* Find jobs that are not running and run them, from the top to the
602 * tail of the queue, so we run older jobs first. */
603 listRewind(sentinel.scripts_queue,&li);
604 while (sentinel.running_scripts < SENTINEL_SCRIPT_MAX_RUNNING &&
605 (ln = listNext(&li)) != NULL)
606 {
607 sentinelScriptJob *sj = ln->value;
608 pid_t pid;
609
610 /* Skip if already running. */
611 if (sj->flags & SENTINEL_SCRIPT_RUNNING) continue;
612
613 /* Skip if it's a retry, but not enough time has elapsed. */
614 if (sj->start_time && sj->start_time > now) continue;
615
616 sj->flags |= SENTINEL_SCRIPT_RUNNING;
617 sj->start_time = mstime();
618 sj->retry_num++;
619 pid = fork();
620
621 if (pid == -1) {
622 /* Parent (fork error).
623 * We report fork errors as signal 99, in order to unify the
624 * reporting with other kind of errors. */
625 sentinelEvent(REDIS_WARNING,"-script-error",NULL,
626 "%s %d %d", sj->argv[0], 99, 0);
627 sj->flags &= ~SENTINEL_SCRIPT_RUNNING;
628 sj->pid = 0;
629 } else if (pid == 0) {
630 /* Child */
631 execve(sj->argv[0],sj->argv,environ);
632 /* If we are here an error occurred. */
633 _exit(2); /* Don't retry execution. */
634 } else {
635 sentinel.running_scripts++;
636 sj->pid = pid;
637 sentinelEvent(REDIS_DEBUG,"+script-child",NULL,"%ld",(long)pid);
638 }
639 }
640 }
641
642 /* How much to delay the execution of a script that we need to retry after
643 * an error?
644 *
645 * We double the retry delay for every further retry we do. So for instance
646 * if RETRY_DELAY is set to 30 seconds and the max number of retries is 10
647 * starting from the second attempt to execute the script the delays are:
648 * 30 sec, 60 sec, 2 min, 4 min, 8 min, 16 min, 32 min, 64 min, 128 min. */
649 mstime_t sentinelScriptRetryDelay(int retry_num) {
650 mstime_t delay = SENTINEL_SCRIPT_RETRY_DELAY;
651
652 while (retry_num-- > 1) delay *= 2;
653 return delay;
654 }
655
656 /* Check for scripts that terminated, and remove them from the queue if the
657 * script terminated successfully. If instead the script was terminated by
658 * a signal, or returned exit code "1", it is scheduled to run again if
659 * the max number of retries did not already elapsed. */
660 void sentinelCollectTerminatedScripts(void) {
661 int statloc;
662 pid_t pid;
663
664 while ((pid = wait3(&statloc,WNOHANG,NULL)) > 0) {
665 int exitcode = WEXITSTATUS(statloc);
666 int bysignal = 0;
667 listNode *ln;
668 sentinelScriptJob *sj;
669
670 if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
671 sentinelEvent(REDIS_DEBUG,"-script-child",NULL,"%ld %d %d",
672 (long)pid, exitcode, bysignal);
673
674 ln = sentinelGetScriptListNodeByPid(pid);
675 if (ln == NULL) {
676 redisLog(REDIS_WARNING,"wait3() returned a pid (%ld) we can't find in our scripts execution queue!", (long)pid);
677 continue;
678 }
679 sj = ln->value;
680
681 /* If the script was terminated by a signal or returns an
682 * exit code of "1" (that means: please retry), we reschedule it
683 * if the max number of retries is not already reached. */
684 if ((bysignal || exitcode == 1) &&
685 sj->retry_num != SENTINEL_SCRIPT_MAX_RETRY)
686 {
687 sj->flags &= ~SENTINEL_SCRIPT_RUNNING;
688 sj->pid = 0;
689 sj->start_time = mstime() +
690 sentinelScriptRetryDelay(sj->retry_num);
691 } else {
692 /* Otherwise let's remove the script, but log the event if the
693 * execution did not terminated in the best of the ways. */
694 if (bysignal || exitcode != 0) {
695 sentinelEvent(REDIS_WARNING,"-script-error",NULL,
696 "%s %d %d", sj->argv[0], bysignal, exitcode);
697 }
698 listDelNode(sentinel.scripts_queue,ln);
699 sentinelReleaseScriptJob(sj);
700 sentinel.running_scripts--;
701 }
702 }
703 }
704
705 /* Kill scripts in timeout, they'll be collected by the
706 * sentinelCollectTerminatedScripts() function. */
707 void sentinelKillTimedoutScripts(void) {
708 listNode *ln;
709 listIter li;
710 mstime_t now = mstime();
711
712 listRewind(sentinel.scripts_queue,&li);
713 while ((ln = listNext(&li)) != NULL) {
714 sentinelScriptJob *sj = ln->value;
715
716 if (sj->flags & SENTINEL_SCRIPT_RUNNING &&
717 (now - sj->start_time) > SENTINEL_SCRIPT_MAX_RUNTIME)
718 {
719 sentinelEvent(REDIS_WARNING,"-script-timeout",NULL,"%s %ld",
720 sj->argv[0], (long)sj->pid);
721 kill(sj->pid,SIGKILL);
722 }
723 }
724 }
725
726 /* Implements SENTINEL PENDING-SCRIPTS command. */
727 void sentinelPendingScriptsCommand(redisClient *c) {
728 listNode *ln;
729 listIter li;
730
731 addReplyMultiBulkLen(c,listLength(sentinel.scripts_queue));
732 listRewind(sentinel.scripts_queue,&li);
733 while ((ln = listNext(&li)) != NULL) {
734 sentinelScriptJob *sj = ln->value;
735 int j = 0;
736
737 addReplyMultiBulkLen(c,10);
738
739 addReplyBulkCString(c,"argv");
740 while (sj->argv[j]) j++;
741 addReplyMultiBulkLen(c,j);
742 j = 0;
743 while (sj->argv[j]) addReplyBulkCString(c,sj->argv[j++]);
744
745 addReplyBulkCString(c,"flags");
746 addReplyBulkCString(c,
747 (sj->flags & SENTINEL_SCRIPT_RUNNING) ? "running" : "scheduled");
748
749 addReplyBulkCString(c,"pid");
750 addReplyBulkLongLong(c,sj->pid);
751
752 if (sj->flags & SENTINEL_SCRIPT_RUNNING) {
753 addReplyBulkCString(c,"run-time");
754 addReplyBulkLongLong(c,mstime() - sj->start_time);
755 } else {
756 mstime_t delay = sj->start_time ? (sj->start_time-mstime()) : 0;
757 if (delay < 0) delay = 0;
758 addReplyBulkCString(c,"run-delay");
759 addReplyBulkLongLong(c,delay);
760 }
761
762 addReplyBulkCString(c,"retry-num");
763 addReplyBulkLongLong(c,sj->retry_num);
764 }
765 }
766
767 /* This function calls, if any, the client reconfiguration script with the
768 * following parameters:
769 *
770 * <master-name> <role> <state> <from-ip> <from-port> <to-ip> <to-port>
771 *
772 * It is called every time a failover starts, ends, or is aborted.
773 *
774 * <state> is "start", "end" or "abort".
775 * <role> is either "leader" or "observer".
776 *
777 * from/to fields are respectively master -> promoted slave addresses for
778 * "start" and "end", or the reverse (promoted slave -> master) in case of
779 * "abort".
780 */
781 void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, char *state, sentinelAddr *from, sentinelAddr *to) {
782 char fromport[32], toport[32];
783
784 if (master->client_reconfig_script == NULL) return;
785 ll2string(fromport,sizeof(fromport),from->port);
786 ll2string(toport,sizeof(toport),to->port);
787 sentinelScheduleScriptExecution(master->client_reconfig_script,
788 master->name,
789 (role == SENTINEL_LEADER) ? "leader" : "observer",
790 state, from->ip, fromport, to->ip, toport);
791 }
792
793 /* ========================== sentinelRedisInstance ========================= */
794
795 /* Create a redis instance, the following fields must be populated by the
796 * caller if needed:
797 * runid: set to NULL but will be populated once INFO output is received.
798 * info_refresh: is set to 0 to mean that we never received INFO so far.
799 *
800 * If SRI_MASTER is set into initial flags the instance is added to
801 * sentinel.masters table.
802 *
803 * if SRI_SLAVE or SRI_SENTINEL is set then 'master' must be not NULL and the
804 * instance is added into master->slaves or master->sentinels table.
805 *
806 * If the instance is a slave or sentinel, the name parameter is ignored and
807 * is created automatically as hostname:port.
808 *
809 * The function fails if hostname can't be resolved or port is out of range.
810 * When this happens NULL is returned and errno is set accordingly to the
811 * createSentinelAddr() function.
812 *
813 * The function may also fail and return NULL with errno set to EBUSY if
814 * a master or slave with the same name already exists. */
815 sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
816 sentinelRedisInstance *ri;
817 sentinelAddr *addr;
818 dict *table;
819 char slavename[128], *sdsname;
820
821 redisAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
822 redisAssert((flags & SRI_MASTER) || master != NULL);
823
824 /* Check address validity. */
825 addr = createSentinelAddr(hostname,port);
826 if (addr == NULL) return NULL;
827
828 /* For slaves and sentinel we use ip:port as name. */
829 if (flags & (SRI_SLAVE|SRI_SENTINEL)) {
830 snprintf(slavename,sizeof(slavename),"%s:%d",hostname,port);
831 name = slavename;
832 }
833
834 /* Make sure the entry is not duplicated. This may happen when the same
835 * name for a master is used multiple times inside the configuration or
836 * if we try to add multiple times a slave or sentinel with same ip/port
837 * to a master. */
838 if (flags & SRI_MASTER) table = sentinel.masters;
839 else if (flags & SRI_SLAVE) table = master->slaves;
840 else if (flags & SRI_SENTINEL) table = master->sentinels;
841 sdsname = sdsnew(name);
842 if (dictFind(table,sdsname)) {
843 sdsfree(sdsname);
844 errno = EBUSY;
845 return NULL;
846 }
847
848 /* Create the instance object. */
849 ri = zmalloc(sizeof(*ri));
850 /* Note that all the instances are started in the disconnected state,
851 * the event loop will take care of connecting them. */
852 ri->flags = flags | SRI_DISCONNECTED;
853 ri->name = sdsname;
854 ri->runid = NULL;
855 ri->addr = addr;
856 ri->cc = NULL;
857 ri->pc = NULL;
858 ri->pending_commands = 0;
859 ri->cc_conn_time = 0;
860 ri->pc_conn_time = 0;
861 ri->pc_last_activity = 0;
862 ri->last_avail_time = mstime();
863 ri->last_pong_time = mstime();
864 ri->last_pub_time = mstime();
865 ri->last_hello_time = mstime();
866 ri->last_master_down_reply_time = mstime();
867 ri->s_down_since_time = 0;
868 ri->o_down_since_time = 0;
869 ri->down_after_period = master ? master->down_after_period :
870 SENTINEL_DOWN_AFTER_PERIOD;
871 ri->master_link_down_time = 0;
872 ri->slave_priority = SENTINEL_DEFAULT_SLAVE_PRIORITY;
873 ri->slave_reconf_sent_time = 0;
874 ri->slave_master_host = NULL;
875 ri->slave_master_port = 0;
876 ri->slave_master_link_status = SENTINEL_MASTER_LINK_STATUS_DOWN;
877 ri->sentinels = dictCreate(&instancesDictType,NULL);
878 ri->quorum = quorum;
879 ri->parallel_syncs = SENTINEL_DEFAULT_PARALLEL_SYNCS;
880 ri->master = master;
881 ri->slaves = dictCreate(&instancesDictType,NULL);
882 ri->info_refresh = 0;
883
884 /* Failover state. */
885 ri->leader = NULL;
886 ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
887 ri->failover_state_change_time = 0;
888 ri->failover_start_time = 0;
889 ri->failover_timeout = SENTINEL_DEFAULT_FAILOVER_TIMEOUT;
890 ri->promoted_slave = NULL;
891 ri->notification_script = NULL;
892 ri->client_reconfig_script = NULL;
893
894 /* Add into the right table. */
895 dictAdd(table, ri->name, ri);
896 return ri;
897 }
898
899 /* Release this instance and all its slaves, sentinels, hiredis connections.
900 * This function also takes care of unlinking the instance from the main
901 * masters table (if it is a master) or from its master sentinels/slaves table
902 * if it is a slave or sentinel. */
903 void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
904 /* Release all its slaves or sentinels if any. */
905 dictRelease(ri->sentinels);
906 dictRelease(ri->slaves);
907
908 /* Release hiredis connections. */
909 if (ri->cc) sentinelKillLink(ri,ri->cc);
910 if (ri->pc) sentinelKillLink(ri,ri->pc);
911
912 /* Free other resources. */
913 sdsfree(ri->name);
914 sdsfree(ri->runid);
915 sdsfree(ri->notification_script);
916 sdsfree(ri->client_reconfig_script);
917 sdsfree(ri->slave_master_host);
918 sdsfree(ri->leader);
919 releaseSentinelAddr(ri->addr);
920
921 /* Clear state into the master if needed. */
922 if ((ri->flags & SRI_SLAVE) && (ri->flags & SRI_PROMOTED) && ri->master)
923 ri->master->promoted_slave = NULL;
924
925 zfree(ri);
926 }
927
928 /* Lookup a slave in a master Redis instance, by ip and port. */
929 sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
930 sentinelRedisInstance *ri, char *ip, int port)
931 {
932 sds key;
933 sentinelRedisInstance *slave;
934
935 redisAssert(ri->flags & SRI_MASTER);
936 key = sdscatprintf(sdsempty(),"%s:%d",ip,port);
937 slave = dictFetchValue(ri->slaves,key);
938 sdsfree(key);
939 return slave;
940 }
941
942 /* Return the name of the type of the instance as a string. */
943 const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri) {
944 if (ri->flags & SRI_MASTER) return "master";
945 else if (ri->flags & SRI_SLAVE) return "slave";
946 else if (ri->flags & SRI_SENTINEL) return "sentinel";
947 else return "unknown";
948 }
949
950 /* This function removes all the instances found in the dictionary of instances
951 * 'd', having either:
952 *
953 * 1) The same ip/port as specified.
954 * 2) The same runid.
955 *
956 * "1" and "2" don't need to verify at the same time, just one is enough.
957 * If "runid" is NULL it is not checked.
958 * Similarly if "ip" is NULL it is not checked.
959 *
960 * This function is useful because every time we add a new Sentinel into
961 * a master's Sentinels dictionary, we want to be very sure about not
962 * having duplicated instances for any reason. This is so important because
963 * we use those other sentinels in order to run our quorum protocol to
964 * understand if it's time to proceeed with the fail over.
965 *
966 * Making sure no duplication is possible we greately improve the robustness
967 * of the quorum (otherwise we may end counting the same instance multiple
968 * times for some reason).
969 *
970 * The function returns the number of Sentinels removed. */
971 int removeMatchingSentinelsFromMaster(sentinelRedisInstance *master, char *ip, int port, char *runid) {
972 dictIterator *di;
973 dictEntry *de;
974 int removed = 0;
975
976 di = dictGetSafeIterator(master->sentinels);
977 while((de = dictNext(di)) != NULL) {
978 sentinelRedisInstance *ri = dictGetVal(de);
979
980 if ((ri->runid && runid && strcmp(ri->runid,runid) == 0) ||
981 (ip && strcmp(ri->addr->ip,ip) == 0 && port == ri->addr->port))
982 {
983 dictDelete(master->sentinels,ri->name);
984 removed++;
985 }
986 }
987 dictReleaseIterator(di);
988 return removed;
989 }
990
991 /* Search an instance with the same runid, ip and port into a dictionary
992 * of instances. Return NULL if not found, otherwise return the instance
993 * pointer.
994 *
995 * runid or ip can be NULL. In such a case the search is performed only
996 * by the non-NULL field. */
997 sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid) {
998 dictIterator *di;
999 dictEntry *de;
1000 sentinelRedisInstance *instance = NULL;
1001
1002 redisAssert(ip || runid); /* User must pass at least one search param. */
1003 di = dictGetIterator(instances);
1004 while((de = dictNext(di)) != NULL) {
1005 sentinelRedisInstance *ri = dictGetVal(de);
1006
1007 if (runid && !ri->runid) continue;
1008 if ((runid == NULL || strcmp(ri->runid, runid) == 0) &&
1009 (ip == NULL || (strcmp(ri->addr->ip, ip) == 0 &&
1010 ri->addr->port == port)))
1011 {
1012 instance = ri;
1013 break;
1014 }
1015 }
1016 dictReleaseIterator(di);
1017 return instance;
1018 }
1019
1020 /* Simple master lookup by name */
1021 sentinelRedisInstance *sentinelGetMasterByName(char *name) {
1022 sentinelRedisInstance *ri;
1023 sds sdsname = sdsnew(name);
1024
1025 ri = dictFetchValue(sentinel.masters,sdsname);
1026 sdsfree(sdsname);
1027 return ri;
1028 }
1029
1030 /* Add the specified flags to all the instances in the specified dictionary. */
1031 void sentinelAddFlagsToDictOfRedisInstances(dict *instances, int flags) {
1032 dictIterator *di;
1033 dictEntry *de;
1034
1035 di = dictGetIterator(instances);
1036 while((de = dictNext(di)) != NULL) {
1037 sentinelRedisInstance *ri = dictGetVal(de);
1038 ri->flags |= flags;
1039 }
1040 dictReleaseIterator(di);
1041 }
1042
1043 /* Remove the specified flags to all the instances in the specified
1044 * dictionary. */
1045 void sentinelDelFlagsToDictOfRedisInstances(dict *instances, int flags) {
1046 dictIterator *di;
1047 dictEntry *de;
1048
1049 di = dictGetIterator(instances);
1050 while((de = dictNext(di)) != NULL) {
1051 sentinelRedisInstance *ri = dictGetVal(de);
1052 ri->flags &= ~flags;
1053 }
1054 dictReleaseIterator(di);
1055 }
1056
1057 /* Reset the state of a monitored master:
1058 * 1) Remove all slaves.
1059 * 2) Remove all sentinels.
1060 * 3) Remove most of the flags resulting from runtime operations.
1061 * 4) Reset timers to their default value.
1062 * 5) In the process of doing this undo the failover if in progress.
1063 * 6) Disconnect the connections with the master (will reconnect automatically).
1064 */
1065 void sentinelResetMaster(sentinelRedisInstance *ri, int flags) {
1066 redisAssert(ri->flags & SRI_MASTER);
1067 dictRelease(ri->slaves);
1068 dictRelease(ri->sentinels);
1069 ri->slaves = dictCreate(&instancesDictType,NULL);
1070 ri->sentinels = dictCreate(&instancesDictType,NULL);
1071 if (ri->cc) sentinelKillLink(ri,ri->cc);
1072 if (ri->pc) sentinelKillLink(ri,ri->pc);
1073 ri->flags &= SRI_MASTER|SRI_CAN_FAILOVER|SRI_DISCONNECTED;
1074 if (ri->leader) {
1075 sdsfree(ri->leader);
1076 ri->leader = NULL;
1077 }
1078 ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
1079 ri->failover_state_change_time = 0;
1080 ri->failover_start_time = 0;
1081 ri->promoted_slave = NULL;
1082 sdsfree(ri->runid);
1083 sdsfree(ri->slave_master_host);
1084 ri->runid = NULL;
1085 ri->slave_master_host = NULL;
1086 ri->last_avail_time = mstime();
1087 ri->last_pong_time = mstime();
1088 if (flags & SENTINEL_GENERATE_EVENT)
1089 sentinelEvent(REDIS_WARNING,"+reset-master",ri,"%@");
1090 }
1091
1092 /* Call sentinelResetMaster() on every master with a name matching the specified
1093 * pattern. */
1094 int sentinelResetMastersByPattern(char *pattern, int flags) {
1095 dictIterator *di;
1096 dictEntry *de;
1097 int reset = 0;
1098
1099 di = dictGetIterator(sentinel.masters);
1100 while((de = dictNext(di)) != NULL) {
1101 sentinelRedisInstance *ri = dictGetVal(de);
1102
1103 if (ri->name) {
1104 if (stringmatch(pattern,ri->name,0)) {
1105 sentinelResetMaster(ri,flags);
1106 reset++;
1107 }
1108 }
1109 }
1110 dictReleaseIterator(di);
1111 return reset;
1112 }
1113
1114 /* Reset the specified master with sentinelResetMaster(), and also change
1115 * the ip:port address, but take the name of the instance unmodified.
1116 *
1117 * This is used to handle the +switch-master and +redirect-to-master events.
1118 *
1119 * The function returns REDIS_ERR if the address can't be resolved for some
1120 * reason. Otherwise REDIS_OK is returned.
1121 *
1122 * TODO: make this reset so that original sentinels are re-added with
1123 * same ip / port / runid.
1124 */
1125
1126 int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip, int port) {
1127 sentinelAddr *oldaddr, *newaddr;
1128
1129 newaddr = createSentinelAddr(ip,port);
1130 if (newaddr == NULL) return REDIS_ERR;
1131 sentinelResetMaster(master,SENTINEL_NO_FLAGS);
1132 oldaddr = master->addr;
1133 master->addr = newaddr;
1134 /* Release the old address at the end so we are safe even if the function
1135 * gets the master->addr->ip and master->addr->port as arguments. */
1136 releaseSentinelAddr(oldaddr);
1137 return REDIS_OK;
1138 }
1139
1140 /* ============================ Config handling ============================= */
1141 char *sentinelHandleConfiguration(char **argv, int argc) {
1142 sentinelRedisInstance *ri;
1143
1144 if (!strcasecmp(argv[0],"monitor") && argc == 5) {
1145 /* monitor <name> <host> <port> <quorum> */
1146 int quorum = atoi(argv[4]);
1147
1148 if (quorum <= 0) return "Quorum must be 1 or greater.";
1149 if (createSentinelRedisInstance(argv[1],SRI_MASTER,argv[2],
1150 atoi(argv[3]),quorum,NULL) == NULL)
1151 {
1152 switch(errno) {
1153 case EBUSY: return "Duplicated master name.";
1154 case ENOENT: return "Can't resolve master instance hostname.";
1155 case EINVAL: return "Invalid port number";
1156 }
1157 }
1158 } else if (!strcasecmp(argv[0],"down-after-milliseconds") && argc == 3) {
1159 /* down-after-milliseconds <name> <milliseconds> */
1160 ri = sentinelGetMasterByName(argv[1]);
1161 if (!ri) return "No such master with specified name.";
1162 ri->down_after_period = atoi(argv[2]);
1163 if (ri->down_after_period <= 0)
1164 return "negative or zero time parameter.";
1165 } else if (!strcasecmp(argv[0],"failover-timeout") && argc == 3) {
1166 /* failover-timeout <name> <milliseconds> */
1167 ri = sentinelGetMasterByName(argv[1]);
1168 if (!ri) return "No such master with specified name.";
1169 ri->failover_timeout = atoi(argv[2]);
1170 if (ri->failover_timeout <= 0)
1171 return "negative or zero time parameter.";
1172 } else if (!strcasecmp(argv[0],"can-failover") && argc == 3) {
1173 /* can-failover <name> <yes/no> */
1174 int yesno = yesnotoi(argv[2]);
1175
1176 ri = sentinelGetMasterByName(argv[1]);
1177 if (!ri) return "No such master with specified name.";
1178 if (yesno == -1) return "Argument must be either yes or no.";
1179 if (yesno)
1180 ri->flags |= SRI_CAN_FAILOVER;
1181 else
1182 ri->flags &= ~SRI_CAN_FAILOVER;
1183 } else if (!strcasecmp(argv[0],"parallel-syncs") && argc == 3) {
1184 /* parallel-syncs <name> <milliseconds> */
1185 ri = sentinelGetMasterByName(argv[1]);
1186 if (!ri) return "No such master with specified name.";
1187 ri->parallel_syncs = atoi(argv[2]);
1188 } else if (!strcasecmp(argv[0],"notification-script") && argc == 3) {
1189 /* notification-script <name> <path> */
1190 ri = sentinelGetMasterByName(argv[1]);
1191 if (!ri) return "No such master with specified name.";
1192 if (access(argv[2],X_OK) == -1)
1193 return "Notification script seems non existing or non executable.";
1194 ri->notification_script = sdsnew(argv[2]);
1195 } else if (!strcasecmp(argv[0],"client-reconfig-script") && argc == 3) {
1196 /* client-reconfig-script <name> <path> */
1197 ri = sentinelGetMasterByName(argv[1]);
1198 if (!ri) return "No such master with specified name.";
1199 if (access(argv[2],X_OK) == -1)
1200 return "Client reconfiguration script seems non existing or "
1201 "non executable.";
1202 ri->client_reconfig_script = sdsnew(argv[2]);
1203 } else {
1204 return "Unrecognized sentinel configuration statement.";
1205 }
1206 return NULL;
1207 }
1208
1209 /* ====================== hiredis connection handling ======================= */
1210
1211 /* Completely disconnect an hiredis link from an instance. */
1212 void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c) {
1213 if (ri->cc == c) {
1214 ri->cc = NULL;
1215 ri->pending_commands = 0;
1216 }
1217 if (ri->pc == c) ri->pc = NULL;
1218 c->data = NULL;
1219 ri->flags |= SRI_DISCONNECTED;
1220 redisAsyncFree(c);
1221 }
1222
1223 /* This function takes an hiredis context that is in an error condition
1224 * and make sure to mark the instance as disconnected performing the
1225 * cleanup needed.
1226 *
1227 * Note: we don't free the hiredis context as hiredis will do it for us
1228 * for async conenctions. */
1229 void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c) {
1230 sentinelRedisInstance *ri = c->data;
1231 int pubsub;
1232
1233 if (ri == NULL) return; /* The instance no longer exists. */
1234
1235 pubsub = (ri->pc == c);
1236 sentinelEvent(REDIS_DEBUG, pubsub ? "-pubsub-link" : "-cmd-link", ri,
1237 "%@ #%s", c->errstr);
1238 if (pubsub)
1239 ri->pc = NULL;
1240 else
1241 ri->cc = NULL;
1242 ri->flags |= SRI_DISCONNECTED;
1243 }
1244
1245 void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
1246 if (status != REDIS_OK) {
1247 sentinelDisconnectInstanceFromContext(c);
1248 } else {
1249 sentinelRedisInstance *ri = c->data;
1250 int pubsub = (ri->pc == c);
1251
1252 sentinelEvent(REDIS_DEBUG, pubsub ? "+pubsub-link" : "+cmd-link", ri,
1253 "%@");
1254 }
1255 }
1256
1257 void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
1258 sentinelDisconnectInstanceFromContext(c);
1259 }
1260
1261 /* Create the async connections for the specified instance if the instance
1262 * is disconnected. Note that the SRI_DISCONNECTED flag is set even if just
1263 * one of the two links (commands and pub/sub) is missing. */
1264 void sentinelReconnectInstance(sentinelRedisInstance *ri) {
1265 if (!(ri->flags & SRI_DISCONNECTED)) return;
1266
1267 /* Commands connection. */
1268 if (ri->cc == NULL) {
1269 ri->cc = redisAsyncConnect(ri->addr->ip,ri->addr->port);
1270 if (ri->cc->err) {
1271 sentinelEvent(REDIS_DEBUG,"-cmd-link-reconnection",ri,"%@ #%s",
1272 ri->cc->errstr);
1273 sentinelKillLink(ri,ri->cc);
1274 } else {
1275 ri->cc_conn_time = mstime();
1276 ri->cc->data = ri;
1277 redisAeAttach(server.el,ri->cc);
1278 redisAsyncSetConnectCallback(ri->cc,
1279 sentinelLinkEstablishedCallback);
1280 redisAsyncSetDisconnectCallback(ri->cc,
1281 sentinelDisconnectCallback);
1282 }
1283 }
1284 /* Pub / Sub */
1285 if ((ri->flags & SRI_MASTER) && ri->pc == NULL) {
1286 ri->pc = redisAsyncConnect(ri->addr->ip,ri->addr->port);
1287 if (ri->pc->err) {
1288 sentinelEvent(REDIS_DEBUG,"-pubsub-link-reconnection",ri,"%@ #%s",
1289 ri->pc->errstr);
1290 sentinelKillLink(ri,ri->pc);
1291 } else {
1292 int retval;
1293
1294 ri->pc_conn_time = mstime();
1295 ri->pc->data = ri;
1296 redisAeAttach(server.el,ri->pc);
1297 redisAsyncSetConnectCallback(ri->pc,
1298 sentinelLinkEstablishedCallback);
1299 redisAsyncSetDisconnectCallback(ri->pc,
1300 sentinelDisconnectCallback);
1301 /* Now we subscribe to the Sentinels "Hello" channel. */
1302 retval = redisAsyncCommand(ri->pc,
1303 sentinelReceiveHelloMessages, NULL, "SUBSCRIBE %s",
1304 SENTINEL_HELLO_CHANNEL);
1305 if (retval != REDIS_OK) {
1306 /* If we can't subscribe, the Pub/Sub connection is useless
1307 * and we can simply disconnect it and try again. */
1308 sentinelKillLink(ri,ri->pc);
1309 return;
1310 }
1311 }
1312 }
1313 /* Clear the DISCONNECTED flags only if we have both the connections
1314 * (or just the commands connection if this is a slave or a
1315 * sentinel instance). */
1316 if (ri->cc && (ri->flags & (SRI_SLAVE|SRI_SENTINEL) || ri->pc))
1317 ri->flags &= ~SRI_DISCONNECTED;
1318 }
1319
1320 /* ======================== Redis instances pinging ======================== */
1321
1322 /* Process the INFO output from masters. */
1323 void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
1324 sds *lines;
1325 int numlines, j;
1326 int role = 0;
1327 int runid_changed = 0; /* true if runid changed. */
1328 int first_runid = 0; /* true if this is the first runid we receive. */
1329
1330 /* The following fields must be reset to a given value in the case they
1331 * are not found at all in the INFO output. */
1332 ri->master_link_down_time = 0;
1333
1334 /* Process line by line. */
1335 lines = sdssplitlen(info,strlen(info),"\r\n",2,&numlines);
1336 for (j = 0; j < numlines; j++) {
1337 sentinelRedisInstance *slave;
1338 sds l = lines[j];
1339
1340 /* run_id:<40 hex chars>*/
1341 if (sdslen(l) >= 47 && !memcmp(l,"run_id:",7)) {
1342 if (ri->runid == NULL) {
1343 ri->runid = sdsnewlen(l+7,40);
1344 first_runid = 1;
1345 } else {
1346 if (strncmp(ri->runid,l+7,40) != 0) {
1347 runid_changed = 1;
1348 sentinelEvent(REDIS_NOTICE,"+reboot",ri,"%@");
1349 sdsfree(ri->runid);
1350 ri->runid = sdsnewlen(l+7,40);
1351 }
1352 }
1353 }
1354
1355 /* slave0:<ip>,<port>,<state> */
1356 if ((ri->flags & SRI_MASTER) &&
1357 sdslen(l) >= 7 &&
1358 !memcmp(l,"slave",5) && isdigit(l[5]))
1359 {
1360 char *ip, *port, *end;
1361
1362 ip = strchr(l,':'); if (!ip) continue;
1363 ip++; /* Now ip points to start of ip address. */
1364 port = strchr(ip,','); if (!port) continue;
1365 *port = '\0'; /* nul term for easy access. */
1366 port++; /* Now port points to start of port number. */
1367 end = strchr(port,','); if (!end) continue;
1368 *end = '\0'; /* nul term for easy access. */
1369
1370 /* Check if we already have this slave into our table,
1371 * otherwise add it. */
1372 if (sentinelRedisInstanceLookupSlave(ri,ip,atoi(port)) == NULL) {
1373 if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,ip,
1374 atoi(port), ri->quorum,ri)) != NULL)
1375 {
1376 sentinelEvent(REDIS_NOTICE,"+slave",slave,"%@");
1377 }
1378 }
1379 }
1380
1381 /* master_link_down_since_seconds:<seconds> */
1382 if (sdslen(l) >= 32 &&
1383 !memcmp(l,"master_link_down_since_seconds",30))
1384 {
1385 ri->master_link_down_time = strtoll(l+31,NULL,10)*1000;
1386 }
1387
1388 /* role:<role> */
1389 if (!memcmp(l,"role:master",11)) role = SRI_MASTER;
1390 else if (!memcmp(l,"role:slave",10)) role = SRI_SLAVE;
1391
1392 if (role == SRI_SLAVE) {
1393 /* master_host:<host> */
1394 if (sdslen(l) >= 12 && !memcmp(l,"master_host:",12)) {
1395 sdsfree(ri->slave_master_host);
1396 ri->slave_master_host = sdsnew(l+12);
1397 }
1398
1399 /* master_port:<port> */
1400 if (sdslen(l) >= 12 && !memcmp(l,"master_port:",12))
1401 ri->slave_master_port = atoi(l+12);
1402
1403 /* master_link_status:<status> */
1404 if (sdslen(l) >= 19 && !memcmp(l,"master_link_status:",19)) {
1405 ri->slave_master_link_status =
1406 (strcasecmp(l+19,"up") == 0) ?
1407 SENTINEL_MASTER_LINK_STATUS_UP :
1408 SENTINEL_MASTER_LINK_STATUS_DOWN;
1409 }
1410 }
1411 }
1412 ri->info_refresh = mstime();
1413 sdsfreesplitres(lines,numlines);
1414
1415 /* ---------------------------- Acting half ----------------------------- */
1416 if (sentinel.tilt) return;
1417
1418 /* Act if a master turned into a slave. */
1419 if ((ri->flags & SRI_MASTER) && role == SRI_SLAVE) {
1420 if (first_runid && ri->slave_master_host) {
1421 /* If it is the first time we receive INFO from it, but it's
1422 * a slave while it was configured as a master, we want to monitor
1423 * its master instead. */
1424 sentinelEvent(REDIS_WARNING,"+redirect-to-master",ri,
1425 "%s %s %d %s %d",
1426 ri->name, ri->addr->ip, ri->addr->port,
1427 ri->slave_master_host, ri->slave_master_port);
1428 sentinelResetMasterAndChangeAddress(ri,ri->slave_master_host,
1429 ri->slave_master_port);
1430 return;
1431 }
1432 }
1433
1434 /* Act if a slave turned into a master. */
1435 if ((ri->flags & SRI_SLAVE) && role == SRI_MASTER) {
1436 if (!(ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
1437 (runid_changed || first_runid))
1438 {
1439 /* If a slave turned into maser but:
1440 *
1441 * 1) Failover not in progress.
1442 * 2) RunID hs changed, or its the first time we see an INFO output.
1443 *
1444 * We assume this is a reboot with a wrong configuration.
1445 * Log the event and remove the slave. */
1446 int retval;
1447
1448 sentinelEvent(REDIS_WARNING,"-slave-restart-as-master",ri,"%@ #removing it from the attached slaves");
1449 retval = dictDelete(ri->master->slaves,ri->name);
1450 redisAssert(retval == REDIS_OK);
1451 return;
1452 } else if (ri->flags & SRI_PROMOTED) {
1453 /* If this is a promoted slave we can change state to the
1454 * failover state machine. */
1455 if ((ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
1456 (ri->master->flags & SRI_I_AM_THE_LEADER) &&
1457 (ri->master->failover_state ==
1458 SENTINEL_FAILOVER_STATE_WAIT_PROMOTION))
1459 {
1460 ri->master->failover_state = SENTINEL_FAILOVER_STATE_RECONF_SLAVES;
1461 ri->master->failover_state_change_time = mstime();
1462 sentinelEvent(REDIS_WARNING,"+promoted-slave",ri,"%@");
1463 sentinelEvent(REDIS_WARNING,"+failover-state-reconf-slaves",
1464 ri->master,"%@");
1465 sentinelCallClientReconfScript(ri->master,SENTINEL_LEADER,
1466 "start",ri->master->addr,ri->addr);
1467 }
1468 } else if (!(ri->master->flags & SRI_FAILOVER_IN_PROGRESS) ||
1469 ((ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
1470 (ri->master->flags & SRI_I_AM_THE_LEADER) &&
1471 ri->master->failover_state ==
1472 SENTINEL_FAILOVER_STATE_WAIT_START))
1473 {
1474 /* No failover in progress? Then it is the start of a failover
1475 * and we are an observer.
1476 *
1477 * We also do that if we are a leader doing a failover, in wait
1478 * start, but well, somebody else started before us. */
1479
1480 if (ri->master->flags & SRI_FAILOVER_IN_PROGRESS) {
1481 sentinelEvent(REDIS_WARNING,"-failover-abort-race",
1482 ri->master, "%@");
1483 sentinelAbortFailover(ri->master);
1484 }
1485
1486 ri->master->flags |= SRI_FAILOVER_IN_PROGRESS;
1487 sentinelEvent(REDIS_WARNING,"+failover-detected",ri->master,"%@");
1488 ri->master->failover_state = SENTINEL_FAILOVER_STATE_DETECT_END;
1489 ri->master->failover_state_change_time = mstime();
1490 ri->master->promoted_slave = ri;
1491 ri->flags |= SRI_PROMOTED;
1492 sentinelCallClientReconfScript(ri->master,SENTINEL_OBSERVER,
1493 "start", ri->master->addr,ri->addr);
1494 /* We are an observer, so we can only assume that the leader
1495 * is reconfiguring the slave instances. For this reason we
1496 * set all the instances as RECONF_SENT waiting for progresses
1497 * on this side. */
1498 sentinelAddFlagsToDictOfRedisInstances(ri->master->slaves,
1499 SRI_RECONF_SENT);
1500 }
1501 }
1502
1503 /* Detect if the slave that is in the process of being reconfigured
1504 * changed state. */
1505 if ((ri->flags & SRI_SLAVE) && role == SRI_SLAVE &&
1506 (ri->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG)))
1507 {
1508 /* SRI_RECONF_SENT -> SRI_RECONF_INPROG. */
1509 if ((ri->flags & SRI_RECONF_SENT) &&
1510 ri->slave_master_host &&
1511 strcmp(ri->slave_master_host,
1512 ri->master->promoted_slave->addr->ip) == 0 &&
1513 ri->slave_master_port == ri->master->promoted_slave->addr->port)
1514 {
1515 ri->flags &= ~SRI_RECONF_SENT;
1516 ri->flags |= SRI_RECONF_INPROG;
1517 sentinelEvent(REDIS_NOTICE,"+slave-reconf-inprog",ri,"%@");
1518 }
1519
1520 /* SRI_RECONF_INPROG -> SRI_RECONF_DONE */
1521 if ((ri->flags & SRI_RECONF_INPROG) &&
1522 ri->slave_master_link_status == SENTINEL_MASTER_LINK_STATUS_UP)
1523 {
1524 ri->flags &= ~SRI_RECONF_INPROG;
1525 ri->flags |= SRI_RECONF_DONE;
1526 sentinelEvent(REDIS_NOTICE,"+slave-reconf-done",ri,"%@");
1527 /* If we are moving forward (a new slave is now configured)
1528 * we update the change_time as we are conceptually passing
1529 * to the next slave. */
1530 ri->failover_state_change_time = mstime();
1531 }
1532 }
1533 }
1534
1535 void sentinelInfoReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
1536 sentinelRedisInstance *ri = c->data;
1537 redisReply *r;
1538
1539 if (ri) ri->pending_commands--;
1540 if (!reply || !ri) return;
1541 r = reply;
1542
1543 if (r->type == REDIS_REPLY_STRING) {
1544 sentinelRefreshInstanceInfo(ri,r->str);
1545 }
1546 }
1547
1548 /* Just discard the reply. We use this when we are not monitoring the return
1549 * value of the command but its effects directly. */
1550 void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
1551 sentinelRedisInstance *ri = c->data;
1552
1553 if (ri) ri->pending_commands--;
1554 }
1555
1556 void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
1557 sentinelRedisInstance *ri = c->data;
1558 redisReply *r;
1559
1560 if (ri) ri->pending_commands--;
1561 if (!reply || !ri) return;
1562 r = reply;
1563
1564 if (r->type == REDIS_REPLY_STATUS ||
1565 r->type == REDIS_REPLY_ERROR) {
1566 /* Update the "instance available" field only if this is an
1567 * acceptable reply. */
1568 if (strncmp(r->str,"PONG",4) == 0 ||
1569 strncmp(r->str,"LOADING",7) == 0 ||
1570 strncmp(r->str,"MASTERDOWN",10) == 0)
1571 {
1572 ri->last_avail_time = mstime();
1573 }
1574 }
1575 ri->last_pong_time = mstime();
1576 }
1577
1578 /* This is called when we get the reply about the PUBLISH command we send
1579 * to the master to advertise this sentinel. */
1580 void sentinelPublishReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
1581 sentinelRedisInstance *ri = c->data;
1582 redisReply *r;
1583
1584 if (ri) ri->pending_commands--;
1585 if (!reply || !ri) return;
1586 r = reply;
1587
1588 /* Only update pub_time if we actually published our message. Otherwise
1589 * we'll retry against in 100 milliseconds. */
1590 if (r->type != REDIS_REPLY_ERROR)
1591 ri->last_pub_time = mstime();
1592 }
1593
1594 /* This is our Pub/Sub callback for the Hello channel. It's useful in order
1595 * to discover other sentinels attached at the same master. */
1596 void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata) {
1597 sentinelRedisInstance *ri = c->data;
1598 redisReply *r;
1599
1600 if (!reply || !ri) return;
1601 r = reply;
1602
1603 /* Update the last activity in the pubsub channel. Note that since we
1604 * receive our messages as well this timestamp can be used to detect
1605 * if the link is probably diconnected even if it seems otherwise. */
1606 ri->pc_last_activity = mstime();
1607
1608 /* Sanity check in the reply we expect, so that the code that follows
1609 * can avoid to check for details. */
1610 if (r->type != REDIS_REPLY_ARRAY ||
1611 r->elements != 3 ||
1612 r->element[0]->type != REDIS_REPLY_STRING ||
1613 r->element[1]->type != REDIS_REPLY_STRING ||
1614 r->element[2]->type != REDIS_REPLY_STRING ||
1615 strcmp(r->element[0]->str,"message") != 0) return;
1616
1617 /* We are not interested in meeting ourselves */
1618 if (strstr(r->element[2]->str,server.runid) != NULL) return;
1619
1620 {
1621 int numtokens, port, removed, canfailover;
1622 char **token = sdssplitlen(r->element[2]->str,
1623 r->element[2]->len,
1624 ":",1,&numtokens);
1625 sentinelRedisInstance *sentinel;
1626
1627 if (numtokens == 4) {
1628 /* First, try to see if we already have this sentinel. */
1629 port = atoi(token[1]);
1630 canfailover = atoi(token[3]);
1631 sentinel = getSentinelRedisInstanceByAddrAndRunID(
1632 ri->sentinels,token[0],port,token[2]);
1633
1634 if (!sentinel) {
1635 /* If not, remove all the sentinels that have the same runid
1636 * OR the same ip/port, because it's either a restart or a
1637 * network topology change. */
1638 removed = removeMatchingSentinelsFromMaster(ri,token[0],port,
1639 token[2]);
1640 if (removed) {
1641 sentinelEvent(REDIS_NOTICE,"-dup-sentinel",ri,
1642 "%@ #duplicate of %s:%d or %s",
1643 token[0],port,token[2]);
1644 }
1645
1646 /* Add the new sentinel. */
1647 sentinel = createSentinelRedisInstance(NULL,SRI_SENTINEL,
1648 token[0],port,ri->quorum,ri);
1649 if (sentinel) {
1650 sentinelEvent(REDIS_NOTICE,"+sentinel",sentinel,"%@");
1651 /* The runid is NULL after a new instance creation and
1652 * for Sentinels we don't have a later chance to fill it,
1653 * so do it now. */
1654 sentinel->runid = sdsnew(token[2]);
1655 }
1656 }
1657
1658 /* Update the state of the Sentinel. */
1659 if (sentinel) {
1660 sentinel->last_hello_time = mstime();
1661 if (canfailover)
1662 sentinel->flags |= SRI_CAN_FAILOVER;
1663 else
1664 sentinel->flags &= ~SRI_CAN_FAILOVER;
1665 }
1666 }
1667 sdsfreesplitres(token,numtokens);
1668 }
1669 }
1670
1671 void sentinelPingInstance(sentinelRedisInstance *ri) {
1672 mstime_t now = mstime();
1673 mstime_t info_period;
1674 int retval;
1675
1676 /* Return ASAP if we have already a PING or INFO already pending, or
1677 * in the case the instance is not properly connected. */
1678 if (ri->flags & SRI_DISCONNECTED) return;
1679
1680 /* For INFO, PING, PUBLISH that are not critical commands to send we
1681 * also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don't
1682 * want to use a lot of memory just because a link is not working
1683 * properly (note that anyway there is a redundant protection about this,
1684 * that is, the link will be disconnected and reconnected if a long
1685 * timeout condition is detected. */
1686 if (ri->pending_commands >= SENTINEL_MAX_PENDING_COMMANDS) return;
1687
1688 /* If this is a slave of a master in O_DOWN condition we start sending
1689 * it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
1690 * period. In this state we want to closely monitor slaves in case they
1691 * are turned into masters by another Sentinel, or by the sysadmin. */
1692 if ((ri->flags & SRI_SLAVE) &&
1693 (ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS))) {
1694 info_period = 1000;
1695 } else {
1696 info_period = SENTINEL_INFO_PERIOD;
1697 }
1698
1699 if ((ri->flags & SRI_SENTINEL) == 0 &&
1700 (ri->info_refresh == 0 ||
1701 (now - ri->info_refresh) > info_period))
1702 {
1703 /* Send INFO to masters and slaves, not sentinels. */
1704 retval = redisAsyncCommand(ri->cc,
1705 sentinelInfoReplyCallback, NULL, "INFO");
1706 if (retval != REDIS_OK) return;
1707 ri->pending_commands++;
1708 } else if ((now - ri->last_pong_time) > SENTINEL_PING_PERIOD) {
1709 /* Send PING to all the three kinds of instances. */
1710 retval = redisAsyncCommand(ri->cc,
1711 sentinelPingReplyCallback, NULL, "PING");
1712 if (retval != REDIS_OK) return;
1713 ri->pending_commands++;
1714 } else if ((ri->flags & SRI_MASTER) &&
1715 (now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD)
1716 {
1717 /* PUBLISH hello messages only to masters. */
1718 struct sockaddr_in sa;
1719 socklen_t salen = sizeof(sa);
1720
1721 if (getsockname(ri->cc->c.fd,(struct sockaddr*)&sa,&salen) != -1) {
1722 char myaddr[128];
1723
1724 snprintf(myaddr,sizeof(myaddr),"%s:%d:%s:%d",
1725 inet_ntoa(sa.sin_addr), server.port, server.runid,
1726 (ri->flags & SRI_CAN_FAILOVER) != 0);
1727 retval = redisAsyncCommand(ri->cc,
1728 sentinelPublishReplyCallback, NULL, "PUBLISH %s %s",
1729 SENTINEL_HELLO_CHANNEL,myaddr);
1730 if (retval != REDIS_OK) return;
1731 ri->pending_commands++;
1732 }
1733 }
1734 }
1735
1736 /* =========================== SENTINEL command ============================= */
1737
1738 const char *sentinelFailoverStateStr(int state) {
1739 switch(state) {
1740 case SENTINEL_FAILOVER_STATE_NONE: return "none";
1741 case SENTINEL_FAILOVER_STATE_WAIT_START: return "wait_start";
1742 case SENTINEL_FAILOVER_STATE_SELECT_SLAVE: return "select_slave";
1743 case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE: return "send_slaveof_noone";
1744 case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION: return "wait_promotion";
1745 case SENTINEL_FAILOVER_STATE_RECONF_SLAVES: return "reconf_slaves";
1746 case SENTINEL_FAILOVER_STATE_ALERT_CLIENTS: return "alert_clients";
1747 case SENTINEL_FAILOVER_STATE_DETECT_END: return "detect_end";
1748 case SENTINEL_FAILOVER_STATE_UPDATE_CONFIG: return "update_config";
1749 default: return "unknown";
1750 }
1751 }
1752
1753 /* Redis instance to Redis protocol representation. */
1754 void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
1755 char *flags = sdsempty();
1756 void *mbl;
1757 int fields = 0;
1758
1759 mbl = addDeferredMultiBulkLength(c);
1760
1761 addReplyBulkCString(c,"name");
1762 addReplyBulkCString(c,ri->name);
1763 fields++;
1764
1765 addReplyBulkCString(c,"ip");
1766 addReplyBulkCString(c,ri->addr->ip);
1767 fields++;
1768
1769 addReplyBulkCString(c,"port");
1770 addReplyBulkLongLong(c,ri->addr->port);
1771 fields++;
1772
1773 addReplyBulkCString(c,"runid");
1774 addReplyBulkCString(c,ri->runid ? ri->runid : "");
1775 fields++;
1776
1777 addReplyBulkCString(c,"flags");
1778 if (ri->flags & SRI_S_DOWN) flags = sdscat(flags,"s_down,");
1779 if (ri->flags & SRI_O_DOWN) flags = sdscat(flags,"o_down,");
1780 if (ri->flags & SRI_MASTER) flags = sdscat(flags,"master,");
1781 if (ri->flags & SRI_SLAVE) flags = sdscat(flags,"slave,");
1782 if (ri->flags & SRI_SENTINEL) flags = sdscat(flags,"sentinel,");
1783 if (ri->flags & SRI_DISCONNECTED) flags = sdscat(flags,"disconnected,");
1784 if (ri->flags & SRI_MASTER_DOWN) flags = sdscat(flags,"master_down,");
1785 if (ri->flags & SRI_FAILOVER_IN_PROGRESS)
1786 flags = sdscat(flags,"failover_in_progress,");
1787 if (ri->flags & SRI_I_AM_THE_LEADER)
1788 flags = sdscat(flags,"i_am_the_leader,");
1789 if (ri->flags & SRI_PROMOTED) flags = sdscat(flags,"promoted,");
1790 if (ri->flags & SRI_RECONF_SENT) flags = sdscat(flags,"reconf_sent,");
1791 if (ri->flags & SRI_RECONF_INPROG) flags = sdscat(flags,"reconf_inprog,");
1792 if (ri->flags & SRI_RECONF_DONE) flags = sdscat(flags,"reconf_done,");
1793
1794 if (sdslen(flags) != 0) flags = sdsrange(flags,0,-2); /* remove last "," */
1795 addReplyBulkCString(c,flags);
1796 sdsfree(flags);
1797 fields++;
1798
1799 addReplyBulkCString(c,"pending-commands");
1800 addReplyBulkLongLong(c,ri->pending_commands);
1801 fields++;
1802
1803 if (ri->flags & SRI_FAILOVER_IN_PROGRESS) {
1804 addReplyBulkCString(c,"failover-state");
1805 addReplyBulkCString(c,(char*)sentinelFailoverStateStr(ri->failover_state));
1806 fields++;
1807 }
1808
1809 addReplyBulkCString(c,"last-ok-ping-reply");
1810 addReplyBulkLongLong(c,mstime() - ri->last_avail_time);
1811 fields++;
1812
1813 addReplyBulkCString(c,"last-ping-reply");
1814 addReplyBulkLongLong(c,mstime() - ri->last_pong_time);
1815 fields++;
1816
1817 if (ri->flags & SRI_S_DOWN) {
1818 addReplyBulkCString(c,"s-down-time");
1819 addReplyBulkLongLong(c,mstime()-ri->s_down_since_time);
1820 fields++;
1821 }
1822
1823 if (ri->flags & SRI_O_DOWN) {
1824 addReplyBulkCString(c,"o-down-time");
1825 addReplyBulkLongLong(c,mstime()-ri->o_down_since_time);
1826 fields++;
1827 }
1828
1829 /* Masters and Slaves */
1830 if (ri->flags & (SRI_MASTER|SRI_SLAVE)) {
1831 addReplyBulkCString(c,"info-refresh");
1832 addReplyBulkLongLong(c,mstime() - ri->info_refresh);
1833 fields++;
1834 }
1835
1836 /* Only masters */
1837 if (ri->flags & SRI_MASTER) {
1838 addReplyBulkCString(c,"num-slaves");
1839 addReplyBulkLongLong(c,dictSize(ri->slaves));
1840 fields++;
1841
1842 addReplyBulkCString(c,"num-other-sentinels");
1843 addReplyBulkLongLong(c,dictSize(ri->sentinels));
1844 fields++;
1845
1846 addReplyBulkCString(c,"quorum");
1847 addReplyBulkLongLong(c,ri->quorum);
1848 fields++;
1849 }
1850
1851 /* Only slaves */
1852 if (ri->flags & SRI_SLAVE) {
1853 addReplyBulkCString(c,"master-link-down-time");
1854 addReplyBulkLongLong(c,ri->master_link_down_time);
1855 fields++;
1856
1857 addReplyBulkCString(c,"master-link-status");
1858 addReplyBulkCString(c,
1859 (ri->slave_master_link_status == SENTINEL_MASTER_LINK_STATUS_UP) ?
1860 "ok" : "err");
1861 fields++;
1862
1863 addReplyBulkCString(c,"master-host");
1864 addReplyBulkCString(c,
1865 ri->slave_master_host ? ri->slave_master_host : "?");
1866 fields++;
1867
1868 addReplyBulkCString(c,"master-port");
1869 addReplyBulkLongLong(c,ri->slave_master_port);
1870 fields++;
1871 }
1872
1873 /* Only sentinels */
1874 if (ri->flags & SRI_SENTINEL) {
1875 addReplyBulkCString(c,"last-hello-message");
1876 addReplyBulkLongLong(c,mstime() - ri->last_hello_time);
1877 fields++;
1878
1879 addReplyBulkCString(c,"can-failover-its-master");
1880 addReplyBulkLongLong(c,(ri->flags & SRI_CAN_FAILOVER) != 0);
1881 fields++;
1882
1883 if (ri->flags & SRI_MASTER_DOWN) {
1884 addReplyBulkCString(c,"subjective-leader");
1885 addReplyBulkCString(c,ri->leader ? ri->leader : "?");
1886 fields++;
1887 }
1888 }
1889
1890 setDeferredMultiBulkLength(c,mbl,fields*2);
1891 }
1892
1893 /* Output a number of instances contanined inside a dictionary as
1894 * Redis protocol. */
1895 void addReplyDictOfRedisInstances(redisClient *c, dict *instances) {
1896 dictIterator *di;
1897 dictEntry *de;
1898
1899 di = dictGetIterator(instances);
1900 addReplyMultiBulkLen(c,dictSize(instances));
1901 while((de = dictNext(di)) != NULL) {
1902 sentinelRedisInstance *ri = dictGetVal(de);
1903
1904 addReplySentinelRedisInstance(c,ri);
1905 }
1906 dictReleaseIterator(di);
1907 }
1908
1909 /* Lookup the named master into sentinel.masters.
1910 * If the master is not found reply to the client with an error and returns
1911 * NULL. */
1912 sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(redisClient *c,
1913 robj *name)
1914 {
1915 sentinelRedisInstance *ri;
1916
1917 ri = dictFetchValue(sentinel.masters,c->argv[2]->ptr);
1918 if (!ri) {
1919 addReplyError(c,"No such master with that name");
1920 return NULL;
1921 }
1922 return ri;
1923 }
1924
1925 void sentinelCommand(redisClient *c) {
1926 if (!strcasecmp(c->argv[1]->ptr,"masters")) {
1927 /* SENTINEL MASTERS */
1928 if (c->argc != 2) goto numargserr;
1929
1930 addReplyDictOfRedisInstances(c,sentinel.masters);
1931 } else if (!strcasecmp(c->argv[1]->ptr,"slaves")) {
1932 /* SENTINEL SLAVES <master-name> */
1933 sentinelRedisInstance *ri;
1934
1935 if (c->argc != 3) goto numargserr;
1936 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2])) == NULL)
1937 return;
1938 addReplyDictOfRedisInstances(c,ri->slaves);
1939 } else if (!strcasecmp(c->argv[1]->ptr,"sentinels")) {
1940 /* SENTINEL SENTINELS <master-name> */
1941 sentinelRedisInstance *ri;
1942
1943 if (c->argc != 3) goto numargserr;
1944 if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2])) == NULL)
1945 return;
1946 addReplyDictOfRedisInstances(c,ri->sentinels);
1947 } else if (!strcasecmp(c->argv[1]->ptr,"is-master-down-by-addr")) {
1948 /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> */
1949 sentinelRedisInstance *ri;
1950 char *leader = NULL;
1951 long port;
1952 int isdown = 0;
1953
1954 if (c->argc != 4) goto numargserr;
1955 if (getLongFromObjectOrReply(c,c->argv[3],&port,NULL) != REDIS_OK)
1956 return;
1957 ri = getSentinelRedisInstanceByAddrAndRunID(sentinel.masters,
1958 c->argv[2]->ptr,port,NULL);
1959
1960 /* It exists? Is actually a master? Is subjectively down? It's down.
1961 * Note: if we are in tilt mode we always reply with "0". */
1962 if (!sentinel.tilt && ri && (ri->flags & SRI_S_DOWN) &&
1963 (ri->flags & SRI_MASTER))
1964 isdown = 1;
1965 if (ri) leader = sentinelGetSubjectiveLeader(ri);
1966
1967 /* Reply with a two-elements multi-bulk reply: down state, leader. */
1968 addReplyMultiBulkLen(c,2);
1969 addReply(c, isdown ? shared.cone : shared.czero);
1970 addReplyBulkCString(c, leader ? leader : "?");
1971 if (leader) sdsfree(leader);
1972 } else if (!strcasecmp(c->argv[1]->ptr,"reset")) {
1973 /* SENTINEL RESET <pattern> */
1974 if (c->argc != 3) goto numargserr;
1975 addReplyLongLong(c,sentinelResetMastersByPattern(c->argv[2]->ptr,SENTINEL_GENERATE_EVENT));
1976 } else if (!strcasecmp(c->argv[1]->ptr,"get-master-addr-by-name")) {
1977 /* SENTINEL GET-MASTER-ADDR-BY-NAME <master-name> */
1978 sentinelRedisInstance *ri;
1979
1980 if (c->argc != 3) goto numargserr;
1981 ri = sentinelGetMasterByName(c->argv[2]->ptr);
1982 if (ri == NULL) {
1983 addReply(c,shared.nullmultibulk);
1984 } else {
1985 sentinelAddr *addr = ri->addr;
1986
1987 if ((ri->flags & SRI_FAILOVER_IN_PROGRESS) && ri->promoted_slave)
1988 addr = ri->promoted_slave->addr;
1989 addReplyMultiBulkLen(c,2);
1990 addReplyBulkCString(c,addr->ip);
1991 addReplyBulkLongLong(c,addr->port);
1992 }
1993 } else if (!strcasecmp(c->argv[1]->ptr,"pending-scripts")) {
1994 /* SENTINEL PENDING-SCRIPTS */
1995
1996 if (c->argc != 2) goto numargserr;
1997 sentinelPendingScriptsCommand(c);
1998 } else {
1999 addReplyErrorFormat(c,"Unknown sentinel subcommand '%s'",
2000 (char*)c->argv[1]->ptr);
2001 }
2002 return;
2003
2004 numargserr:
2005 addReplyErrorFormat(c,"Wrong number of commands for 'sentinel %s'",
2006 (char*)c->argv[1]->ptr);
2007 }
2008
2009 /* ===================== SENTINEL availability checks ======================= */
2010
2011 /* Is this instance down from our point of view? */
2012 void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
2013 mstime_t elapsed = mstime() - ri->last_avail_time;
2014
2015 /* Check if we are in need for a reconnection of one of the
2016 * links, because we are detecting low activity.
2017 *
2018 * 1) Check if the command link seems connected, was connected not less
2019 * than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have an
2020 * idle time that is greater than down_after_period / 2 seconds. */
2021 if (ri->cc &&
2022 (mstime() - ri->cc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
2023 (mstime() - ri->last_pong_time) > (ri->down_after_period/2))
2024 {
2025 sentinelKillLink(ri,ri->cc);
2026 }
2027
2028 /* 2) Check if the pubsub link seems connected, was connected not less
2029 * than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have no
2030 * activity in the Pub/Sub channel for more than
2031 * SENTINEL_PUBLISH_PERIOD * 3.
2032 */
2033 if (ri->pc &&
2034 (mstime() - ri->pc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
2035 (mstime() - ri->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))
2036 {
2037 sentinelKillLink(ri,ri->pc);
2038 }
2039
2040 /* Update the subjectively down flag. */
2041 if (elapsed > ri->down_after_period) {
2042 /* Is subjectively down */
2043 if ((ri->flags & SRI_S_DOWN) == 0) {
2044 sentinelEvent(REDIS_WARNING,"+sdown",ri,"%@");
2045 ri->s_down_since_time = mstime();
2046 ri->flags |= SRI_S_DOWN;
2047 }
2048 } else {
2049 /* Is subjectively up */
2050 if (ri->flags & SRI_S_DOWN) {
2051 sentinelEvent(REDIS_WARNING,"-sdown",ri,"%@");
2052 ri->flags &= ~SRI_S_DOWN;
2053 }
2054 }
2055 }
2056
2057 /* Is this instance down accordingly to the configured quorum? */
2058 void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
2059 dictIterator *di;
2060 dictEntry *de;
2061 int quorum = 0, odown = 0;
2062
2063 if (master->flags & SRI_S_DOWN) {
2064 /* Is down for enough sentinels? */
2065 quorum = 1; /* the current sentinel. */
2066 /* Count all the other sentinels. */
2067 di = dictGetIterator(master->sentinels);
2068 while((de = dictNext(di)) != NULL) {
2069 sentinelRedisInstance *ri = dictGetVal(de);
2070
2071 if (ri->flags & SRI_MASTER_DOWN) quorum++;
2072 }
2073 dictReleaseIterator(di);
2074 if (quorum >= master->quorum) odown = 1;
2075 }
2076
2077 /* Set the flag accordingly to the outcome. */
2078 if (odown) {
2079 if ((master->flags & SRI_O_DOWN) == 0) {
2080 sentinelEvent(REDIS_WARNING,"+odown",master,"%@ #quorum %d/%d",
2081 quorum, master->quorum);
2082 master->flags |= SRI_O_DOWN;
2083 master->o_down_since_time = mstime();
2084 }
2085 } else {
2086 if (master->flags & SRI_O_DOWN) {
2087 sentinelEvent(REDIS_WARNING,"-odown",master,"%@");
2088 master->flags &= ~SRI_O_DOWN;
2089 }
2090 }
2091 }
2092
2093 /* Receive the SENTINEL is-master-down-by-addr reply, see the
2094 * sentinelAskMasterStateToOtherSentinels() function for more information. */
2095 void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
2096 sentinelRedisInstance *ri = c->data;
2097 redisReply *r;
2098
2099 if (ri) ri->pending_commands--;
2100 if (!reply || !ri) return;
2101 r = reply;
2102
2103 /* Ignore every error or unexpected reply.
2104 * Note that if the command returns an error for any reason we'll
2105 * end clearing the SRI_MASTER_DOWN flag for timeout anyway. */
2106 if (r->type == REDIS_REPLY_ARRAY && r->elements == 2 &&
2107 r->element[0]->type == REDIS_REPLY_INTEGER &&
2108 r->element[1]->type == REDIS_REPLY_STRING)
2109 {
2110 ri->last_master_down_reply_time = mstime();
2111 if (r->element[0]->integer == 1) {
2112 ri->flags |= SRI_MASTER_DOWN;
2113 } else {
2114 ri->flags &= ~SRI_MASTER_DOWN;
2115 }
2116 sdsfree(ri->leader);
2117 ri->leader = sdsnew(r->element[1]->str);
2118 }
2119 }
2120
2121 /* If we think (subjectively) the master is down, we start sending
2122 * SENTINEL IS-MASTER-DOWN-BY-ADDR requests to other sentinels
2123 * in order to get the replies that allow to reach the quorum and
2124 * possibly also mark the master as objectively down. */
2125 void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master) {
2126 dictIterator *di;
2127 dictEntry *de;
2128
2129 di = dictGetIterator(master->sentinels);
2130 while((de = dictNext(di)) != NULL) {
2131 sentinelRedisInstance *ri = dictGetVal(de);
2132 mstime_t elapsed = mstime() - ri->last_master_down_reply_time;
2133 char port[32];
2134 int retval;
2135
2136 /* If the master state from other sentinel is too old, we clear it. */
2137 if (elapsed > SENTINEL_INFO_VALIDITY_TIME) {
2138 ri->flags &= ~SRI_MASTER_DOWN;
2139 sdsfree(ri->leader);
2140 ri->leader = NULL;
2141 }
2142
2143 /* Only ask if master is down to other sentinels if:
2144 *
2145 * 1) We believe it is down, or there is a failover in progress.
2146 * 2) Sentinel is connected.
2147 * 3) We did not received the info within SENTINEL_ASK_PERIOD ms. */
2148 if ((master->flags & (SRI_S_DOWN|SRI_FAILOVER_IN_PROGRESS)) == 0)
2149 continue;
2150 if (ri->flags & SRI_DISCONNECTED) continue;
2151 if (mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)
2152 continue;
2153
2154 /* Ask */
2155 ll2string(port,sizeof(port),master->addr->port);
2156 retval = redisAsyncCommand(ri->cc,
2157 sentinelReceiveIsMasterDownReply, NULL,
2158 "SENTINEL is-master-down-by-addr %s %s",
2159 master->addr->ip, port);
2160 if (retval == REDIS_OK) ri->pending_commands++;
2161 }
2162 dictReleaseIterator(di);
2163 }
2164
2165 /* =============================== FAILOVER ================================= */
2166
2167 /* Given a master get the "subjective leader", that is, among all the sentinels
2168 * with given characteristics, the one with the lexicographically smaller
2169 * runid. The characteristics required are:
2170 *
2171 * 1) Has SRI_CAN_FAILOVER flag.
2172 * 2) Is not disconnected.
2173 * 3) Recently answered to our ping (no longer than
2174 * SENTINEL_INFO_VALIDITY_TIME milliseconds ago).
2175 *
2176 * The function returns a pointer to an sds string representing the runid of the
2177 * leader sentinel instance (from our point of view). Otherwise NULL is
2178 * returned if there are no suitable sentinels.
2179 */
2180
2181 int compareRunID(const void *a, const void *b) {
2182 char **aptrptr = (char**)a, **bptrptr = (char**)b;
2183 return strcasecmp(*aptrptr, *bptrptr);
2184 }
2185
2186 char *sentinelGetSubjectiveLeader(sentinelRedisInstance *master) {
2187 dictIterator *di;
2188 dictEntry *de;
2189 char **instance =
2190 zmalloc(sizeof(char*)*(dictSize(master->sentinels)+1));
2191 int instances = 0;
2192 char *leader = NULL;
2193
2194 if (master->flags & SRI_CAN_FAILOVER) {
2195 /* Add myself if I'm a Sentinel that can failover this master. */
2196 instance[instances++] = server.runid;
2197 }
2198
2199 di = dictGetIterator(master->sentinels);
2200 while((de = dictNext(di)) != NULL) {
2201 sentinelRedisInstance *ri = dictGetVal(de);
2202 mstime_t lag = mstime() - ri->last_avail_time;
2203
2204 if (lag > SENTINEL_INFO_VALIDITY_TIME ||
2205 !(ri->flags & SRI_CAN_FAILOVER) ||
2206 (ri->flags & SRI_DISCONNECTED) ||
2207 ri->runid == NULL)
2208 continue;
2209 instance[instances++] = ri->runid;
2210 }
2211 dictReleaseIterator(di);
2212
2213 /* If we have at least one instance passing our checks, order the array
2214 * by runid. */
2215 if (instances) {
2216 qsort(instance,instances,sizeof(char*),compareRunID);
2217 leader = sdsnew(instance[0]);
2218 }
2219 zfree(instance);
2220 return leader;
2221 }
2222
2223 struct sentinelLeader {
2224 char *runid;
2225 unsigned long votes;
2226 };
2227
2228 /* Helper function for sentinelGetObjectiveLeader, increment the counter
2229 * relative to the specified runid. */
2230 void sentinelObjectiveLeaderIncr(dict *counters, char *runid) {
2231 dictEntry *de = dictFind(counters,runid);
2232 uint64_t oldval;
2233
2234 if (de) {
2235 oldval = dictGetUnsignedIntegerVal(de);
2236 dictSetUnsignedIntegerVal(de,oldval+1);
2237 } else {
2238 de = dictAddRaw(counters,runid);
2239 redisAssert(de != NULL);
2240 dictSetUnsignedIntegerVal(de,1);
2241 }
2242 }
2243
2244 /* Scan all the Sentinels attached to this master to check what is the
2245 * most voted leader among Sentinels. */
2246 char *sentinelGetObjectiveLeader(sentinelRedisInstance *master) {
2247 dict *counters;
2248 dictIterator *di;
2249 dictEntry *de;
2250 unsigned int voters = 0, voters_quorum;
2251 char *myvote;
2252 char *winner = NULL;
2253
2254 redisAssert(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS));
2255 counters = dictCreate(&leaderVotesDictType,NULL);
2256
2257 /* Count my vote. */
2258 myvote = sentinelGetSubjectiveLeader(master);
2259 if (myvote) {
2260 sentinelObjectiveLeaderIncr(counters,myvote);
2261 voters++;
2262 }
2263
2264 /* Count other sentinels votes */
2265 di = dictGetIterator(master->sentinels);
2266 while((de = dictNext(di)) != NULL) {
2267 sentinelRedisInstance *ri = dictGetVal(de);
2268 if (ri->leader == NULL) continue;
2269 /* If the failover is not already in progress we are only interested
2270 * in Sentinels that believe the master is down. Otherwise the leader
2271 * selection is useful for the "failover-takedown" when the original
2272 * leader fails. In that case we consider all the voters. */
2273 if (!(master->flags & SRI_FAILOVER_IN_PROGRESS) &&
2274 !(ri->flags & SRI_MASTER_DOWN)) continue;
2275 sentinelObjectiveLeaderIncr(counters,ri->leader);
2276 voters++;
2277 }
2278 dictReleaseIterator(di);
2279 voters_quorum = voters/2+1;
2280
2281 /* Check what's the winner. For the winner to win, it needs two conditions:
2282 * 1) Absolute majority between voters (50% + 1).
2283 * 2) And anyway at least master->quorum votes. */
2284 {
2285 uint64_t max_votes = 0; /* Max votes so far. */
2286
2287 di = dictGetIterator(counters);
2288 while((de = dictNext(di)) != NULL) {
2289 uint64_t votes = dictGetUnsignedIntegerVal(de);
2290
2291 if (max_votes < votes) {
2292 max_votes = votes;
2293 winner = dictGetKey(de);
2294 }
2295 }
2296 dictReleaseIterator(di);
2297 if (winner && (max_votes < voters_quorum || max_votes < master->quorum))
2298 winner = NULL;
2299 }
2300 winner = winner ? sdsnew(winner) : NULL;
2301 sdsfree(myvote);
2302 dictRelease(counters);
2303 return winner;
2304 }
2305
2306 /* This function checks if there are the conditions to start the failover,
2307 * that is:
2308 *
2309 * 1) Enough time has passed since O_DOWN.
2310 * 2) The master is marked as SRI_CAN_FAILOVER, so we can failover it.
2311 * 3) We are the objectively leader for this master.
2312 *
2313 * If the conditions are met we flag the master as SRI_FAILOVER_IN_PROGRESS
2314 * and SRI_I_AM_THE_LEADER.
2315 */
2316 void sentinelStartFailover(sentinelRedisInstance *master) {
2317 char *leader;
2318 int isleader;
2319
2320 /* We can't failover if the master is not in O_DOWN state or if
2321 * there is not already a failover in progress (to perform the
2322 * takedown if the leader died) or if this Sentinel is not allowed
2323 * to start a failover. */
2324 if (!(master->flags & SRI_CAN_FAILOVER) ||
2325 !(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS))) return;
2326
2327 leader = sentinelGetObjectiveLeader(master);
2328 isleader = leader && strcasecmp(leader,server.runid) == 0;
2329 sdsfree(leader);
2330
2331 /* If I'm not the leader, I can't failover for sure. */
2332 if (!isleader) return;
2333
2334 /* If the failover is already in progress there are two options... */
2335 if (master->flags & SRI_FAILOVER_IN_PROGRESS) {
2336 if (master->flags & SRI_I_AM_THE_LEADER) {
2337 /* 1) I'm flagged as leader so I already started the failover.
2338 * Just return. */
2339 return;
2340 } else {
2341 mstime_t elapsed = mstime() - master->failover_state_change_time;
2342
2343 /* 2) I'm the new leader, but I'm not flagged as leader in the
2344 * master: I did not started the failover, but the original
2345 * leader has no longer the leadership.
2346 *
2347 * In this case if the failover appears to be lagging
2348 * for at least 25% of the configured failover timeout,
2349 * I can assume I can take control. Otherwise
2350 * it's better to return and wait more. */
2351 if (elapsed < (master->failover_timeout/4)) return;
2352 sentinelEvent(REDIS_WARNING,"+failover-takedown",master,"%@");
2353 /* We have already an elected slave if we are in
2354 * FAILOVER_IN_PROGRESS state, that is, the slave that we
2355 * observed turning into a master. */
2356 master->failover_state = SENTINEL_FAILOVER_STATE_RECONF_SLAVES;
2357 /* As an observer we flagged all the slaves as RECONF_SENT but
2358 * now we are in charge of actually sending the reconfiguration
2359 * command so let's clear this flag for all the instances. */
2360 sentinelDelFlagsToDictOfRedisInstances(master->slaves,
2361 SRI_RECONF_SENT);
2362 }
2363 } else {
2364 /* Brand new failover as SRI_FAILOVER_IN_PROGRESS was not set.
2365 *
2366 * Do we have a slave to promote? Otherwise don't start a failover
2367 * at all. */
2368 if (sentinelSelectSlave(master) == NULL) return;
2369 master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
2370 }
2371
2372 master->flags |= SRI_FAILOVER_IN_PROGRESS|SRI_I_AM_THE_LEADER;
2373 sentinelEvent(REDIS_WARNING,"+failover-triggered",master,"%@");
2374
2375 /* Pick a random delay if it's a fresh failover (WAIT_START), and not
2376 * a recovery of a failover started by another sentinel. */
2377 if (master->failover_state == SENTINEL_FAILOVER_STATE_WAIT_START) {
2378 master->failover_start_time = mstime() +
2379 SENTINEL_FAILOVER_FIXED_DELAY +
2380 (rand() % SENTINEL_FAILOVER_MAX_RANDOM_DELAY);
2381 sentinelEvent(REDIS_WARNING,"+failover-state-wait-start",master,
2382 "%@ #starting in %lld milliseconds",
2383 master->failover_start_time-mstime());
2384 }
2385 master->failover_state_change_time = mstime();
2386 }
2387
2388 /* Select a suitable slave to promote. The current algorithm only uses
2389 * the following parameters:
2390 *
2391 * 1) None of the following conditions: S_DOWN, O_DOWN, DISCONNECTED.
2392 * 2) last_avail_time more recent than SENTINEL_INFO_VALIDITY_TIME.
2393 * 3) info_refresh more recent than SENTINEL_INFO_VALIDITY_TIME.
2394 * 4) master_link_down_time no more than:
2395 * (now - master->s_down_since_time) + (master->down_after_period * 10).
2396 *
2397 * Among all the slaves matching the above conditions we select the slave
2398 * with lower slave_priority. If priority is the same we select the slave
2399 * with lexicographically smaller runid.
2400 *
2401 * The function returns the pointer to the selected slave, otherwise
2402 * NULL if no suitable slave was found.
2403 */
2404
2405 int compareSlavesForPromotion(const void *a, const void *b) {
2406 sentinelRedisInstance **sa = (sentinelRedisInstance **)a,
2407 **sb = (sentinelRedisInstance **)b;
2408 if ((*sa)->slave_priority != (*sb)->slave_priority)
2409 return (*sa)->slave_priority - (*sb)->slave_priority;
2410 return strcasecmp((*sa)->runid,(*sb)->runid);
2411 }
2412
2413 sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
2414 sentinelRedisInstance **instance =
2415 zmalloc(sizeof(instance[0])*dictSize(master->slaves));
2416 sentinelRedisInstance *selected = NULL;
2417 int instances = 0;
2418 dictIterator *di;
2419 dictEntry *de;
2420 mstime_t max_master_down_time;
2421
2422 max_master_down_time = (mstime() - master->s_down_since_time) +
2423 (master->down_after_period * 10);
2424
2425 di = dictGetIterator(master->slaves);
2426 while((de = dictNext(di)) != NULL) {
2427 sentinelRedisInstance *slave = dictGetVal(de);
2428 mstime_t info_validity_time = mstime()-SENTINEL_INFO_VALIDITY_TIME;
2429
2430 if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN|SRI_DISCONNECTED)) continue;
2431 if (slave->last_avail_time < info_validity_time) continue;
2432 if (slave->info_refresh < info_validity_time) continue;
2433 if (slave->master_link_down_time > max_master_down_time) continue;
2434 instance[instances++] = slave;
2435 }
2436 dictReleaseIterator(di);
2437 if (instances) {
2438 qsort(instance,instances,sizeof(sentinelRedisInstance*),
2439 compareSlavesForPromotion);
2440 selected = instance[0];
2441 }
2442 zfree(instance);
2443 return selected;
2444 }
2445
2446 /* ---------------- Failover state machine implementation ------------------- */
2447 void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
2448 /* If we in "wait start" but the master is no longer in ODOWN nor in
2449 * SDOWN condition we abort the failover. This is important as it
2450 * prevents a useless failover in a a notable case of netsplit, where
2451 * the senitnels are split from the redis instances. In this case
2452 * the failover will not start while there is the split because no
2453 * good slave can be reached. However when the split is resolved, we
2454 * can go to waitstart if the slave is back rechable a few milliseconds
2455 * before the master is. In that case when the master is back online
2456 * we cancel the failover. */
2457 if ((ri->flags & (SRI_S_DOWN|SRI_O_DOWN)) == 0) {
2458 sentinelEvent(REDIS_WARNING,"-failover-abort-master-is-back",
2459 ri,"%@");
2460 sentinelAbortFailover(ri);
2461 return;
2462 }
2463
2464 /* Start the failover going to the next state if enough time has
2465 * elapsed. */
2466 if (mstime() >= ri->failover_start_time) {
2467 ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE;
2468 ri->failover_state_change_time = mstime();
2469 sentinelEvent(REDIS_WARNING,"+failover-state-select-slave",ri,"%@");
2470 }
2471 }
2472
2473 void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
2474 sentinelRedisInstance *slave = sentinelSelectSlave(ri);
2475
2476 if (slave == NULL) {
2477 sentinelEvent(REDIS_WARNING,"-failover-abort-no-good-slave",ri,"%@");
2478 sentinelAbortFailover(ri);
2479 } else {
2480 sentinelEvent(REDIS_WARNING,"+selected-slave",slave,"%@");
2481 slave->flags |= SRI_PROMOTED;
2482 ri->promoted_slave = slave;
2483 ri->failover_state = SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE;
2484 ri->failover_state_change_time = mstime();
2485 sentinelEvent(REDIS_NOTICE,"+failover-state-send-slaveof-noone",
2486 slave, "%@");
2487 }
2488 }
2489
2490 void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
2491 int retval;
2492
2493 if (ri->promoted_slave->flags & SRI_DISCONNECTED) return;
2494
2495 /* Send SLAVEOF NO ONE command to turn the slave into a master.
2496 * We actually register a generic callback for this command as we don't
2497 * really care about the reply. We check if it worked indirectly observing
2498 * if INFO returns a different role (master instead of slave). */
2499 retval = redisAsyncCommand(ri->promoted_slave->cc,
2500 sentinelDiscardReplyCallback, NULL, "SLAVEOF NO ONE");
2501 if (retval != REDIS_OK) return;
2502 ri->promoted_slave->pending_commands++;
2503 sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion",
2504 ri->promoted_slave,"%@");
2505 ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
2506 ri->failover_state_change_time = mstime();
2507 }
2508
2509 /* We actually wait for promotion indirectly checking with INFO when the
2510 * slave turns into a master. */
2511 void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri) {
2512 mstime_t elapsed = mstime() - ri->failover_state_change_time;
2513
2514 if (elapsed >= SENTINEL_PROMOTION_RETRY_PERIOD) {
2515 sentinelEvent(REDIS_WARNING,"-promotion-timeout",ri->promoted_slave,
2516 "%@");
2517 sentinelEvent(REDIS_WARNING,"+failover-state-select-slave",ri,"%@");
2518 ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE;
2519 ri->failover_state_change_time = mstime();
2520 ri->promoted_slave->flags &= ~SRI_PROMOTED;
2521 ri->promoted_slave = NULL;
2522 }
2523 }
2524
2525 void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
2526 int not_reconfigured = 0, timeout = 0;
2527 dictIterator *di;
2528 dictEntry *de;
2529 mstime_t elapsed = mstime() - master->failover_state_change_time;
2530
2531 /* We can't consider failover finished if the promoted slave is
2532 * not reachable. */
2533 if (master->promoted_slave == NULL ||
2534 master->promoted_slave->flags & SRI_S_DOWN) return;
2535
2536 /* The failover terminates once all the reachable slaves are properly
2537 * configured. */
2538 di = dictGetIterator(master->slaves);
2539 while((de = dictNext(di)) != NULL) {
2540 sentinelRedisInstance *slave = dictGetVal(de);
2541
2542 if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE)) continue;
2543 if (slave->flags & SRI_S_DOWN) continue;
2544 not_reconfigured++;
2545 }
2546 dictReleaseIterator(di);
2547
2548 /* Force end of failover on timeout. */
2549 if (elapsed > master->failover_timeout) {
2550 not_reconfigured = 0;
2551 timeout = 1;
2552 sentinelEvent(REDIS_WARNING,"+failover-end-for-timeout",master,"%@");
2553 }
2554
2555 if (not_reconfigured == 0) {
2556 int role = (master->flags & SRI_I_AM_THE_LEADER) ? SENTINEL_LEADER :
2557 SENTINEL_OBSERVER;
2558
2559 sentinelEvent(REDIS_WARNING,"+failover-end",master,"%@");
2560 master->failover_state = SENTINEL_FAILOVER_STATE_UPDATE_CONFIG;
2561 master->failover_state_change_time = mstime();
2562 sentinelCallClientReconfScript(master,role,"end",master->addr,
2563 master->promoted_slave->addr);
2564 }
2565
2566 /* If I'm the leader it is a good idea to send a best effort SLAVEOF
2567 * command to all the slaves still not reconfigured to replicate with
2568 * the new master. */
2569 if (timeout && (master->flags & SRI_I_AM_THE_LEADER)) {
2570 dictIterator *di;
2571 dictEntry *de;
2572 char master_port[32];
2573
2574 ll2string(master_port,sizeof(master_port),
2575 master->promoted_slave->addr->port);
2576
2577 di = dictGetIterator(master->slaves);
2578 while((de = dictNext(di)) != NULL) {
2579 sentinelRedisInstance *slave = dictGetVal(de);
2580 int retval;
2581
2582 if (slave->flags &
2583 (SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue;
2584
2585 retval = redisAsyncCommand(slave->cc,
2586 sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
2587 master->promoted_slave->addr->ip,
2588 master_port);
2589 if (retval == REDIS_OK) {
2590 sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@");
2591 slave->flags |= SRI_RECONF_SENT;
2592 }
2593 }
2594 dictReleaseIterator(di);
2595 }
2596 }
2597
2598 /* Send SLAVE OF <new master address> to all the remaining slaves that
2599 * still don't appear to have the configuration updated. */
2600 void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
2601 dictIterator *di;
2602 dictEntry *de;
2603 int in_progress = 0;
2604
2605 di = dictGetIterator(master->slaves);
2606 while((de = dictNext(di)) != NULL) {
2607 sentinelRedisInstance *slave = dictGetVal(de);
2608
2609 if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG))
2610 in_progress++;
2611 }
2612 dictReleaseIterator(di);
2613
2614 di = dictGetIterator(master->slaves);
2615 while(in_progress < master->parallel_syncs &&
2616 (de = dictNext(di)) != NULL)
2617 {
2618 sentinelRedisInstance *slave = dictGetVal(de);
2619 int retval;
2620 char master_port[32];
2621
2622 /* Skip the promoted slave, and already configured slaves. */
2623 if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE)) continue;
2624
2625 /* Clear the SRI_RECONF_SENT flag if too much time elapsed without
2626 * the slave moving forward to the next state. */
2627 if ((slave->flags & SRI_RECONF_SENT) &&
2628 (mstime() - slave->slave_reconf_sent_time) >
2629 SENTINEL_SLAVE_RECONF_RETRY_PERIOD)
2630 {
2631 sentinelEvent(REDIS_NOTICE,"-slave-reconf-sent-timeout",slave,"%@");
2632 slave->flags &= ~SRI_RECONF_SENT;
2633 }
2634
2635 /* Nothing to do for instances that are disconnected or already
2636 * in RECONF_SENT state. */
2637 if (slave->flags & (SRI_DISCONNECTED|SRI_RECONF_SENT|SRI_RECONF_INPROG))
2638 continue;
2639
2640 /* Send SLAVEOF <new master>. */
2641 ll2string(master_port,sizeof(master_port),
2642 master->promoted_slave->addr->port);
2643 retval = redisAsyncCommand(slave->cc,
2644 sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
2645 master->promoted_slave->addr->ip,
2646 master_port);
2647 if (retval == REDIS_OK) {
2648 slave->flags |= SRI_RECONF_SENT;
2649 slave->pending_commands++;
2650 slave->slave_reconf_sent_time = mstime();
2651 sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@");
2652 in_progress++;
2653 }
2654 }
2655 dictReleaseIterator(di);
2656 sentinelFailoverDetectEnd(master);
2657 }
2658
2659 /* This function is called when the slave is in
2660 * SENTINEL_FAILOVER_STATE_UPDATE_CONFIG state. In this state we need
2661 * to remove it from the master table and add the promoted slave instead.
2662 *
2663 * If there are no promoted slaves as this instance is unique, we remove
2664 * and re-add it with the same address to trigger a complete state
2665 * refresh. */
2666 void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) {
2667 sentinelRedisInstance *ref = master->promoted_slave ?
2668 master->promoted_slave : master;
2669
2670 sentinelEvent(REDIS_WARNING,"+switch-master",master,"%s %s %d %s %d",
2671 master->name, master->addr->ip, master->addr->port,
2672 ref->addr->ip, ref->addr->port);
2673
2674 sentinelResetMasterAndChangeAddress(master,ref->addr->ip,ref->addr->port);
2675 }
2676
2677 void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
2678 redisAssert(ri->flags & SRI_MASTER);
2679
2680 if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;
2681
2682 switch(ri->failover_state) {
2683 case SENTINEL_FAILOVER_STATE_WAIT_START:
2684 sentinelFailoverWaitStart(ri);
2685 break;
2686 case SENTINEL_FAILOVER_STATE_SELECT_SLAVE:
2687 sentinelFailoverSelectSlave(ri);
2688 break;
2689 case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE:
2690 sentinelFailoverSendSlaveOfNoOne(ri);
2691 break;
2692 case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION:
2693 sentinelFailoverWaitPromotion(ri);
2694 break;
2695 case SENTINEL_FAILOVER_STATE_RECONF_SLAVES:
2696 sentinelFailoverReconfNextSlave(ri);
2697 break;
2698 case SENTINEL_FAILOVER_STATE_DETECT_END:
2699 sentinelFailoverDetectEnd(ri);
2700 break;
2701 }
2702 }
2703
2704 /* Abort a failover in progress with the following steps:
2705 * 1) If this instance is the leaer send a SLAVEOF command to all the already
2706 * reconfigured slaves if any to configure them to replicate with the
2707 * original master.
2708 * 2) For both leaders and observers: clear the failover flags and state in
2709 * the master instance.
2710 * 3) If there is already a promoted slave and we are the leader, and this
2711 * slave is not DISCONNECTED, try to reconfigure it to replicate
2712 * back to the master as well, sending a best effort SLAVEOF command.
2713 */
2714 void sentinelAbortFailover(sentinelRedisInstance *ri) {
2715 char master_port[32];
2716 dictIterator *di;
2717 dictEntry *de;
2718 int sentinel_role;
2719
2720 redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
2721 ll2string(master_port,sizeof(master_port),ri->addr->port);
2722
2723 /* Clear failover related flags from slaves.
2724 * Also if we are the leader make sure to send SLAVEOF commands to all the
2725 * already reconfigured slaves in order to turn them back into slaves of
2726 * the original master. */
2727 di = dictGetIterator(ri->slaves);
2728 while((de = dictNext(di)) != NULL) {
2729 sentinelRedisInstance *slave = dictGetVal(de);
2730 if ((ri->flags & SRI_I_AM_THE_LEADER) &&
2731 !(slave->flags & SRI_DISCONNECTED) &&
2732 (slave->flags & (SRI_PROMOTED|SRI_RECONF_SENT|SRI_RECONF_INPROG|
2733 SRI_RECONF_DONE)))
2734 {
2735 int retval;
2736
2737 retval = redisAsyncCommand(slave->cc,
2738 sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
2739 ri->addr->ip,
2740 master_port);
2741 if (retval == REDIS_OK)
2742 sentinelEvent(REDIS_NOTICE,"-slave-reconf-undo",slave,"%@");
2743 }
2744 slave->flags &= ~(SRI_RECONF_SENT|SRI_RECONF_INPROG|SRI_RECONF_DONE);
2745 }
2746 dictReleaseIterator(di);
2747
2748 sentinel_role = (ri->flags & SRI_I_AM_THE_LEADER) ? SENTINEL_LEADER :
2749 SENTINEL_OBSERVER;
2750 ri->flags &= ~(SRI_FAILOVER_IN_PROGRESS|SRI_I_AM_THE_LEADER);
2751 ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
2752 ri->failover_state_change_time = mstime();
2753 if (ri->promoted_slave) {
2754 sentinelCallClientReconfScript(ri,sentinel_role,"abort",
2755 ri->promoted_slave->addr,ri->addr);
2756 ri->promoted_slave->flags &= ~SRI_PROMOTED;
2757 ri->promoted_slave = NULL;
2758 }
2759 }
2760
2761 /* The following is called only for master instances and will abort the
2762 * failover process if:
2763 *
2764 * 1) The failover is in progress.
2765 * 2) We already promoted a slave.
2766 * 3) The promoted slave is in extended SDOWN condition.
2767 */
2768 void sentinelAbortFailoverIfNeeded(sentinelRedisInstance *ri) {
2769 /* Failover is in progress? Do we have a promoted slave? */
2770 if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS) || !ri->promoted_slave) return;
2771
2772 /* Is the promoted slave into an extended SDOWN state? */
2773 if (!(ri->promoted_slave->flags & SRI_S_DOWN) ||
2774 (mstime() - ri->promoted_slave->s_down_since_time) <
2775 (ri->down_after_period * SENTINEL_EXTENDED_SDOWN_MULTIPLIER)) return;
2776
2777 sentinelEvent(REDIS_WARNING,"-failover-abort-x-sdown",ri->promoted_slave,"%@");
2778 sentinelAbortFailover(ri);
2779 }
2780
2781 /* ======================== SENTINEL timer handler ==========================
2782 * This is the "main" our Sentinel, being sentinel completely non blocking
2783 * in design. The function is called every second.
2784 * -------------------------------------------------------------------------- */
2785
2786 /* Perform scheduled operations for the specified Redis instance. */
2787 void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
2788 /* ========== MONITORING HALF ============ */
2789 /* Every kind of instance */
2790 sentinelReconnectInstance(ri);
2791 sentinelPingInstance(ri);
2792
2793 /* Masters and slaves */
2794 if (ri->flags & (SRI_MASTER|SRI_SLAVE)) {
2795 /* Nothing so far. */
2796 }
2797
2798 /* Only masters */
2799 if (ri->flags & SRI_MASTER) {
2800 sentinelAskMasterStateToOtherSentinels(ri);
2801 }
2802
2803 /* ============== ACTING HALF ============= */
2804 /* We don't proceed with the acting half if we are in TILT mode.
2805 * TILT happens when we find something odd with the time, like a
2806 * sudden change in the clock. */
2807 if (sentinel.tilt) {
2808 if (mstime()-sentinel.tilt_start_time < SENTINEL_TILT_PERIOD) return;
2809 sentinel.tilt = 0;
2810 sentinelEvent(REDIS_WARNING,"-tilt",NULL,"#tilt mode exited");
2811 }
2812
2813 /* Every kind of instance */
2814 sentinelCheckSubjectivelyDown(ri);
2815
2816 /* Masters and slaves */
2817 if (ri->flags & (SRI_MASTER|SRI_SLAVE)) {
2818 /* Nothing so far. */
2819 }
2820
2821 /* Only masters */
2822 if (ri->flags & SRI_MASTER) {
2823 sentinelCheckObjectivelyDown(ri);
2824 sentinelStartFailover(ri);
2825 sentinelFailoverStateMachine(ri);
2826 sentinelAbortFailoverIfNeeded(ri);
2827 }
2828 }
2829
2830 /* Perform scheduled operations for all the instances in the dictionary.
2831 * Recursively call the function against dictionaries of slaves. */
2832 void sentinelHandleDictOfRedisInstances(dict *instances) {
2833 dictIterator *di;
2834 dictEntry *de;
2835 sentinelRedisInstance *switch_to_promoted = NULL;
2836
2837 /* There are a number of things we need to perform against every master. */
2838 di = dictGetIterator(instances);
2839 while((de = dictNext(di)) != NULL) {
2840 sentinelRedisInstance *ri = dictGetVal(de);
2841
2842 sentinelHandleRedisInstance(ri);
2843 if (ri->flags & SRI_MASTER) {
2844 sentinelHandleDictOfRedisInstances(ri->slaves);
2845 sentinelHandleDictOfRedisInstances(ri->sentinels);
2846 if (ri->failover_state == SENTINEL_FAILOVER_STATE_UPDATE_CONFIG) {
2847 switch_to_promoted = ri;
2848 }
2849 }
2850 }
2851 if (switch_to_promoted)
2852 sentinelFailoverSwitchToPromotedSlave(switch_to_promoted);
2853 dictReleaseIterator(di);
2854 }
2855
2856 /* This function checks if we need to enter the TITL mode.
2857 *
2858 * The TILT mode is entered if we detect that between two invocations of the
2859 * timer interrupt, a negative amount of time, or too much time has passed.
2860 * Note that we expect that more or less just 100 milliseconds will pass
2861 * if everything is fine. However we'll see a negative number or a
2862 * difference bigger than SENTINEL_TILT_TRIGGER milliseconds if one of the
2863 * following conditions happen:
2864 *
2865 * 1) The Sentiel process for some time is blocked, for every kind of
2866 * random reason: the load is huge, the computer was freezed for some time
2867 * in I/O or alike, the process was stopped by a signal. Everything.
2868 * 2) The system clock was altered significantly.
2869 *
2870 * Under both this conditions we'll see everything as timed out and failing
2871 * without good reasons. Instead we enter the TILT mode and wait
2872 * for SENTIENL_TILT_PERIOD to elapse before starting to act again.
2873 *
2874 * During TILT time we still collect information, we just do not act. */
2875 void sentinelCheckTiltCondition(void) {
2876 mstime_t now = mstime();
2877 mstime_t delta = now - sentinel.previous_time;
2878
2879 if (delta < 0 || delta > SENTINEL_TILT_TRIGGER) {
2880 sentinel.tilt = 1;
2881 sentinel.tilt_start_time = mstime();
2882 sentinelEvent(REDIS_WARNING,"+tilt",NULL,"#tilt mode entered");
2883 }
2884 sentinel.previous_time = mstime();
2885 }
2886
2887 void sentinelTimer(void) {
2888 sentinelCheckTiltCondition();
2889 sentinelHandleDictOfRedisInstances(sentinel.masters);
2890 sentinelRunPendingScripts();
2891 sentinelCollectTerminatedScripts();
2892 sentinelKillTimedoutScripts();
2893 }
2894