1 /* Redis Sentinel implementation
2 * -----------------------------
4 * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * * Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Redis nor the names of its contributors may be used
16 * to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
37 #include <arpa/inet.h>
38 #include <sys/socket.h>
40 #define REDIS_SENTINEL_PORT 26379
42 /* ======================== Sentinel global state =========================== */
44 typedef long long mstime_t
; /* millisecond time type. */
46 /* Address object, used to describe an ip:port pair. */
47 typedef struct sentinelAddr
{
52 /* A Sentinel Redis Instance object is monitoring. */
53 #define SRI_MASTER (1<<0)
54 #define SRI_SLAVE (1<<1)
55 #define SRI_SENTINEL (1<<2)
56 #define SRI_DISCONNECTED (1<<3)
57 #define SRI_S_DOWN (1<<4) /* Subjectively down (no quorum). */
58 #define SRI_O_DOWN (1<<5) /* Objectively down (quorum reached). */
59 #define SRI_MASTER_DOWN (1<<6) /* A Sentinel with this flag set thinks that
60 its master is down. */
61 /* SRI_CAN_FAILOVER when set in an SRI_MASTER instance means that we are
62 * allowed to perform the failover for this master.
63 * When set in a SRI_SENTINEL instance means that sentinel is allowed to
64 * perform the failover on its master. */
65 #define SRI_CAN_FAILOVER (1<<7)
66 #define SRI_FAILOVER_IN_PROGRESS (1<<8) /* Failover is in progress for
68 #define SRI_I_AM_THE_LEADER (1<<9) /* We are the leader for this master. */
69 #define SRI_PROMOTED (1<<10) /* Slave selected for promotion. */
70 #define SRI_RECONF_SENT (1<<11) /* SLAVEOF <newmaster> sent. */
71 #define SRI_RECONF_INPROG (1<<12) /* Slave synchronization in progress. */
72 #define SRI_RECONF_DONE (1<<13) /* Slave synchronized with new master. */
74 #define SENTINEL_INFO_PERIOD 10000
75 #define SENTINEL_PING_PERIOD 1000
76 #define SENTINEL_ASK_PERIOD 1000
77 #define SENTINEL_PUBLISH_PERIOD 5000
78 #define SENTINEL_DOWN_AFTER_PERIOD 30000
79 #define SENTINEL_HELLO_CHANNEL "__sentinel__:hello"
80 #define SENTINEL_TILT_TRIGGER 2000
81 #define SENTINEL_TILT_PERIOD (SENTINEL_PING_PERIOD*30)
82 #define SENTINEL_DEFAULT_SLAVE_PRIORITY 100
83 #define SENTINEL_PROMOTION_RETRY_PERIOD 30000
84 #define SENTINEL_SLAVE_RECONF_RETRY_PERIOD 10000
85 #define SENTINEL_DEFAULT_PARALLEL_SYNCS 1
86 #define SENTINEL_MIN_LINK_RECONNECT_PERIOD 15000
87 #define SENTINEL_DEFAULT_FAILOVER_TIMEOUT (60*15*1000)
88 #define SENTINEL_MAX_PENDING_COMMANDS 100
89 #define SENTINEL_EXTENDED_SDOWN_MULTIPLIER 10
91 /* How many milliseconds is an information valid? This applies for instance
92 * to the reply to SENTINEL IS-MASTER-DOWN-BY-ADDR replies. */
93 #define SENTINEL_INFO_VALIDITY_TIME 5000
94 #define SENTINEL_FAILOVER_FIXED_DELAY 5000
95 #define SENTINEL_FAILOVER_MAX_RANDOM_DELAY 10000
97 /* Failover machine different states. */
98 #define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */
99 #define SENTINEL_FAILOVER_STATE_WAIT_START 1 /* Wait for failover_start_time*/
100 #define SENTINEL_FAILOVER_STATE_SELECT_SLAVE 2 /* Select slave to promote */
101 #define SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE 3 /* Slave -> Master */
102 #define SENTINEL_FAILOVER_STATE_WAIT_PROMOTION 4 /* Wait slave to change role */
103 #define SENTINEL_FAILOVER_STATE_RECONF_SLAVES 5 /* SLAVEOF newmaster */
104 #define SENTINEL_FAILOVER_STATE_WAIT_NEXT_SLAVE 6 /* wait replication */
105 #define SENTINEL_FAILOVER_STATE_ALERT_CLIENTS 7 /* Run user script. */
106 #define SENTINEL_FAILOVER_STATE_WAIT_ALERT_SCRIPT 8 /* Wait script exec. */
107 #define SENTINEL_FAILOVER_STATE_DETECT_END 9 /* Check for failover end. */
108 #define SENTINEL_FAILOVER_STATE_UPDATE_CONFIG 10 /* Monitor promoted slave. */
110 #define SENTINEL_MASTER_LINK_STATUS_UP 0
111 #define SENTINEL_MASTER_LINK_STATUS_DOWN 1
113 /* Generic flags that can be used with different functions. */
114 #define SENTINEL_NO_FLAGS 0
115 #define SENTINEL_GENERATE_EVENT 1
117 typedef struct sentinelRedisInstance
{
118 int flags
; /* See SRI_... defines */
119 char *name
; /* Master name from the point of view of this sentinel. */
120 char *runid
; /* run ID of this instance. */
121 sentinelAddr
*addr
; /* Master host. */
122 redisAsyncContext
*cc
; /* Hiredis context for commands. */
123 redisAsyncContext
*pc
; /* Hiredis context for Pub / Sub. */
124 int pending_commands
; /* Number of commands sent waiting for a reply. */
125 mstime_t cc_conn_time
; /* cc connection time. */
126 mstime_t pc_conn_time
; /* pc connection time. */
127 mstime_t pc_last_activity
; /* Last time we received any message. */
128 mstime_t last_avail_time
; /* Last time the instance replied to ping with
129 a reply we consider valid. */
130 mstime_t last_pong_time
; /* Last time the instance replied to ping,
131 whatever the reply was. That's used to check
132 if the link is idle and must be reconnected. */
133 mstime_t last_pub_time
; /* Last time we sent hello via Pub/Sub. */
134 mstime_t last_hello_time
; /* Only used if SRI_SENTINEL is set. Last time
135 we received an hello from this Sentinel
137 mstime_t last_master_down_reply_time
; /* Time of last reply to
138 SENTINEL is-master-down command. */
139 mstime_t s_down_since_time
; /* Subjectively down since time. */
140 mstime_t o_down_since_time
; /* Objectively down since time. */
141 mstime_t down_after_period
; /* Consider it down after that period. */
142 mstime_t info_refresh
; /* Time at which we received INFO output from it. */
144 /* Master specific. */
145 dict
*sentinels
; /* Other sentinels monitoring the same master. */
146 dict
*slaves
; /* Slaves for this master instance. */
147 int quorum
; /* Number of sentinels that need to agree on failure. */
148 int parallel_syncs
; /* How many slaves to reconfigure at same time. */
150 /* Slave specific. */
151 mstime_t master_link_down_time
; /* Slave replication link down time. */
152 int slave_priority
; /* Slave priority according to its INFO output. */
153 mstime_t slave_reconf_sent_time
; /* Time at which we sent SLAVE OF <new> */
154 struct sentinelRedisInstance
*master
; /* Master instance if SRI_SLAVE is set. */
155 char *slave_master_host
; /* Master host as reported by INFO */
156 int slave_master_port
; /* Master port as reported by INFO */
157 int slave_master_link_status
; /* Master link status as reported by INFO */
159 char *leader
; /* If this is a master instance, this is the runid of
160 the Sentinel that should perform the failover. If
161 this is a Sentinel, this is the runid of the Sentinel
162 that this other Sentinel is voting as leader.
163 This field is valid only if SRI_MASTER_DOWN is
164 set on the Sentinel instance. */
165 int failover_state
; /* See SENTINEL_FAILOVER_STATE_* defines. */
166 mstime_t failover_state_change_time
;
167 mstime_t failover_start_time
; /* When to start to failover if leader. */
168 mstime_t failover_timeout
; /* Max time to refresh failover state. */
169 struct sentinelRedisInstance
*promoted_slave
; /* Promoted slave instance. */
170 /* Scripts executed to notify admin or reconfigure clients: when they
171 * are set to NULL no script is executed. */
173 char *client_reconfig_script
;
174 } sentinelRedisInstance
;
177 struct sentinelState
{
178 dict
*masters
; /* Dictionary of master sentinelRedisInstances.
179 Key is the instance name, value is the
180 sentinelRedisInstance structure pointer. */
181 int tilt
; /* Are we in TILT mode? */
182 mstime_t tilt_start_time
; /* When TITL started. */
183 mstime_t previous_time
; /* Time last time we ran the time handler. */
186 /* ======================= hiredis ae.c adapters =============================
187 * Note: this implementation is taken from hiredis/adapters/ae.h, however
188 * we have our modified copy for Sentinel in order to use our allocator
189 * and to have full control over how the adapter works. */
191 typedef struct redisAeEvents
{
192 redisAsyncContext
*context
;
195 int reading
, writing
;
198 static void redisAeReadEvent(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
199 ((void)el
); ((void)fd
); ((void)mask
);
201 redisAeEvents
*e
= (redisAeEvents
*)privdata
;
202 redisAsyncHandleRead(e
->context
);
205 static void redisAeWriteEvent(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
206 ((void)el
); ((void)fd
); ((void)mask
);
208 redisAeEvents
*e
= (redisAeEvents
*)privdata
;
209 redisAsyncHandleWrite(e
->context
);
212 static void redisAeAddRead(void *privdata
) {
213 redisAeEvents
*e
= (redisAeEvents
*)privdata
;
214 aeEventLoop
*loop
= e
->loop
;
217 aeCreateFileEvent(loop
,e
->fd
,AE_READABLE
,redisAeReadEvent
,e
);
221 static void redisAeDelRead(void *privdata
) {
222 redisAeEvents
*e
= (redisAeEvents
*)privdata
;
223 aeEventLoop
*loop
= e
->loop
;
226 aeDeleteFileEvent(loop
,e
->fd
,AE_READABLE
);
230 static void redisAeAddWrite(void *privdata
) {
231 redisAeEvents
*e
= (redisAeEvents
*)privdata
;
232 aeEventLoop
*loop
= e
->loop
;
235 aeCreateFileEvent(loop
,e
->fd
,AE_WRITABLE
,redisAeWriteEvent
,e
);
239 static void redisAeDelWrite(void *privdata
) {
240 redisAeEvents
*e
= (redisAeEvents
*)privdata
;
241 aeEventLoop
*loop
= e
->loop
;
244 aeDeleteFileEvent(loop
,e
->fd
,AE_WRITABLE
);
248 static void redisAeCleanup(void *privdata
) {
249 redisAeEvents
*e
= (redisAeEvents
*)privdata
;
250 redisAeDelRead(privdata
);
251 redisAeDelWrite(privdata
);
255 static int redisAeAttach(aeEventLoop
*loop
, redisAsyncContext
*ac
) {
256 redisContext
*c
= &(ac
->c
);
259 /* Nothing should be attached when something is already attached */
260 if (ac
->ev
.data
!= NULL
)
263 /* Create container for context and r/w events */
264 e
= (redisAeEvents
*)zmalloc(sizeof(*e
));
268 e
->reading
= e
->writing
= 0;
270 /* Register functions to start/stop listening for events */
271 ac
->ev
.addRead
= redisAeAddRead
;
272 ac
->ev
.delRead
= redisAeDelRead
;
273 ac
->ev
.addWrite
= redisAeAddWrite
;
274 ac
->ev
.delWrite
= redisAeDelWrite
;
275 ac
->ev
.cleanup
= redisAeCleanup
;
281 /* ============================= Prototypes ================================= */
283 void sentinelLinkEstablishedCallback(const redisAsyncContext
*c
, int status
);
284 void sentinelDisconnectCallback(const redisAsyncContext
*c
, int status
);
285 void sentinelReceiveHelloMessages(redisAsyncContext
*c
, void *reply
, void *privdata
);
286 sentinelRedisInstance
*sentinelGetMasterByName(char *name
);
287 char *sentinelGetSubjectiveLeader(sentinelRedisInstance
*master
);
288 char *sentinelGetObjectiveLeader(sentinelRedisInstance
*master
);
289 int yesnotoi(char *s
);
290 void sentinelDisconnectInstanceFromContext(const redisAsyncContext
*c
);
291 void sentinelKillLink(sentinelRedisInstance
*ri
, redisAsyncContext
*c
);
292 const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance
*ri
);
294 /* ========================= Dictionary types =============================== */
296 unsigned int dictSdsHash(const void *key
);
297 int dictSdsKeyCompare(void *privdata
, const void *key1
, const void *key2
);
298 void releaseSentinelRedisInstance(sentinelRedisInstance
*ri
);
300 void dictInstancesValDestructor (void *privdata
, void *obj
) {
301 releaseSentinelRedisInstance(obj
);
304 /* Instance name (sds) -> instance (sentinelRedisInstance pointer)
306 * also used for: sentinelRedisInstance->sentinels dictionary that maps
307 * sentinels ip:port to last seen time in Pub/Sub hello message. */
308 dictType instancesDictType
= {
309 dictSdsHash
, /* hash function */
312 dictSdsKeyCompare
, /* key compare */
313 NULL
, /* key destructor */
314 dictInstancesValDestructor
/* val destructor */
317 /* Instance runid (sds) -> votes (long casted to void*)
319 * This is useful into sentinelGetObjectiveLeader() function in order to
320 * count the votes and understand who is the leader. */
321 dictType leaderVotesDictType
= {
322 dictSdsHash
, /* hash function */
325 dictSdsKeyCompare
, /* key compare */
326 NULL
, /* key destructor */
327 NULL
/* val destructor */
330 /* =========================== Initialization =============================== */
332 void sentinelCommand(redisClient
*c
);
334 struct redisCommand sentinelcmds
[] = {
335 {"ping",pingCommand
,1,"",0,NULL
,0,0,0,0,0},
336 {"sentinel",sentinelCommand
,-2,"",0,NULL
,0,0,0,0,0},
337 {"subscribe",subscribeCommand
,-2,"",0,NULL
,0,0,0,0,0},
338 {"unsubscribe",unsubscribeCommand
,-1,"",0,NULL
,0,0,0,0,0},
339 {"psubscribe",psubscribeCommand
,-2,"",0,NULL
,0,0,0,0,0},
340 {"punsubscribe",punsubscribeCommand
,-1,"",0,NULL
,0,0,0,0,0}
343 /* This function overwrites a few normal Redis config default with Sentinel
344 * specific defaults. */
345 void initSentinelConfig(void) {
346 server
.port
= REDIS_SENTINEL_PORT
;
349 /* Perform the Sentinel mode initialization. */
350 void initSentinel(void) {
353 /* Remove usual Redis commands from the command table, then just add
354 * the SENTINEL command. */
355 dictEmpty(server
.commands
);
356 for (j
= 0; j
< sizeof(sentinelcmds
)/sizeof(sentinelcmds
[0]); j
++) {
358 struct redisCommand
*cmd
= sentinelcmds
+j
;
360 retval
= dictAdd(server
.commands
, sdsnew(cmd
->name
), cmd
);
361 redisAssert(retval
== DICT_OK
);
364 /* Initialize various data structures. */
365 sentinel
.masters
= dictCreate(&instancesDictType
,NULL
);
367 sentinel
.tilt_start_time
= mstime();
368 sentinel
.previous_time
= mstime();
371 /* ============================== sentinelAddr ============================== */
373 /* Create a sentinelAddr object and return it on success.
374 * On error NULL is returned and errno is set to:
375 * ENOENT: Can't resolve the hostname.
376 * EINVAL: Invalid port number.
378 sentinelAddr
*createSentinelAddr(char *hostname
, int port
) {
382 if (port
<= 0 || port
> 65535) {
386 if (anetResolve(NULL
,hostname
,buf
) == ANET_ERR
) {
390 sa
= zmalloc(sizeof(*sa
));
391 sa
->ip
= sdsnew(buf
);
396 /* Free a Sentinel address. Can't fail. */
397 void releaseSentinelAddr(sentinelAddr
*sa
) {
402 /* =========================== Events notification ========================== */
404 void sentinelCallNotificationScript(char *scriptpath
, char *type
, char *msg
) {
405 /* TODO: implement it. */
408 /* Send an event to log, pub/sub, user notification script.
410 * 'level' is the log level for logging. Only REDIS_WARNING events will trigger
411 * the execution of the user notification script.
413 * 'type' is the message type, also used as a pub/sub channel name.
415 * 'ri', is the redis instance target of this event if applicable, and is
416 * used to obtain the path of the notification script to execute.
418 * The remaining arguments are printf-alike.
419 * If the format specifier starts with the two characters "%@" then ri is
420 * not NULL, and the message is prefixed with an instance identifier in the
423 * <instance type> <instance name> <ip> <port>
425 * If the instance type is not master, than the additional string is
426 * added to specify the originating master:
428 * @ <master name> <master ip> <master port>
430 * Any other specifier after "%@" is processed by printf itself.
432 void sentinelEvent(int level
, char *type
, sentinelRedisInstance
*ri
,
433 const char *fmt
, ...) {
435 char msg
[REDIS_MAX_LOGMSG_LEN
];
436 robj
*channel
, *payload
;
439 if (fmt
[0] == '%' && fmt
[1] == '@') {
440 sentinelRedisInstance
*master
= (ri
->flags
& SRI_MASTER
) ?
444 snprintf(msg
, sizeof(msg
), "%s %s %s %d @ %s %s %d",
445 sentinelRedisInstanceTypeStr(ri
),
446 ri
->name
, ri
->addr
->ip
, ri
->addr
->port
,
447 master
->name
, master
->addr
->ip
, master
->addr
->port
);
449 snprintf(msg
, sizeof(msg
), "%s %s %s %d",
450 sentinelRedisInstanceTypeStr(ri
),
451 ri
->name
, ri
->addr
->ip
, ri
->addr
->port
);
458 /* Use vsprintf for the rest of the formatting if any. */
459 if (fmt
[0] != '\0') {
461 vsnprintf(msg
+strlen(msg
), sizeof(msg
)-strlen(msg
), fmt
, ap
);
465 /* Log the message if the log level allows it to be logged. */
466 if (level
>= server
.verbosity
)
467 redisLog(level
,"%s %s",type
,msg
);
469 /* Publish the message via Pub/Sub if it's not a debugging one. */
470 if (level
!= REDIS_DEBUG
) {
471 channel
= createStringObject(type
,strlen(type
));
472 payload
= createStringObject(msg
,strlen(msg
));
473 pubsubPublishMessage(channel
,payload
);
474 decrRefCount(channel
);
475 decrRefCount(payload
);
478 /* Call the notification script if applicable. */
479 if (level
== REDIS_WARNING
&& ri
!= NULL
) {
480 sentinelRedisInstance
*master
= (ri
->flags
& SRI_MASTER
) ?
482 if (master
->notify_script
) {
483 sentinelCallNotificationScript(master
->notify_script
,type
,msg
);
488 /* ========================== sentinelRedisInstance ========================= */
490 /* Create a redis instance, the following fields must be populated by the
492 * runid: set to NULL but will be populated once INFO output is received.
493 * info_refresh: is set to 0 to mean that we never received INFO so far.
495 * If SRI_MASTER is set into initial flags the instance is added to
496 * sentinel.masters table.
498 * if SRI_SLAVE or SRI_SENTINEL is set then 'master' must be not NULL and the
499 * instance is added into master->slaves or master->sentinels table.
501 * If the instance is a slave or sentinel, the name parameter is ignored and
502 * is created automatically as hostname:port.
504 * The function fails if hostname can't be resolved or port is out of range.
505 * When this happens NULL is returned and errno is set accordingly to the
506 * createSentinelAddr() function.
508 * The function may also fail and return NULL with errno set to EBUSY if
509 * a master or slave with the same name already exists. */
510 sentinelRedisInstance
*createSentinelRedisInstance(char *name
, int flags
, char *hostname
, int port
, int quorum
, sentinelRedisInstance
*master
) {
511 sentinelRedisInstance
*ri
;
514 char slavename
[128], *sdsname
;
516 redisAssert(flags
& (SRI_MASTER
|SRI_SLAVE
|SRI_SENTINEL
));
517 redisAssert((flags
& SRI_MASTER
) || master
!= NULL
);
519 /* Check address validity. */
520 addr
= createSentinelAddr(hostname
,port
);
521 if (addr
== NULL
) return NULL
;
523 /* For slaves and sentinel we use ip:port as name. */
524 if (flags
& (SRI_SLAVE
|SRI_SENTINEL
)) {
525 snprintf(slavename
,sizeof(slavename
),"%s:%d",hostname
,port
);
529 /* Make sure the entry is not duplicated. This may happen when the same
530 * name for a master is used multiple times inside the configuration or
531 * if we try to add multiple times a slave or sentinel with same ip/port
533 if (flags
& SRI_MASTER
) table
= sentinel
.masters
;
534 else if (flags
& SRI_SLAVE
) table
= master
->slaves
;
535 else if (flags
& SRI_SENTINEL
) table
= master
->sentinels
;
536 sdsname
= sdsnew(name
);
537 if (dictFind(table
,sdsname
)) {
543 /* Create the instance object. */
544 ri
= zmalloc(sizeof(*ri
));
545 /* Note that all the instances are started in the disconnected state,
546 * the event loop will take care of connecting them. */
547 ri
->flags
= flags
| SRI_DISCONNECTED
;
553 ri
->pending_commands
= 0;
554 ri
->cc_conn_time
= 0;
555 ri
->pc_conn_time
= 0;
556 ri
->pc_last_activity
= 0;
557 ri
->last_avail_time
= mstime();
558 ri
->last_pong_time
= mstime();
559 ri
->last_pub_time
= mstime();
560 ri
->last_hello_time
= mstime();
561 ri
->last_master_down_reply_time
= mstime();
562 ri
->s_down_since_time
= 0;
563 ri
->o_down_since_time
= 0;
564 ri
->down_after_period
= master
? master
->down_after_period
:
565 SENTINEL_DOWN_AFTER_PERIOD
;
566 ri
->master_link_down_time
= 0;
567 ri
->slave_priority
= SENTINEL_DEFAULT_SLAVE_PRIORITY
;
568 ri
->slave_reconf_sent_time
= 0;
569 ri
->slave_master_host
= NULL
;
570 ri
->slave_master_port
= 0;
571 ri
->slave_master_link_status
= SENTINEL_MASTER_LINK_STATUS_DOWN
;
572 ri
->sentinels
= dictCreate(&instancesDictType
,NULL
);
574 ri
->parallel_syncs
= SENTINEL_DEFAULT_PARALLEL_SYNCS
;
576 ri
->slaves
= dictCreate(&instancesDictType
,NULL
);
577 ri
->info_refresh
= 0;
579 /* Failover state. */
581 ri
->failover_state
= SENTINEL_FAILOVER_STATE_NONE
;
582 ri
->failover_state_change_time
= 0;
583 ri
->failover_start_time
= 0;
584 ri
->failover_timeout
= SENTINEL_DEFAULT_FAILOVER_TIMEOUT
;
585 ri
->promoted_slave
= NULL
;
586 ri
->notify_script
= NULL
;
587 ri
->client_reconfig_script
= NULL
;
589 /* Add into the right table. */
590 dictAdd(table
, ri
->name
, ri
);
594 /* Release this instance and all its slaves, sentinels, hiredis connections.
595 * This function also takes care of unlinking the instance from the main
596 * masters table (if it is a master) or from its master sentinels/slaves table
597 * if it is a slave or sentinel. */
598 void releaseSentinelRedisInstance(sentinelRedisInstance
*ri
) {
599 /* Release all its slaves or sentinels if any. */
600 dictRelease(ri
->sentinels
);
601 dictRelease(ri
->slaves
);
603 /* Release hiredis connections. */
604 if (ri
->cc
) sentinelKillLink(ri
,ri
->cc
);
605 if (ri
->pc
) sentinelKillLink(ri
,ri
->pc
);
607 /* Free other resources. */
610 sdsfree(ri
->notify_script
);
611 sdsfree(ri
->client_reconfig_script
);
612 sdsfree(ri
->slave_master_host
);
614 releaseSentinelAddr(ri
->addr
);
616 /* Clear state into the master if needed. */
617 if ((ri
->flags
& SRI_SLAVE
) && (ri
->flags
& SRI_PROMOTED
) && ri
->master
)
618 ri
->master
->promoted_slave
= NULL
;
623 /* Lookup a slave in a master Redis instance, by ip and port. */
624 sentinelRedisInstance
*sentinelRedisInstanceLookupSlave(
625 sentinelRedisInstance
*ri
, char *ip
, int port
)
628 sentinelRedisInstance
*slave
;
630 redisAssert(ri
->flags
& SRI_MASTER
);
631 key
= sdscatprintf(sdsempty(),"%s:%d",ip
,port
);
632 slave
= dictFetchValue(ri
->slaves
,key
);
637 /* Return the name of the type of the instance as a string. */
638 const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance
*ri
) {
639 if (ri
->flags
& SRI_MASTER
) return "master";
640 else if (ri
->flags
& SRI_SLAVE
) return "slave";
641 else if (ri
->flags
& SRI_SENTINEL
) return "sentinel";
642 else return "unknown";
645 /* This function removes all the instances found in the dictionary of instances
646 * 'd', having either:
648 * 1) The same ip/port as specified.
651 * "1" and "2" don't need to verify at the same time, just one is enough.
652 * If "runid" is NULL it is not checked.
653 * Similarly if "ip" is NULL it is not checked.
655 * This function is useful because every time we add a new Sentinel into
656 * a master's Sentinels dictionary, we want to be very sure about not
657 * having duplicated instances for any reason. This is so important because
658 * we use those other sentinels in order to run our quorum protocol to
659 * understand if it's time to proceeed with the fail over.
661 * Making sure no duplication is possible we greately improve the robustness
662 * of the quorum (otherwise we may end counting the same instance multiple
663 * times for some reason).
665 * The function returns the number of Sentinels removed. */
666 int removeMatchingSentinelsFromMaster(sentinelRedisInstance
*master
, char *ip
, int port
, char *runid
) {
671 di
= dictGetSafeIterator(master
->sentinels
);
672 while((de
= dictNext(di
)) != NULL
) {
673 sentinelRedisInstance
*ri
= dictGetVal(de
);
675 if ((ri
->runid
&& runid
&& strcmp(ri
->runid
,runid
) == 0) ||
676 (ip
&& strcmp(ri
->addr
->ip
,ip
) == 0 && port
== ri
->addr
->port
))
678 dictDelete(master
->sentinels
,ri
->name
);
682 dictReleaseIterator(di
);
686 /* Search an instance with the same runid, ip and port into a dictionary
687 * of instances. Return NULL if not found, otherwise return the instance
690 * runid or ip can be NULL. In such a case the search is performed only
691 * by the non-NULL field. */
692 sentinelRedisInstance
*getSentinelRedisInstanceByAddrAndRunID(dict
*instances
, char *ip
, int port
, char *runid
) {
695 sentinelRedisInstance
*instance
= NULL
;
697 redisAssert(ip
|| runid
); /* User must pass at least one search param. */
698 di
= dictGetIterator(instances
);
699 while((de
= dictNext(di
)) != NULL
) {
700 sentinelRedisInstance
*ri
= dictGetVal(de
);
702 if (runid
&& !ri
->runid
) continue;
703 if ((runid
== NULL
|| strcmp(ri
->runid
, runid
) == 0) &&
704 (ip
== NULL
|| (strcmp(ri
->addr
->ip
, ip
) == 0 &&
705 ri
->addr
->port
== port
)))
711 dictReleaseIterator(di
);
715 /* Simple master lookup by name */
716 sentinelRedisInstance
*sentinelGetMasterByName(char *name
) {
717 sentinelRedisInstance
*ri
;
718 sds sdsname
= sdsnew(name
);
720 ri
= dictFetchValue(sentinel
.masters
,sdsname
);
725 /* Add the specified flags to all the instances in the specified dictionary. */
726 void sentinelAddFlagsToDictOfRedisInstances(dict
*instances
, int flags
) {
730 di
= dictGetIterator(instances
);
731 while((de
= dictNext(di
)) != NULL
) {
732 sentinelRedisInstance
*ri
= dictGetVal(de
);
735 dictReleaseIterator(di
);
738 /* Remove the specified flags to all the instances in the specified
740 void sentinelDelFlagsToDictOfRedisInstances(dict
*instances
, int flags
) {
744 di
= dictGetIterator(instances
);
745 while((de
= dictNext(di
)) != NULL
) {
746 sentinelRedisInstance
*ri
= dictGetVal(de
);
749 dictReleaseIterator(di
);
752 /* Reset the state of a monitored master:
753 * 1) Remove all slaves.
754 * 2) Remove all sentinels.
755 * 3) Remove most of the flags resulting from runtime operations.
756 * 4) Reset timers to their default value.
757 * 5) In the process of doing this undo the failover if in progress.
758 * 6) Disconnect the connections with the master (will reconnect automatically).
760 void sentinelResetMaster(sentinelRedisInstance
*ri
, int flags
) {
761 redisAssert(ri
->flags
& SRI_MASTER
);
762 dictRelease(ri
->slaves
);
763 dictRelease(ri
->sentinels
);
764 ri
->slaves
= dictCreate(&instancesDictType
,NULL
);
765 ri
->sentinels
= dictCreate(&instancesDictType
,NULL
);
766 if (ri
->cc
) sentinelKillLink(ri
,ri
->cc
);
767 if (ri
->pc
) sentinelKillLink(ri
,ri
->pc
);
768 ri
->pending_commands
= 0;
769 ri
->flags
&= SRI_MASTER
|SRI_CAN_FAILOVER
|SRI_DISCONNECTED
;
774 ri
->failover_state
= SENTINEL_FAILOVER_STATE_NONE
;
775 ri
->failover_state_change_time
= 0;
776 ri
->failover_start_time
= 0;
777 ri
->promoted_slave
= NULL
;
779 sdsfree(ri
->slave_master_host
);
781 ri
->slave_master_host
= NULL
;
782 ri
->last_avail_time
= mstime();
783 ri
->last_pong_time
= mstime();
784 if (flags
& SENTINEL_GENERATE_EVENT
)
785 sentinelEvent(REDIS_WARNING
,"+reset-master",ri
,"%@");
788 /* Call sentinelResetMaster() on every master with a name matching the specified
790 int sentinelResetMastersByPattern(char *pattern
, int flags
) {
795 di
= dictGetIterator(sentinel
.masters
);
796 while((de
= dictNext(di
)) != NULL
) {
797 sentinelRedisInstance
*ri
= dictGetVal(de
);
800 if (stringmatch(pattern
,ri
->name
,0)) {
801 sentinelResetMaster(ri
,flags
);
806 dictReleaseIterator(di
);
810 /* Reset the specified master with sentinelResetMaster(), and also change
811 * the ip:port address, but take the name of the instance unmodified.
813 * This is used to handle the +switch-master and +redirect-to-master events.
815 * The function returns REDIS_ERR if the address can't be resolved for some
816 * reason. Otherwise REDIS_OK is returned.
818 * TODO: make this reset so that original sentinels are re-added with
819 * same ip / port / runid.
822 int sentinelResetMasterAndChangeAddress(sentinelRedisInstance
*master
, char *ip
, int port
) {
823 sentinelAddr
*oldaddr
, *newaddr
;
825 newaddr
= createSentinelAddr(ip
,port
);
826 if (newaddr
== NULL
) return REDIS_ERR
;
827 sentinelResetMaster(master
,SENTINEL_NO_FLAGS
);
828 oldaddr
= master
->addr
;
829 master
->addr
= newaddr
;
830 /* Release the old address at the end so we are safe even if the function
831 * gets the master->addr->ip and master->addr->port as arguments. */
832 releaseSentinelAddr(oldaddr
);
836 /* ============================ Config handling ============================= */
837 char *sentinelHandleConfiguration(char **argv
, int argc
) {
838 sentinelRedisInstance
*ri
;
840 if (!strcasecmp(argv
[0],"monitor") && argc
== 5) {
841 /* monitor <name> <host> <port> <quorum> */
842 int quorum
= atoi(argv
[4]);
844 if (quorum
<= 0) return "Quorum must be 1 or greater.";
845 if (createSentinelRedisInstance(argv
[1],SRI_MASTER
,argv
[2],
846 atoi(argv
[3]),quorum
,NULL
) == NULL
)
849 case EBUSY
: return "Duplicated master name.";
850 case ENOENT
: return "Can't resolve master instance hostname.";
851 case EINVAL
: return "Invalid port number";
854 } else if (!strcasecmp(argv
[0],"down-after-milliseconds") && argc
== 3) {
855 /* down-after-milliseconds <name> <milliseconds> */
856 ri
= sentinelGetMasterByName(argv
[1]);
857 if (!ri
) return "No such master with specified name.";
858 ri
->down_after_period
= atoi(argv
[2]);
859 if (ri
->down_after_period
<= 0)
860 return "negative or zero time parameter.";
861 } else if (!strcasecmp(argv
[0],"failover-timeout") && argc
== 3) {
862 /* failover-timeout <name> <milliseconds> */
863 ri
= sentinelGetMasterByName(argv
[1]);
864 if (!ri
) return "No such master with specified name.";
865 ri
->failover_timeout
= atoi(argv
[2]);
866 if (ri
->failover_timeout
<= 0)
867 return "negative or zero time parameter.";
868 } else if (!strcasecmp(argv
[0],"can-failover") && argc
== 3) {
869 /* can-failover <name> <yes/no> */
870 int yesno
= yesnotoi(argv
[2]);
872 ri
= sentinelGetMasterByName(argv
[1]);
873 if (!ri
) return "No such master with specified name.";
874 if (yesno
== -1) return "Argument must be either yes or no.";
876 ri
->flags
|= SRI_CAN_FAILOVER
;
878 ri
->flags
&= ~SRI_CAN_FAILOVER
;
879 } else if (!strcasecmp(argv
[0],"parallel-syncs") && argc
== 3) {
880 /* parallel-syncs <name> <milliseconds> */
881 ri
= sentinelGetMasterByName(argv
[1]);
882 if (!ri
) return "No such master with specified name.";
883 ri
->parallel_syncs
= atoi(argv
[2]);
885 return "Unrecognized sentinel configuration statement.";
890 /* ====================== hiredis connection handling ======================= */
892 /* Completely disconnect an hiredis link from an instance. */
893 void sentinelKillLink(sentinelRedisInstance
*ri
, redisAsyncContext
*c
) {
894 if (ri
->cc
== c
) ri
->cc
= NULL
;
895 if (ri
->pc
== c
) ri
->pc
= NULL
;
897 ri
->flags
|= SRI_DISCONNECTED
;
901 /* This function takes an hiredis context that is in an error condition
902 * and make sure to mark the instance as disconnected performing the
905 * Note: we don't free the hiredis context as hiredis will do it for us
906 * for async conenctions. */
907 void sentinelDisconnectInstanceFromContext(const redisAsyncContext
*c
) {
908 sentinelRedisInstance
*ri
= c
->data
;
911 if (ri
== NULL
) return; /* The instance no longer exists. */
913 pubsub
= (ri
->pc
== c
);
914 sentinelEvent(REDIS_DEBUG
, pubsub
? "-pubsub-link" : "-cmd-link", ri
,
915 "%@ #%s", c
->errstr
);
920 ri
->flags
|= SRI_DISCONNECTED
;
923 void sentinelLinkEstablishedCallback(const redisAsyncContext
*c
, int status
) {
924 if (status
!= REDIS_OK
) {
925 sentinelDisconnectInstanceFromContext(c
);
927 sentinelRedisInstance
*ri
= c
->data
;
928 int pubsub
= (ri
->pc
== c
);
930 sentinelEvent(REDIS_DEBUG
, pubsub
? "+pubsub-link" : "+cmd-link", ri
,
935 void sentinelDisconnectCallback(const redisAsyncContext
*c
, int status
) {
936 sentinelDisconnectInstanceFromContext(c
);
939 /* Create the async connections for the specified instance if the instance
940 * is disconnected. Note that the SRI_DISCONNECTED flag is set even if just
941 * one of the two links (commands and pub/sub) is missing. */
942 void sentinelReconnectInstance(sentinelRedisInstance
*ri
) {
943 if (!(ri
->flags
& SRI_DISCONNECTED
)) return;
945 /* Commands connection. */
946 if (ri
->cc
== NULL
) {
947 ri
->cc
= redisAsyncConnect(ri
->addr
->ip
,ri
->addr
->port
);
949 sentinelEvent(REDIS_DEBUG
,"-cmd-link-reconnection",ri
,"%@ #%s",
951 sentinelKillLink(ri
,ri
->cc
);
953 ri
->cc_conn_time
= mstime();
955 redisAeAttach(server
.el
,ri
->cc
);
956 redisAsyncSetConnectCallback(ri
->cc
,
957 sentinelLinkEstablishedCallback
);
958 redisAsyncSetDisconnectCallback(ri
->cc
,
959 sentinelDisconnectCallback
);
963 if ((ri
->flags
& SRI_MASTER
) && ri
->pc
== NULL
) {
964 ri
->pc
= redisAsyncConnect(ri
->addr
->ip
,ri
->addr
->port
);
966 sentinelEvent(REDIS_DEBUG
,"-pubsub-link-reconnection",ri
,"%@ #%s",
968 sentinelKillLink(ri
,ri
->pc
);
972 ri
->pc_conn_time
= mstime();
974 redisAeAttach(server
.el
,ri
->pc
);
975 redisAsyncSetConnectCallback(ri
->pc
,
976 sentinelLinkEstablishedCallback
);
977 redisAsyncSetDisconnectCallback(ri
->pc
,
978 sentinelDisconnectCallback
);
979 /* Now we subscribe to the Sentinels "Hello" channel. */
980 retval
= redisAsyncCommand(ri
->pc
,
981 sentinelReceiveHelloMessages
, NULL
, "SUBSCRIBE %s",
982 SENTINEL_HELLO_CHANNEL
);
983 if (retval
!= REDIS_OK
) {
984 /* If we can't subscribe, the Pub/Sub connection is useless
985 * and we can simply disconnect it and try again. */
986 sentinelKillLink(ri
,ri
->pc
);
991 /* Clear the DISCONNECTED flags only if we have both the connections
992 * (or just the commands connection if this is a slave or a
993 * sentinel instance). */
994 if (ri
->cc
&& (ri
->flags
& (SRI_SLAVE
|SRI_SENTINEL
) || ri
->pc
))
995 ri
->flags
&= ~SRI_DISCONNECTED
;
998 /* ======================== Redis instances pinging ======================== */
1000 /* Process the INFO output from masters. */
1001 void sentinelRefreshInstanceInfo(sentinelRedisInstance
*ri
, const char *info
) {
1005 int runid_changed
= 0; /* true if runid changed. */
1006 int first_runid
= 0; /* true if this is the first runid we receive. */
1008 /* The following fields must be reset to a given value in the case they
1009 * are not found at all in the INFO output. */
1010 ri
->master_link_down_time
= 0;
1012 /* Process line by line. */
1013 lines
= sdssplitlen(info
,strlen(info
),"\r\n",2,&numlines
);
1014 for (j
= 0; j
< numlines
; j
++) {
1015 sentinelRedisInstance
*slave
;
1018 /* run_id:<40 hex chars>*/
1019 if (sdslen(l
) >= 47 && !memcmp(l
,"run_id:",7)) {
1020 if (ri
->runid
== NULL
) {
1021 ri
->runid
= sdsnewlen(l
+7,40);
1024 if (strncmp(ri
->runid
,l
+7,40) != 0) {
1026 sentinelEvent(REDIS_NOTICE
,"+reboot",ri
,"%@");
1028 ri
->runid
= sdsnewlen(l
+7,40);
1033 /* slave0:<ip>,<port>,<state> */
1034 if ((ri
->flags
& SRI_MASTER
) &&
1036 !memcmp(l
,"slave",5) && isdigit(l
[5]))
1038 char *ip
, *port
, *end
;
1040 ip
= strchr(l
,':'); if (!ip
) continue;
1041 ip
++; /* Now ip points to start of ip address. */
1042 port
= strchr(ip
,','); if (!port
) continue;
1043 *port
= '\0'; /* nul term for easy access. */
1044 port
++; /* Now port points to start of port number. */
1045 end
= strchr(port
,','); if (!end
) continue;
1046 *end
= '\0'; /* nul term for easy access. */
1048 /* Check if we already have this slave into our table,
1049 * otherwise add it. */
1050 if (sentinelRedisInstanceLookupSlave(ri
,ip
,atoi(port
)) == NULL
) {
1051 if ((slave
= createSentinelRedisInstance(NULL
,SRI_SLAVE
,ip
,
1052 atoi(port
), ri
->quorum
,ri
)) != NULL
)
1054 sentinelEvent(REDIS_NOTICE
,"+slave",slave
,"%@");
1059 /* master_link_down_since_seconds:<seconds> */
1060 if (sdslen(l
) >= 32 &&
1061 !memcmp(l
,"master_link_down_since_seconds",30))
1063 ri
->master_link_down_time
= strtoll(l
+31,NULL
,10)*1000;
1067 if (!memcmp(l
,"role:master",11)) role
= SRI_MASTER
;
1068 else if (!memcmp(l
,"role:slave",10)) role
= SRI_SLAVE
;
1070 if (role
== SRI_SLAVE
) {
1071 /* master_host:<host> */
1072 if (sdslen(l
) >= 12 && !memcmp(l
,"master_host:",12)) {
1073 sdsfree(ri
->slave_master_host
);
1074 ri
->slave_master_host
= sdsnew(l
+12);
1077 /* master_port:<port> */
1078 if (sdslen(l
) >= 12 && !memcmp(l
,"master_port:",12))
1079 ri
->slave_master_port
= atoi(l
+12);
1081 /* master_link_status:<status> */
1082 if (sdslen(l
) >= 19 && !memcmp(l
,"master_link_status:",19)) {
1083 ri
->slave_master_link_status
=
1084 (strcasecmp(l
+19,"up") == 0) ?
1085 SENTINEL_MASTER_LINK_STATUS_UP
:
1086 SENTINEL_MASTER_LINK_STATUS_DOWN
;
1090 ri
->info_refresh
= mstime();
1091 sdsfreesplitres(lines
,numlines
);
1093 if (sentinel
.tilt
) return;
1095 /* Act if a master turned into a slave. */
1096 if ((ri
->flags
& SRI_MASTER
) && role
== SRI_SLAVE
) {
1097 if (first_runid
&& ri
->slave_master_host
) {
1098 /* If it is the first time we receive INFO from it, but it's
1099 * a slave while it was configured as a master, we want to monitor
1100 * its master instead. */
1101 sentinelEvent(REDIS_WARNING
,"+redirect-to-master",ri
,
1103 ri
->name
, ri
->addr
->ip
, ri
->addr
->port
,
1104 ri
->slave_master_host
, ri
->slave_master_port
);
1105 sentinelResetMasterAndChangeAddress(ri
,ri
->slave_master_host
,
1106 ri
->slave_master_port
);
1111 /* Act if a slave turned into a master. */
1112 if ((ri
->flags
& SRI_SLAVE
) && role
== SRI_MASTER
) {
1113 if (!(ri
->master
->flags
& SRI_FAILOVER_IN_PROGRESS
) &&
1114 (runid_changed
|| first_runid
))
1116 /* If a slave turned into a master, but at the same time the
1117 * runid has changed, or it is simply the first time we see and
1118 * INFO output from this instance, this is a reboot with a wrong
1121 * Log the event and remove the slave. */
1124 sentinelEvent(REDIS_WARNING
,"-slave-restart-as-master",ri
,"%@ #removing it from the attached slaves");
1125 retval
= dictDelete(ri
->master
->slaves
,ri
->name
);
1126 redisAssert(retval
== REDIS_OK
);
1128 } else if (ri
->flags
& SRI_PROMOTED
) {
1129 /* If this is a promoted slave we can change state to the
1130 * failover state machine. */
1132 (ri
->master
->flags
& SRI_FAILOVER_IN_PROGRESS
) &&
1133 (ri
->master
->flags
& SRI_I_AM_THE_LEADER
) &&
1134 (ri
->master
->failover_state
==
1135 SENTINEL_FAILOVER_STATE_WAIT_PROMOTION
))
1137 ri
->master
->failover_state
= SENTINEL_FAILOVER_STATE_RECONF_SLAVES
;
1138 ri
->master
->failover_state_change_time
= mstime();
1139 sentinelEvent(REDIS_WARNING
,"+promoted-slave",ri
,"%@");
1140 sentinelEvent(REDIS_WARNING
,"+failover-state-reconf-slaves",
1144 /* Otherwise we interpret this as the start of the failover. */
1146 (ri
->master
->flags
& SRI_FAILOVER_IN_PROGRESS
) == 0)
1148 ri
->master
->flags
|= SRI_FAILOVER_IN_PROGRESS
;
1149 sentinelEvent(REDIS_WARNING
,"failover-detected",ri
->master
,"%@");
1150 ri
->master
->failover_state
= SENTINEL_FAILOVER_STATE_DETECT_END
;
1151 ri
->master
->failover_state_change_time
= mstime();
1152 ri
->master
->promoted_slave
= ri
;
1153 ri
->flags
|= SRI_PROMOTED
;
1154 /* We are an observer, so we can only assume that the leader
1155 * is reconfiguring the slave instances. For this reason we
1156 * set all the instances as RECONF_SENT waiting for progresses
1158 sentinelAddFlagsToDictOfRedisInstances(ri
->master
->slaves
,
1164 /* Detect if the slave that is in the process of being reconfigured
1166 if ((ri
->flags
& SRI_SLAVE
) && role
== SRI_SLAVE
&&
1167 (ri
->flags
& (SRI_RECONF_SENT
|SRI_RECONF_INPROG
)))
1169 /* SRI_RECONF_SENT -> SRI_RECONF_INPROG. */
1170 if ((ri
->flags
& SRI_RECONF_SENT
) &&
1171 ri
->slave_master_host
&&
1172 strcmp(ri
->slave_master_host
,
1173 ri
->master
->promoted_slave
->addr
->ip
) == 0 &&
1174 ri
->slave_master_port
== ri
->master
->promoted_slave
->addr
->port
)
1176 ri
->flags
&= ~SRI_RECONF_SENT
;
1177 ri
->flags
|= SRI_RECONF_INPROG
;
1178 sentinelEvent(REDIS_NOTICE
,"+slave-reconf-inprog",ri
,"%@");
1181 /* SRI_RECONF_INPROG -> SRI_RECONF_DONE */
1182 if ((ri
->flags
& SRI_RECONF_INPROG
) &&
1183 ri
->slave_master_link_status
== SENTINEL_MASTER_LINK_STATUS_UP
)
1185 ri
->flags
&= ~SRI_RECONF_INPROG
;
1186 ri
->flags
|= SRI_RECONF_DONE
;
1187 sentinelEvent(REDIS_NOTICE
,"+slave-reconf-done",ri
,"%@");
1188 /* If we are moving forward (a new slave is now configured)
1189 * we update the change_time as we are conceptually passing
1190 * to the next slave. */
1191 ri
->failover_state_change_time
= mstime();
1196 void sentinelInfoReplyCallback(redisAsyncContext
*c
, void *reply
, void *privdata
) {
1197 sentinelRedisInstance
*ri
= c
->data
;
1200 if (ri
) ri
->pending_commands
--;
1201 if (!reply
|| !ri
) return;
1204 if (r
->type
== REDIS_REPLY_STRING
) {
1205 sentinelRefreshInstanceInfo(ri
,r
->str
);
1209 /* Just discard the reply. We use this when we are not monitoring the return
1210 * value of the command but its effects directly. */
1211 void sentinelDiscardReplyCallback(redisAsyncContext
*c
, void *reply
, void *privdata
) {
1212 sentinelRedisInstance
*ri
= c
->data
;
1214 if (ri
) ri
->pending_commands
--;
1217 void sentinelPingReplyCallback(redisAsyncContext
*c
, void *reply
, void *privdata
) {
1218 sentinelRedisInstance
*ri
= c
->data
;
1221 if (ri
) ri
->pending_commands
--;
1222 if (!reply
|| !ri
) return;
1225 if (r
->type
== REDIS_REPLY_STATUS
||
1226 r
->type
== REDIS_REPLY_ERROR
) {
1227 /* Update the "instance available" field only if this is an
1228 * acceptable reply. */
1229 if (strncmp(r
->str
,"PONG",4) == 0 ||
1230 strncmp(r
->str
,"LOADING",7) == 0 ||
1231 strncmp(r
->str
,"MASTERDOWN",10) == 0)
1233 ri
->last_avail_time
= mstime();
1236 ri
->last_pong_time
= mstime();
1239 /* This is called when we get the reply about the PUBLISH command we send
1240 * to the master to advertise this sentinel. */
1241 void sentinelPublishReplyCallback(redisAsyncContext
*c
, void *reply
, void *privdata
) {
1242 sentinelRedisInstance
*ri
= c
->data
;
1245 if (ri
) ri
->pending_commands
--;
1246 if (!reply
|| !ri
) return;
1249 /* Only update pub_time if we actually published our message. Otherwise
1250 * we'll retry against in 100 milliseconds. */
1251 if (r
->type
!= REDIS_REPLY_ERROR
)
1252 ri
->last_pub_time
= mstime();
1255 /* This is our Pub/Sub callback for the Hello channel. It's useful in order
1256 * to discover other sentinels attached at the same master. */
1257 void sentinelReceiveHelloMessages(redisAsyncContext
*c
, void *reply
, void *privdata
) {
1258 sentinelRedisInstance
*ri
= c
->data
;
1261 if (!reply
|| !ri
) return;
1264 /* Update the last activity in the pubsub channel. Note that since we
1265 * receive our messages as well this timestamp can be used to detect
1266 * if the link is probably diconnected even if it seems otherwise. */
1267 ri
->pc_last_activity
= mstime();
1269 /* Sanity check in the reply we expect, so that the code that follows
1270 * can avoid to check for details. */
1271 if (r
->type
!= REDIS_REPLY_ARRAY
||
1273 r
->element
[0]->type
!= REDIS_REPLY_STRING
||
1274 r
->element
[1]->type
!= REDIS_REPLY_STRING
||
1275 r
->element
[2]->type
!= REDIS_REPLY_STRING
||
1276 strcmp(r
->element
[0]->str
,"message") != 0) return;
1278 /* We are not interested in meeting ourselves */
1279 if (strstr(r
->element
[2]->str
,server
.runid
) != NULL
) return;
1282 int numtokens
, port
, removed
, canfailover
;
1283 char **token
= sdssplitlen(r
->element
[2]->str
,
1286 sentinelRedisInstance
*sentinel
;
1288 if (numtokens
== 4) {
1289 /* First, try to see if we already have this sentinel. */
1290 port
= atoi(token
[1]);
1291 canfailover
= atoi(token
[3]);
1292 sentinel
= getSentinelRedisInstanceByAddrAndRunID(
1293 ri
->sentinels
,token
[0],port
,token
[2]);
1296 /* If not, remove all the sentinels that have the same runid
1297 * OR the same ip/port, because it's either a restart or a
1298 * network topology change. */
1299 removed
= removeMatchingSentinelsFromMaster(ri
,token
[0],port
,
1302 sentinelEvent(REDIS_NOTICE
,"-dup-sentinel",ri
,
1303 "%@ #duplicate of %s:%d or %s",
1304 token
[0],port
,token
[2]);
1307 /* Add the new sentinel. */
1308 sentinel
= createSentinelRedisInstance(NULL
,SRI_SENTINEL
,
1309 token
[0],port
,ri
->quorum
,ri
);
1311 sentinelEvent(REDIS_NOTICE
,"+sentinel",sentinel
,"%@");
1312 /* The runid is NULL after a new instance creation and
1313 * for Sentinels we don't have a later chance to fill it,
1315 sentinel
->runid
= sdsnew(token
[2]);
1319 /* Update the state of the Sentinel. */
1321 sentinel
->last_hello_time
= mstime();
1323 sentinel
->flags
|= SRI_CAN_FAILOVER
;
1325 sentinel
->flags
&= ~SRI_CAN_FAILOVER
;
1328 sdsfreesplitres(token
,numtokens
);
1332 void sentinelPingInstance(sentinelRedisInstance
*ri
) {
1333 mstime_t now
= mstime();
1334 mstime_t info_period
;
1337 /* Return ASAP if we have already a PING or INFO already pending, or
1338 * in the case the instance is not properly connected. */
1339 if (ri
->flags
& SRI_DISCONNECTED
) return;
1341 /* For INFO, PING, PUBLISH that are not critical commands to send we
1342 * also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don't
1343 * want to use a lot of memory just because a link is not working
1344 * properly (note that anyway there is a redundant protection about this,
1345 * that is, the link will be disconnected and reconnected if a long
1346 * timeout condition is detected. */
1347 if (ri
->pending_commands
>= SENTINEL_MAX_PENDING_COMMANDS
) return;
1349 /* If this is a slave of a master in O_DOWN condition we start sending
1350 * it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
1351 * period. In this state we want to closely monitor slaves in case they
1352 * are turned into masters by another Sentinel, or by the sysadmin. */
1353 if ((ri
->flags
& SRI_SLAVE
) &&
1354 (ri
->master
->flags
& (SRI_O_DOWN
|SRI_FAILOVER_IN_PROGRESS
))) {
1357 info_period
= SENTINEL_INFO_PERIOD
;
1360 if ((ri
->flags
& SRI_SENTINEL
) == 0 &&
1361 (ri
->info_refresh
== 0 ||
1362 (now
- ri
->info_refresh
) > info_period
))
1364 /* Send INFO to masters and slaves, not sentinels. */
1365 retval
= redisAsyncCommand(ri
->cc
,
1366 sentinelInfoReplyCallback
, NULL
, "INFO");
1367 if (retval
!= REDIS_OK
) return;
1368 ri
->pending_commands
++;
1369 } else if ((now
- ri
->last_pong_time
) > SENTINEL_PING_PERIOD
) {
1370 /* Send PING to all the three kinds of instances. */
1371 retval
= redisAsyncCommand(ri
->cc
,
1372 sentinelPingReplyCallback
, NULL
, "PING");
1373 if (retval
!= REDIS_OK
) return;
1374 ri
->pending_commands
++;
1375 } else if ((ri
->flags
& SRI_MASTER
) &&
1376 (now
- ri
->last_pub_time
) > SENTINEL_PUBLISH_PERIOD
)
1378 /* PUBLISH hello messages only to masters. */
1379 struct sockaddr_in sa
;
1380 socklen_t salen
= sizeof(sa
);
1382 if (getsockname(ri
->cc
->c
.fd
,(struct sockaddr
*)&sa
,&salen
) != -1) {
1385 snprintf(myaddr
,sizeof(myaddr
),"%s:%d:%s:%d",
1386 inet_ntoa(sa
.sin_addr
), server
.port
, server
.runid
,
1387 (ri
->flags
& SRI_CAN_FAILOVER
) != 0);
1388 retval
= redisAsyncCommand(ri
->cc
,
1389 sentinelPublishReplyCallback
, NULL
, "PUBLISH %s %s",
1390 SENTINEL_HELLO_CHANNEL
,myaddr
);
1391 if (retval
!= REDIS_OK
) return;
1392 ri
->pending_commands
++;
1397 /* =========================== SENTINEL command ============================= */
1399 const char *sentinelFailoverStateStr(int state
) {
1401 case SENTINEL_FAILOVER_STATE_NONE
: return "none";
1402 case SENTINEL_FAILOVER_STATE_WAIT_START
: return "wait_start";
1403 case SENTINEL_FAILOVER_STATE_SELECT_SLAVE
: return "select_slave";
1404 case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE
: return "send_slaveof_noone";
1405 case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION
: return "wait_promotion";
1406 case SENTINEL_FAILOVER_STATE_RECONF_SLAVES
: return "reconf_slaves";
1407 case SENTINEL_FAILOVER_STATE_ALERT_CLIENTS
: return "alert_clients";
1408 case SENTINEL_FAILOVER_STATE_DETECT_END
: return "detect_end";
1409 case SENTINEL_FAILOVER_STATE_UPDATE_CONFIG
: return "update_config";
1410 default: return "unknown";
1414 /* Redis instance to Redis protocol representation. */
1415 void addReplySentinelRedisInstance(redisClient
*c
, sentinelRedisInstance
*ri
) {
1416 char *flags
= sdsempty();
1420 mbl
= addDeferredMultiBulkLength(c
);
1422 addReplyBulkCString(c
,"name");
1423 addReplyBulkCString(c
,ri
->name
);
1426 addReplyBulkCString(c
,"ip");
1427 addReplyBulkCString(c
,ri
->addr
->ip
);
1430 addReplyBulkCString(c
,"port");
1431 addReplyBulkLongLong(c
,ri
->addr
->port
);
1434 addReplyBulkCString(c
,"runid");
1435 addReplyBulkCString(c
,ri
->runid
? ri
->runid
: "");
1438 addReplyBulkCString(c
,"flags");
1439 if (ri
->flags
& SRI_S_DOWN
) flags
= sdscat(flags
,"s_down,");
1440 if (ri
->flags
& SRI_O_DOWN
) flags
= sdscat(flags
,"o_down,");
1441 if (ri
->flags
& SRI_MASTER
) flags
= sdscat(flags
,"master,");
1442 if (ri
->flags
& SRI_SLAVE
) flags
= sdscat(flags
,"slave,");
1443 if (ri
->flags
& SRI_SENTINEL
) flags
= sdscat(flags
,"sentinel,");
1444 if (ri
->flags
& SRI_DISCONNECTED
) flags
= sdscat(flags
,"disconnected,");
1445 if (ri
->flags
& SRI_MASTER_DOWN
) flags
= sdscat(flags
,"master_down,");
1446 if (ri
->flags
& SRI_FAILOVER_IN_PROGRESS
)
1447 flags
= sdscat(flags
,"failover_in_progress,");
1448 if (ri
->flags
& SRI_I_AM_THE_LEADER
)
1449 flags
= sdscat(flags
,"i_am_the_leader,");
1450 if (ri
->flags
& SRI_PROMOTED
) flags
= sdscat(flags
,"promoted,");
1451 if (ri
->flags
& SRI_RECONF_SENT
) flags
= sdscat(flags
,"reconf_sent,");
1452 if (ri
->flags
& SRI_RECONF_INPROG
) flags
= sdscat(flags
,"reconf_inprog,");
1453 if (ri
->flags
& SRI_RECONF_DONE
) flags
= sdscat(flags
,"reconf_done,");
1455 if (sdslen(flags
) != 0) flags
= sdsrange(flags
,0,-2); /* remove last "," */
1456 addReplyBulkCString(c
,flags
);
1460 addReplyBulkCString(c
,"pending-commands");
1461 addReplyBulkLongLong(c
,ri
->pending_commands
);
1464 if (ri
->flags
& SRI_FAILOVER_IN_PROGRESS
) {
1465 addReplyBulkCString(c
,"failover-state");
1466 addReplyBulkCString(c
,(char*)sentinelFailoverStateStr(ri
->failover_state
));
1470 addReplyBulkCString(c
,"last-ok-ping-reply");
1471 addReplyBulkLongLong(c
,mstime() - ri
->last_avail_time
);
1474 addReplyBulkCString(c
,"last-ping-reply");
1475 addReplyBulkLongLong(c
,mstime() - ri
->last_pong_time
);
1478 if (ri
->flags
& SRI_S_DOWN
) {
1479 addReplyBulkCString(c
,"s-down-time");
1480 addReplyBulkLongLong(c
,mstime()-ri
->s_down_since_time
);
1484 if (ri
->flags
& SRI_O_DOWN
) {
1485 addReplyBulkCString(c
,"o-down-time");
1486 addReplyBulkLongLong(c
,mstime()-ri
->o_down_since_time
);
1490 /* Masters and Slaves */
1491 if (ri
->flags
& (SRI_MASTER
|SRI_SLAVE
)) {
1492 addReplyBulkCString(c
,"info-refresh");
1493 addReplyBulkLongLong(c
,mstime() - ri
->info_refresh
);
1498 if (ri
->flags
& SRI_MASTER
) {
1499 addReplyBulkCString(c
,"num-slaves");
1500 addReplyBulkLongLong(c
,dictSize(ri
->slaves
));
1503 addReplyBulkCString(c
,"num-other-sentinels");
1504 addReplyBulkLongLong(c
,dictSize(ri
->sentinels
));
1507 addReplyBulkCString(c
,"quorum");
1508 addReplyBulkLongLong(c
,ri
->quorum
);
1513 if (ri
->flags
& SRI_SLAVE
) {
1514 addReplyBulkCString(c
,"master-link-down-time");
1515 addReplyBulkLongLong(c
,ri
->master_link_down_time
);
1518 addReplyBulkCString(c
,"master-link-status");
1519 addReplyBulkCString(c
,
1520 (ri
->slave_master_link_status
== SENTINEL_MASTER_LINK_STATUS_UP
) ?
1524 addReplyBulkCString(c
,"master-host");
1525 addReplyBulkCString(c
,
1526 ri
->slave_master_host
? ri
->slave_master_host
: "?");
1529 addReplyBulkCString(c
,"master-port");
1530 addReplyBulkLongLong(c
,ri
->slave_master_port
);
1534 /* Only sentinels */
1535 if (ri
->flags
& SRI_SENTINEL
) {
1536 addReplyBulkCString(c
,"last-hello-message");
1537 addReplyBulkLongLong(c
,mstime() - ri
->last_hello_time
);
1540 addReplyBulkCString(c
,"can-failover-its-master");
1541 addReplyBulkLongLong(c
,(ri
->flags
& SRI_CAN_FAILOVER
) != 0);
1544 if (ri
->flags
& SRI_MASTER_DOWN
) {
1545 addReplyBulkCString(c
,"subjective-leader");
1546 addReplyBulkCString(c
,ri
->leader
? ri
->leader
: "?");
1551 setDeferredMultiBulkLength(c
,mbl
,fields
*2);
1554 /* Output a number of instances contanined inside a dictionary as
1555 * Redis protocol. */
1556 void addReplyDictOfRedisInstances(redisClient
*c
, dict
*instances
) {
1560 di
= dictGetIterator(instances
);
1561 addReplyMultiBulkLen(c
,dictSize(instances
));
1562 while((de
= dictNext(di
)) != NULL
) {
1563 sentinelRedisInstance
*ri
= dictGetVal(de
);
1565 addReplySentinelRedisInstance(c
,ri
);
1567 dictReleaseIterator(di
);
1570 /* Lookup the named master into sentinel.masters.
1571 * If the master is not found reply to the client with an error and returns
1573 sentinelRedisInstance
*sentinelGetMasterByNameOrReplyError(redisClient
*c
,
1576 sentinelRedisInstance
*ri
;
1578 ri
= dictFetchValue(sentinel
.masters
,c
->argv
[2]->ptr
);
1580 addReplyError(c
,"No such master with that name");
1586 void sentinelCommand(redisClient
*c
) {
1587 if (!strcasecmp(c
->argv
[1]->ptr
,"masters")) {
1588 /* SENTINEL MASTERS */
1589 if (c
->argc
!= 2) goto numargserr
;
1591 addReplyDictOfRedisInstances(c
,sentinel
.masters
);
1592 } else if (!strcasecmp(c
->argv
[1]->ptr
,"slaves")) {
1593 /* SENTINEL SLAVES <master-name> */
1594 sentinelRedisInstance
*ri
;
1596 if (c
->argc
!= 3) goto numargserr
;
1597 if ((ri
= sentinelGetMasterByNameOrReplyError(c
,c
->argv
[2])) == NULL
)
1599 addReplyDictOfRedisInstances(c
,ri
->slaves
);
1600 } else if (!strcasecmp(c
->argv
[1]->ptr
,"sentinels")) {
1601 /* SENTINEL SENTINELS <master-name> */
1602 sentinelRedisInstance
*ri
;
1604 if (c
->argc
!= 3) goto numargserr
;
1605 if ((ri
= sentinelGetMasterByNameOrReplyError(c
,c
->argv
[2])) == NULL
)
1607 addReplyDictOfRedisInstances(c
,ri
->sentinels
);
1608 } else if (!strcasecmp(c
->argv
[1]->ptr
,"is-master-down-by-addr")) {
1609 /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> */
1610 sentinelRedisInstance
*ri
;
1611 char *leader
= NULL
;
1615 if (c
->argc
!= 4) goto numargserr
;
1616 if (getLongFromObjectOrReply(c
,c
->argv
[3],&port
,NULL
) != REDIS_OK
)
1618 ri
= getSentinelRedisInstanceByAddrAndRunID(sentinel
.masters
,
1619 c
->argv
[2]->ptr
,port
,NULL
);
1621 /* It exists? Is actually a master? Is subjectively down? It's down.
1622 * Note: if we are in tilt mode we always reply with "0". */
1623 if (!sentinel
.tilt
&& ri
&& (ri
->flags
& SRI_S_DOWN
) &&
1624 (ri
->flags
& SRI_MASTER
))
1626 if (ri
) leader
= sentinelGetSubjectiveLeader(ri
);
1628 /* Reply with a two-elements multi-bulk reply: down state, leader. */
1629 addReplyMultiBulkLen(c
,2);
1630 addReply(c
, isdown
? shared
.cone
: shared
.czero
);
1631 addReplyBulkCString(c
, leader
? leader
: "?");
1632 if (leader
) sdsfree(leader
);
1633 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reset")) {
1634 /* SENTINEL RESET <pattern> */
1635 if (c
->argc
!= 3) goto numargserr
;
1636 addReplyLongLong(c
,sentinelResetMastersByPattern(c
->argv
[2]->ptr
,SENTINEL_GENERATE_EVENT
));
1637 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get-master-addr-by-name")) {
1638 /* SENTINEL GET-MASTER-ADDR-BY-NAME <master-name> */
1639 sentinelRedisInstance
*ri
;
1641 if (c
->argc
!= 3) goto numargserr
;
1642 ri
= sentinelGetMasterByName(c
->argv
[2]->ptr
);
1644 addReply(c
,shared
.nullmultibulk
);
1646 sentinelAddr
*addr
= ri
->addr
;
1648 if ((ri
->flags
& SRI_FAILOVER_IN_PROGRESS
) && ri
->promoted_slave
)
1649 addr
= ri
->promoted_slave
->addr
;
1650 addReplyMultiBulkLen(c
,2);
1651 addReplyBulkCString(c
,addr
->ip
);
1652 addReplyBulkLongLong(c
,addr
->port
);
1655 addReplyErrorFormat(c
,"Unknown sentinel subcommand '%s'",
1656 (char*)c
->argv
[1]->ptr
);
1661 addReplyErrorFormat(c
,"Wrong number of commands for 'sentinel %s'",
1662 (char*)c
->argv
[1]->ptr
);
1665 /* ===================== SENTINEL availability checks ======================= */
1667 /* Is this instance down from our point of view? */
1668 void sentinelCheckSubjectivelyDown(sentinelRedisInstance
*ri
) {
1669 mstime_t elapsed
= mstime() - ri
->last_avail_time
;
1671 /* Check if we are in need for a reconnection of one of the
1672 * links, because we are detecting low activity.
1674 * 1) Check if the command link seems connected, was connected not less
1675 * than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have an
1676 * idle time that is greater than down_after_period / 2 seconds. */
1678 (mstime() - ri
->cc_conn_time
) > SENTINEL_MIN_LINK_RECONNECT_PERIOD
&&
1679 (mstime() - ri
->last_pong_time
) > (ri
->down_after_period
/2))
1681 sentinelKillLink(ri
,ri
->cc
);
1684 /* 2) Check if the pubsub link seems connected, was connected not less
1685 * than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have no
1686 * activity in the Pub/Sub channel for more than
1687 * SENTINEL_PUBLISH_PERIOD * 3.
1690 (mstime() - ri
->pc_conn_time
) > SENTINEL_MIN_LINK_RECONNECT_PERIOD
&&
1691 (mstime() - ri
->pc_last_activity
) > (SENTINEL_PUBLISH_PERIOD
*3))
1693 sentinelKillLink(ri
,ri
->pc
);
1696 /* Update the subjectively down flag. */
1697 if (elapsed
> ri
->down_after_period
) {
1698 /* Is subjectively down */
1699 if ((ri
->flags
& SRI_S_DOWN
) == 0) {
1700 sentinelEvent(REDIS_WARNING
,"+sdown",ri
,"%@");
1701 ri
->s_down_since_time
= mstime();
1702 ri
->flags
|= SRI_S_DOWN
;
1705 /* Is subjectively up */
1706 if (ri
->flags
& SRI_S_DOWN
) {
1707 sentinelEvent(REDIS_WARNING
,"-sdown",ri
,"%@");
1708 ri
->flags
&= ~SRI_S_DOWN
;
1713 /* Is this instance down accordingly to the configured quorum? */
1714 void sentinelCheckObjectivelyDown(sentinelRedisInstance
*master
) {
1717 int quorum
= 0, odown
= 0;
1719 if (master
->flags
& SRI_S_DOWN
) {
1720 /* Is down for enough sentinels? */
1721 quorum
= 1; /* the current sentinel. */
1722 /* Count all the other sentinels. */
1723 di
= dictGetIterator(master
->sentinels
);
1724 while((de
= dictNext(di
)) != NULL
) {
1725 sentinelRedisInstance
*ri
= dictGetVal(de
);
1727 if (ri
->flags
& SRI_MASTER_DOWN
) quorum
++;
1729 dictReleaseIterator(di
);
1730 if (quorum
>= master
->quorum
) odown
= 1;
1733 /* Set the flag accordingly to the outcome. */
1735 if ((master
->flags
& SRI_O_DOWN
) == 0) {
1736 sentinelEvent(REDIS_WARNING
,"+odown",master
,"%@ #quorum %d/%d",
1737 quorum
, master
->quorum
);
1738 master
->flags
|= SRI_O_DOWN
;
1739 master
->o_down_since_time
= mstime();
1742 if (master
->flags
& SRI_O_DOWN
) {
1743 sentinelEvent(REDIS_WARNING
,"-odown",master
,"%@");
1744 master
->flags
&= ~SRI_O_DOWN
;
1749 /* Receive the SENTINEL is-master-down-by-addr reply, see the
1750 * sentinelAskMasterStateToOtherSentinels() function for more information. */
1751 void sentinelReceiveIsMasterDownReply(redisAsyncContext
*c
, void *reply
, void *privdata
) {
1752 sentinelRedisInstance
*ri
= c
->data
;
1755 if (ri
) ri
->pending_commands
--;
1756 if (!reply
|| !ri
) return;
1759 /* Ignore every error or unexpected reply.
1760 * Note that if the command returns an error for any reason we'll
1761 * end clearing the SRI_MASTER_DOWN flag for timeout anyway. */
1762 if (r
->type
== REDIS_REPLY_ARRAY
&& r
->elements
== 2 &&
1763 r
->element
[0]->type
== REDIS_REPLY_INTEGER
&&
1764 r
->element
[1]->type
== REDIS_REPLY_STRING
)
1766 ri
->last_master_down_reply_time
= mstime();
1767 if (r
->element
[0]->integer
== 1) {
1768 ri
->flags
|= SRI_MASTER_DOWN
;
1770 ri
->flags
&= ~SRI_MASTER_DOWN
;
1772 sdsfree(ri
->leader
);
1773 ri
->leader
= sdsnew(r
->element
[1]->str
);
1777 /* If we think (subjectively) the master is down, we start sending
1778 * SENTINEL IS-MASTER-DOWN-BY-ADDR requests to other sentinels
1779 * in order to get the replies that allow to reach the quorum and
1780 * possibly also mark the master as objectively down. */
1781 void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance
*master
) {
1785 di
= dictGetIterator(master
->sentinels
);
1786 while((de
= dictNext(di
)) != NULL
) {
1787 sentinelRedisInstance
*ri
= dictGetVal(de
);
1788 mstime_t elapsed
= mstime() - ri
->last_master_down_reply_time
;
1792 /* If the master state from other sentinel is too old, we clear it. */
1793 if (elapsed
> SENTINEL_INFO_VALIDITY_TIME
) {
1794 ri
->flags
&= ~SRI_MASTER_DOWN
;
1795 sdsfree(ri
->leader
);
1799 /* Only ask if master is down to other sentinels if:
1801 * 1) We believe it is down, or there is a failover in progress.
1802 * 2) Sentinel is connected.
1803 * 3) We did not received the info within SENTINEL_ASK_PERIOD ms. */
1804 if ((master
->flags
& (SRI_S_DOWN
|SRI_FAILOVER_IN_PROGRESS
)) == 0)
1806 if (ri
->flags
& SRI_DISCONNECTED
) continue;
1807 if (mstime() - ri
->last_master_down_reply_time
< SENTINEL_ASK_PERIOD
)
1811 ll2string(port
,sizeof(port
),master
->addr
->port
);
1812 retval
= redisAsyncCommand(ri
->cc
,
1813 sentinelReceiveIsMasterDownReply
, NULL
,
1814 "SENTINEL is-master-down-by-addr %s %s",
1815 master
->addr
->ip
, port
);
1816 if (retval
== REDIS_OK
) ri
->pending_commands
++;
1818 dictReleaseIterator(di
);
1821 /* =============================== FAILOVER ================================= */
1823 /* Given a master get the "subjective leader", that is, among all the sentinels
1824 * with given characteristics, the one with the lexicographically smaller
1825 * runid. The characteristics required are:
1827 * 1) Has SRI_CAN_FAILOVER flag.
1828 * 2) Is not disconnected.
1829 * 3) Recently answered to our ping (no longer than
1830 * SENTINEL_INFO_VALIDITY_TIME milliseconds ago).
1832 * The function returns a pointer to an sds string representing the runid of the
1833 * leader sentinel instance (from our point of view). Otherwise NULL is
1834 * returned if there are no suitable sentinels.
1837 int compareRunID(const void *a
, const void *b
) {
1838 char **aptrptr
= (char**)a
, **bptrptr
= (char**)b
;
1839 return strcasecmp(*aptrptr
, *bptrptr
);
1842 char *sentinelGetSubjectiveLeader(sentinelRedisInstance
*master
) {
1846 zmalloc(sizeof(char*)*(dictSize(master
->sentinels
)+1));
1848 char *leader
= NULL
;
1850 if (master
->flags
& SRI_CAN_FAILOVER
) {
1851 /* Add myself if I'm a Sentinel that can failover this master. */
1852 instance
[instances
++] = server
.runid
;
1855 di
= dictGetIterator(master
->sentinels
);
1856 while((de
= dictNext(di
)) != NULL
) {
1857 sentinelRedisInstance
*ri
= dictGetVal(de
);
1858 mstime_t lag
= mstime() - ri
->last_avail_time
;
1860 if (lag
> SENTINEL_INFO_VALIDITY_TIME
||
1861 !(ri
->flags
& SRI_CAN_FAILOVER
) ||
1862 (ri
->flags
& SRI_DISCONNECTED
) ||
1865 instance
[instances
++] = ri
->runid
;
1867 dictReleaseIterator(di
);
1869 /* If we have at least one instance passing our checks, order the array
1872 qsort(instance
,instances
,sizeof(char*),compareRunID
);
1873 leader
= sdsnew(instance
[0]);
1879 struct sentinelLeader
{
1881 unsigned long votes
;
1884 /* Helper function for sentinelGetObjectiveLeader, increment the counter
1885 * relative to the specified runid. */
1886 void sentinelObjectiveLeaderIncr(dict
*counters
, char *runid
) {
1887 dictEntry
*de
= dictFind(counters
,runid
);
1891 oldval
= dictGetUnsignedIntegerVal(de
);
1892 dictSetUnsignedIntegerVal(de
,oldval
+1);
1894 de
= dictAddRaw(counters
,runid
);
1895 redisAssert(de
!= NULL
);
1896 dictSetUnsignedIntegerVal(de
,1);
1900 /* Scan all the Sentinels attached to this master to check what is the
1901 * most voted leader among Sentinels. */
1902 char *sentinelGetObjectiveLeader(sentinelRedisInstance
*master
) {
1906 unsigned int voters
= 0, voters_quorum
;
1908 char *winner
= NULL
;
1910 redisAssert(master
->flags
& (SRI_O_DOWN
|SRI_FAILOVER_IN_PROGRESS
));
1911 counters
= dictCreate(&leaderVotesDictType
,NULL
);
1913 /* Count my vote. */
1914 myvote
= sentinelGetSubjectiveLeader(master
);
1916 sentinelObjectiveLeaderIncr(counters
,myvote
);
1920 /* Count other sentinels votes */
1921 di
= dictGetIterator(master
->sentinels
);
1922 while((de
= dictNext(di
)) != NULL
) {
1923 sentinelRedisInstance
*ri
= dictGetVal(de
);
1924 if (ri
->leader
== NULL
) continue;
1925 /* If the failover is not already in progress we are only interested
1926 * in Sentinels that believe the master is down. Otherwise the leader
1927 * selection is useful for the "failover-takedown" when the original
1928 * leader fails. In that case we consider all the voters. */
1929 if (!(master
->flags
& SRI_FAILOVER_IN_PROGRESS
) &&
1930 !(ri
->flags
& SRI_MASTER_DOWN
)) continue;
1931 sentinelObjectiveLeaderIncr(counters
,ri
->leader
);
1934 dictReleaseIterator(di
);
1935 voters_quorum
= voters
/2+1;
1937 /* Check what's the winner. For the winner to win, it needs two conditions:
1938 * 1) Absolute majority between voters (50% + 1).
1939 * 2) And anyway at least master->quorum votes. */
1941 uint64_t max_votes
= 0; /* Max votes so far. */
1943 di
= dictGetIterator(counters
);
1944 while((de
= dictNext(di
)) != NULL
) {
1945 uint64_t votes
= dictGetUnsignedIntegerVal(de
);
1947 if (max_votes
< votes
) {
1949 winner
= dictGetKey(de
);
1952 dictReleaseIterator(di
);
1953 if (winner
&& (max_votes
< voters_quorum
|| max_votes
< master
->quorum
))
1956 winner
= winner
? sdsnew(winner
) : NULL
;
1958 dictRelease(counters
);
1962 /* This function checks if there are the conditions to start the failover,
1965 * 1) Enough time has passed since O_DOWN.
1966 * 2) The master is marked as SRI_CAN_FAILOVER, so we can failover it.
1967 * 3) We are the objectively leader for this master.
1969 * If the conditions are met we flag the master as SRI_FAILOVER_IN_PROGRESS
1970 * and SRI_I_AM_THE_LEADER.
1972 void sentinelStartFailover(sentinelRedisInstance
*master
) {
1976 /* We can't failover if the master is not in O_DOWN state or if
1977 * there is not already a failover in progress (to perform the
1978 * takedown if the leader died) or if this Sentinel is not allowed
1979 * to start a failover. */
1980 if (!(master
->flags
& SRI_CAN_FAILOVER
) ||
1981 !(master
->flags
& (SRI_O_DOWN
|SRI_FAILOVER_IN_PROGRESS
))) return;
1983 leader
= sentinelGetObjectiveLeader(master
);
1984 isleader
= leader
&& strcasecmp(leader
,server
.runid
) == 0;
1987 /* If I'm not the leader, I can't failover for sure. */
1988 if (!isleader
) return;
1990 /* If the failover is already in progress there are two options... */
1991 if (master
->flags
& SRI_FAILOVER_IN_PROGRESS
) {
1992 if (master
->flags
& SRI_I_AM_THE_LEADER
) {
1993 /* 1) I'm flagged as leader so I already started the failover.
1997 mstime_t elapsed
= mstime() - master
->failover_state_change_time
;
1999 /* 2) I'm the new leader, but I'm not flagged as leader in the
2000 * master: I did not started the failover, but the original
2001 * leader has no longer the leadership.
2003 * In this case if the failover appears to be lagging
2004 * for at least 25% of the configured failover timeout,
2005 * I can assume I can take control. Otherwise
2006 * it's better to return and wait more. */
2007 if (elapsed
< (master
->failover_timeout
/4)) return;
2008 sentinelEvent(REDIS_WARNING
,"+failover-takedown",master
,"%@");
2009 /* We have already an elected slave if we are in
2010 * FAILOVER_IN_PROGRESS state, that is, the slave that we
2011 * observed turning into a master. */
2012 master
->failover_state
= SENTINEL_FAILOVER_STATE_RECONF_SLAVES
;
2013 /* As an observer we flagged all the slaves as RECONF_SENT but
2014 * now we are in charge of actually sending the reconfiguration
2015 * command so let's clear this flag for all the instances. */
2016 sentinelDelFlagsToDictOfRedisInstances(master
->slaves
,
2020 /* Brand new failover as SRI_FAILOVER_IN_PROGRESS was not set. */
2021 master
->failover_state
= SENTINEL_FAILOVER_STATE_WAIT_START
;
2024 master
->flags
|= SRI_FAILOVER_IN_PROGRESS
|SRI_I_AM_THE_LEADER
;
2025 sentinelEvent(REDIS_WARNING
,"+failover-triggered",master
,"%@");
2027 /* Pick a random delay if it's a fresh failover (WAIT_START), and not
2028 * a recovery of a failover started by another sentinel. */
2029 if (master
->failover_state
== SENTINEL_FAILOVER_STATE_WAIT_START
) {
2030 master
->failover_start_time
= mstime() +
2031 SENTINEL_FAILOVER_FIXED_DELAY
+
2032 (rand() % SENTINEL_FAILOVER_MAX_RANDOM_DELAY
);
2033 sentinelEvent(REDIS_WARNING
,"+failover-state-wait-start",master
,
2034 "%@ #starting in %lld milliseconds",
2035 master
->failover_start_time
-mstime());
2037 master
->failover_state_change_time
= mstime();
2040 /* Select a suitable slave to promote. The current algorithm only uses
2041 * the following parameters:
2043 * 1) None of the following conditions: S_DOWN, O_DOWN, DISCONNECTED.
2044 * 2) last_avail_time more recent than SENTINEL_INFO_VALIDITY_TIME.
2045 * 3) info_refresh more recent than SENTINEL_INFO_VALIDITY_TIME.
2046 * 4) master_link_down_time no more than:
2047 * (now - master->s_down_since_time) + (master->down_after_period * 10).
2049 * Among all the slaves matching the above conditions we select the slave
2050 * with lower slave_priority. If priority is the same we select the slave
2051 * with lexicographically smaller runid.
2053 * The function returns the pointer to the selected slave, otherwise
2054 * NULL if no suitable slave was found.
2057 int compareSlavesForPromotion(const void *a
, const void *b
) {
2058 sentinelRedisInstance
**sa
= (sentinelRedisInstance
**)a
,
2059 **sb
= (sentinelRedisInstance
**)b
;
2060 if ((*sa
)->slave_priority
!= (*sb
)->slave_priority
)
2061 return (*sa
)->slave_priority
- (*sb
)->slave_priority
;
2062 return strcasecmp((*sa
)->runid
,(*sb
)->runid
);
2065 sentinelRedisInstance
*sentinelSelectSlave(sentinelRedisInstance
*master
) {
2066 sentinelRedisInstance
**instance
=
2067 zmalloc(sizeof(instance
[0])*dictSize(master
->slaves
));
2068 sentinelRedisInstance
*selected
= NULL
;
2072 mstime_t max_master_down_time
;
2074 max_master_down_time
= (mstime() - master
->s_down_since_time
) +
2075 (master
->down_after_period
* 10);
2077 di
= dictGetIterator(master
->slaves
);
2078 while((de
= dictNext(di
)) != NULL
) {
2079 sentinelRedisInstance
*slave
= dictGetVal(de
);
2080 mstime_t info_validity_time
= mstime()-SENTINEL_INFO_VALIDITY_TIME
;
2082 if (slave
->flags
& (SRI_S_DOWN
|SRI_O_DOWN
|SRI_DISCONNECTED
)) continue;
2083 if (slave
->last_avail_time
< info_validity_time
) continue;
2084 if (slave
->info_refresh
< info_validity_time
) continue;
2085 if (slave
->master_link_down_time
> max_master_down_time
) continue;
2086 instance
[instances
++] = slave
;
2088 dictReleaseIterator(di
);
2090 qsort(instance
,instances
,sizeof(sentinelRedisInstance
*),
2091 compareSlavesForPromotion
);
2092 selected
= instance
[0];
2098 /* ---------------- Failover state machine implementation ------------------- */
2099 void sentinelFailoverWaitStart(sentinelRedisInstance
*ri
) {
2100 if (mstime() >= ri
->failover_start_time
) {
2101 ri
->failover_state
= SENTINEL_FAILOVER_STATE_SELECT_SLAVE
;
2102 ri
->failover_state_change_time
= mstime();
2103 sentinelEvent(REDIS_WARNING
,"+failover-state-select-slave",ri
,"%@");
2107 void sentinelFailoverSelectSlave(sentinelRedisInstance
*ri
) {
2108 sentinelRedisInstance
*slave
= sentinelSelectSlave(ri
);
2110 if (slave
== NULL
) {
2111 sentinelEvent(REDIS_WARNING
,"-no-good-slave",ri
,
2112 "%@ #retrying in %d seconds",
2113 (SENTINEL_FAILOVER_FIXED_DELAY
+
2114 SENTINEL_FAILOVER_MAX_RANDOM_DELAY
)/1000);
2115 ri
->failover_state
= SENTINEL_FAILOVER_STATE_WAIT_START
;
2116 ri
->failover_start_time
= mstime() + SENTINEL_FAILOVER_FIXED_DELAY
+
2117 SENTINEL_FAILOVER_MAX_RANDOM_DELAY
;
2119 sentinelEvent(REDIS_WARNING
,"+selected-slave",slave
,"%@");
2120 slave
->flags
|= SRI_PROMOTED
;
2121 ri
->promoted_slave
= slave
;
2122 ri
->failover_state
= SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE
;
2123 ri
->failover_state_change_time
= mstime();
2124 sentinelEvent(REDIS_NOTICE
,"+failover-state-send-slaveof-noone",
2129 void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance
*ri
) {
2132 if (ri
->promoted_slave
->flags
& SRI_DISCONNECTED
) return;
2134 /* Send SLAVEOF NO ONE command to turn the slave into a master.
2135 * We actually register a generic callback for this command as we don't
2136 * really care about the reply. We check if it worked indirectly observing
2137 * if INFO returns a different role (master instead of slave). */
2138 retval
= redisAsyncCommand(ri
->promoted_slave
->cc
,
2139 sentinelDiscardReplyCallback
, NULL
, "SLAVEOF NO ONE");
2140 if (retval
!= REDIS_OK
) return;
2141 ri
->promoted_slave
->pending_commands
++;
2142 sentinelEvent(REDIS_NOTICE
, "+failover-state-wait-promotion",
2143 ri
->promoted_slave
,"%@");
2144 ri
->failover_state
= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION
;
2145 ri
->failover_state_change_time
= mstime();
2148 /* We actually wait for promotion indirectly checking with INFO when the
2149 * slave turns into a master. */
2150 void sentinelFailoverWaitPromotion(sentinelRedisInstance
*ri
) {
2151 mstime_t elapsed
= mstime() - ri
->failover_state_change_time
;
2153 if (elapsed
>= SENTINEL_PROMOTION_RETRY_PERIOD
) {
2154 sentinelEvent(REDIS_WARNING
,"-promotion-timeout",ri
->promoted_slave
,
2156 sentinelEvent(REDIS_WARNING
,"+failover-state-select-slave",ri
,"%@");
2157 ri
->failover_state
= SENTINEL_FAILOVER_STATE_SELECT_SLAVE
;
2158 ri
->failover_state_change_time
= mstime();
2159 ri
->promoted_slave
->flags
&= ~SRI_PROMOTED
;
2160 ri
->promoted_slave
= NULL
;
2164 void sentinelFailoverDetectEnd(sentinelRedisInstance
*master
) {
2165 int not_reconfigured
= 0, timeout
= 0;
2168 mstime_t elapsed
= mstime() - master
->failover_state_change_time
;
2170 /* We can't consider failover finished if the promoted slave is
2172 if (master
->promoted_slave
== NULL
||
2173 master
->promoted_slave
->flags
& SRI_S_DOWN
) return;
2175 /* The failover terminates once all the reachable slaves are properly
2177 di
= dictGetIterator(master
->slaves
);
2178 while((de
= dictNext(di
)) != NULL
) {
2179 sentinelRedisInstance
*slave
= dictGetVal(de
);
2181 if (slave
->flags
& (SRI_PROMOTED
|SRI_RECONF_DONE
)) continue;
2182 if (slave
->flags
& SRI_S_DOWN
) continue;
2185 dictReleaseIterator(di
);
2187 /* Force end of failover on timeout. */
2188 if (elapsed
> master
->failover_timeout
) {
2189 not_reconfigured
= 0;
2191 sentinelEvent(REDIS_WARNING
,"+failover-end-for-timeout",master
,"%@");
2194 if (not_reconfigured
== 0) {
2195 sentinelEvent(REDIS_WARNING
,"+failover-end",master
,"%@");
2196 master
->failover_state
= SENTINEL_FAILOVER_STATE_UPDATE_CONFIG
;
2197 master
->failover_state_change_time
= mstime();
2200 /* If I'm the leader it is a good idea to send a best effort SLAVEOF
2201 * command to all the slaves still not reconfigured to replicate with
2202 * the new master. */
2203 if (timeout
&& (master
->flags
& SRI_I_AM_THE_LEADER
)) {
2206 char master_port
[32];
2208 ll2string(master_port
,sizeof(master_port
),
2209 master
->promoted_slave
->addr
->port
);
2211 di
= dictGetIterator(master
->slaves
);
2212 while((de
= dictNext(di
)) != NULL
) {
2213 sentinelRedisInstance
*slave
= dictGetVal(de
);
2217 (SRI_RECONF_DONE
|SRI_RECONF_SENT
|SRI_DISCONNECTED
)) continue;
2219 retval
= redisAsyncCommand(slave
->cc
,
2220 sentinelDiscardReplyCallback
, NULL
, "SLAVEOF %s %s",
2221 master
->promoted_slave
->addr
->ip
,
2223 if (retval
== REDIS_OK
) {
2224 sentinelEvent(REDIS_NOTICE
,"+slave-reconf-sent-be",slave
,"%@");
2225 slave
->flags
|= SRI_RECONF_SENT
;
2228 dictReleaseIterator(di
);
2232 /* Send SLAVE OF <new master address> to all the remaining slaves that
2233 * still don't appear to have the configuration updated. */
2234 void sentinelFailoverReconfNextSlave(sentinelRedisInstance
*master
) {
2237 int in_progress
= 0;
2239 di
= dictGetIterator(master
->slaves
);
2240 while((de
= dictNext(di
)) != NULL
) {
2241 sentinelRedisInstance
*slave
= dictGetVal(de
);
2243 if (slave
->flags
& (SRI_RECONF_SENT
|SRI_RECONF_INPROG
))
2246 dictReleaseIterator(di
);
2248 di
= dictGetIterator(master
->slaves
);
2249 while(in_progress
< master
->parallel_syncs
&&
2250 (de
= dictNext(di
)) != NULL
)
2252 sentinelRedisInstance
*slave
= dictGetVal(de
);
2254 char master_port
[32];
2256 /* Skip the promoted slave, and already configured slaves. */
2257 if (slave
->flags
& (SRI_PROMOTED
|SRI_RECONF_DONE
)) continue;
2259 /* Clear the SRI_RECONF_SENT flag if too much time elapsed without
2260 * the slave moving forward to the next state. */
2261 if ((slave
->flags
& SRI_RECONF_SENT
) &&
2262 (mstime() - slave
->slave_reconf_sent_time
) >
2263 SENTINEL_SLAVE_RECONF_RETRY_PERIOD
)
2265 sentinelEvent(REDIS_NOTICE
,"-slave-reconf-sent-timeout",slave
,"%@");
2266 slave
->flags
&= ~SRI_RECONF_SENT
;
2269 /* Nothing to do for instances that are disconnected or already
2270 * in RECONF_SENT state. */
2271 if (slave
->flags
& (SRI_DISCONNECTED
|SRI_RECONF_SENT
|SRI_RECONF_INPROG
))
2274 /* Send SLAVEOF <new master>. */
2275 ll2string(master_port
,sizeof(master_port
),
2276 master
->promoted_slave
->addr
->port
);
2277 retval
= redisAsyncCommand(slave
->cc
,
2278 sentinelDiscardReplyCallback
, NULL
, "SLAVEOF %s %s",
2279 master
->promoted_slave
->addr
->ip
,
2281 if (retval
== REDIS_OK
) {
2282 slave
->flags
|= SRI_RECONF_SENT
;
2283 slave
->pending_commands
++;
2284 slave
->slave_reconf_sent_time
= mstime();
2285 sentinelEvent(REDIS_NOTICE
,"+slave-reconf-sent",slave
,"%@");
2289 dictReleaseIterator(di
);
2290 sentinelFailoverDetectEnd(master
);
2293 /* This function is called when the slave is in
2294 * SENTINEL_FAILOVER_STATE_UPDATE_CONFIG state. In this state we need
2295 * to remove it from the master table and add the promoted slave instead.
2297 * If there are no promoted slaves as this instance is unique, we remove
2298 * and re-add it with the same address to trigger a complete state
2300 void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance
*master
) {
2301 sentinelRedisInstance
*ref
= master
->promoted_slave
?
2302 master
->promoted_slave
: master
;
2304 sentinelEvent(REDIS_WARNING
,"+switch-master",master
,"%s %s %d %s %d",
2305 master
->name
, master
->addr
->ip
, master
->addr
->port
,
2306 ref
->addr
->ip
, ref
->addr
->port
);
2308 sentinelResetMasterAndChangeAddress(master
,ref
->addr
->ip
,ref
->addr
->port
);
2311 void sentinelFailoverStateMachine(sentinelRedisInstance
*ri
) {
2312 redisAssert(ri
->flags
& SRI_MASTER
);
2314 if (!(ri
->flags
& SRI_FAILOVER_IN_PROGRESS
)) return;
2316 switch(ri
->failover_state
) {
2317 case SENTINEL_FAILOVER_STATE_WAIT_START
:
2318 sentinelFailoverWaitStart(ri
);
2320 case SENTINEL_FAILOVER_STATE_SELECT_SLAVE
:
2321 sentinelFailoverSelectSlave(ri
);
2323 case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE
:
2324 sentinelFailoverSendSlaveOfNoOne(ri
);
2326 case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION
:
2327 sentinelFailoverWaitPromotion(ri
);
2329 case SENTINEL_FAILOVER_STATE_RECONF_SLAVES
:
2330 sentinelFailoverReconfNextSlave(ri
);
2332 case SENTINEL_FAILOVER_STATE_DETECT_END
:
2333 sentinelFailoverDetectEnd(ri
);
2338 /* The following is called only for master instances and will abort the
2339 * failover process if:
2341 * 1) The failover is in progress.
2342 * 2) We already promoted a slave.
2343 * 3) The promoted slave is in extended SDOWN condition.
2345 void sentinelAbortFailoverIfNeeded(sentinelRedisInstance
*ri
) {
2349 /* Failover is in progress? Do we have a promoted slave? */
2350 if (!(ri
->flags
& SRI_FAILOVER_IN_PROGRESS
) || !ri
->promoted_slave
) return;
2352 /* Is the promoted slave into an extended SDOWN state? */
2353 if (!(ri
->promoted_slave
->flags
& SRI_S_DOWN
) ||
2354 (mstime() - ri
->promoted_slave
->s_down_since_time
) <
2355 (ri
->down_after_period
* SENTINEL_EXTENDED_SDOWN_MULTIPLIER
)) return;
2357 sentinelEvent(REDIS_WARNING
,"-failover-abort-x-sdown",ri
->promoted_slave
,"%@");
2359 /* Clear failover related flags from slaves.
2360 * Also if we are the leader make sure to send SLAVEOF commands to all the
2361 * already reconfigured slaves in order to turn them back into slaves of
2362 * the original master. */
2364 di
= dictGetIterator(ri
->slaves
);
2365 while((de
= dictNext(di
)) != NULL
) {
2366 sentinelRedisInstance
*slave
= dictGetVal(de
);
2367 if (ri
->flags
& SRI_I_AM_THE_LEADER
) {
2368 char master_port
[32];
2371 ll2string(master_port
,sizeof(master_port
),ri
->addr
->port
);
2372 retval
= redisAsyncCommand(slave
->cc
,
2373 sentinelDiscardReplyCallback
, NULL
, "SLAVEOF %s %s",
2376 if (retval
== REDIS_OK
)
2377 sentinelEvent(REDIS_NOTICE
,"-slave-reconf-undo",slave
,"%@");
2379 slave
->flags
&= ~(SRI_RECONF_SENT
|SRI_RECONF_INPROG
|SRI_RECONF_DONE
);
2381 dictReleaseIterator(di
);
2383 ri
->flags
&= ~(SRI_FAILOVER_IN_PROGRESS
|SRI_I_AM_THE_LEADER
);
2384 ri
->failover_state
= SENTINEL_FAILOVER_STATE_NONE
;
2385 ri
->failover_state_change_time
= mstime();
2386 ri
->promoted_slave
->flags
&= ~SRI_PROMOTED
;
2387 ri
->promoted_slave
= NULL
;
2390 /* ======================== SENTINEL timer handler ==========================
2391 * This is the "main" our Sentinel, being sentinel completely non blocking
2392 * in design. The function is called every second.
2393 * -------------------------------------------------------------------------- */
2395 /* Perform scheduled operations for the specified Redis instance. */
2396 void sentinelHandleRedisInstance(sentinelRedisInstance
*ri
) {
2397 /* ========== MONITORING HALF ============ */
2398 /* Every kind of instance */
2399 sentinelReconnectInstance(ri
);
2400 sentinelPingInstance(ri
);
2402 /* Masters and slaves */
2403 if (ri
->flags
& (SRI_MASTER
|SRI_SLAVE
)) {
2404 /* Nothing so far. */
2408 if (ri
->flags
& SRI_MASTER
) {
2409 sentinelAskMasterStateToOtherSentinels(ri
);
2412 /* ============== ACTING HALF ============= */
2413 /* We don't proceed with the acting half if we are in TILT mode.
2414 * TILT happens when we find something odd with the time, like a
2415 * sudden change in the clock. */
2416 if (sentinel
.tilt
) {
2417 if (mstime()-sentinel
.tilt_start_time
< SENTINEL_TILT_PERIOD
) return;
2419 sentinelEvent(REDIS_WARNING
,"-tilt",NULL
,"#tilt mode exited");
2422 /* Every kind of instance */
2423 sentinelCheckSubjectivelyDown(ri
);
2425 /* Masters and slaves */
2426 if (ri
->flags
& (SRI_MASTER
|SRI_SLAVE
)) {
2427 /* Nothing so far. */
2431 if (ri
->flags
& SRI_MASTER
) {
2432 sentinelCheckObjectivelyDown(ri
);
2433 sentinelStartFailover(ri
);
2434 sentinelFailoverStateMachine(ri
);
2435 sentinelAbortFailoverIfNeeded(ri
);
2439 /* Perform scheduled operations for all the instances in the dictionary.
2440 * Recursively call the function against dictionaries of slaves. */
2441 void sentinelHandleDictOfRedisInstances(dict
*instances
) {
2444 sentinelRedisInstance
*switch_to_promoted
= NULL
;
2446 /* There are a number of things we need to perform against every master. */
2447 di
= dictGetIterator(instances
);
2448 while((de
= dictNext(di
)) != NULL
) {
2449 sentinelRedisInstance
*ri
= dictGetVal(de
);
2451 sentinelHandleRedisInstance(ri
);
2452 if (ri
->flags
& SRI_MASTER
) {
2453 sentinelHandleDictOfRedisInstances(ri
->slaves
);
2454 sentinelHandleDictOfRedisInstances(ri
->sentinels
);
2455 if (ri
->failover_state
== SENTINEL_FAILOVER_STATE_UPDATE_CONFIG
) {
2456 switch_to_promoted
= ri
;
2460 if (switch_to_promoted
)
2461 sentinelFailoverSwitchToPromotedSlave(switch_to_promoted
);
2462 dictReleaseIterator(di
);
2465 /* This function checks if we need to enter the TITL mode.
2467 * The TILT mode is entered if we detect that between two invocations of the
2468 * timer interrupt, a negative amount of time, or too much time has passed.
2469 * Note that we expect that more or less just 100 milliseconds will pass
2470 * if everything is fine. However we'll see a negative number or a
2471 * difference bigger than SENTINEL_TILT_TRIGGER milliseconds if one of the
2472 * following conditions happen:
2474 * 1) The Sentiel process for some time is blocked, for every kind of
2475 * random reason: the load is huge, the computer was freezed for some time
2476 * in I/O or alike, the process was stopped by a signal. Everything.
2477 * 2) The system clock was altered significantly.
2479 * Under both this conditions we'll see everything as timed out and failing
2480 * without good reasons. Instead we enter the TILT mode and wait
2481 * for SENTIENL_TILT_PERIOD to elapse before starting to act again.
2483 * During TILT time we still collect information, we just do not act. */
2484 void sentinelCheckTiltCondition(void) {
2485 mstime_t now
= mstime();
2486 mstime_t delta
= now
- sentinel
.previous_time
;
2488 if (delta
< 0 || delta
> SENTINEL_TILT_TRIGGER
) {
2490 sentinel
.tilt_start_time
= mstime();
2491 sentinelEvent(REDIS_WARNING
,"+tilt",NULL
,"#tilt mode entered");
2493 sentinel
.previous_time
= mstime();
2496 void sentinelTimer(void) {
2497 sentinelCheckTiltCondition();
2498 sentinelHandleDictOfRedisInstances(sentinel
.masters
);