From 999fe0d352618dddd9e51fbb27ebc1a86a2c583a Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 25 Jul 2012 16:33:37 +0200 Subject: [PATCH] Sentinel: ability to execute notification scripts. --- sentinel.conf | 34 ++++++++++++++++++++---- src/sentinel.c | 72 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 95 insertions(+), 11 deletions(-) diff --git a/sentinel.conf b/sentinel.conf index ca291163..eb2de6a5 100644 --- a/sentinel.conf +++ b/sentinel.conf @@ -4,7 +4,8 @@ # The port that this sentinel instance will run on port 26379 -# sentinel monitor +# sentinel monitor +# # Tells Sentinel to monitor this slave, and to consider it in O_DOWN # (Objectively Down) state only if at least sentinels agree. # @@ -12,7 +13,8 @@ port 26379 # The valid charset is A-z 0-9 and the three characters ".-_". sentinel monitor mymaster 127.0.0.1 6379 2 -# sentinel down-after-milliseconds +# sentinel down-after-milliseconds +# # Number of milliseconds the master (or any attached slave or sentinel) should # be unreachable (as in, not acceptable reply to PING, continuously, for the # specified period) in order to consider it in S_DOWN state (Subjectively @@ -21,18 +23,21 @@ sentinel monitor mymaster 127.0.0.1 6379 2 # Default is 30 seconds. sentinel down-after-milliseconds mymaster 30000 -# sentinel can-failover +# sentinel can-failover +# # Specify if this Sentinel can start the failover for this master. sentinel can-failover mymaster yes -# sentinel parallel-syncs +# sentinel parallel-syncs +# # How many slaves we can reconfigure to point to the new slave simultaneously # during the failover. Use a low number if you use the slaves to serve query # to avoid that all the slaves will be unreachable at about the same # time while performing the synchronization with the master. sentinel parallel-syncs mymaster 1 -# sentinel failover-timeout +# sentinel failover-timeout +# # Specifies the failover timeout in milliseconds. When this time has elapsed # without any progress in the failover process, it is considered concluded by # the sentinel even if not all the attached slaves were correctly configured @@ -47,3 +52,22 @@ sentinel parallel-syncs mymaster 1 # Default is 15 minutes. sentinel failover-timeout mymaster 900000 +# sentinel notification-script +# +# Call the specified notification script for any sentienl event that is +# generated in the WARNING level (for instance -sdown, -odown, and so forth). +# This script should notify the system administrator via email, SMS, or any +# other messaging system, that there is something wrong with the monitored +# Redis systems. +# +# The script is called with just two arguments: the first is the event type +# and the second the event description. +# +# The script must be exits and executable in order for sentinel to start if +# this option is provided. +# +# Example: +# +# sentinel notification-script mymaster /var/redis/notify.sh + + diff --git a/src/sentinel.c b/src/sentinel.c index 54c675b5..227fb693 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -37,6 +37,8 @@ #include #include +extern char **environ; + #define REDIS_SENTINEL_PORT 26379 /* ======================== Sentinel global state =========================== */ @@ -169,7 +171,7 @@ typedef struct sentinelRedisInstance { struct sentinelRedisInstance *promoted_slave; /* Promoted slave instance. */ /* Scripts executed to notify admin or reconfigure clients: when they * are set to NULL no script is executed. */ - char *notify_script; + char *notification_script; char *client_reconfig_script; } sentinelRedisInstance; @@ -291,6 +293,7 @@ void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c); void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c); const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri); void sentinelAbortFailover(sentinelRedisInstance *ri); +void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...); /* ========================= Dictionary types =============================== */ @@ -403,7 +406,29 @@ void releaseSentinelAddr(sentinelAddr *sa) { /* =========================== Events notification ========================== */ void sentinelCallNotificationScript(char *scriptpath, char *type, char *msg) { - /* TODO: implement it. */ + pid_t pid = fork(); + + if (pid == -1) { + /* Parent on error. */ + sentinelEvent(REDIS_WARNING,"-notification-script-error",NULL, + "#can't fork: %s",strerror(errno)); + return; + } else if (pid == 0) { + /* Child */ + char *argv[4]; + + argv[0] = scriptpath; + argv[1] = type; + argv[2] = msg; + argv[3] = NULL; + execve(scriptpath,argv,environ); + /* If we are here an error occurred. */ + sentinelEvent(REDIS_WARNING,"-notification-script-error",NULL, + "#execve(2): %s",strerror(errno)); + _exit(1); + } else { + sentinelEvent(REDIS_DEBUG,"+child",NULL,"%ld",(long)pid); + } } /* Send an event to log, pub/sub, user notification script. @@ -480,8 +505,9 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, if (level == REDIS_WARNING && ri != NULL) { sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ? ri : ri->master; - if (master->notify_script) { - sentinelCallNotificationScript(master->notify_script,type,msg); + if (master->notification_script) { + sentinelCallNotificationScript(master->notification_script, + type,msg); } } } @@ -584,7 +610,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char * ri->failover_start_time = 0; ri->failover_timeout = SENTINEL_DEFAULT_FAILOVER_TIMEOUT; ri->promoted_slave = NULL; - ri->notify_script = NULL; + ri->notification_script = NULL; ri->client_reconfig_script = NULL; /* Add into the right table. */ @@ -608,7 +634,7 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) { /* Free other resources. */ sdsfree(ri->name); sdsfree(ri->runid); - sdsfree(ri->notify_script); + sdsfree(ri->notification_script); sdsfree(ri->client_reconfig_script); sdsfree(ri->slave_master_host); sdsfree(ri->leader); @@ -881,6 +907,21 @@ char *sentinelHandleConfiguration(char **argv, int argc) { ri = sentinelGetMasterByName(argv[1]); if (!ri) return "No such master with specified name."; ri->parallel_syncs = atoi(argv[2]); + } else if (!strcasecmp(argv[0],"notification-script") && argc == 3) { + /* notification-script */ + ri = sentinelGetMasterByName(argv[1]); + if (!ri) return "No such master with specified name."; + if (access(argv[2],X_OK) == -1) + return "Notification script seems non existing or non executable."; + ri->notification_script = sdsnew(argv[2]); + } else if (!strcasecmp(argv[0],"client-reconfig-script") && argc == 3) { + /* client-reconfig-script */ + ri = sentinelGetMasterByName(argv[1]); + if (!ri) return "No such master with specified name."; + if (access(argv[2],X_OK) == -1) + return "Client reconfiguration script seems non existing or " + "non executable."; + ri->client_reconfig_script = sdsnew(argv[2]); } else { return "Unrecognized sentinel configuration statement."; } @@ -2511,8 +2552,27 @@ void sentinelCheckTiltCondition(void) { sentinel.previous_time = mstime(); } +/* Handle terminated childs resulting from calls to notifications and client + * reconfigurations scripts. */ +void sentinelHandleChildren(void) { + int statloc; + pid_t pid; + + if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) { + int exitcode = WEXITSTATUS(statloc); + int bysignal = 0; + + if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc); + sentinelEvent(REDIS_DEBUG,"-child",NULL,"%ld %d %d", + (long)pid, exitcode, bysignal); + + /* TODO: remove client reconfiguration scripts from the queue. */ + } +} + void sentinelTimer(void) { sentinelCheckTiltCondition(); sentinelHandleDictOfRedisInstances(sentinel.masters); + sentinelHandleChildren(); } -- 2.45.2