From 39bd025c295974986331ca4cd89ce18cfa50204a Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 27 Mar 2012 11:47:51 +0200 Subject: [PATCH] Redis software watchdog. --- src/config.c | 7 ++++++ src/debug.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++-- src/redis.c | 7 +++++- src/redis.h | 4 ++++ 4 files changed, 80 insertions(+), 3 deletions(-) diff --git a/src/config.c b/src/config.c index ab49178a..8dffe288 100644 --- a/src/config.c +++ b/src/config.c @@ -627,6 +627,12 @@ void configSetCommand(redisClient *c) { } else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; server.repl_timeout = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"watchdog-period")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; + if (ll) + enableWatchdog(ll); + else + disableWatchdog(); } else { addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s", (char*)c->argv[2]->ptr); @@ -715,6 +721,7 @@ void configGetCommand(redisClient *c) { config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period); config_get_numerical_field("repl-timeout",server.repl_timeout); config_get_numerical_field("maxclients",server.maxclients); + config_get_numerical_field("watchdog-period",server.watchdog_period); /* Bool (yes/no) values */ config_get_bool_field("no-appendfsync-on-rewrite", diff --git a/src/debug.c b/src/debug.c index 9c8b6ef9..2df913b0 100644 --- a/src/debug.c +++ b/src/debug.c @@ -661,11 +661,72 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { /* Make sure we exit with the right signal at the end. So for instance * the core will be dumped if enabled. */ sigemptyset (&act.sa_mask); - /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction - * is used. Otherwise, sa_handler is used */ act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND; act.sa_handler = SIG_DFL; sigaction (sig, &act, NULL); kill(getpid(),sig); } #endif /* HAVE_BACKTRACE */ + +/* =========================== Software Watchdog ============================ */ +#include + +void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) { + ucontext_t *uc = (ucontext_t*) secret; + REDIS_NOTUSED(info); + REDIS_NOTUSED(sig); + + /* Log INFO and CLIENT LIST */ + redisLog(REDIS_WARNING, "--- WATCHDOG TIMER EXPIRED ---"); +#ifdef HAVE_BACKTRACE + logStackTrace(uc); + redisLog(REDIS_WARNING, "------"); +#endif +} + +/* Schedule a SIGALRM delivery after the specified period in milliseconds. + * If a timer is already scheduled, this function will re-schedule it to the + * specified time. If period is 0 the current timer is disabled. */ +void watchdogScheduleSignal(int period) { + struct itimerval it; + + /* Will stop the timer if period is 0. */ + it.it_value.tv_sec = period/1000; + it.it_value.tv_usec = period%1000; + /* Don't automatically restart. */ + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 0; + setitimer(ITIMER_REAL, &it, NULL); +} + +/* Enable the software watchdong with the specified period in milliseconds. */ +void enableWatchdog(int period) { + if (server.watchdog_period == 0) { + struct sigaction act; + + /* Watchdog was actually disabled, so we have to setup the signal + * handler. */ + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_SIGINFO; + act.sa_sigaction = watchdogSignalHandler; + sigaction(SIGALRM, &act, NULL); + } + if (period < 200) period = 200; /* We don't accept periods < 200 ms. */ + watchdogScheduleSignal(period); /* Adjust the current timer. */ + server.watchdog_period = period; +} + +/* Disable the software watchdog. */ +void disableWatchdog(void) { + struct sigaction act; + if (server.watchdog_period == 0) return; /* Already disabled. */ + watchdogScheduleSignal(0); /* Stop the current timer. */ + + /* Set the signal handler to SIG_IGN, this will also remove pending + * signals from the queue. */ + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = SIG_IGN; + sigaction(SIGALRM, &act, NULL); + server.watchdog_period = 0; +} diff --git a/src/redis.c b/src/redis.c index e926fd9b..88fb2fd8 100644 --- a/src/redis.c +++ b/src/redis.c @@ -726,6 +726,10 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { REDIS_NOTUSED(id); REDIS_NOTUSED(clientData); + /* Software watchdog: deliver the SIGALRM that will reach the signal + * handler if we don't return here fast enough. */ + if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period); + /* We take a cached value of the unix time in the global state because * with virtual memory and aging there is to store the current time * in objects at every object access, and accuracy is not needed. @@ -1086,11 +1090,12 @@ void initServerConfig() { server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN; server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN; - /* Assert */ + /* Debugging */ server.assert_failed = ""; server.assert_file = ""; server.assert_line = 0; server.bug_report_start = 0; + server.watchdog_period = 0; } /* This function will try to raise the max number of open files accordingly to diff --git a/src/redis.h b/src/redis.h index e4fd47d3..20e0ea98 100644 --- a/src/redis.h +++ b/src/redis.h @@ -722,6 +722,7 @@ struct redisServer { char *assert_file; int assert_line; int bug_report_start; /* True if bug report header was already logged. */ + int watchdog_period; /* Software watchdog period in ms. 0 = off */ }; typedef struct pubsubPattern { @@ -1255,4 +1256,7 @@ void bugReportStart(void); void redisLogObjectDebugInfo(robj *o); void sigsegvHandler(int sig, siginfo_t *info, void *secret); sds genRedisInfoString(char *section); +void enableWatchdog(int period); +void disableWatchdog(void); +void watchdogScheduleSignal(int period); #endif -- 2.45.2