From owner-svn-src-all@FreeBSD.ORG Wed Feb 27 19:03:33 2013 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.FreeBSD.org [8.8.178.115]) by hub.freebsd.org (Postfix) with ESMTP id 2A23F4C9; Wed, 27 Feb 2013 19:03:33 +0000 (UTC) (envelope-from alfred@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 0CF3B84B; Wed, 27 Feb 2013 19:03:33 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id r1RJ3XxL066654; Wed, 27 Feb 2013 19:03:33 GMT (envelope-from alfred@svn.freebsd.org) Received: (from alfred@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id r1RJ3WDR066647; Wed, 27 Feb 2013 19:03:32 GMT (envelope-from alfred@svn.freebsd.org) Message-Id: <201302271903.r1RJ3WDR066647@svn.freebsd.org> From: Alfred Perlstein Date: Wed, 27 Feb 2013 19:03:32 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r247405 - in head: sys/dev/watchdog sys/sys usr.sbin/watchdogd X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 27 Feb 2013 19:03:33 -0000 Author: alfred Date: Wed Feb 27 19:03:31 2013 New Revision: 247405 URL: http://svnweb.freebsd.org/changeset/base/247405 Log: watchdogd(8) and watchdog(4) enhancements. The following support was added to watchdog(4): - Support to query the outstanding timeout. - Support to set a software pre-timeout function watchdog with an 'action' - Support to set a software only watchdog with a configurable 'action' 'action' can be a mask specifying a single operation or a combination of: log(9), printf(9), panic(9) and/or kdb_enter(9). Support the following in watchdogged: - Support to utilize the new additions to watchdog(4). - Support to warn if a watchdog script runs for too long. - Support for "dry run" where we do not actually arm the watchdog, but only report on our timing. Sponsored by: iXsystems, Inc. MFC after: 1 month Modified: head/sys/dev/watchdog/watchdog.c head/sys/sys/watchdog.h head/usr.sbin/watchdogd/watchdogd.8 head/usr.sbin/watchdogd/watchdogd.c Modified: head/sys/dev/watchdog/watchdog.c ============================================================================== --- head/sys/dev/watchdog/watchdog.c Wed Feb 27 18:47:01 2013 (r247404) +++ head/sys/dev/watchdog/watchdog.c Wed Feb 27 19:03:31 2013 (r247405) @@ -1,5 +1,8 @@ /*- * Copyright (c) 2004 Poul-Henning Kamp + * Copyright (c) 2013 iXsystems.com, + * author: Alfred Perlstein + * * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,21 +32,40 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include #include #include #include +#include #include #include #include +#include /* kern_clock_gettime() */ + +static int wd_set_pretimeout(int newtimeout, int disableiftoolong); +static void wd_timeout_cb(void *arg); + +static struct callout wd_pretimeo_handle; +static int wd_pretimeout; +static int wd_pretimeout_act = WD_SOFT_LOG; + +static struct callout wd_softtimeo_handle; +static int wd_softtimer; /* true = use softtimer instead of hardware + watchdog */ +static int wd_softtimeout_act = WD_SOFT_LOG; /* action for the software timeout */ + static struct cdev *wd_dev; -static volatile u_int wd_last_u; +static volatile u_int wd_last_u; /* last timeout value set by kern_do_pat */ -static int -kern_do_pat(u_int utim) +static int wd_lastpat_valid = 0; +static time_t wd_lastpat = 0; /* when the watchdog was last patted */ + +int +wdog_kern_pat(u_int utim) { int error; @@ -51,11 +73,20 @@ kern_do_pat(u_int utim) return (EINVAL); if ((utim & WD_LASTVAL) != 0) { + /* + * if WD_LASTVAL is set, fill in the bits for timeout + * from the saved value in wd_last_u. + */ MPASS((wd_last_u & ~WD_INTERVAL) == 0); utim &= ~WD_LASTVAL; utim |= wd_last_u; - } else + } else { + /* + * Otherwise save the new interval. + * This can be zero (to disable the watchdog) + */ wd_last_u = (utim & WD_INTERVAL); + } if ((utim & WD_INTERVAL) == WD_TO_NEVER) { utim = 0; @@ -65,18 +96,49 @@ kern_do_pat(u_int utim) /* Assume no watchdog available; watchdog flags success */ error = EOPNOTSUPP; } - EVENTHANDLER_INVOKE(watchdog_list, utim, &error); + if (wd_softtimer) { + if (utim == 0) { + callout_stop(&wd_softtimeo_handle); + } else { + (void) callout_reset(&wd_softtimeo_handle, + hz*utim, wd_timeout_cb, "soft"); + } + error = 0; + } else { + EVENTHANDLER_INVOKE(watchdog_list, utim, &error); + } + wd_set_pretimeout(wd_pretimeout, true); + /* + * If we were able to arm/strobe the watchdog, then + * update the last time it was strobed for WDIOC_GETTIMELEFT + */ + if (!error) { + struct timespec ts; + + error = kern_clock_gettime(curthread /* XXX */, + CLOCK_MONOTONIC_FAST, &ts); + if (!error) { + wd_lastpat = ts.tv_sec; + wd_lastpat_valid = 1; + } + } return (error); } static int -wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, - int flags __unused, struct thread *td) +wd_valid_act(int act) +{ + + if ((act & ~(WD_SOFT_MASK)) != 0) + return false; + return true; +} + +static int +wd_ioctl_patpat(caddr_t data) { u_int u; - if (cmd != WDIOCPATPAT) - return (ENOIOCTL); u = *(u_int *)data; if (u & ~(WD_ACTIVE | WD_PASSIVE | WD_LASTVAL | WD_INTERVAL)) return (EINVAL); @@ -89,24 +151,162 @@ wd_ioctl(struct cdev *dev __unused, u_lo return (ENOSYS); /* XXX Not implemented yet */ u &= ~(WD_ACTIVE | WD_PASSIVE); - return (kern_do_pat(u)); + return (wdog_kern_pat(u)); } -u_int -wdog_kern_last_timeout(void) +static int +wd_get_time_left(struct thread *td, time_t *remainp) { + struct timespec ts; + int error; - return (wd_last_u); + error = kern_clock_gettime(td, CLOCK_MONOTONIC_FAST, &ts); + if (error) + return (error); + if (!wd_lastpat_valid) + return (ENOENT); + *remainp = ts.tv_sec - wd_lastpat; + return (0); } -int -wdog_kern_pat(u_int utim) +static void +wd_timeout_cb(void *arg) { + const char *type = arg; - if (utim & ~(WD_LASTVAL | WD_INTERVAL)) - return (EINVAL); +#ifdef DDB + if ((wd_pretimeout_act & WD_SOFT_DDB)) { + char kdb_why[80]; + snprintf(kdb_why, sizeof(buf), "watchdog %s timeout", type); + kdb_backtrace(); + kdb_enter(KDB_WHY_WATCHDOG, kdb_why); + } +#endif + if ((wd_pretimeout_act & WD_SOFT_LOG)) + log(LOG_EMERG, "watchdog %s-timeout, WD_SOFT_LOG", type); + if ((wd_pretimeout_act & WD_SOFT_PRINTF)) + printf("watchdog %s-timeout, WD_SOFT_PRINTF\n", type); + if ((wd_pretimeout_act & WD_SOFT_PANIC)) + panic("watchdog %s-timeout, WD_SOFT_PANIC set", type); +} - return (kern_do_pat(utim)); +/* + * Called to manage timeouts. + * newtimeout needs to be in the range of 0 to actual watchdog timeout. + * if 0, we disable the pre-timeout. + * otherwise we set the pre-timeout provided it's not greater than the + * current actual watchdog timeout. + */ +static int +wd_set_pretimeout(int newtimeout, int disableiftoolong) +{ + u_int utime; + + utime = wdog_kern_last_timeout(); + /* do not permit a pre-timeout >= than the timeout. */ + if (newtimeout >= utime) { + /* + * If 'disableiftoolong' then just fall through + * so as to disable the pre-watchdog + */ + if (disableiftoolong) + newtimeout = 0; + else + return EINVAL; + } + + /* disable the pre-timeout */ + if (newtimeout == 0) { + wd_pretimeout = 0; + callout_stop(&wd_pretimeo_handle); + return 0; + } + + /* We determined the value is sane, so reset the callout */ + (void) callout_reset(&wd_pretimeo_handle, hz*(utime - newtimeout), + wd_timeout_cb, "pre-timeout"); + wd_pretimeout = newtimeout; + return 0; +} + +static int +wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, + int flags __unused, struct thread *td) +{ + u_int u; + time_t timeleft; + int error; + + error = 0; + + switch (cmd) { + case WDIOC_SETSOFT: + u = *(int *)data; + /* do nothing? */ + if (u == wd_softtimer) + break; + /* If there is a pending timeout disallow this ioctl */ + if (wd_last_u != 0) { + error = EINVAL; + break; + } + wd_softtimer = u; + break; + case WDIOC_SETSOFTTIMEOUTACT: + u = *(int *)data; + if (wd_valid_act(u)) { + wd_softtimeout_act = u; + } else { + error = EINVAL; + } + break; + case WDIOC_SETPRETIMEOUTACT: + u = *(int *)data; + if (wd_valid_act(u)) { + wd_pretimeout_act = u; + } else { + error = EINVAL; + } + break; + case WDIOC_GETPRETIMEOUT: + *(int *)data = (int)wd_pretimeout; + break; + case WDIOC_SETPRETIMEOUT: + error = wd_set_pretimeout(*(int *)data, false); + break; + case WDIOC_GETTIMELEFT: + error = wd_get_time_left(td, &timeleft); + if (error) + break; + *(int *)data = (int)timeleft; + break; + case WDIOC_SETTIMEOUT: + u = *(u_int *)data; + error = wdog_kern_pat(u); + break; + case WDIOC_GETTIMEOUT: + u = wdog_kern_last_timeout(); + *(u_int *)data = u; + break; + case WDIOCPATPAT: + error = wd_ioctl_patpat(data); + break; + default: + error = ENOIOCTL; + break; + } + return (error); +} + +/* + * Return the last timeout set, this is NOT the seconds from NOW until timeout, + * rather it is the amount of seconds passed to WDIOCPATPAT/WDIOC_SETTIMEOUT. + */ +u_int +wdog_kern_last_timeout(void) +{ + + return (wd_last_u); } static struct cdevsw wd_cdevsw = { @@ -120,10 +320,16 @@ watchdog_modevent(module_t mod __unused, { switch(type) { case MOD_LOAD: + callout_init(&wd_pretimeo_handle, true); + callout_init(&wd_softtimeo_handle, true); wd_dev = make_dev(&wd_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, _PATH_WATCHDOG); return 0; case MOD_UNLOAD: + callout_stop(&wd_pretimeo_handle); + callout_stop(&wd_softtimeo_handle); + callout_drain(&wd_pretimeo_handle); + callout_drain(&wd_softtimeo_handle); destroy_dev(wd_dev); return 0; case MOD_SHUTDOWN: Modified: head/sys/sys/watchdog.h ============================================================================== --- head/sys/sys/watchdog.h Wed Feb 27 18:47:01 2013 (r247404) +++ head/sys/sys/watchdog.h Wed Feb 27 19:03:31 2013 (r247405) @@ -1,5 +1,8 @@ /*- * Copyright (c) 2003 Poul-Henning Kamp + * Copyright (c) 2013 iXsystems.com, + * author: Alfred Perlstein + * * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +35,18 @@ #define _PATH_WATCHDOG "fido" -#define WDIOCPATPAT _IOW('W', 42, u_int) +#define WDIOCPATPAT _IOW('W', 42, u_int) /* pat the watchdog */ +#define WDIOC_SETTIMEOUT _IOW('W', 43, int) /* set/reset the timer */ +#define WDIOC_GETTIMEOUT _IOR('W', 44, int) /* get total timeout */ +#define WDIOC_GETTIMELEFT _IOR('W', 45, int) /* get time left */ +#define WDIOC_GETPRETIMEOUT _IOR('W', 46, int) /* get the pre-timeout */ +#define WDIOC_SETPRETIMEOUT _IOW('W', 47, int) /* set the pre-timeout */ +/* set the action when a pre-timeout occurs see: WD_SOFT_* */ +#define WDIOC_SETPRETIMEOUTACT _IOW('W', 48, int) + +/* use software watchdog instead of hardware */ +#define WDIOC_SETSOFT _IOW('W', 49, int) +#define WDIOC_SETSOFTTIMEOUTACT _IOW('W', 50, int) #define WD_ACTIVE 0x8000000 /* @@ -76,6 +90,15 @@ #define WD_TO_8SEC 33 #define WD_TO_16SEC 34 #define WD_TO_32SEC 35 +#define WD_TO_64SEC 36 +#define WD_TO_128SEC 37 + +/* action on pre-timeout trigger */ +#define WD_SOFT_PANIC 0x01 /* panic */ +#define WD_SOFT_DDB 0x02 /* enter debugger */ +#define WD_SOFT_LOG 0x04 /* log(9) */ +#define WD_SOFT_PRINTF 0x08 /* printf(9) */ +#define WD_SOFT_MASK 0x0f /* all of the above */ #ifdef _KERNEL Modified: head/usr.sbin/watchdogd/watchdogd.8 ============================================================================== --- head/usr.sbin/watchdogd/watchdogd.8 Wed Feb 27 18:47:01 2013 (r247404) +++ head/usr.sbin/watchdogd/watchdogd.8 Wed Feb 27 19:03:31 2013 (r247405) @@ -1,3 +1,5 @@ +.\" Copyright (c) 2013 iXsystems.com, +.\" author: Alfred Perlstein .\" Copyright (c) 2004 Poul-Henning Kamp .\" Copyright (c) 2003 Sean M. Kelly .\" All rights reserved. @@ -25,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 2, 2006 +.Dd September 2, 2013 .Dt WATCHDOGD 8 .Os .Sh NAME @@ -33,11 +35,17 @@ .Nd watchdog daemon .Sh SYNOPSIS .Nm -.Op Fl d +.Op Fl dnw +.Op Fl -debug +.Op Fl -softtimeout +.Op Fl -softtimeout-action Ar action +.Op Fl -pretimeout Ar timeout +.Op Fl -pretimeout-action Ar action .Op Fl e Ar cmd .Op Fl I Ar file .Op Fl s Ar sleep .Op Fl t Ar timeout +.Op Fl T Ar script_timeout .Sh DESCRIPTION The .Nm @@ -62,6 +70,13 @@ is not specified, the daemon will perfor check instead. .Pp The +.Fl n +argument 'dry-run' will cause watchdog not to arm the system watchdog and +instead only run the watchdog function and report on failures. +This is useful for developing new watchdogd scripts as the system will not +reboot if there are problems with the script. +.Pp +The .Fl s Ar sleep argument can be used to control the sleep period between each execution of the check and defaults to one second. @@ -78,6 +93,16 @@ If this occurs, will no longer execute and thus the kernel's watchdog routines will take action after a configurable timeout. .Pp +The +.Fl T Ar script_timeout +specifies the threshold (in seconds) at which the watchdogd will complain +that its script has run for too long. +If unset +.Ar script_timeout +defaults to the value specified by the +.Fl s Ar sleep +option. +.Pp Upon receiving the .Dv SIGTERM or @@ -90,17 +115,85 @@ will terminate. The .Nm utility recognizes the following runtime options: -.Bl -tag -width ".Fl I Ar file" +.Bl -tag -width ".Fl -softtimeout-action Ar action " .It Fl I Ar file Write the process ID of the .Nm utility in the specified file. -.It Fl d +.It Fl d Fl -debug Do not fork. When this option is specified, .Nm will not fork into the background at startup. +.Pp +.It Fl w +Complain when the watchdog script takes too long. +This flag will cause watchdogd to complain when the amount of time to +execute the watchdog script exceeds the threshold of 'sleep' option. +.Pp +.It Fl -pretimeout Ar timeout +Set a "pretimeout" watchdog. At "timeout" seconds before the watchdog +will fire attempt an action. The action is set by the --pretimeout-action +flag. The default is just to log a message (WD_SOFT_LOG) via +.Xr log 9 . +.Pp +.It Fl -pretimeout-action Ar action +Set the timeout action for the pretimeout. See the section +.Sx Timeout Actions . +.Pp +.It Fl -softtimeout +Instead of arming the various hardware watchdogs, only use a basic software +watchdog. The default action is just to +.Xr log 9 +a message (WD_SOFT_LOG). +.Pp +.It Fl -softtimeout-action Ar action +Set the timeout action for the softtimeout. See the section +.Sx Timeout Actions . +.Pp .El +.Sh Timeout Actions +The following timeout actions are available via the +.Fl -pretimeout-action +and +.Fl -softtimeout-action +flags: +.Bl -tag -width ".Ar printf " +.It Ar panic +Call +.Xr panic 9 +when the timeout is reached. +.Pp +.It Ar ddb +Enter the kernel debugger via +.Xr kdb_enter 9 +when the timeout is reached. +.Pp +.It Ar log +Log a message using +.Xr log 9 +when the timeout is reached. +.Pp +.It Ar printf +call the kernel +.Xr printf 9 +to display a message to the console and +.Xr dmesg 8 +buffer. +.Pp +.El +Actions can be combined in a comma separated list as so: +.Ar log,printf +which would both +.Xr printf 9 +and +.Xr log 9 +which will send messages both to +.Xr dmesg 8 +and the kernel +.Xr log 4 +device for +.Xr syslog 8 . .Sh FILES .Bl -tag -width ".Pa /var/run/watchdogd.pid" -compact .It Pa /var/run/watchdogd.pid @@ -125,3 +218,6 @@ and .Pp Some contributions made by .An Jeff Roberson Aq jeff@FreeBSD.org . +.Pp +The pretimeout and softtimeout action system was added by +.An Alfred Perlstein Aq alfred@freebsd.org . Modified: head/usr.sbin/watchdogd/watchdogd.c ============================================================================== --- head/usr.sbin/watchdogd/watchdogd.c Wed Feb 27 18:47:01 2013 (r247404) +++ head/usr.sbin/watchdogd/watchdogd.c Wed Feb 27 19:03:31 2013 (r247405) @@ -1,5 +1,8 @@ /*- * Copyright (c) 2003-2004 Sean M. Kelly + * Copyright (c) 2013 iXsystems.com, + * author: Alfred Perlstein + * * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,8 +53,11 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include + static void parseargs(int, char *[]); static void sighandler(int); static void watchdog_loop(void); @@ -63,13 +69,39 @@ static void usage(void); static int debugging = 0; static int end_program = 0; static const char *pidfile = _PATH_VARRUN "watchdogd.pid"; -static u_int timeout = WD_TO_16SEC; +static u_int timeout = WD_TO_128SEC; +static u_int pretimeout = 0; static u_int passive = 0; static int is_daemon = 0; +static int is_dry_run = 0; /* do not arm the watchdog, only + report on timing of the watch + program */ +static int do_timedog = 0; +static int do_syslog = 0; static int fd = -1; static int nap = 1; +static int carp_thresh_seconds = -1; static char *test_cmd = NULL; +static const char *getopt_shortopts; + +static int pretimeout_set; +static int pretimeout_act; +static int pretimeout_act_set; + +static int softtimeout_set; +static int softtimeout_act; +static int softtimeout_act_set; + +static struct option longopts[] = { + { "debug", no_argument, &debugging, 1 }, + { "pretimeout", required_argument, &pretimeout_set, 1 }, + { "pretimeout-action", required_argument, &pretimeout_act_set, 1 }, + { "softtimeout", no_argument, &softtimeout_set, 1 }, + { "softtimeout-action", required_argument, &softtimeout_act_set, 1 }, + { NULL, 0, NULL, 0} +}; + /* * Ask malloc() to map minimum-sized chunks of virtual address space at a time, * so that mlockall() won't needlessly wire megabytes of unused memory into the @@ -93,12 +125,18 @@ main(int argc, char *argv[]) parseargs(argc, argv); + if (do_syslog) { + openlog("watchdogd", LOG_CONS|LOG_NDELAY|LOG_PERROR, + LOG_DAEMON); + + } + rtp.type = RTP_PRIO_REALTIME; rtp.prio = 0; if (rtprio(RTP_SET, 0, &rtp) == -1) err(EX_OSERR, "rtprio"); - if (watchdog_init() == -1) + if (!is_dry_run && watchdog_init() == -1) errx(EX_SOFTWARE, "unable to initialize watchdog"); if (is_daemon) { @@ -108,6 +146,7 @@ main(int argc, char *argv[]) pfh = pidfile_open(pidfile, 0600, &otherpid); if (pfh == NULL) { if (errno == EEXIST) { + watchdog_onoff(0); errx(EX_SOFTWARE, "%s already running, pid: %d", getprogname(), otherpid); } @@ -164,6 +203,9 @@ static int watchdog_init(void) { + if (is_dry_run) + return 0; + fd = open("/dev/" _PATH_WATCHDOG, O_RDWR); if (fd >= 0) return (0); @@ -172,26 +214,98 @@ watchdog_init(void) } /* + * If we are doing timing, then get the time. + */ +static int +watchdog_getuptime(struct timespec *tp) +{ + int error; + + if (!do_timedog) + return 0; + + error = clock_gettime(CLOCK_UPTIME_FAST, tp); + if (error) + warn("clock_gettime"); + return (error); +} + +static long +watchdog_check_dogfunction_time(struct timespec *tp_start, + struct timespec *tp_end) +{ + struct timeval tv_start, tv_end, tv; + const char *cmd_prefix, *cmd; + int sec; + + if (!do_timedog) + return (0); + + TIMESPEC_TO_TIMEVAL(&tv_start, tp_start); + TIMESPEC_TO_TIMEVAL(&tv_end, tp_end); + timersub(&tv_end, &tv_start, &tv); + sec = tv.tv_sec; + if (sec < carp_thresh_seconds) + return (sec); + + if (test_cmd) { + cmd_prefix = "Watchdog program"; + cmd = test_cmd; + } else { + cmd_prefix = "Watchdog operation"; + cmd = "stat(\"/etc\", &sb)"; + } + if (do_syslog) + syslog(LOG_CRIT, "%s: '%s' took too long: " + "%d.%06ld seconds >= %d seconds threshhold", + cmd_prefix, cmd, sec, (long)tv.tv_usec, + carp_thresh_seconds); + warnx("%s: '%s' took too long: " + "%d.%06ld seconds >= %d seconds threshhold", + cmd_prefix, cmd, sec, (long)tv.tv_usec, carp_thresh_seconds); + return (sec); +} + + +/* * Main program loop which is iterated every second. */ static void watchdog_loop(void) { + struct timespec ts_start, ts_end; struct stat sb; - int failed; + long waited; + int error, failed; while (end_program != 2) { failed = 0; + error = watchdog_getuptime(&ts_start); + if (error) { + end_program = 1; + goto try_end; + } + if (test_cmd != NULL) failed = system(test_cmd); else failed = stat("/etc", &sb); + error = watchdog_getuptime(&ts_end); + if (error) { + end_program = 1; + goto try_end; + } + + waited = watchdog_check_dogfunction_time(&ts_start, &ts_end); + if (failed == 0) watchdog_patpat(timeout|WD_ACTIVE); - sleep(nap); + if (nap - waited > 0) + sleep(nap - waited); +try_end: if (end_program != 0) { if (watchdog_onoff(0) == 0) { end_program = 2; @@ -211,6 +325,9 @@ static int watchdog_patpat(u_int t) { + if (is_dry_run) + return 0; + return ioctl(fd, WDIOCPATPAT, &t); } @@ -221,11 +338,62 @@ watchdog_patpat(u_int t) static int watchdog_onoff(int onoff) { + int error; - if (onoff) + /* fake successful watchdog op if a dry run */ + if (is_dry_run) + return 0; + + if (onoff) { + /* + * Call the WDIOC_SETSOFT regardless of softtimeout_set + * because we'll need to turn it off if someone had turned + * it on. + */ + error = ioctl(fd, WDIOC_SETSOFT, &softtimeout_set); + if (error) { + warn("setting WDIOC_SETSOFT %d", softtimeout_set); + return (error); + } + error = watchdog_patpat((timeout|WD_ACTIVE)); + if (error) { + warn("watchdog_patpat failed"); + goto failsafe; + } + if (softtimeout_act_set) { + error = ioctl(fd, WDIOC_SETSOFTTIMEOUTACT, + &softtimeout_act); + if (error) { + warn("setting WDIOC_SETSOFTTIMEOUTACT %d", + softtimeout_act); + goto failsafe; + } + } + if (pretimeout_set) { + error = ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout); + if (error) { + warn("setting WDIOC_SETPRETIMEOUT %d", + pretimeout); + goto failsafe; + } + } + if (pretimeout_act_set) { + error = ioctl(fd, WDIOC_SETPRETIMEOUTACT, + &pretimeout_act); + if (error) { + warn("setting WDIOC_SETPRETIMEOUTACT %d", + pretimeout_act); + goto failsafe; + } + } + /* pat one more time for good measure */ return watchdog_patpat((timeout|WD_ACTIVE)); - else + } else { return watchdog_patpat(0); + } +failsafe: + watchdog_patpat(0); + return (error); } /* @@ -235,27 +403,132 @@ static void usage(void) { if (is_daemon) - fprintf(stderr, "usage: watchdogd [-d] [-e cmd] [-I file] [-s sleep] [-t timeout]\n"); + fprintf(stderr, "usage:\n" +" watchdogd [-dnw] [-e cmd] [-I file] [-s sleep] [-t timeout]\n" +" [-T script_timeout]\n" +" [--debug]\n" +" [--pretimeout seconds] [-pretimeout-action action]\n" +" [--softtimeout] [-softtimeout-action action]\n" +); else fprintf(stderr, "usage: watchdog [-d] [-t timeout]\n"); exit(EX_USAGE); } +static long +fetchtimeout(int opt, const char *longopt, const char *myoptarg) +{ + const char *errstr; + char *p; + long rv; + + errstr = NULL; + p = NULL; + errno = 0; + rv = strtol(myoptarg, &p, 0); + if ((p != NULL && *p != '\0') || errno != 0) + errstr = "is not a number"; + if (rv <= 0) + errstr = "must be greater than zero"; + if (errstr) { + if (longopt) + errx(EX_USAGE, "--%s argument %s", longopt, errstr); + else + errx(EX_USAGE, "-%c argument %s", opt, errstr); + } + return (rv); +} + +struct act_tbl { + const char *at_act; + int at_value; +}; + +struct act_tbl act_tbl[] = { + { "panic", WD_SOFT_PANIC }, + { "ddb", WD_SOFT_DDB }, + { "log", WD_SOFT_LOG }, + { "printf", WD_SOFT_PRINTF }, + { NULL, 0 } +}; + +static void +timeout_act_error(const char *lopt, const char *badact) +{ + char *opts, *oldopts; + int i; + + opts = NULL; + for (i = 0; act_tbl[i].at_act != NULL; i++) { + oldopts = opts; + if (asprintf(&opts, "%s%s%s", + oldopts == NULL ? "" : oldopts, + oldopts == NULL ? "" : ", ", + act_tbl[i].at_act) == -1) + err(EX_OSERR, "malloc"); + free(oldopts); + } + warnx("bad --%s argument '%s' must be one of (%s).", + lopt, badact, opts); + usage(); +} + +/* + * Take a comma separated list of actions and or the flags + * together for the ioctl. + */ +static int +timeout_act_str2int(const char *lopt, const char *acts) +{ + int i; + char *dupacts, *tofree; + char *o; + int rv = 0; + + tofree = dupacts = strdup(acts); + if (!tofree) + err(EX_OSERR, "malloc"); + while ((o = strsep(&dupacts, ",")) != NULL) { + for (i = 0; act_tbl[i].at_act != NULL; i++) { + if (!strcmp(o, act_tbl[i].at_act)) { + rv |= act_tbl[i].at_value; + break; + } + } + if (act_tbl[i].at_act == NULL) + timeout_act_error(lopt, o); + } + free(tofree); + return rv; +} + /* * Handle the few command line arguments supported. */ static void parseargs(int argc, char *argv[]) { + int longindex; int c; char *p; + const char *lopt; double a; + /* + * if we end with a 'd' aka 'watchdogd' then we are the daemon program, + * otherwise run as a command line utility. + */ c = strlen(argv[0]); if (argv[0][c - 1] == 'd') is_daemon = 1; - while ((c = getopt(argc, argv, - is_daemon ? "I:de:s:t:?" : "dt:?")) != -1) { + + if (is_daemon) + getopt_shortopts = "I:de:ns:t:ST:w?"; + else + getopt_shortopts = "dt:?"; + + while ((c = getopt_long(argc, argv, getopt_shortopts, longopts, + &longindex)) != -1) { switch (c) { case 'I': pidfile = optarg; @@ -266,17 +539,19 @@ parseargs(int argc, char *argv[]) case 'e': test_cmd = strdup(optarg); break; + case 'n': + is_dry_run = 1; + break; #ifdef notyet case 'p': passive = 1; break; #endif case 's': - p = NULL; - errno = 0; - nap = strtol(optarg, &p, 0); - if ((p != NULL && *p != '\0') || errno != 0) - errx(EX_USAGE, "-s argument is not a number"); + nap = fetchtimeout(c, NULL, optarg); + break; + case 'S': + do_syslog = 1; break; case 't': p = NULL; @@ -286,6 +561,7 @@ parseargs(int argc, char *argv[]) errx(EX_USAGE, "-t argument is not a number"); if (a < 0) errx(EX_USAGE, "-t argument must be positive"); + if (a == 0) timeout = WD_TO_NEVER; else @@ -294,12 +570,39 @@ parseargs(int argc, char *argv[]) printf("Timeout is 2^%d nanoseconds\n", timeout); break; + case 'T': + carp_thresh_seconds = fetchtimeout(c, "NULL", optarg); + break; + case 'w': + do_timedog = 1; + break; + case 0: + lopt = longopts[longindex].name; + if (!strcmp(lopt, "pretimeout")) { + pretimeout = fetchtimeout(0, lopt, optarg); + } else if (!strcmp(lopt, "pretimeout-action")) { + pretimeout_act = timeout_act_str2int(lopt, + optarg); + } else if (!strcmp(lopt, "softtimeout-action")) { + softtimeout_act = timeout_act_str2int(lopt, + optarg); + } else { + /* warnx("bad option at index %d: %s", optind, + argv[optind]); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***