Date: Sun, 17 Oct 2010 16:43:20 +0000 (UTC) From: Pawel Jakub Dawidek <pjd@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org Subject: svn commit: r213984 - stable/8/sbin/hastd Message-ID: <201010171643.o9HGhKkF038338@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: pjd Date: Sun Oct 17 16:43:20 2010 New Revision: 213984 URL: http://svn.freebsd.org/changeset/base/213984 Log: MFC r213183,r213428,r213429,r213430,r213529,r213530,r213531,r213533,r213579, r213580,r213938,r213939,r213981: r213183: Plug memory leak on fork(2) failure. Submitted by: Mikolaj Golub <to.my.trociny@gmail.com> r213428: We can't mask ignored signal, so install dummy signal hander for SIGCHLD before masking it. This fixes bogus reports about hooks running for too long and other problems related to garbage-collecting child processes. Reported by: Mikolaj Golub <to.my.trociny@gmail.com> r213429: hook_check() is now only used to report about long-running hooks, so the argument is redundant, remove it. r213430: Decrease report interval to 5 seconds, as this also means we will check for signals every 5 seconds and not every 10 seconds as before. r213529: Don't close local component on exit as we can hang waiting on g_waitidle. I'm unable to reproduce the race described in comment anymore and also the comment is incorrect - localfd represents local component from configuration file, eg. /dev/da0 and not HAST provider. Reported by: Mikolaj Golub <to.my.trociny@gmail.com> r213530: Start the guard thread first, so we can handle signals from the very begining. Reported by: Mikolaj Golub <to.my.trociny@gmail.com> r213531: Log error message when we fail to destroy ggate provider. r213533: Clear ggate structures before using them. We don't initialize all the field and there can be some garbage from the stack. r213579: We close the event socketpair early in the mainloop to prevent spaming with error messages, so when we clean up after child process, we have to check if the event socketpair is still there. Submitted by: Mikolaj Golub <to.my.trociny@gmail.com> r213580: We can't zero out ggio request, as we have some fields in there we initialize once during start-up. Reported by: Mikolaj Golub <to.my.trociny@gmail.com> r213938: Clear signal mask before executing a hook. Submitted by: Mikolaj Golub <to.my.trociny@gmail.com> r213939: Use one fprintf() instead of two. r213981: Log correct connection when canceling half-open connection. Submitted by: Mikolaj Golub <to.my.trociny@gmail.com> Modified: stable/8/sbin/hastd/control.c stable/8/sbin/hastd/hastd.c stable/8/sbin/hastd/hooks.c stable/8/sbin/hastd/hooks.h stable/8/sbin/hastd/pjdlog.c stable/8/sbin/hastd/primary.c Directory Properties: stable/8/sbin/hastd/ (props changed) Modified: stable/8/sbin/hastd/control.c ============================================================================== --- stable/8/sbin/hastd/control.c Sun Oct 17 16:30:33 2010 (r213983) +++ stable/8/sbin/hastd/control.c Sun Oct 17 16:43:20 2010 (r213984) @@ -58,8 +58,10 @@ child_cleanup(struct hast_resource *res) proto_close(res->hr_ctrl); res->hr_ctrl = NULL; - proto_close(res->hr_event); - res->hr_event = NULL; + if (res->hr_event != NULL) { + proto_close(res->hr_event); + res->hr_event = NULL; + } res->hr_workerpid = 0; } Modified: stable/8/sbin/hastd/hastd.c ============================================================================== --- stable/8/sbin/hastd/hastd.c Sun Oct 17 16:30:33 2010 (r213983) +++ stable/8/sbin/hastd/hastd.c Sun Oct 17 16:43:20 2010 (r213984) @@ -69,7 +69,7 @@ bool sigexit_received = false; struct pidfh *pfh; /* How often check for hooks running for too long. */ -#define REPORT_INTERVAL 10 +#define REPORT_INTERVAL 5 static void usage(void) @@ -527,7 +527,8 @@ listen_accept(void) } else if (res->hr_remotein != NULL) { char oaddr[256]; - proto_remote_address(conn, oaddr, sizeof(oaddr)); + proto_remote_address(res->hr_remotein, oaddr, + sizeof(oaddr)); pjdlog_debug(1, "Canceling half-open connection from %s on connection from %s.", oaddr, raddr); @@ -659,7 +660,7 @@ main_loop(void) assert(maxfd + 1 <= (int)FD_SETSIZE); ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout); if (ret == 0) - hook_check(false); + hook_check(); else if (ret == -1) { if (errno == EINTR) continue; @@ -685,6 +686,12 @@ main_loop(void) } } +static void +dummy_sighandler(int sig __unused) +{ + /* Nothing to do. */ +} + int main(int argc, char *argv[]) { @@ -743,6 +750,11 @@ main(int argc, char *argv[]) cfg = yy_config_parse(cfgpath, true); assert(cfg != NULL); + /* + * Because SIGCHLD is ignored by default, setup dummy handler for it, + * so we can mask it. + */ + PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR); PJDLOG_VERIFY(sigemptyset(&mask) == 0); PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); Modified: stable/8/sbin/hastd/hooks.c ============================================================================== --- stable/8/sbin/hastd/hooks.c Sun Oct 17 16:30:33 2010 (r213983) +++ stable/8/sbin/hastd/hooks.c Sun Oct 17 16:43:20 2010 (r213984) @@ -293,24 +293,14 @@ hook_check_one(pid_t pid, int status) } void -hook_check(bool sigchld) +hook_check(void) { struct hookproc *hp, *hp2; - int status; time_t now; - pid_t pid; assert(hooks_initialized); /* - * If SIGCHLD was received, garbage collect finished processes. - */ - if (sigchld) { - while ((pid = wait3(&status, WNOHANG, NULL)) > 0) - hook_check_one(pid, status); - } - - /* * Report about processes that are running for a long time. */ now = time(NULL); @@ -364,6 +354,7 @@ hook_execv(const char *path, va_list ap) struct hookproc *hp; char *args[64]; unsigned int ii; + sigset_t mask; pid_t pid; assert(hooks_initialized); @@ -388,9 +379,12 @@ hook_execv(const char *path, va_list ap) switch (pid) { case -1: /* Error. */ pjdlog_errno(LOG_ERR, "Unable to fork to execute %s", path); + hook_free(hp); return; case 0: /* Child. */ descriptors(); + PJDLOG_VERIFY(sigemptyset(&mask) == 0); + PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); execv(path, args); pjdlog_errno(LOG_ERR, "Unable to execute %s", path); exit(EX_SOFTWARE); Modified: stable/8/sbin/hastd/hooks.h ============================================================================== --- stable/8/sbin/hastd/hooks.h Sun Oct 17 16:30:33 2010 (r213983) +++ stable/8/sbin/hastd/hooks.h Sun Oct 17 16:43:20 2010 (r213984) @@ -41,7 +41,7 @@ void hook_init(void); void hook_fini(void); void hook_check_one(pid_t pid, int status); -void hook_check(bool sigchld); +void hook_check(void); void hook_exec(const char *path, ...); void hook_execv(const char *path, va_list ap); Modified: stable/8/sbin/hastd/pjdlog.c ============================================================================== --- stable/8/sbin/hastd/pjdlog.c Sun Oct 17 16:30:33 2010 (r213983) +++ stable/8/sbin/hastd/pjdlog.c Sun Oct 17 16:43:20 2010 (r213984) @@ -214,8 +214,7 @@ pjdlogv_common(int loglevel, int debugle /* Attach debuglevel if this is debug log. */ if (loglevel == LOG_DEBUG) fprintf(out, "[%d]", debuglevel); - fprintf(out, " "); - fprintf(out, "%s", pjdlog_prefix); + fprintf(out, " %s", pjdlog_prefix); vfprintf(out, fmt, ap); if (error != -1) fprintf(out, ": %s.", strerror(error)); Modified: stable/8/sbin/hastd/primary.c ============================================================================== --- stable/8/sbin/hastd/primary.c Sun Oct 17 16:30:33 2010 (r213983) +++ stable/8/sbin/hastd/primary.c Sun Oct 17 16:43:20 2010 (r213984) @@ -234,21 +234,17 @@ cleanup(struct hast_resource *res) /* Remember errno. */ rerrno = errno; - /* - * Close descriptor to /dev/hast/<name> - * to work-around race in the kernel. - */ - close(res->hr_localfd); - /* Destroy ggate provider if we created one. */ if (res->hr_ggateunit >= 0) { struct g_gate_ctl_destroy ggiod; + bzero(&ggiod, sizeof(ggiod)); ggiod.gctl_version = G_GATE_VERSION; ggiod.gctl_unit = res->hr_ggateunit; ggiod.gctl_force = 1; if (ioctl(res->hr_ggatefd, G_GATE_CMD_DESTROY, &ggiod) < 0) { - pjdlog_warning("Unable to destroy hast/%s device", + pjdlog_errno(LOG_WARNING, + "Unable to destroy hast/%s device", res->hr_provname); } res->hr_ggateunit = -1; @@ -705,6 +701,7 @@ init_ggate(struct hast_resource *res) * Create provider before trying to connect, as connection failure * is not critical, but may take some time. */ + bzero(&ggiocreate, sizeof(ggiocreate)); ggiocreate.gctl_version = G_GATE_VERSION; ggiocreate.gctl_mediasize = res->hr_datasize; ggiocreate.gctl_sectorsize = res->hr_local_sectorsize; @@ -714,7 +711,6 @@ init_ggate(struct hast_resource *res) ggiocreate.gctl_unit = G_GATE_NAME_GIVEN; snprintf(ggiocreate.gctl_name, sizeof(ggiocreate.gctl_name), "hast/%s", res->hr_provname); - bzero(ggiocreate.gctl_info, sizeof(ggiocreate.gctl_info)); if (ioctl(res->hr_ggatefd, G_GATE_CMD_CREATE, &ggiocreate) == 0) { pjdlog_info("Device hast/%s created.", res->hr_provname); res->hr_ggateunit = ggiocreate.gctl_unit; @@ -732,6 +728,7 @@ init_ggate(struct hast_resource *res) * provider died and didn't clean up. In that case we will start from * where he left of. */ + bzero(&ggiocancel, sizeof(ggiocancel)); ggiocancel.gctl_version = G_GATE_VERSION; ggiocancel.gctl_unit = G_GATE_NAME_GIVEN; snprintf(ggiocancel.gctl_name, sizeof(ggiocancel.gctl_name), "hast/%s", @@ -797,6 +794,12 @@ hastd_primary(struct hast_resource *res) init_ggate(res); init_environment(res); /* + * Create the guard thread first, so we can handle signals from the + * very begining. + */ + error = pthread_create(&td, NULL, guard_thread, res); + assert(error == 0); + /* * Create the control thread before sending any event to the parent, * as we can deadlock when parent sends control request to worker, * but worker has no control thread started yet, so parent waits. @@ -818,9 +821,7 @@ hastd_primary(struct hast_resource *res) assert(error == 0); error = pthread_create(&td, NULL, ggate_send_thread, res); assert(error == 0); - error = pthread_create(&td, NULL, sync_thread, res); - assert(error == 0); - (void)guard_thread(res); + (void)sync_thread(res); } static void
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201010171643.o9HGhKkF038338>