Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 17 Oct 2010 16:43:20 +0000 (UTC)
From:      Pawel Jakub Dawidek <pjd@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r213984 - stable/8/sbin/hastd
Message-ID:  <201010171643.o9HGhKkF038338@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: pjd
Date: Sun Oct 17 16:43:20 2010
New Revision: 213984
URL: http://svn.freebsd.org/changeset/base/213984

Log:
  MFC r213183,r213428,r213429,r213430,r213529,r213530,r213531,r213533,r213579,
    r213580,r213938,r213939,r213981:
  
  r213183:
  
  Plug memory leak on fork(2) failure.
  
  Submitted by:	Mikolaj Golub <to.my.trociny@gmail.com>
  
  r213428:
  
  We can't mask ignored signal, so install dummy signal hander for SIGCHLD before
  masking it.
  
  This fixes bogus reports about hooks running for too long and other problems
  related to garbage-collecting child processes.
  
  Reported by:	Mikolaj Golub <to.my.trociny@gmail.com>
  
  r213429:
  
  hook_check() is now only used to report about long-running hooks, so the
  argument is redundant, remove it.
  
  r213430:
  
  Decrease report interval to 5 seconds, as this also means we will check for
  signals every 5 seconds and not every 10 seconds as before.
  
  r213529:
  
  Don't close local component on exit as we can hang waiting on g_waitidle.
  I'm unable to reproduce the race described in comment anymore and also the
  comment is incorrect - localfd represents local component from configuration
  file, eg. /dev/da0 and not HAST provider.
  
  Reported by:	Mikolaj Golub <to.my.trociny@gmail.com>
  
  r213530:
  
  Start the guard thread first, so we can handle signals from the very begining.
  
  Reported by:	Mikolaj Golub <to.my.trociny@gmail.com>
  
  r213531:
  
  Log error message when we fail to destroy ggate provider.
  
  r213533:
  
  Clear ggate structures before using them. We don't initialize all the field
  and there can be some garbage from the stack.
  
  r213579:
  
  We close the event socketpair early in the mainloop to prevent spaming with
  error messages, so when we clean up after child process, we have to check if
  the event socketpair is still there.
  
  Submitted by:	Mikolaj Golub <to.my.trociny@gmail.com>
  
  r213580:
  
  We can't zero out ggio request, as we have some fields in there we initialize
  once during start-up.
  
  Reported by:	Mikolaj Golub <to.my.trociny@gmail.com>
  
  r213938:
  
  Clear signal mask before executing a hook.
  
  Submitted by:	Mikolaj Golub <to.my.trociny@gmail.com>
  
  r213939:
  
  Use one fprintf() instead of two.
  
  r213981:
  
  Log correct connection when canceling half-open connection.
  
  Submitted by:	Mikolaj Golub <to.my.trociny@gmail.com>

Modified:
  stable/8/sbin/hastd/control.c
  stable/8/sbin/hastd/hastd.c
  stable/8/sbin/hastd/hooks.c
  stable/8/sbin/hastd/hooks.h
  stable/8/sbin/hastd/pjdlog.c
  stable/8/sbin/hastd/primary.c
Directory Properties:
  stable/8/sbin/hastd/   (props changed)

Modified: stable/8/sbin/hastd/control.c
==============================================================================
--- stable/8/sbin/hastd/control.c	Sun Oct 17 16:30:33 2010	(r213983)
+++ stable/8/sbin/hastd/control.c	Sun Oct 17 16:43:20 2010	(r213984)
@@ -58,8 +58,10 @@ child_cleanup(struct hast_resource *res)
 
 	proto_close(res->hr_ctrl);
 	res->hr_ctrl = NULL;
-	proto_close(res->hr_event);
-	res->hr_event = NULL;
+	if (res->hr_event != NULL) {
+		proto_close(res->hr_event);
+		res->hr_event = NULL;
+	}
 	res->hr_workerpid = 0;
 }
 

Modified: stable/8/sbin/hastd/hastd.c
==============================================================================
--- stable/8/sbin/hastd/hastd.c	Sun Oct 17 16:30:33 2010	(r213983)
+++ stable/8/sbin/hastd/hastd.c	Sun Oct 17 16:43:20 2010	(r213984)
@@ -69,7 +69,7 @@ bool sigexit_received = false;
 struct pidfh *pfh;
 
 /* How often check for hooks running for too long. */
-#define	REPORT_INTERVAL	10
+#define	REPORT_INTERVAL	5
 
 static void
 usage(void)
@@ -527,7 +527,8 @@ listen_accept(void)
 		} else if (res->hr_remotein != NULL) {
 			char oaddr[256];
 
-			proto_remote_address(conn, oaddr, sizeof(oaddr));
+			proto_remote_address(res->hr_remotein, oaddr,
+			    sizeof(oaddr));
 			pjdlog_debug(1,
 			    "Canceling half-open connection from %s on connection from %s.",
 			    oaddr, raddr);
@@ -659,7 +660,7 @@ main_loop(void)
 		assert(maxfd + 1 <= (int)FD_SETSIZE);
 		ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout);
 		if (ret == 0)
-			hook_check(false);
+			hook_check();
 		else if (ret == -1) {
 			if (errno == EINTR)
 				continue;
@@ -685,6 +686,12 @@ main_loop(void)
 	}
 }
 
+static void
+dummy_sighandler(int sig __unused)
+{
+	/* Nothing to do. */
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -743,6 +750,11 @@ main(int argc, char *argv[])
 	cfg = yy_config_parse(cfgpath, true);
 	assert(cfg != NULL);
 
+	/*
+	 * Because SIGCHLD is ignored by default, setup dummy handler for it,
+	 * so we can mask it.
+	 */
+	PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR);
 	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
 	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
 	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);

Modified: stable/8/sbin/hastd/hooks.c
==============================================================================
--- stable/8/sbin/hastd/hooks.c	Sun Oct 17 16:30:33 2010	(r213983)
+++ stable/8/sbin/hastd/hooks.c	Sun Oct 17 16:43:20 2010	(r213984)
@@ -293,24 +293,14 @@ hook_check_one(pid_t pid, int status)
 }
 
 void
-hook_check(bool sigchld)
+hook_check(void)
 {
 	struct hookproc *hp, *hp2;
-	int status;
 	time_t now;
-	pid_t pid;
 
 	assert(hooks_initialized);
 
 	/*
-	 * If SIGCHLD was received, garbage collect finished processes.
-	 */
-	if (sigchld) {
-		while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
-			hook_check_one(pid, status);
-	}
-
-	/*
 	 * Report about processes that are running for a long time.
 	 */
 	now = time(NULL);
@@ -364,6 +354,7 @@ hook_execv(const char *path, va_list ap)
 	struct hookproc *hp;
 	char *args[64];
 	unsigned int ii;
+	sigset_t mask;
 	pid_t pid;
 
 	assert(hooks_initialized);
@@ -388,9 +379,12 @@ hook_execv(const char *path, va_list ap)
 	switch (pid) {
 	case -1:	/* Error. */
 		pjdlog_errno(LOG_ERR, "Unable to fork to execute %s", path);
+		hook_free(hp);
 		return;
 	case 0:		/* Child. */
 		descriptors();
+		PJDLOG_VERIFY(sigemptyset(&mask) == 0);
+		PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
 		execv(path, args);
 		pjdlog_errno(LOG_ERR, "Unable to execute %s", path);
 		exit(EX_SOFTWARE);

Modified: stable/8/sbin/hastd/hooks.h
==============================================================================
--- stable/8/sbin/hastd/hooks.h	Sun Oct 17 16:30:33 2010	(r213983)
+++ stable/8/sbin/hastd/hooks.h	Sun Oct 17 16:43:20 2010	(r213984)
@@ -41,7 +41,7 @@
 void hook_init(void);
 void hook_fini(void);
 void hook_check_one(pid_t pid, int status);
-void hook_check(bool sigchld);
+void hook_check(void);
 void hook_exec(const char *path, ...);
 void hook_execv(const char *path, va_list ap);
 

Modified: stable/8/sbin/hastd/pjdlog.c
==============================================================================
--- stable/8/sbin/hastd/pjdlog.c	Sun Oct 17 16:30:33 2010	(r213983)
+++ stable/8/sbin/hastd/pjdlog.c	Sun Oct 17 16:43:20 2010	(r213984)
@@ -214,8 +214,7 @@ pjdlogv_common(int loglevel, int debugle
 		/* Attach debuglevel if this is debug log. */
 		if (loglevel == LOG_DEBUG)
 			fprintf(out, "[%d]", debuglevel);
-		fprintf(out, " ");
-		fprintf(out, "%s", pjdlog_prefix);
+		fprintf(out, " %s", pjdlog_prefix);
 		vfprintf(out, fmt, ap);
 		if (error != -1)
 			fprintf(out, ": %s.", strerror(error));

Modified: stable/8/sbin/hastd/primary.c
==============================================================================
--- stable/8/sbin/hastd/primary.c	Sun Oct 17 16:30:33 2010	(r213983)
+++ stable/8/sbin/hastd/primary.c	Sun Oct 17 16:43:20 2010	(r213984)
@@ -234,21 +234,17 @@ cleanup(struct hast_resource *res)
 	/* Remember errno. */
 	rerrno = errno;
 
-	/*
-	 * Close descriptor to /dev/hast/<name>
-	 * to work-around race in the kernel.
-	 */
-	close(res->hr_localfd);
-
 	/* Destroy ggate provider if we created one. */
 	if (res->hr_ggateunit >= 0) {
 		struct g_gate_ctl_destroy ggiod;
 
+		bzero(&ggiod, sizeof(ggiod));
 		ggiod.gctl_version = G_GATE_VERSION;
 		ggiod.gctl_unit = res->hr_ggateunit;
 		ggiod.gctl_force = 1;
 		if (ioctl(res->hr_ggatefd, G_GATE_CMD_DESTROY, &ggiod) < 0) {
-			pjdlog_warning("Unable to destroy hast/%s device",
+			pjdlog_errno(LOG_WARNING,
+			    "Unable to destroy hast/%s device",
 			    res->hr_provname);
 		}
 		res->hr_ggateunit = -1;
@@ -705,6 +701,7 @@ init_ggate(struct hast_resource *res)
 	 * Create provider before trying to connect, as connection failure
 	 * is not critical, but may take some time.
 	 */
+	bzero(&ggiocreate, sizeof(ggiocreate));
 	ggiocreate.gctl_version = G_GATE_VERSION;
 	ggiocreate.gctl_mediasize = res->hr_datasize;
 	ggiocreate.gctl_sectorsize = res->hr_local_sectorsize;
@@ -714,7 +711,6 @@ init_ggate(struct hast_resource *res)
 	ggiocreate.gctl_unit = G_GATE_NAME_GIVEN;
 	snprintf(ggiocreate.gctl_name, sizeof(ggiocreate.gctl_name), "hast/%s",
 	    res->hr_provname);
-	bzero(ggiocreate.gctl_info, sizeof(ggiocreate.gctl_info));
 	if (ioctl(res->hr_ggatefd, G_GATE_CMD_CREATE, &ggiocreate) == 0) {
 		pjdlog_info("Device hast/%s created.", res->hr_provname);
 		res->hr_ggateunit = ggiocreate.gctl_unit;
@@ -732,6 +728,7 @@ init_ggate(struct hast_resource *res)
 	 * provider died and didn't clean up. In that case we will start from
 	 * where he left of.
 	 */
+	bzero(&ggiocancel, sizeof(ggiocancel));
 	ggiocancel.gctl_version = G_GATE_VERSION;
 	ggiocancel.gctl_unit = G_GATE_NAME_GIVEN;
 	snprintf(ggiocancel.gctl_name, sizeof(ggiocancel.gctl_name), "hast/%s",
@@ -797,6 +794,12 @@ hastd_primary(struct hast_resource *res)
 	init_ggate(res);
 	init_environment(res);
 	/*
+	 * Create the guard thread first, so we can handle signals from the
+	 * very begining.
+	 */
+	error = pthread_create(&td, NULL, guard_thread, res);
+	assert(error == 0);
+	/*
 	 * Create the control thread before sending any event to the parent,
 	 * as we can deadlock when parent sends control request to worker,
 	 * but worker has no control thread started yet, so parent waits.
@@ -818,9 +821,7 @@ hastd_primary(struct hast_resource *res)
 	assert(error == 0);
 	error = pthread_create(&td, NULL, ggate_send_thread, res);
 	assert(error == 0);
-	error = pthread_create(&td, NULL, sync_thread, res);
-	assert(error == 0);
-	(void)guard_thread(res);
+	(void)sync_thread(res);
 }
 
 static void



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201010171643.o9HGhKkF038338>