Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 10 Apr 2011 15:48:16 +0000 (UTC)
From:      Mikolaj Golub <trociny@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r220525 - stable/8/sbin/hastd
Message-ID:  <201104101548.p3AFmGAq052736@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: trociny
Date: Sun Apr 10 15:48:16 2011
New Revision: 220525
URL: http://svn.freebsd.org/changeset/base/220525

Log:
  MFC r220005, r220006, r220007, r220266, r220270, r220271, r220272,
    r220273, r220274:
  
  r220005 (pjd):
  
  Use role2str() when setting process title.
  
  r220006 (pjd):
  
  Use timeout from configuration file not only when sending and receiving,
  but also when establishing connection.
  
  r220007 (pjd):
  
  Add mapsize to the header just before sending the packet.
  Before it could change later and we were sending invalid mapsize.
  Some time ago I added optimization where when nodes are connected for the
  first time and there were no writes to them yet, there is no initial full
  synchronization. This bug prevented it from working.
  
  r220266 (pjd):
  
  Handle the problem described in r220264 by using GEOM GATE queue of unlimited
  length. This should fix deadlocks reported by HAST users.
  
  r220270 (pjd):
  
  Allow to disable sends or receives on a socket using shutdown(2) by
  interpreting NULL 'data' argument passed to proto_common_send() or
  proto_common_recv() as a will to do so.
  
  r220271 (pjd):
  
  Declare directions for sockets between primary and secondary.
  In HAST we use two sockets - one for only sending the data and one for only
  receiving the data.
  
  r220272 (pjd):
  
  When we are operating on blocking socket and get EAGAIN on send(2) or recv(2)
  this means that request timed out. Translate the meaningless EAGAIN to
  ETIMEDOUT to give administrator a hint that he might need to increase timeout
  in configuration file.
  
  r220273 (pjd):
  
  Handle ENOBUFS on send(2) by retrying for a while and logging the problem.
  
  r220274 (pjd):
  
  Increase default timeout from 5 seconds to 20 seconds. 5 seconds is definitely
  to short under heavy load and I was experiencing those timeouts in my recent
  tests.
  
  Approved by:	pjd (mentor)

Modified:
  stable/8/sbin/hastd/hast.conf.5
  stable/8/sbin/hastd/hast.h
  stable/8/sbin/hastd/primary.c
  stable/8/sbin/hastd/proto_common.c
  stable/8/sbin/hastd/secondary.c
Directory Properties:
  stable/8/sbin/hastd/   (props changed)

Modified: stable/8/sbin/hastd/hast.conf.5
==============================================================================
--- stable/8/sbin/hastd/hast.conf.5	Sun Apr 10 15:40:57 2011	(r220524)
+++ stable/8/sbin/hastd/hast.conf.5	Sun Apr 10 15:48:16 2011	(r220525)
@@ -28,7 +28,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 20, 2011
+.Dd April 2, 2011
 .Dt HAST.CONF 5
 .Os
 .Sh NAME
@@ -241,7 +241,7 @@ LZF is very fast, general purpose compre
 .Pp
 Connection timeout in seconds.
 The default value is
-.Va 5 .
+.Va 20 .
 .It Ic exec Aq path
 .Pp
 Execute the given program on various HAST events.

Modified: stable/8/sbin/hastd/hast.h
==============================================================================
--- stable/8/sbin/hastd/hast.h	Sun Apr 10 15:40:57 2011	(r220524)
+++ stable/8/sbin/hastd/hast.h	Sun Apr 10 15:48:16 2011	(r220525)
@@ -83,7 +83,7 @@
 #define	HIO_KEEPALIVE		5
 
 #define	HAST_USER	"hast"
-#define	HAST_TIMEOUT	5
+#define	HAST_TIMEOUT	20
 #define	HAST_CONFIG	"/etc/hast.conf"
 #define	HAST_CONTROL	"/var/run/hastctl"
 #define	HASTD_LISTEN	"tcp4://0.0.0.0:8457"

Modified: stable/8/sbin/hastd/primary.c
==============================================================================
--- stable/8/sbin/hastd/primary.c	Sun Apr 10 15:40:57 2011	(r220524)
+++ stable/8/sbin/hastd/primary.c	Sun Apr 10 15:48:16 2011	(r220525)
@@ -509,7 +509,7 @@ primary_connect(struct hast_resource *re
 		primary_exit(EX_TEMPFAIL,
 		    "Unable to receive connection from parent");
 	}
-	if (proto_connect_wait(conn, HAST_TIMEOUT) < 0) {
+	if (proto_connect_wait(conn, res->hr_timeout) < 0) {
 		pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
 		    res->hr_remoteaddr);
 		proto_close(conn);
@@ -701,6 +701,11 @@ init_remote(struct hast_resource *res, s
 		(void)hast_activemap_flush(res);
 	}
 	nv_free(nvin);
+	/* Setup directions. */
+	if (proto_send(out, NULL, 0) == -1)
+		pjdlog_errno(LOG_WARNING, "Unable to set connection direction");
+	if (proto_recv(in, NULL, 0) == -1)
+		pjdlog_errno(LOG_WARNING, "Unable to set connection direction");
 	pjdlog_info("Connected to %s.", res->hr_remoteaddr);
 	if (inp != NULL && outp != NULL) {
 		*inp = in;
@@ -761,7 +766,7 @@ init_ggate(struct hast_resource *res)
 	ggiocreate.gctl_mediasize = res->hr_datasize;
 	ggiocreate.gctl_sectorsize = res->hr_local_sectorsize;
 	ggiocreate.gctl_flags = 0;
-	ggiocreate.gctl_maxcount = G_GATE_MAX_QUEUE_SIZE;
+	ggiocreate.gctl_maxcount = 0;
 	ggiocreate.gctl_timeout = 0;
 	ggiocreate.gctl_unit = G_GATE_NAME_GIVEN;
 	snprintf(ggiocreate.gctl_name, sizeof(ggiocreate.gctl_name), "hast/%s",
@@ -868,7 +873,7 @@ hastd_primary(struct hast_resource *res)
 	pjdlog_init(mode);
 	pjdlog_debug_set(debuglevel);
 	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
-	setproctitle("%s (primary)", res->hr_name);
+	setproctitle("%s (%s)", res->hr_name, role2str(res->hr_role));
 
 	init_local(res);
 	init_ggate(res);

Modified: stable/8/sbin/hastd/proto_common.c
==============================================================================
--- stable/8/sbin/hastd/proto_common.c	Sun Apr 10 15:40:57 2011	(r220524)
+++ stable/8/sbin/hastd/proto_common.c	Sun Apr 10 15:48:16 2011	(r220525)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2009-2010 The FreeBSD Foundation
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * This software was developed by Pawel Jakub Dawidek under sponsorship from
@@ -34,8 +35,11 @@ __FBSDID("$FreeBSD$");
 #include <sys/socket.h>
 
 #include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <strings.h>
+#include <unistd.h>
 
 #include "pjdlog.h"
 #include "proto_impl.h"
@@ -45,6 +49,16 @@ __FBSDID("$FreeBSD$");
 #define	MAX_SEND_SIZE	32768
 #endif
 
+static bool
+blocking_socket(int sock)
+{
+	int flags;
+
+	flags = fcntl(sock, F_GETFL);
+	PJDLOG_ASSERT(flags >= 0);
+	return ((flags & O_NONBLOCK) == 0);
+}
+
 static int
 proto_descriptor_send(int sock, int fd)
 {
@@ -80,24 +94,65 @@ proto_common_send(int sock, const unsign
 {
 	ssize_t done;
 	size_t sendsize;
+	int errcount = 0;
 
 	PJDLOG_ASSERT(sock >= 0);
+
+	if (data == NULL) {
+		/* The caller is just trying to decide about direction. */
+
+		PJDLOG_ASSERT(size == 0);
+
+		if (shutdown(sock, SHUT_RD) == -1)
+			return (errno);
+		return (0);
+	}
+
 	PJDLOG_ASSERT(data != NULL);
 	PJDLOG_ASSERT(size > 0);
 
 	do {
 		sendsize = size < MAX_SEND_SIZE ? size : MAX_SEND_SIZE;
 		done = send(sock, data, sendsize, MSG_NOSIGNAL);
-		if (done == 0)
+		if (done == 0) {
 			return (ENOTCONN);
-		else if (done < 0) {
+		} else if (done < 0) {
 			if (errno == EINTR)
 				continue;
+			if (errno == ENOBUFS) {
+				/*
+				 * If there are no buffers we retry.
+				 * After each try we increase delay before the
+				 * next one and we give up after fifteen times.
+				 * This gives 11s of total wait time.
+				 */
+				if (errcount == 15) {
+					pjdlog_warning("Getting ENOBUFS errors for 11s on send(), giving up.");
+				} else {
+					if (errcount == 0)
+						pjdlog_warning("Got ENOBUFS error on send(), retrying for a bit.");
+					errcount++;
+					usleep(100000 * errcount);
+					continue;
+				}
+			}
+			/*
+			 * If this is blocking socket and we got EAGAIN, this
+			 * means the request timed out. Translate errno to
+			 * ETIMEDOUT, to give administrator a hint to
+			 * eventually increase timeout.
+			 */
+			if (errno == EAGAIN && blocking_socket(sock))
+				errno = ETIMEDOUT;
 			return (errno);
 		}
 		data += done;
 		size -= done;
 	} while (size > 0);
+	if (errcount > 0) {
+		pjdlog_info("Data sent successfully after %d ENOBUFS error%s.",
+		    errcount, errcount == 1 ? "" : "s");
+	}
 
 	if (fd == -1)
 		return (0);
@@ -141,16 +196,36 @@ proto_common_recv(int sock, unsigned cha
 	ssize_t done;
 
 	PJDLOG_ASSERT(sock >= 0);
+
+	if (data == NULL) {
+		/* The caller is just trying to decide about direction. */
+
+		PJDLOG_ASSERT(size == 0);
+
+		if (shutdown(sock, SHUT_WR) == -1)
+			return (errno);
+		return (0);
+	}
+
 	PJDLOG_ASSERT(data != NULL);
 	PJDLOG_ASSERT(size > 0);
 
 	do {
 		done = recv(sock, data, size, MSG_WAITALL);
 	} while (done == -1 && errno == EINTR);
-	if (done == 0)
+	if (done == 0) {
 		return (ENOTCONN);
-	else if (done < 0)
+	} else if (done < 0) {
+		/*
+		 * If this is blocking socket and we got EAGAIN, this
+		 * means the request timed out. Translate errno to
+		 * ETIMEDOUT, to give administrator a hint to
+		 * eventually increase timeout.
+		 */
+		if (errno == EAGAIN && blocking_socket(sock))
+			errno = ETIMEDOUT;
 		return (errno);
+	}
 	if (fdp == NULL)
 		return (0);
 	return (proto_descriptor_recv(sock, fdp));

Modified: stable/8/sbin/hastd/secondary.c
==============================================================================
--- stable/8/sbin/hastd/secondary.c	Sun Apr 10 15:40:57 2011	(r220524)
+++ stable/8/sbin/hastd/secondary.c	Sun Apr 10 15:48:16 2011	(r220525)
@@ -183,6 +183,10 @@ init_remote(struct hast_resource *res, s
 	unsigned char *map;
 	size_t mapsize;
 
+	/* Setup direction. */
+	if (proto_send(res->hr_remoteout, NULL, 0) == -1)
+		pjdlog_errno(LOG_WARNING, "Unable to set connection direction");
+
 	map = NULL;
 	mapsize = 0;
 	nvout = nv_alloc();
@@ -201,7 +205,6 @@ init_remote(struct hast_resource *res, s
 		    "Unable to allocate memory (%zu bytes) for activemap.",
 		    mapsize);
 	}
-	nv_add_uint32(nvout, (uint32_t)mapsize, "mapsize");
 	/*
 	 * When we work as primary and secondary is missing we will increase
 	 * localcnt in our metadata. When secondary is connected and synced
@@ -339,6 +342,7 @@ init_remote(struct hast_resource *res, s
 		    (uintmax_t)res->hr_secondary_localcnt,
 		    (uintmax_t)res->hr_secondary_remotecnt);
 	}
+	nv_add_uint32(nvout, (uint32_t)mapsize, "mapsize");
 	if (hast_proto_send(res, res->hr_remotein, nvout, map, mapsize) < 0) {
 		pjdlog_exit(EX_TEMPFAIL, "Unable to send activemap to %s",
 		    res->hr_remoteaddr);
@@ -346,6 +350,9 @@ init_remote(struct hast_resource *res, s
 	if (map != NULL)
 		free(map);
 	nv_free(nvout);
+	/* Setup direction. */
+	if (proto_recv(res->hr_remotein, NULL, 0) == -1)
+		pjdlog_errno(LOG_WARNING, "Unable to set connection direction");
 	if (res->hr_secondary_localcnt > res->hr_primary_remotecnt &&
 	     res->hr_primary_localcnt > res->hr_secondary_remotecnt) {
 		/* Exit on split-brain. */
@@ -414,7 +421,7 @@ hastd_secondary(struct hast_resource *re
 	pjdlog_init(mode);
 	pjdlog_debug_set(debuglevel);
 	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
-	setproctitle("%s (secondary)", res->hr_name);
+	setproctitle("%s (%s)", res->hr_name, role2str(res->hr_role));
 
 	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
 	PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201104101548.p3AFmGAq052736>