Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 30 Oct 2010 15:25:56 +0300
From:      Mikolaj Golub <to.my.trociny@gmail.com>
To:        Pawel Jakub Dawidek <pjd@FreeBSD.org>
Cc:        freebsd-stable@freebsd.org, Pete French <petefrench@ticketswitch.com>
Subject:   Re: hast vs ggate+gmirror sychrnoisation speed
Message-ID:  <86d3qr3m0b.fsf@kopusha.home.net>
In-Reply-To: <86lj5i3zjt.fsf@kopusha.home.net> (Mikolaj Golub's message of "Thu, 28 Oct 2010 22:08:54 %2B0300")
References:  <E1PAlxN-000H5x-Eh@dilbert.ticketswitch.com> <86wrp3wj67.fsf@kopusha.home.net> <20101028163036.GA2347@garage.freebsd.pl> <86lj5i3zjt.fsf@kopusha.home.net>

next in thread | previous in thread | raw e-mail | index | archive | help
--=-=-=


On Thu, 28 Oct 2010 22:08:54 +0300 Mikolaj Golub wrote to Pawel Jakub Dawidek:

 PJD>> I looked at the code and the keepalive packets arbe sent from another
 PJD>> thread. Could you try turning them off in primary.c and see if that
 PJD>> helps?

 MG> At first I set RETRY_SLEEP to 1 sec to have more keepalive packets. The errors
 MG> started to observe frequently:

 MG> Oct 28 21:35:53 bolek hastd[1709]: [storage] (secondary) Unable to receive request header: RPC version wrong.
 MG> Oct 28 21:35:54 bolek hastd[1632]: [storage] (secondary) Worker process exited ungracefully (pid=1709, exitcode=75).
 MG> Oct 28 21:36:12 bolek hastd[1722]: [storage] (secondary) Unable to receive request header: RPC version wrong.
 MG> Oct 28 21:36:12 bolek hastd[1632]: [storage] (secondary) Worker process exited ungracefully (pid=1722, exitcode=75).
 MG> ...

 MG> Now I have been running synchronization for more then a half an hour with
 MG> keepalive_send disabled and have not seen any error.

So :-) What do you think about sending keepalive in remote_send_thread() to
avoid this problem and sending them only when a connection is idle (it looks
like there is no much use to send them all the time)? Something like in the
patch below (it works for me).

-- 
Mikolaj Golub


--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=hastd.keepalive.patch

Index: sbin/hastd/primary.c
===================================================================
--- sbin/hastd/primary.c	(revision 214550)
+++ sbin/hastd/primary.c	(working copy)
@@ -190,6 +190,19 @@ static pthread_mutex_t metadata_lock;
 	    hio_next[(ncomp)]);						\
 	mtx_unlock(&hio_##name##_list_lock[(ncomp)]);			\
 } while (0)
+#define	QUEUE_TRY1(hio, name, ncomp)		do {			\
+	mtx_lock(&hio_##name##_list_lock[(ncomp)]);			\
+	(hio) = TAILQ_FIRST(&hio_##name##_list[(ncomp)]);		\
+	if (hio == NULL) {						\
+		cv_timedwait(&hio_##name##_list_cond[(ncomp)],		\
+		    &hio_##name##_list_lock[(ncomp)], RETRY_SLEEP);	\
+		hio = TAILQ_FIRST(&hio_##name##_list[(ncomp)]);		\
+	}								\
+	if (hio != NULL)						\
+		TAILQ_REMOVE(&hio_##name##_list[(ncomp)], hio,		\
+		    hio_next[(ncomp)]);					\
+	mtx_unlock(&hio_##name##_list_lock[(ncomp)]);			\
+} while (0)
 #define	QUEUE_TAKE2(hio, name)	do {					\
 	mtx_lock(&hio_##name##_list_lock);				\
 	while (((hio) = TAILQ_FIRST(&hio_##name##_list)) == NULL) {	\
@@ -1176,6 +1189,38 @@ local_send_thread(void *arg)
 	return (NULL);
 }
 
+static void
+keepalive_send(struct hast_resource *res, unsigned int ncomp)
+{
+	struct nv *nv;
+
+	if (!ISCONNECTED(res, ncomp))
+		return;
+	
+	assert(res->hr_remotein != NULL);
+	assert(res->hr_remoteout != NULL);
+
+	nv = nv_alloc();
+	nv_add_uint8(nv, HIO_KEEPALIVE, "cmd");
+	if (nv_error(nv) != 0) {
+		nv_free(nv);
+		pjdlog_debug(1,
+		    "keepalive_send: Unable to prepare header to send.");
+		return;
+	}
+	if (hast_proto_send(res, res->hr_remoteout, nv, NULL, 0) < 0) {
+		pjdlog_common(LOG_DEBUG, 1, errno,
+		    "keepalive_send: Unable to send request");
+		nv_free(nv);
+		rw_unlock(&hio_remote_lock[ncomp]);
+		remote_close(res, ncomp);
+		rw_rlock(&hio_remote_lock[ncomp]);
+		return;
+	}
+	nv_free(nv);
+	pjdlog_debug(2, "keepalive_send: Request sent.");
+}
+
 /*
  * Thread sends request to secondary node.
  */
@@ -1184,6 +1229,7 @@ remote_send_thread(void *arg)
 {
 	struct hast_resource *res = arg;
 	struct g_gate_ctl_io *ggio;
+	time_t lastcheck, now;
 	struct hio *hio;
 	struct nv *nv;
 	unsigned int ncomp;
@@ -1194,10 +1240,19 @@ remote_send_thread(void *arg)
 
 	/* Remote component is 1 for now. */
 	ncomp = 1;
+	lastcheck = time(NULL);	
 
 	for (;;) {
 		pjdlog_debug(2, "remote_send: Taking request.");
-		QUEUE_TAKE1(hio, send, ncomp);
+		QUEUE_TRY1(hio, send, ncomp);
+		if (hio == NULL) {
+			now = time(NULL);
+			if (lastcheck + RETRY_SLEEP <= now) {
+				keepalive_send(res, ncomp);
+				lastcheck = now;
+			}
+			continue;
+		}
 		pjdlog_debug(2, "remote_send: (%p) Got request.", hio);
 		ggio = &hio->hio_ggio;
 		switch (ggio->gctl_cmd) {
@@ -1883,32 +1938,6 @@ failed:
 }
 
 static void
-keepalive_send(struct hast_resource *res, unsigned int ncomp)
-{
-	struct nv *nv;
-
-	nv = nv_alloc();
-	nv_add_uint8(nv, HIO_KEEPALIVE, "cmd");
-	if (nv_error(nv) != 0) {
-		nv_free(nv);
-		pjdlog_debug(1,
-		    "keepalive_send: Unable to prepare header to send.");
-		return;
-	}
-	if (hast_proto_send(res, res->hr_remoteout, nv, NULL, 0) < 0) {
-		pjdlog_common(LOG_DEBUG, 1, errno,
-		    "keepalive_send: Unable to send request");
-		nv_free(nv);
-		rw_unlock(&hio_remote_lock[ncomp]);
-		remote_close(res, ncomp);
-		rw_rlock(&hio_remote_lock[ncomp]);
-		return;
-	}
-	nv_free(nv);
-	pjdlog_debug(2, "keepalive_send: Request sent.");
-}
-
-static void
 guard_one(struct hast_resource *res, unsigned int ncomp)
 {
 	struct proto_conn *in, *out;
@@ -1926,12 +1955,6 @@ guard_one(struct hast_resource *res, unsigned int
 	if (ISCONNECTED(res, ncomp)) {
 		assert(res->hr_remotein != NULL);
 		assert(res->hr_remoteout != NULL);
-		keepalive_send(res, ncomp);
-	}
-
-	if (ISCONNECTED(res, ncomp)) {
-		assert(res->hr_remotein != NULL);
-		assert(res->hr_remoteout != NULL);
 		rw_unlock(&hio_remote_lock[ncomp]);
 		pjdlog_debug(2, "remote_guard: Connection to %s is ok.",
 		    res->hr_remoteaddr);

--=-=-=--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?86d3qr3m0b.fsf>