Date: Sat, 30 Oct 2010 15:25:56 +0300 From: Mikolaj Golub <to.my.trociny@gmail.com> To: Pawel Jakub Dawidek <pjd@FreeBSD.org> Cc: freebsd-stable@freebsd.org, Pete French <petefrench@ticketswitch.com> Subject: Re: hast vs ggate+gmirror sychrnoisation speed Message-ID: <86d3qr3m0b.fsf@kopusha.home.net> In-Reply-To: <86lj5i3zjt.fsf@kopusha.home.net> (Mikolaj Golub's message of "Thu, 28 Oct 2010 22:08:54 %2B0300") References: <E1PAlxN-000H5x-Eh@dilbert.ticketswitch.com> <86wrp3wj67.fsf@kopusha.home.net> <20101028163036.GA2347@garage.freebsd.pl> <86lj5i3zjt.fsf@kopusha.home.net>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
PJD>> I looked at the code and the keepalive packets arbe sent from another
PJD>> thread. Could you try turning them off in primary.c and see if that
PJD>> helps?
MG> At first I set RETRY_SLEEP to 1 sec to have more keepalive packets. The errors
MG> started to observe frequently:
MG> Oct 28 21:35:53 bolek hastd[1709]: [storage] (secondary) Unable to receive request header: RPC version wrong.
MG> Oct 28 21:35:54 bolek hastd[1632]: [storage] (secondary) Worker process exited ungracefully (pid=1709, exitcode=75).
MG> Oct 28 21:36:12 bolek hastd[1722]: [storage] (secondary) Unable to receive request header: RPC version wrong.
MG> Oct 28 21:36:12 bolek hastd[1632]: [storage] (secondary) Worker process exited ungracefully (pid=1722, exitcode=75).
MG> ...
MG> Now I have been running synchronization for more then a half an hour with
MG> keepalive_send disabled and have not seen any error.
So :-) What do you think about sending keepalive in remote_send_thread() to
avoid this problem and sending them only when a connection is idle (it looks
like there is no much use to send them all the time)? Something like in the
patch below (it works for me).
--
Mikolaj Golub
[-- Attachment #2 --]
Index: sbin/hastd/primary.c
===================================================================
--- sbin/hastd/primary.c (revision 214550)
+++ sbin/hastd/primary.c (working copy)
@@ -190,6 +190,19 @@ static pthread_mutex_t metadata_lock;
hio_next[(ncomp)]); \
mtx_unlock(&hio_##name##_list_lock[(ncomp)]); \
} while (0)
+#define QUEUE_TRY1(hio, name, ncomp) do { \
+ mtx_lock(&hio_##name##_list_lock[(ncomp)]); \
+ (hio) = TAILQ_FIRST(&hio_##name##_list[(ncomp)]); \
+ if (hio == NULL) { \
+ cv_timedwait(&hio_##name##_list_cond[(ncomp)], \
+ &hio_##name##_list_lock[(ncomp)], RETRY_SLEEP); \
+ hio = TAILQ_FIRST(&hio_##name##_list[(ncomp)]); \
+ } \
+ if (hio != NULL) \
+ TAILQ_REMOVE(&hio_##name##_list[(ncomp)], hio, \
+ hio_next[(ncomp)]); \
+ mtx_unlock(&hio_##name##_list_lock[(ncomp)]); \
+} while (0)
#define QUEUE_TAKE2(hio, name) do { \
mtx_lock(&hio_##name##_list_lock); \
while (((hio) = TAILQ_FIRST(&hio_##name##_list)) == NULL) { \
@@ -1176,6 +1189,38 @@ local_send_thread(void *arg)
return (NULL);
}
+static void
+keepalive_send(struct hast_resource *res, unsigned int ncomp)
+{
+ struct nv *nv;
+
+ if (!ISCONNECTED(res, ncomp))
+ return;
+
+ assert(res->hr_remotein != NULL);
+ assert(res->hr_remoteout != NULL);
+
+ nv = nv_alloc();
+ nv_add_uint8(nv, HIO_KEEPALIVE, "cmd");
+ if (nv_error(nv) != 0) {
+ nv_free(nv);
+ pjdlog_debug(1,
+ "keepalive_send: Unable to prepare header to send.");
+ return;
+ }
+ if (hast_proto_send(res, res->hr_remoteout, nv, NULL, 0) < 0) {
+ pjdlog_common(LOG_DEBUG, 1, errno,
+ "keepalive_send: Unable to send request");
+ nv_free(nv);
+ rw_unlock(&hio_remote_lock[ncomp]);
+ remote_close(res, ncomp);
+ rw_rlock(&hio_remote_lock[ncomp]);
+ return;
+ }
+ nv_free(nv);
+ pjdlog_debug(2, "keepalive_send: Request sent.");
+}
+
/*
* Thread sends request to secondary node.
*/
@@ -1184,6 +1229,7 @@ remote_send_thread(void *arg)
{
struct hast_resource *res = arg;
struct g_gate_ctl_io *ggio;
+ time_t lastcheck, now;
struct hio *hio;
struct nv *nv;
unsigned int ncomp;
@@ -1194,10 +1240,19 @@ remote_send_thread(void *arg)
/* Remote component is 1 for now. */
ncomp = 1;
+ lastcheck = time(NULL);
for (;;) {
pjdlog_debug(2, "remote_send: Taking request.");
- QUEUE_TAKE1(hio, send, ncomp);
+ QUEUE_TRY1(hio, send, ncomp);
+ if (hio == NULL) {
+ now = time(NULL);
+ if (lastcheck + RETRY_SLEEP <= now) {
+ keepalive_send(res, ncomp);
+ lastcheck = now;
+ }
+ continue;
+ }
pjdlog_debug(2, "remote_send: (%p) Got request.", hio);
ggio = &hio->hio_ggio;
switch (ggio->gctl_cmd) {
@@ -1883,32 +1938,6 @@ failed:
}
static void
-keepalive_send(struct hast_resource *res, unsigned int ncomp)
-{
- struct nv *nv;
-
- nv = nv_alloc();
- nv_add_uint8(nv, HIO_KEEPALIVE, "cmd");
- if (nv_error(nv) != 0) {
- nv_free(nv);
- pjdlog_debug(1,
- "keepalive_send: Unable to prepare header to send.");
- return;
- }
- if (hast_proto_send(res, res->hr_remoteout, nv, NULL, 0) < 0) {
- pjdlog_common(LOG_DEBUG, 1, errno,
- "keepalive_send: Unable to send request");
- nv_free(nv);
- rw_unlock(&hio_remote_lock[ncomp]);
- remote_close(res, ncomp);
- rw_rlock(&hio_remote_lock[ncomp]);
- return;
- }
- nv_free(nv);
- pjdlog_debug(2, "keepalive_send: Request sent.");
-}
-
-static void
guard_one(struct hast_resource *res, unsigned int ncomp)
{
struct proto_conn *in, *out;
@@ -1926,12 +1955,6 @@ guard_one(struct hast_resource *res, unsigned int
if (ISCONNECTED(res, ncomp)) {
assert(res->hr_remotein != NULL);
assert(res->hr_remoteout != NULL);
- keepalive_send(res, ncomp);
- }
-
- if (ISCONNECTED(res, ncomp)) {
- assert(res->hr_remotein != NULL);
- assert(res->hr_remoteout != NULL);
rw_unlock(&hio_remote_lock[ncomp]);
pjdlog_debug(2, "remote_guard: Connection to %s is ok.",
res->hr_remoteaddr);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?86d3qr3m0b.fsf>
