Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 12 Jun 2010 15:02:22 +0900
From:      "hiroshi@soupacific.com" <hiroshi@soupacific.com>
To:        Pawel Jakub Dawidek <pjd@FreeBSD.org>, freebsd-fs@freebsd.org
Subject:   Re: FreeBSD 8.1 and HAST
Message-ID:  <4C1322EE.8070704@soupacific.com>
In-Reply-To: <4C122435.4020409@soupacific.com>
References:  <x2u90ed88931004131130m272ab2e4p37c2bdee5353a495@mail.gmail.com><20100416065126.GG1705@garage.freebsd.pl><4BCD3979.8050107@soupacific.com> <4BCD5AD7.8070502@soupacific.com><4BCFA4C2.6000109@soupacific.com> <4BCFB1C5.5000908@soupacific.com><4BD01800.9040901@soupacific.com> <4BD0438B.5080308@soupacific.com><4BD0E432.1000108@soupacific.com><20100423061521.GC1670@garage.freebsd.pl><4BD17B0D.5080601@soupacific.com> <4C10B526.4040908@soupacific.com> <4C122435.4020409@soupacific.com>

next in thread | previous in thread | raw e-mail | index | archive | help

I put some log message to trace the trouble of HAST on 8.1.

Modified code is

/*
  * Thread receives requests from the primary node.
  */
static void *
recv_thread(void *arg)
{
	struct hast_resource *res = arg;
	struct hio *hio;
	bool wakeup;
pjdlog_warning("recv_thread");
	for (;;) {
		pjdlog_debug(2, "recv: Taking free request.");
		mtx_lock(&hio_free_list_lock);
		while ((hio = TAILQ_FIRST(&hio_free_list)) == NULL) {
			pjdlog_debug(2, "recv: No free requests, waiting.");
			cv_wait(&hio_free_list_cond, &hio_free_list_lock);
		}
		TAILQ_REMOVE(&hio_free_list, hio, hio_next);
		mtx_unlock(&hio_free_list_lock);
		pjdlog_debug(2, "recv: (%p) Got request.", hio);

pjdlog_warning("wooooo");

		if (hast_proto_recv_hdr(res->hr_remotein, &hio->hio_nv) < 0) {
			pjdlog_exit(EX_TEMPFAIL,
			    "Unable to receive request header. ");
		}
		if (requnpack(res, hio) != 0)
{
pjdlog_warning("requnpack");
			goto send_queue;
}
		reqlog(LOG_DEBUG, 2, -1, hio,
		    "recv: (%p) Got request header: ", hio);
		if (hio->hio_cmd == HIO_WRITE) {
			if (hast_proto_recv_data(res, res->hr_remotein,
			    hio->hio_nv, hio->hio_data, MAXPHYS) < 0) {
				pjdlog_exit(EX_TEMPFAIL,
				    "Unable to receive reply data");
			}
pjdlog_warning("HIO_WRITE");
		}
		pjdlog_debug(2, "recv: (%p) Moving request to the disk queue.",
		    hio);
		mtx_lock(&hio_disk_list_lock);
		wakeup = TAILQ_EMPTY(&hio_disk_list);
		TAILQ_INSERT_TAIL(&hio_disk_list, hio, hio_next);
		mtx_unlock(&hio_disk_list_lock);
		if (wakeup)
{
pjdlog_warning("wakeup");
			cv_signal(&hio_disk_list_cond);
}
		continue;
send_queue:
		pjdlog_debug(2, "recv: (%p) Moving request to the send queue.",
		    hio);
		mtx_lock(&hio_send_list_lock);
		wakeup = TAILQ_EMPTY(&hio_send_list);
		TAILQ_INSERT_TAIL(&hio_send_list, hio, hio_next);
		mtx_unlock(&hio_send_list_lock);
		if (wakeup)
			cv_signal(&hio_send_list_cond);
	}
	/* NOTREACHED */
	return (NULL);
}

/*
  * Thread sends requests back to primary node.
  */
static void *
send_thread(void *arg)
{
	struct hast_resource *res = arg;
	struct nv *nvout;
	struct hio *hio;
	void *data;
	size_t length;
	bool wakeup;

	for (;;) {

pjdlog_warning("send_thread for loop");
		pjdlog_debug(2, "send: Taking request.");
		mtx_lock(&hio_send_list_lock);
		while ((hio = TAILQ_FIRST(&hio_send_list)) == NULL) {
			pjdlog_debug(2, "send: No requests, waiting.");
			cv_wait(&hio_send_list_cond, &hio_send_list_lock);
		}
		TAILQ_REMOVE(&hio_send_list, hio, hio_next);
		mtx_unlock(&hio_send_list_lock);

9.0 logs shows

un 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) woooo
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) woooo
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup

repeated forever

8.1
Jun 12 14:07:18 sv01B hastd: [zfshast] (init) We act as init for the 
resource and

not as secondary as requested by tcp4://192.168.0.240:59254.
Jun 12 14:07:23 sv01B hastd: [zfshast] (init) We act as init for the 
resource and

not as secondary as requested by tcp4://192.168.0.240:56349.
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) recv_thread
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wooooo
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wakeup
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wooooo
Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) Unable to receive 
request header.

: Socket is not connected.
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) Worker process exited

ungracefully (pid=757, exitcode=75).
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) recv_thread
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wooooo
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) HIO_WRITE
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wakeup
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wooooo
Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) send_thread for loop
Jun 12 14:07:38 sv01B hastd: [zfshast] (secondary) Unable to receive 
request header.

: Socket is not connected.


I hope this simple trace could help you some idea.

Thanks

Hiroshi



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4C1322EE.8070704>