From owner-freebsd-fs@FreeBSD.ORG Sat Jun 12 06:02:28 2010 Return-Path: Delivered-To: freebsd-fs@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 9992E106566B; Sat, 12 Jun 2010 06:02:28 +0000 (UTC) (envelope-from hiroshi@soupacific.com) Received: from mail.soupacific.com (mail.soupacific.com [211.19.53.201]) by mx1.freebsd.org (Postfix) with ESMTP id 2A73C8FC0A; Sat, 12 Jun 2010 06:02:27 +0000 (UTC) Received: from [127.0.0.1] (unknown [192.168.1.239]) by mail.soupacific.com (Postfix) with ESMTP id 0A98C6B638; Sat, 12 Jun 2010 05:53:50 +0000 (UTC) Message-ID: <4C1322EE.8070704@soupacific.com> Date: Sat, 12 Jun 2010 15:02:22 +0900 From: "hiroshi@soupacific.com" User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.9) Gecko/20100317 Thunderbird/3.0.4 MIME-Version: 1.0 To: Pawel Jakub Dawidek , freebsd-fs@freebsd.org References: <20100416065126.GG1705@garage.freebsd.pl><4BCD3979.8050107@soupacific.com> <4BCD5AD7.8070502@soupacific.com><4BCFA4C2.6000109@soupacific.com> <4BCFB1C5.5000908@soupacific.com><4BD01800.9040901@soupacific.com> <4BD0438B.5080308@soupacific.com><4BD0E432.1000108@soupacific.com><20100423061521.GC1670@garage.freebsd.pl><4BD17B0D.5080601@soupacific.com> <4C10B526.4040908@soupacific.com> <4C122435.4020409@soupacific.com> In-Reply-To: <4C122435.4020409@soupacific.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Cc: Subject: Re: FreeBSD 8.1 and HAST X-BeenThere: freebsd-fs@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Filesystems List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 12 Jun 2010 06:02:28 -0000 I put some log message to trace the trouble of HAST on 8.1. Modified code is /* * Thread receives requests from the primary node. */ static void * recv_thread(void *arg) { struct hast_resource *res = arg; struct hio *hio; bool wakeup; pjdlog_warning("recv_thread"); for (;;) { pjdlog_debug(2, "recv: Taking free request."); mtx_lock(&hio_free_list_lock); while ((hio = TAILQ_FIRST(&hio_free_list)) == NULL) { pjdlog_debug(2, "recv: No free requests, waiting."); cv_wait(&hio_free_list_cond, &hio_free_list_lock); } TAILQ_REMOVE(&hio_free_list, hio, hio_next); mtx_unlock(&hio_free_list_lock); pjdlog_debug(2, "recv: (%p) Got request.", hio); pjdlog_warning("wooooo"); if (hast_proto_recv_hdr(res->hr_remotein, &hio->hio_nv) < 0) { pjdlog_exit(EX_TEMPFAIL, "Unable to receive request header. "); } if (requnpack(res, hio) != 0) { pjdlog_warning("requnpack"); goto send_queue; } reqlog(LOG_DEBUG, 2, -1, hio, "recv: (%p) Got request header: ", hio); if (hio->hio_cmd == HIO_WRITE) { if (hast_proto_recv_data(res, res->hr_remotein, hio->hio_nv, hio->hio_data, MAXPHYS) < 0) { pjdlog_exit(EX_TEMPFAIL, "Unable to receive reply data"); } pjdlog_warning("HIO_WRITE"); } pjdlog_debug(2, "recv: (%p) Moving request to the disk queue.", hio); mtx_lock(&hio_disk_list_lock); wakeup = TAILQ_EMPTY(&hio_disk_list); TAILQ_INSERT_TAIL(&hio_disk_list, hio, hio_next); mtx_unlock(&hio_disk_list_lock); if (wakeup) { pjdlog_warning("wakeup"); cv_signal(&hio_disk_list_cond); } continue; send_queue: pjdlog_debug(2, "recv: (%p) Moving request to the send queue.", hio); mtx_lock(&hio_send_list_lock); wakeup = TAILQ_EMPTY(&hio_send_list); TAILQ_INSERT_TAIL(&hio_send_list, hio, hio_next); mtx_unlock(&hio_send_list_lock); if (wakeup) cv_signal(&hio_send_list_cond); } /* NOTREACHED */ return (NULL); } /* * Thread sends requests back to primary node. */ static void * send_thread(void *arg) { struct hast_resource *res = arg; struct nv *nvout; struct hio *hio; void *data; size_t length; bool wakeup; for (;;) { pjdlog_warning("send_thread for loop"); pjdlog_debug(2, "send: Taking request."); mtx_lock(&hio_send_list_lock); while ((hio = TAILQ_FIRST(&hio_send_list)) == NULL) { pjdlog_debug(2, "send: No requests, waiting."); cv_wait(&hio_send_list_cond, &hio_send_list_lock); } TAILQ_REMOVE(&hio_send_list, hio, hio_next); mtx_unlock(&hio_send_list_lock); 9.0 logs shows un 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) woooo Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) woooo Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) send_thread for loop Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) HIO_WRITE Jun 12 12:49:33 fw01B hastd: [zfshast] (secondary) wakup repeated forever 8.1 Jun 12 14:07:18 sv01B hastd: [zfshast] (init) We act as init for the resource and not as secondary as requested by tcp4://192.168.0.240:59254. Jun 12 14:07:23 sv01B hastd: [zfshast] (init) We act as init for the resource and not as secondary as requested by tcp4://192.168.0.240:56349. Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) recv_thread Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) send_thread for loop Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wooooo Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) HIO_WRITE Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wakeup Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) wooooo Jun 12 14:07:28 sv01B hastd: [zfshast] (secondary) send_thread for loop Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) Unable to receive request header. : Socket is not connected. Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) Worker process exited ungracefully (pid=757, exitcode=75). Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) recv_thread Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) send_thread for loop Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wooooo Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) HIO_WRITE Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wakeup Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) wooooo Jun 12 14:07:33 sv01B hastd: [zfshast] (secondary) send_thread for loop Jun 12 14:07:38 sv01B hastd: [zfshast] (secondary) Unable to receive request header. : Socket is not connected. I hope this simple trace could help you some idea. Thanks Hiroshi