From owner-freebsd-stable@FreeBSD.ORG Tue Nov 26 11:21:14 2013 Return-Path: Delivered-To: freebsd-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id C71E9894; Tue, 26 Nov 2013 11:21:14 +0000 (UTC) Received: from constantine.ingresso.co.uk (constantine.ingresso.co.uk [IPv6:2a02:b90:3002:e550::3]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id 8E63E25C1; Tue, 26 Nov 2013 11:21:14 +0000 (UTC) Received: from dilbert.london-internal.ingresso.co.uk ([10.64.50.6] helo=dilbert.ingresso.co.uk) by constantine.ingresso.co.uk with esmtps (TLSv1:DHE-RSA-AES256-SHA:256) (Exim 4.80.1 (FreeBSD)) (envelope-from ) id 1VlGhV-000NOk-6L; Tue, 26 Nov 2013 11:21:05 +0000 Received: from petefrench by dilbert.ingresso.co.uk with local (Exim 4.80.1 (FreeBSD)) (envelope-from ) id 1VlGhU-000PFN-VP; Tue, 26 Nov 2013 11:21:04 +0000 To: petefrench@ingresso.co.uk, trociny@FreeBSD.org Subject: Re: Hast locking up under 9.2 In-Reply-To: Message-Id: From: Pete French Date: Tue, 26 Nov 2013 11:21:04 +0000 Cc: freebsd-stable@freebsd.org, to.my.trociny@gmail.com X-BeenThere: freebsd-stable@freebsd.org X-Mailman-Version: 2.1.16 Precedence: list List-Id: Production branch of FreeBSD source code List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 26 Nov 2013 11:21:14 -0000 I just tried to apply the pathc, but it didnt apply cleanly _ I am patching against 9.2 STABLE latest version, the rejected bits of the patch are below... *************** *** 1322,1334 **** } pjdlog_debug(2, "ggate_recv: (%p) Moving request to the send queues.", hio); - if (hio->hio_replication == HAST_REPLICATION_MEMSYNC && - ggio->gctl_cmd == BIO_WRITE) { - /* Each remote request needs two responses in memsync. */ - refcnt_init(&hio->hio_countdown, ncomps + 1); - } else { - refcnt_init(&hio->hio_countdown, ncomps); - } for (ii = ncomp; ii < ncomps; ii++) QUEUE_INSERT1(hio, send, ii); } --- 1338,1344 ---- } pjdlog_debug(2, "ggate_recv: (%p) Moving request to the send queues.", hio); + refcnt_init(&hio->hio_countdown, ncomps); for (ii = ncomp; ii < ncomps; ii++) QUEUE_INSERT1(hio, send, ii); } *************** *** 1808,1889 **** hio->hio_errors[ncomp] = 0; nv_free(nv); done_queue: - if (hio->hio_replication != HAST_REPLICATION_MEMSYNC || - hio->hio_ggio.gctl_cmd != BIO_WRITE || ISSYNCREQ(hio)) { - if (refcnt_release(&hio->hio_countdown) > 0) - continue; - } else { - /* - * Depending on hio_countdown value, requests finished - * in the following order: - * - * 0: local write, remote memsync, remote final - * or - * 0: remote memsync, local write, remote final - * - * 1: local write, remote memsync, (remote final) - * or - * 1: remote memsync, remote final, (local write) - * - * 2: remote memsync, (local write), (remote final) - * or - * 2: remote memsync, (remote final), (local write) - */ - switch (refcnt_release(&hio->hio_countdown)) { - case 0: - /* - * Remote final reply arrived. - */ - PJDLOG_ASSERT(!memsyncack); - break; - case 1: - if (memsyncack) { - /* - * Local request already finished, so we - * can complete the write. - */ if (hio->hio_errors[0] == 0) write_complete(res, hio); - /* - * We still need to wait for final - * remote reply. - */ pjdlog_debug(2, - "remote_recv: (%p) Moving request back to the recv queue.", - hio); mtx_lock(&hio_recv_list_lock[ncomp]); TAILQ_INSERT_TAIL(&hio_recv_list[ncomp], hio, hio_next[ncomp]); hio_recv_list_size[ncomp]++; mtx_unlock(&hio_recv_list_lock[ncomp]); - } else { - /* - * Remote final reply arrived before - * local write finished. - * Nothing to do in such case. - */ } - continue; - case 2: - /* - * We received remote memsync reply even before - * local write finished. - */ - PJDLOG_ASSERT(memsyncack); - - pjdlog_debug(2, - "remote_recv: (%p) Moving request back to the recv queue.", - hio); - mtx_lock(&hio_recv_list_lock[ncomp]); - TAILQ_INSERT_TAIL(&hio_recv_list[ncomp], hio, - hio_next[ncomp]); - hio_recv_list_size[ncomp]++; - mtx_unlock(&hio_recv_list_lock[ncomp]); - continue; - default: - PJDLOG_ABORT("Invalid hio_countdown."); } } if (ISSYNCREQ(hio)) { mtx_lock(&sync_lock); SYNCREQDONE(hio); --- 1792,1825 ---- hio->hio_errors[ncomp] = 0; nv_free(nv); done_queue: + if (ISMEMSYNCWRITE(hio)) { + if (!hio->hio_memsyncacked) { + PJDLOG_ASSERT(memsyncack || + hio->hio_errors[ncomp] != 0); + /* Remote ack arrived. */ + if (refcnt_release(&hio->hio_writecount) == 0) { if (hio->hio_errors[0] == 0) write_complete(res, hio); + } + hio->hio_memsyncacked = true; + if (hio->hio_errors[ncomp] == 0) { pjdlog_debug(2, + "remote_recv: (%p) Moving request " + "back to the recv queue.", hio); mtx_lock(&hio_recv_list_lock[ncomp]); TAILQ_INSERT_TAIL(&hio_recv_list[ncomp], hio, hio_next[ncomp]); hio_recv_list_size[ncomp]++; mtx_unlock(&hio_recv_list_lock[ncomp]); + continue; } + } else { + PJDLOG_ASSERT(!memsyncack); + /* Remote final reply arrived. */ } } + if (refcnt_release(&hio->hio_countdown) > 0) + continue; if (ISSYNCREQ(hio)) { mtx_lock(&sync_lock); SYNCREQDONE(hio);