From owner-svn-src-stable@FreeBSD.ORG Wed Mar 6 06:57:19 2013 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.FreeBSD.org [8.8.178.115]) by hub.freebsd.org (Postfix) with ESMTP id 8C5AC157; Wed, 6 Mar 2013 06:57:19 +0000 (UTC) (envelope-from trociny@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 6466165C; Wed, 6 Mar 2013 06:57:19 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.6/8.14.6) with ESMTP id r266vJpm008749; Wed, 6 Mar 2013 06:57:19 GMT (envelope-from trociny@svn.freebsd.org) Received: (from trociny@localhost) by svn.freebsd.org (8.14.6/8.14.5/Submit) id r266vIdk008745; Wed, 6 Mar 2013 06:57:18 GMT (envelope-from trociny@svn.freebsd.org) Message-Id: <201303060657.r266vIdk008745@svn.freebsd.org> From: Mikolaj Golub Date: Wed, 6 Mar 2013 06:57:18 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r247866 - stable/9/sbin/hastd X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 06 Mar 2013 06:57:19 -0000 Author: trociny Date: Wed Mar 6 06:57:18 2013 New Revision: 247866 URL: http://svnweb.freebsd.org/changeset/base/247866 Log: MFC r247281: Add i/o error counters to hastd(8) and make hastctl(8) display them. This may be useful for detecting problems with HAST disks. Discussed with and reviewed by: pjd Modified: stable/9/sbin/hastd/control.c stable/9/sbin/hastd/hast.h stable/9/sbin/hastd/primary.c stable/9/sbin/hastd/secondary.c Directory Properties: stable/9/sbin/hastd/ (props changed) Modified: stable/9/sbin/hastd/control.c ============================================================================== --- stable/9/sbin/hastd/control.c Wed Mar 6 06:24:09 2013 (r247865) +++ stable/9/sbin/hastd/control.c Wed Mar 6 06:57:18 2013 (r247866) @@ -207,6 +207,14 @@ control_status_worker(struct hast_resour "stat_flush%u", no); nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"), "stat_activemap_update%u", no); + nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read_error"), + "stat_read_error%u", no); + nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write_error"), + "stat_write_error%u", no); + nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete_error"), + "stat_delete_error%u", no); + nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"), + "stat_flush_error%u", no); end: if (cnvin != NULL) nv_free(cnvin); @@ -459,6 +467,16 @@ ctrl_thread(void *arg) nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush"); nv_add_uint64(nvout, res->hr_stat_activemap_update, "stat_activemap_update"); + nv_add_uint64(nvout, res->hr_stat_read_error, + "stat_read_error"); + nv_add_uint64(nvout, res->hr_stat_write_error + + res->hr_stat_activemap_write_error, + "stat_write_error"); + nv_add_uint64(nvout, res->hr_stat_delete_error, + "stat_delete_error"); + nv_add_uint64(nvout, res->hr_stat_flush_error + + res->hr_stat_activemap_flush_error, + "stat_flush_error"); nv_add_int16(nvout, 0, "error"); break; case CONTROL_RELOAD: Modified: stable/9/sbin/hastd/hast.h ============================================================================== --- stable/9/sbin/hastd/hast.h Wed Mar 6 06:24:09 2013 (r247865) +++ stable/9/sbin/hastd/hast.h Wed Mar 6 06:57:18 2013 (r247866) @@ -234,6 +234,18 @@ struct hast_resource { uint64_t hr_stat_flush; /* Number of activemap updates. */ uint64_t hr_stat_activemap_update; + /* Number of local read errors. */ + uint64_t hr_stat_read_error; + /* Number of local write errors. */ + uint64_t hr_stat_write_error; + /* Number of local delete errors. */ + uint64_t hr_stat_delete_error; + /* Number of flush errors. */ + uint64_t hr_stat_flush_error; + /* Number of activemap write errors. */ + uint64_t hr_stat_activemap_write_error; + /* Number of activemap flush errors. */ + uint64_t hr_stat_activemap_flush_error; /* Next resource. */ TAILQ_ENTRY(hast_resource) hr_next; Modified: stable/9/sbin/hastd/primary.c ============================================================================== --- stable/9/sbin/hastd/primary.c Wed Mar 6 06:24:09 2013 (r247865) +++ stable/9/sbin/hastd/primary.c Wed Mar 6 06:57:18 2013 (r247866) @@ -303,6 +303,7 @@ hast_activemap_flush(struct hast_resourc if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) != (ssize_t)size) { pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk"); + res->hr_stat_activemap_write_error++; return (-1); } if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) { @@ -313,6 +314,7 @@ hast_activemap_flush(struct hast_resourc } else { pjdlog_errno(LOG_ERR, "Unable to flush disk cache on activemap update"); + res->hr_stat_activemap_flush_error++; return (-1); } } @@ -1792,6 +1794,22 @@ ggate_send_thread(void *arg) "G_GATE_CMD_DONE failed"); } } + if (hio->hio_errors[0]) { + switch (ggio->gctl_cmd) { + case BIO_READ: + res->hr_stat_read_error++; + break; + case BIO_WRITE: + res->hr_stat_write_error++; + break; + case BIO_DELETE: + res->hr_stat_delete_error++; + break; + case BIO_FLUSH: + res->hr_stat_flush_error++; + break; + } + } pjdlog_debug(2, "ggate_send: (%p) Moving request to the free queue.", hio); QUEUE_INSERT2(hio, free); Modified: stable/9/sbin/hastd/secondary.c ============================================================================== --- stable/9/sbin/hastd/secondary.c Wed Mar 6 06:24:09 2013 (r247865) +++ stable/9/sbin/hastd/secondary.c Wed Mar 6 06:57:18 2013 (r247866) @@ -725,6 +725,7 @@ disk_thread(void *arg) pjdlog_errno(LOG_WARNING, "Unable to store cleared activemap"); free(map); + res->hr_stat_activemap_write_error++; break; } free(map); @@ -839,8 +840,23 @@ send_thread(void *arg) PJDLOG_ABORT("Unexpected command (cmd=%hhu).", hio->hio_cmd); } - if (hio->hio_error != 0) + if (hio->hio_error != 0) { + switch (hio->hio_cmd) { + case HIO_READ: + res->hr_stat_read_error++; + break; + case HIO_WRITE: + res->hr_stat_write_error++; + break; + case HIO_DELETE: + res->hr_stat_delete_error++; + break; + case HIO_FLUSH: + res->hr_stat_flush_error++; + break; + } nv_add_int16(nvout, hio->hio_error, "error"); + } if (hast_proto_send(res, res->hr_remoteout, nvout, data, length) == -1) { secondary_exit(EX_TEMPFAIL, "Unable to send reply");