From owner-svn-src-projects@FreeBSD.ORG Mon Feb 14 11:56:46 2011 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 49F71106564A; Mon, 14 Feb 2011 11:56:46 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 38B468FC19; Mon, 14 Feb 2011 11:56:46 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id p1EBukTp091969; Mon, 14 Feb 2011 11:56:46 GMT (envelope-from mav@svn.freebsd.org) Received: (from mav@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id p1EBuk0p091966; Mon, 14 Feb 2011 11:56:46 GMT (envelope-from mav@svn.freebsd.org) Message-Id: <201102141156.p1EBuk0p091966@svn.freebsd.org> From: Alexander Motin Date: Mon, 14 Feb 2011 11:56:46 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r218678 - projects/graid/head/sys/geom/raid X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 14 Feb 2011 11:56:46 -0000 Author: mav Date: Mon Feb 14 11:56:45 2011 New Revision: 218678 URL: http://svn.freebsd.org/changeset/base/218678 Log: Rework idle events scheduling. Call idle() trasfotmation method for the volume with no active payload requests with fixed period of idle_threshold, instead of measuring that time from the last (probably internal) activity. That change allows to reach full rebuild/resync speed supported by the disk. Also it makes tunables closer related to some real world values. Maximum speed for idle disk now is rebuild_slab_size * rebuild_cluster_idle per idle_threshold interval. With present settings it is 100MB/s. When disk is busy - rebuild does one rebuild_slab_size sized transaction every rebuild_fair_io payload requests. Now it is 1MB per 20 requests. Modified: projects/graid/head/sys/geom/raid/g_raid.c projects/graid/head/sys/geom/raid/g_raid.h Modified: projects/graid/head/sys/geom/raid/g_raid.c ============================================================================== --- projects/graid/head/sys/geom/raid/g_raid.c Mon Feb 14 09:58:47 2011 (r218677) +++ projects/graid/head/sys/geom/raid/g_raid.c Mon Feb 14 11:56:45 2011 (r218678) @@ -1047,6 +1047,7 @@ g_raid_iodone(struct bio *bp, int error) bioq_remove(&vol->v_inflight, bp); if (vol->v_pending_lock && g_raid_is_in_locked_range(vol, bp)) g_raid_finish_with_locked_ranges(vol, bp); + getmicrouptime(&vol->v_last_done); g_io_deliver(bp, error); } @@ -1273,6 +1274,7 @@ g_raid_worker(void *arg) struct g_raid_event *ep; struct g_raid_volume *vol; struct bio *bp; + struct timeval now, t; int timeout, rv; sc = arg; @@ -1296,38 +1298,59 @@ g_raid_worker(void *arg) else if ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) ; else { - /* - * Two steps to avoid overflows at HZ=1000 - * and idle timeouts > 2.1s. Some rounding errors - * can occur, but they are < 1tick, which is deemed to - * be close enough for this purpose. - */ - int micpertic = 1000000 / hz; - timeout = g_raid_idle_threshold / micpertic; - sx_xunlock(&sc->sc_lock); - MSLEEP(rv, sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "-", - timeout); - sx_xlock(&sc->sc_lock); - goto process; + getmicrouptime(&now); + t = now; + TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { + if (bioq_first(&vol->v_inflight) == NULL && + timevalcmp(&vol->v_last_done, &t, < )) + t = vol->v_last_done; + } + timevalsub(&t, &now); + timeout = g_raid_idle_threshold + + t.tv_sec * 1000000 + t.tv_usec; + if (timeout > 0) { + /* + * Two steps to avoid overflows at HZ=1000 + * and idle timeouts > 2.1s. Some rounding + * errors can occur, but they are < 1tick, + * which is deemed to be close enough for + * this purpose. + */ + int micpertic = 1000000 / hz; + timeout = (timeout + micpertic - 1) / micpertic; + sx_xunlock(&sc->sc_lock); + MSLEEP(rv, sc, &sc->sc_queue_mtx, + PRIBIO | PDROP, "-", timeout); + sx_xlock(&sc->sc_lock); + goto process; + } else + rv = EWOULDBLOCK; } mtx_unlock(&sc->sc_queue_mtx); process: - if (ep != NULL) + if (ep != NULL) { g_raid_handle_event(sc, ep); - if (bp != NULL) { + } else if (bp != NULL) { if (bp->bio_to != NULL && bp->bio_to->geom == sc->sc_geom) g_raid_start_request(bp); else g_raid_disk_done_request(bp); - } - if (rv == EWOULDBLOCK) { + } else if (rv == EWOULDBLOCK) { TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { if (vol->v_writes == 0 && vol->v_dirty) g_raid_clean(vol, -1); if (bioq_first(&vol->v_inflight) == NULL && - vol->v_tr) - G_RAID_TR_IDLE(vol->v_tr); + vol->v_tr) { + t.tv_sec = g_raid_idle_threshold / 1000000; + t.tv_usec = g_raid_idle_threshold % 1000000; + timevaladd(&t, &vol->v_last_done); + getmicrouptime(&now); + if (timevalcmp(&t, &now, <= )) { + G_RAID_TR_IDLE(vol->v_tr); + vol->v_last_done = now; + } + } } } if (sc->sc_stopping == G_RAID_DESTROY_HARD) Modified: projects/graid/head/sys/geom/raid/g_raid.h ============================================================================== --- projects/graid/head/sys/geom/raid/g_raid.h Mon Feb 14 09:58:47 2011 (r218677) +++ projects/graid/head/sys/geom/raid/g_raid.h Mon Feb 14 11:56:45 2011 (r218678) @@ -32,6 +32,7 @@ #include #include #include +#include #define G_RAID_CLASS_NAME "RAID" @@ -250,6 +251,7 @@ struct g_raid_volume { LIST_HEAD(, g_raid_lock) v_locks; /* List of locked regions. */ int v_pending_lock; /* writes to locked region */ int v_dirty; /* Volume is DIRTY. */ + struct timeval v_last_done; /* Time of the last I/O. */ time_t v_last_write; /* Time of the last write. */ u_int v_writes; /* Number of active writes. */ struct root_hold_token *v_rootmount; /* Root mount delay token. */