Date: Thu, 8 Jan 2009 09:24:18 +0000 (UTC) From: Luigi Rizzo <luigi@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r186888 - in user/luigi/geom_sched/sys: conf dev/ata geom geom/sched modules/geom modules/geom/geom_sched modules/geom/geom_sched/geom_sched modules/geom/geom_sched/gs_as modules/geom/g... Message-ID: <200901080924.n089OIeF014222@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: luigi Date: Thu Jan 8 09:24:18 2009 New Revision: 186888 URL: http://svn.freebsd.org/changeset/base/186888 Log: initial snapshot of geom disk schedulers by Fabio Checconi. Added: user/luigi/geom_sched/sys/geom/sched/ user/luigi/geom_sched/sys/geom/sched/g_as.c user/luigi/geom_sched/sys/geom/sched/g_gsched.h user/luigi/geom_sched/sys/geom/sched/g_rr.c user/luigi/geom_sched/sys/geom/sched/g_sched.c user/luigi/geom_sched/sys/geom/sched/g_sched.h user/luigi/geom_sched/sys/geom/sched/gs_as.c user/luigi/geom_sched/sys/geom/sched/gs_rr.c user/luigi/geom_sched/sys/modules/geom/geom_sched/ user/luigi/geom_sched/sys/modules/geom/geom_sched/Makefile user/luigi/geom_sched/sys/modules/geom/geom_sched/geom_sched/ user/luigi/geom_sched/sys/modules/geom/geom_sched/geom_sched/Makefile user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_as/ user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_as/Makefile user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_rr/ user/luigi/geom_sched/sys/modules/geom/geom_sched/gs_rr/Makefile user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_as/ user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_as/Makefile user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_rr/ user/luigi/geom_sched/sys/modules/geom/geom_sched/gsched_rr/Makefile Modified: user/luigi/geom_sched/sys/conf/NOTES user/luigi/geom_sched/sys/conf/files user/luigi/geom_sched/sys/conf/options user/luigi/geom_sched/sys/dev/ata/ata-all.h user/luigi/geom_sched/sys/dev/ata/ata-disk.c user/luigi/geom_sched/sys/dev/ata/ata-queue.c user/luigi/geom_sched/sys/geom/geom_disk.c user/luigi/geom_sched/sys/geom/geom_disk.h user/luigi/geom_sched/sys/geom/geom_io.c user/luigi/geom_sched/sys/modules/geom/Makefile user/luigi/geom_sched/sys/sys/bio.h Modified: user/luigi/geom_sched/sys/conf/NOTES ============================================================================== --- user/luigi/geom_sched/sys/conf/NOTES Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/conf/NOTES Thu Jan 8 09:24:18 2009 (r186888) @@ -153,6 +153,10 @@ options GEOM_PART_MBR # MBR partitioni options GEOM_PART_VTOC8 # SMI VTOC8 disk label options GEOM_PC98 # NEC PC9800 partitioning options GEOM_RAID3 # RAID3 functionality. +options GEOM_SCHED # Disk scheduling in GEOM. +options GEOM_GSCHED_AS # Geom-based anticipatory. +options GEOM_GS_AS # Driver-based anticipatory. +options GEOM_GS_RR # Driver-based round-robin. options GEOM_SHSEC # Shared secret. options GEOM_STRIPE # Disk striping. options GEOM_SUNLABEL # Sun/Solaris partitioning Modified: user/luigi/geom_sched/sys/conf/files ============================================================================== --- user/luigi/geom_sched/sys/conf/files Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/conf/files Thu Jan 8 09:24:18 2009 (r186888) @@ -1283,6 +1283,7 @@ geom/geom_mbr_enc.c optional geom_mbr geom/geom_pc98.c optional geom_pc98 geom/geom_pc98_enc.c optional geom_pc98 geom/geom_slice.c standard +geom/geom_sched.c standard geom/geom_subr.c standard geom/geom_sunlabel.c optional geom_sunlabel geom/geom_sunlabel_enc.c optional geom_sunlabel @@ -1312,6 +1313,11 @@ geom/part/g_part_vtoc8.c optional geom_p geom/raid3/g_raid3.c optional geom_raid3 geom/raid3/g_raid3_ctl.c optional geom_raid3 geom/shsec/g_shsec.c optional geom_shsec +geom/sched/g_sched.c optional geom_sched +geom/sched/g_as.c optional geom_gsched_as +geom/sched/g_rr.c optional geom_gsched_rr +geom/sched/gs_as.c optional geom_gs_as +geom/sched/gs_rr.c optional geom_gs_rr geom/stripe/g_stripe.c optional geom_stripe geom/uzip/g_uzip.c optional geom_uzip geom/virstor/binstream.c optional geom_virstor Modified: user/luigi/geom_sched/sys/conf/options ============================================================================== --- user/luigi/geom_sched/sys/conf/options Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/conf/options Thu Jan 8 09:24:18 2009 (r186888) @@ -98,6 +98,10 @@ GEOM_PART_MBR opt_geom.h GEOM_PART_VTOC8 opt_geom.h GEOM_PC98 opt_geom.h GEOM_RAID3 opt_geom.h +GEOM_SCHED opt_geom.h +GEOM_GSCHED_AS opt_geom.h +GEOM_GS_AS opt_geom.h +GEOM_GS_RR opt_geom.h GEOM_SHSEC opt_geom.h GEOM_STRIPE opt_geom.h GEOM_SUNLABEL opt_geom.h Modified: user/luigi/geom_sched/sys/dev/ata/ata-all.h ============================================================================== --- user/luigi/geom_sched/sys/dev/ata/ata-all.h Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/dev/ata/ata-all.h Thu Jan 8 09:24:18 2009 (r186888) @@ -510,6 +510,7 @@ struct ata_channel { TAILQ_HEAD(, ata_request) ata_queue; /* head of ATA queue */ struct ata_request *freezepoint; /* composite freezepoint */ struct ata_request *running; /* currently running request */ + struct disk *disks[2]; /* disks, if any */ }; /* disk bay/enclosure related */ @@ -546,6 +547,9 @@ int ata_wmode(struct ata_params *ap); int ata_umode(struct ata_params *ap); int ata_limit_mode(device_t dev, int mode, int maxmode); +/* ata-disk.c */ +struct ata_request *ata_create_request(struct bio *bp, int full); + /* ata-queue.c: */ int ata_controlcmd(device_t dev, u_int8_t command, u_int16_t feature, u_int64_t lba, u_int16_t count); int ata_atapicmd(device_t dev, u_int8_t *ccb, caddr_t data, int count, int flags, int timeout); Modified: user/luigi/geom_sched/sys/dev/ata/ata-disk.c ============================================================================== --- user/luigi/geom_sched/sys/dev/ata/ata-disk.c Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/dev/ata/ata-disk.c Thu Jan 8 09:24:18 2009 (r186888) @@ -60,6 +60,7 @@ static void ad_describe(device_t dev); static int ad_version(u_int16_t); static disk_strategy_t ad_strategy; static disk_ioctl_t ad_ioctl; +static disk_kick_t ad_kick; static dumper_t ad_dump; /* @@ -148,6 +149,7 @@ ad_attach(device_t dev) adp->disk = disk_alloc(); adp->disk->d_strategy = ad_strategy; adp->disk->d_ioctl = ad_ioctl; + adp->disk->d_kick = ad_kick; adp->disk->d_dump = ad_dump; adp->disk->d_name = "ad"; adp->disk->d_drv1 = dev; @@ -168,6 +170,7 @@ ad_attach(device_t dev) snprintf(adp->disk->d_ident, sizeof(adp->disk->d_ident), "ad:%s", atadev->param.serial); disk_create(adp->disk, DISK_VERSION); + ch->disks[atadev->unit == ATA_SLAVE] = adp->disk; device_add_child(dev, "subdisk", device_get_unit(dev)); ad_firmware_geom_adjust(dev, adp->disk); bus_generic_attach(dev); @@ -179,6 +182,7 @@ ad_attach(device_t dev) static int ad_detach(device_t dev) { + struct ata_channel *ch = device_get_softc(device_get_parent(dev)); struct ad_softc *adp = device_get_ivars(dev); struct ata_device *atadev = device_get_softc(dev); device_t *children; @@ -199,6 +203,8 @@ ad_detach(device_t dev) free(children, M_TEMP); } + ch->disks[atadev->unit == ATA_SLAVE] = NULL; + /* detroy disk from the system so we dont get any further requests */ disk_destroy(adp->disk); @@ -266,13 +272,13 @@ ad_spindown(void *priv) ata_queue_request(request); } - -static void -ad_strategy(struct bio *bp) +struct ata_request * +ata_create_request(struct bio *bp, int full) { - device_t dev = bp->bio_disk->d_drv1; + device_t dev = bp->bio_disk->d_drv1; struct ata_device *atadev = device_get_softc(dev); struct ata_request *request; + struct ata_channel *ch; if (atadev->spindown != 0) callout_reset(&atadev->spindown_timer, hz * atadev->spindown, @@ -281,7 +287,7 @@ ad_strategy(struct bio *bp) if (!(request = ata_alloc_request())) { device_printf(dev, "FAILURE - out of memory in start\n"); biofinish(bp, NULL, ENOMEM); - return; + return NULL; } /* setup request */ @@ -344,10 +350,32 @@ ad_strategy(struct bio *bp) device_printf(dev, "FAILURE - unknown BIO operation\n"); ata_free_request(request); biofinish(bp, NULL, EIO); - return; + return NULL; } request->flags |= ATA_R_ORDERED; - ata_queue_request(request); + + if (full != 0) { + if ((request->parent = device_get_parent(dev)) == NULL) { + ata_free_request(request); + biofinish(bp, NULL, ENXIO); + return NULL; + } + + ch = device_get_softc(request->parent); + callout_init_mtx(&request->callout, &ch->state_mtx, + CALLOUT_RETURNUNLOCKED); + } + + return request; +} + +static void +ad_strategy(struct bio *bp) +{ + struct ata_request *request; + + if ((request = ata_create_request(bp, 0)) != NULL) + ata_queue_request(request); } static void @@ -369,6 +397,18 @@ ad_ioctl(struct disk *disk, u_long cmd, return ata_device_ioctl(disk->d_drv1, cmd, data); } +static void +ad_kick(struct disk *disk) +{ + device_t dev; + struct ata_channel *ch; + + dev = disk->d_drv1; + ch = device_get_softc(device_get_parent(dev)); + if (ch != NULL) + ata_start(ch->dev); +} + static int ad_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) Modified: user/luigi/geom_sched/sys/dev/ata/ata-queue.c ============================================================================== --- user/luigi/geom_sched/sys/dev/ata/ata-queue.c Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/dev/ata/ata-queue.c Thu Jan 8 09:24:18 2009 (r186888) @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rman.h> #include <dev/ata/ata-all.h> #include <ata_if.h> +#include <geom/geom_sched.h> /* prototypes */ static void ata_completed(void *, int); @@ -171,10 +172,25 @@ ata_start(device_t dev) struct ata_channel *ch = device_get_softc(dev); struct ata_request *request; struct ata_composite *cptr; - int dependencies = 0; + struct disk *dp; + struct bio *bp; + int dependencies = 0, i; - /* if we have a request on the queue try to get it running */ mtx_lock(&ch->queue_mtx); + if (TAILQ_FIRST(&ch->ata_queue) == NULL) { + for (i = 0; i < 2; i++) { + dp = ch->disks[i]; + while (dp != NULL && (bp = g_sched_next(dp)) != NULL) { + request = ata_create_request(bp, 1); + if (request != NULL) { + ata_sort_queue(ch, request); + break; + } + } + } + } + + /* if we have a request on the queue try to get it running */ if ((request = TAILQ_FIRST(&ch->ata_queue))) { /* we need the locking function to get the lock for this channel */ Modified: user/luigi/geom_sched/sys/geom/geom_disk.c ============================================================================== --- user/luigi/geom_sched/sys/geom/geom_disk.c Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/geom/geom_disk.c Thu Jan 8 09:24:18 2009 (r186888) @@ -55,9 +55,11 @@ __FBSDID("$FreeBSD$"); #include <geom/geom.h> #include <geom/geom_disk.h> #include <geom/geom_int.h> +#include <geom/geom_sched.h> static struct mtx g_disk_done_mtx; +static g_ctl_req_t g_disk_ctlreq; static g_access_t g_disk_access; static g_init_t g_disk_init; static g_fini_t g_disk_fini; @@ -68,6 +70,7 @@ static g_dumpconf_t g_disk_dumpconf; static struct g_class g_disk_class = { .name = "DISK", .version = G_VERSION, + .ctlreq = g_disk_ctlreq, .init = g_disk_init, .fini = g_disk_fini, .start = g_disk_start, @@ -81,16 +84,19 @@ g_disk_init(struct g_class *mp __unused) { mtx_init(&g_disk_done_mtx, "g_disk_done", NULL, MTX_DEF); + g_sched_init(); } static void g_disk_fini(struct g_class *mp __unused) { + g_sched_fini(); mtx_destroy(&g_disk_done_mtx); } DECLARE_GEOM_CLASS(g_disk_class, g_disk); +MODULE_VERSION(g_disk, 0); static void __inline g_disk_lock_giant(struct disk *dp) @@ -106,6 +112,83 @@ g_disk_unlock_giant(struct disk *dp) mtx_unlock(&Giant); } +static void +g_disk_configure(struct gctl_req *req, struct g_class *mp) +{ + struct disk *dp; + struct g_provider *pp; + const char *sched, *name; + char param[16]; + int i, *nargs; + + g_topology_assert(); + + nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); + if (nargs == NULL) { + gctl_error(req, "No '%s' argument.", "nargs"); + return; + } + + if (*nargs <= 0) { + gctl_error(req, "Missing devices."); + return; + } + + sched = gctl_get_asciiparam(req, "iosched"); + if (sched == NULL) { + gctl_error(req, "No '%s' argument.", "iosched"); + return; + } + + for (i = 0; i < *nargs; i++) { + snprintf(param, sizeof(param), "arg%d", i); + name = gctl_get_asciiparam(req, param); + if (name == NULL) { + gctl_error(req, "No '%s' argument.", param); + return; + } + + if (strncmp(name, "/dev/", strlen("/dev/")) == 0) + name += strlen("/dev/"); + + pp = g_provider_by_name(name); + if (pp == NULL || pp->geom->class != mp) { + gctl_error(req, "Provider %s is invalid.", name); + return; + } + + dp = pp->geom->softc; + if (g_sched_configure(dp, sched) != 0) { + gctl_error(req, "Could not set scheduler %s.", sched); + return; + } + } +} + +static void +g_disk_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) +{ + uint32_t *version; + + g_topology_assert(); + + version = gctl_get_paraml(req, "version", sizeof(*version)); + if (version == NULL) { + gctl_error(req, "No '%s' argument.", "version"); + return; + } + + if (*version != G_VERSION) { + gctl_error(req, "Userland and kernel parts are out of sync."); + return; + } + + if (strcmp(verb, "configure") == 0) + g_disk_configure(req, mp); + else + gctl_error(req, "Unknown verb."); +} + static int g_disk_access(struct g_provider *pp, int r, int w, int e) { @@ -198,6 +281,8 @@ g_disk_done(struct bio *bp) mtx_lock(&g_disk_done_mtx); bp->bio_completed = bp->bio_length - bp->bio_resid; + g_sched_done(bp); + bp2 = bp->bio_parent; if (bp2->bio_error == 0) bp2->bio_error = bp->bio_error; @@ -288,7 +373,7 @@ g_disk_start(struct bio *bp) bp2->bio_disk = dp; devstat_start_transaction_bio(dp->d_devstat, bp2); g_disk_lock_giant(dp); - dp->d_strategy(bp2); + g_sched_start(dp, bp2); g_disk_unlock_giant(dp); bp2 = bp3; bp3 = NULL; @@ -466,6 +551,7 @@ disk_create(struct disk *dp, int version dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); dp->d_geom = NULL; + g_sched_disk_init(dp); g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident)); g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL); } @@ -476,6 +562,7 @@ disk_destroy(struct disk *dp) g_cancel_event(dp); dp->d_destroyed = 1; + g_sched_disk_fini(dp); if (dp->d_devstat != NULL) devstat_remove_entry(dp->d_devstat); g_post_event(g_disk_destroy, dp, M_WAITOK, NULL); @@ -487,6 +574,8 @@ disk_gone(struct disk *dp) struct g_geom *gp; struct g_provider *pp; + g_sched_disk_gone(dp); + gp = dp->d_geom; if (gp != NULL) LIST_FOREACH(pp, &gp->provider, provider) Modified: user/luigi/geom_sched/sys/geom/geom_disk.h ============================================================================== --- user/luigi/geom_sched/sys/geom/geom_disk.h Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/geom/geom_disk.h Thu Jan 8 09:24:18 2009 (r186888) @@ -53,7 +53,10 @@ typedef int disk_ioctl_t(struct disk *, int fflag, struct thread *td); /* NB: disk_ioctl_t SHALL be cast'able to d_ioctl_t */ +typedef void disk_kick_t(struct disk *); + struct g_geom; +struct g_sched; struct devstat; struct disk { @@ -74,6 +77,7 @@ struct disk { disk_close_t *d_close; disk_strategy_t *d_strategy; disk_ioctl_t *d_ioctl; + disk_kick_t *d_kick; dumper_t *d_dump; /* Info fields from driver to geom_disk.c. Valid when open */ @@ -86,6 +90,13 @@ struct disk { u_int d_stripesize; char d_ident[DISK_IDENT_SIZE]; + /* Scheduler fields */ + struct mtx d_sched_lock; + u_int d_sched_flags; + u_int d_nr_sorted; + struct g_sched *d_sched; + void *d_sched_data; + /* Fields private to the driver */ void *d_drv1; }; Modified: user/luigi/geom_sched/sys/geom/geom_io.c ============================================================================== --- user/luigi/geom_sched/sys/geom/geom_io.c Thu Jan 8 06:38:06 2009 (r186887) +++ user/luigi/geom_sched/sys/geom/geom_io.c Thu Jan 8 09:24:18 2009 (r186888) @@ -172,6 +172,7 @@ g_clone_bio(struct bio *bp) bp2->bio_offset = bp->bio_offset; bp2->bio_data = bp->bio_data; bp2->bio_attribute = bp->bio_attribute; + bp2->bio_thread = bp->bio_thread; bp->bio_children++; } #ifdef KTR @@ -369,6 +370,10 @@ g_io_request(struct bio *bp, struct g_co bp->bio_error = 0; bp->bio_completed = 0; + /* Pass down the thread that issued the bio. */ + if (bp->bio_thread == NULL) + bp->bio_thread = curthread; + KASSERT(!(bp->bio_flags & BIO_ONQUEUE), ("Bio already on queue bp=%p", bp)); bp->bio_flags |= BIO_ONQUEUE; Added: user/luigi/geom_sched/sys/geom/sched/g_as.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/luigi/geom_sched/sys/geom/sched/g_as.c Thu Jan 8 09:24:18 2009 (r186888) @@ -0,0 +1,204 @@ +/*- + * Copyright (c) 2007 Fabio Checconi <fabio@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/bio.h> +#include <sys/callout.h> +#include <sys/proc.h> +#include <sys/taskqueue.h> + +#include <geom/geom.h> +#include <geom/sched/g_gsched.h> + +/* + * Status values for AS. + */ +#define G_AS_NOWAIT 0 /* Not wating at all. */ +#define G_AS_WAITREQ 1 /* Waiting a request to complete. */ +#define G_AS_WAITING 2 /* Waiting a new request. */ + +struct g_as_softc { + struct g_geom *sc_geom; + struct thread *sc_curthread; + int sc_status; + long sc_batch; + + struct callout sc_wait; + struct bio_queue_head sc_bioq; +}; + +#define G_AS_WAIT_EXPIRE (hz/200 > 0 ? hz/200 : 2) +#define G_AS_MAX_BATCH 0x00800000 + +/* + * Dispatch the first queued request. Here we also update the status + * according to the dispatched request. + */ +static void +g_as_dispatch(struct g_as_softc *sc) +{ + struct bio *bio; + + /* + * Batching means just don't serve too many requests waiting + * for sequential ones, it is not really coupled with the + * threads being served. Its only purpose is to let not the + * scheduler starve other threads while an aggressive one + * is making continuously new requests. + */ + sc->sc_curthread = NULL; + + bio = bioq_takefirst(&sc->sc_bioq); + if (bio != NULL) { + sc->sc_batch += bio->bio_length; + if (sc->sc_batch > G_AS_MAX_BATCH) { + /* + * Too many requests served here, don't wait + * for the next. + */ + sc->sc_batch = 0; + sc->sc_status = G_AS_NOWAIT; + } else { + /* + * When this request will be served we'll wait + * for a new one from the same thread. + * Of course we are anticipating everything + * here, even writes or asynchronous requests, + * but this is only a prototype. + */ + sc->sc_status = G_AS_WAITREQ; + } + g_io_request(bio, LIST_FIRST(&sc->sc_geom->consumer)); + } else + sc->sc_status = G_AS_NOWAIT; +} + +static void +g_as_wait_timeout(void *data) +{ + struct g_as_softc *sc = data; + + g_sched_lock(sc->sc_geom); + /* + * We were waiting for a new request for curthread, it did + * not come, just dispatch the next one. + */ + if (sc->sc_status == G_AS_WAITING) + g_as_dispatch(sc); + g_sched_unlock(sc->sc_geom); +} + +static void +g_as_start(void *data, struct bio *bio) +{ + struct g_as_softc *sc = data; + + bioq_disksort(&sc->sc_bioq, bio); + + /* + * If the request being submitted is the one we were waiting for + * stop the timer and dispatch it, otherwise do nothing. + */ + if (sc->sc_status == G_AS_NOWAIT || + bio->bio_thread == sc->sc_curthread) { + callout_stop(&sc->sc_wait); + g_as_dispatch(sc); + } +} + +static void +g_as_done(void *data, struct bio *bio) +{ + struct g_as_softc *sc = data; + struct bio *bp2; + + bp2 = bio->bio_parent; + + /* Don't wait when fragments are completed. */ + if (bp2->bio_children != bp2->bio_inbed + 1) + return; + + if (sc->sc_status == G_AS_WAITREQ) { + /* + * Start waiting for a new request from curthread. + */ + sc->sc_curthread = bio->bio_thread; + sc->sc_status = G_AS_WAITING; + callout_reset(&sc->sc_wait, G_AS_WAIT_EXPIRE, + g_as_wait_timeout, sc); + } else { + /* + * Since we don't have to wait anything just dispatch + * the next request. + */ + g_as_dispatch(sc); + } +} + +static void * +g_as_init(struct g_geom *geom) +{ + struct g_as_softc *sc; + + sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); + sc->sc_geom = geom; + sc->sc_curthread = NULL; + sc->sc_status = G_AS_NOWAIT; + + callout_init(&sc->sc_wait, CALLOUT_MPSAFE); + bioq_init(&sc->sc_bioq); + + return sc; +} + +static void +g_as_fini(void *data) +{ + struct g_as_softc *sc = data; + + KASSERT(bioq_first(&sc->sc_bioq) == NULL, + ("Still requests pending.")); + callout_drain(&sc->sc_wait); + + g_free(sc); +} + +static struct g_gsched g_as = { + .gs_name = "as", + .gs_init = g_as_init, + .gs_fini = g_as_fini, + .gs_start = g_as_start, + .gs_done = g_as_done, +}; + +DECLARE_GSCHED_MODULE(as, &g_as); Added: user/luigi/geom_sched/sys/geom/sched/g_gsched.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/luigi/geom_sched/sys/geom/sched/g_gsched.h Thu Jan 8 09:24:18 2009 (r186888) @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 2008 Fabio Checconi <fabio@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _G_GSCHED_H_ +#define _G_GSCHED_H_ + +#ifdef _KERNEL +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/queue.h> +#include <geom/sched/g_sched.h> + +/* + * This is the interface exported to scheduling modules. + */ +/* + * Geom I/O scheduler descriptor. + */ +struct g_geom; + +typedef void *gs_init_t (struct g_geom *geom); +typedef void gs_fini_t (void *data); +typedef void gs_start_t (void *data, struct bio *bio); +typedef void gs_done_t (void *data, struct bio *bio); + +struct g_gsched { + const char *gs_name; + int gs_refs; + + gs_init_t *gs_init; + gs_fini_t *gs_fini; + gs_start_t *gs_start; + gs_done_t *gs_done; + + LIST_ENTRY(g_gsched) glist; +}; + +/* + * Locking interface. When each operation registered with the + * scheduler is invoked, a per-instance lock is taken to protect + * the data associated with it. If the scheduler needs something + * else to access the same data (e.g., a callout) it must use + * these functions. + */ +void g_sched_lock(struct g_geom *gp); +void g_sched_unlock(struct g_geom *gp); + +/* + * Declaration of a scheduler module. + */ +int g_gsched_modevent(module_t mod, int cmd, void *arg); + +#define DECLARE_GSCHED_MODULE(name, gsched) \ + static moduledata_t name##_mod = { \ + #name, \ + g_gsched_modevent, \ + gsched, \ + }; \ + DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_ANY); \ + MODULE_DEPEND(name, g_sched, 0, 0, 0); + +#endif /* _KERNEL */ + +#endif /* _G_GSCHED_H_ */ Added: user/luigi/geom_sched/sys/geom/sched/g_rr.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/luigi/geom_sched/sys/geom/sched/g_rr.c Thu Jan 8 09:24:18 2009 (r186888) @@ -0,0 +1,355 @@ +/*- + * Copyright (c) 2008 Fabio Checconi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bio.h> +#include <sys/callout.h> +#include <sys/hash.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <geom/geom.h> +#include <geom/sched/g_gsched.h> + +/* + * Trivial round robin disk scheduler, with per-thread queues, always + * anticipating requests from the last served thread. + */ + +/* Timeout for anticipation. */ +#define G_RR_WAIT_EXPIRE (hz/200 > 0 ? hz/200 : 2) + +#define G_QUEUE_NOWAIT 0 /* Ready to dispatch. */ +#define G_QUEUE_WAITREQ 1 /* Waiting for a completion. */ +#define G_QUEUE_WAITING 2 /* Waiting for a new request. */ + +/* + * Per process (thread) queue structure. Each process (thread) in the + * system that accesses the disk managed by an instance of this scheduler + * has an associated queue. + */ +struct g_rr_queue { + int q_refs; + int q_status; + u_long q_key; + struct proc *q_proc; + + struct bio_queue_head q_bioq; + unsigned int q_service; + unsigned int q_budget; + + LIST_ENTRY(g_rr_queue) q_hash; + TAILQ_ENTRY(g_rr_queue) q_tailq; +}; + +/* List types. */ +TAILQ_HEAD(g_rr_tailq, g_rr_queue); +LIST_HEAD(g_hash, g_rr_queue); + +/* Size of the per-device hash table storing threads. */ +#define G_RR_HASH_SIZE 32 + +/* Default slice for RR between queues. */ +#define G_RR_DEFAULT_BUDGET 0x00800000 + +/* + * Per device descriptor. It holds the RR list of queues accessing + * the disk. + */ +struct g_rr_softc { + struct g_geom *sc_geom; + + struct g_rr_queue *sc_active; + struct g_rr_tailq sc_rr_tailq; + + struct g_hash *sc_hash; + u_long sc_hash_mask; + + struct callout sc_wait; +}; + +static inline u_long +g_rr_key(struct thread *tp) +{ + + return (tp != NULL ? tp->td_tid : 0); +} + +/* Return the hash chain for the given key. */ +static inline struct g_hash * +g_rr_hash(struct g_rr_softc *sc, u_long key) +{ + + return (&sc->sc_hash[key & sc->sc_hash_mask]); +} + +/* + * Get a reference to the queue that holds requests for tp, allocating + * it if necessary. + */ +static struct g_rr_queue * +g_rr_queue_get(struct g_rr_softc *sc, struct thread *tp) +{ + struct g_hash *bucket; + struct g_rr_queue *qp; + u_long key; + + key = g_rr_key(tp); + bucket = g_rr_hash(sc, key); + LIST_FOREACH(qp, bucket, q_hash) { + if (qp->q_key == key) { + qp->q_refs++; + return (qp); + } + } + + qp = g_malloc(sizeof *qp, M_NOWAIT | M_ZERO); + + if (qp != NULL) { + /* One for the hash table, one for the caller. */ + qp->q_refs = 2; + + qp->q_key = key; + qp->q_proc = tp->td_proc; + bioq_init(&qp->q_bioq); + qp->q_budget = G_RR_DEFAULT_BUDGET; + LIST_INSERT_HEAD(bucket, qp, q_hash); + } + + return (qp); +} + +/* + * Release a reference to the queue. + */ +static void +g_rr_queue_put(struct g_rr_queue *qp) +{ + + if (--qp->q_refs > 0) + return; + + LIST_REMOVE(qp, q_hash); + KASSERT(bioq_first(&qp->q_bioq) == NULL, ("released nonempty queue")); + + g_free(qp); +} + +static void * +g_rr_init(struct g_geom *geom) +{ + struct g_rr_softc *sc; + + sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO); + sc->sc_geom = geom; + TAILQ_INIT(&sc->sc_rr_tailq); + sc->sc_hash = hashinit(G_RR_HASH_SIZE, M_GEOM, &sc->sc_hash_mask); + callout_init(&sc->sc_wait, CALLOUT_MPSAFE); + + return (sc); +} + +static void +g_rr_fini(void *data) +{ + struct g_rr_softc *sc; + struct g_rr_queue *qp, *qp2; + int i; + + sc = data; + callout_drain(&sc->sc_wait); + KASSERT(sc->sc_active == NULL, ("still a queue under service")); + KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues")); + for (i = 0; i < G_RR_HASH_SIZE; i++) { + LIST_FOREACH_SAFE(qp, &sc->sc_hash[i], q_hash, qp2) { + LIST_REMOVE(qp, q_hash); + g_rr_queue_put(qp); + } + } + hashdestroy(sc->sc_hash, M_GEOM, sc->sc_hash_mask); + g_free(sc); +} + +/* + * Activate a queue, inserting it into the RR list and preparing it + * to be served. + */ +static inline void +g_rr_activate(struct g_rr_softc *sc, struct g_rr_queue *qp) +{ + + qp->q_service = 0; + TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq); +} + +static void +g_rr_dispatch(struct g_rr_softc *sc) +{ + struct g_rr_queue *qp; + struct bio *bp, *next; + + /* Try with the queue under service first. */ + qp = sc->sc_active; + if (qp == NULL) { + /* No queue under service, look for the first in RR order. */ + qp = TAILQ_FIRST(&sc->sc_rr_tailq); + if (qp == NULL) { + /* No queue at all, just return. */ + return; + } + /* Select the new queue for service. */ + TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq); + sc->sc_active = qp; + } else if (qp->q_status != G_QUEUE_NOWAIT) { + /* Queue is anticipating, stop dispatching. */ + return; + } + + bp = bioq_takefirst(&qp->q_bioq); + qp->q_service += bp->bio_length; + next = bioq_first(&qp->q_bioq); + if (qp->q_service > qp->q_budget) { + /* Queue exhausted its budget. */ + sc->sc_active = NULL; + if (next != NULL) { + /* If it has more requests requeue it. */ + qp->q_status = G_QUEUE_NOWAIT; + g_rr_activate(sc, qp); + } else { *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200901080924.n089OIeF014222>