Date: Thu, 23 Jun 2022 18:46:56 GMT From: John Baldwin <jhb@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: cd9618bdb274 - main - bhyve: Snapshot impovements for 'blockif' backend Message-ID: <202206231846.25NIkujK055498@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=cd9618bdb274375139080ee4e33ccbdc980513f3 commit cd9618bdb274375139080ee4e33ccbdc980513f3 Author: Vitaliy Gusev <gusev.vitaliy@gmail.com> AuthorDate: 2022-06-23 18:46:06 +0000 Commit: John Baldwin <jhb@FreeBSD.org> CommitDate: 2022-06-23 18:46:06 +0000 bhyve: Snapshot impovements for 'blockif' backend When pausing a block I/O device model as part of suspending a VM, wait for all active block I/O requests to finish before saving snapshot data. This avoids having to save information about in-flight requests both in the block_if layer and in storage device models. For the AHCI device model, the queues are now guaranteed to be idle when taking a snapshot, so remove the code to save queue state and rely on the initial state in a resumed VM having all queues already idle. This will also simplify adding NVMe snapshot support in the future. Reviewed by: jhb Sponsored by: vStack Differential Revision: https://reviews.freebsd.org/D26267 --- usr.sbin/bhyve/block_if.c | 105 ++++------------------------------ usr.sbin/bhyve/block_if.h | 4 -- usr.sbin/bhyve/pci_ahci.c | 142 +--------------------------------------------- 3 files changed, 15 insertions(+), 236 deletions(-) diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index 06c3cb50fb13..05c9d9731754 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -109,11 +109,9 @@ struct blockif_ctxt { int bc_psectoff; int bc_closing; int bc_paused; - int bc_work_count; pthread_t bc_btid[BLOCKIF_NUMTHR]; pthread_mutex_t bc_mtx; pthread_cond_t bc_cond; - pthread_cond_t bc_paused_cond; pthread_cond_t bc_work_done_cond; blockif_resize_cb *bc_resize_cb; void *bc_resize_cb_arg; @@ -362,6 +360,12 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) (*br->br_callback)(br, err); } +static inline bool +blockif_empty(const struct blockif_ctxt *bc) +{ + return (TAILQ_EMPTY(&bc->bc_pendq) && TAILQ_EMPTY(&bc->bc_busyq)); +} + static void * blockif_thr(void *arg) { @@ -379,30 +383,21 @@ blockif_thr(void *arg) pthread_mutex_lock(&bc->bc_mtx); for (;;) { - bc->bc_work_count++; - - /* We cannot process work if the interface is paused */ - while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) { + while (blockif_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); blockif_proc(bc, be, buf); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } - bc->bc_work_count--; - - /* If none of the workers are busy, notify the main thread */ - if (bc->bc_work_count == 0) + /* If none to work, notify the main thread */ + if (blockif_empty(bc)) pthread_cond_broadcast(&bc->bc_work_done_cond); /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) break; - /* Make all worker threads wait here if the device is paused */ - while (bc->bc_paused) - pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx); - pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); } pthread_mutex_unlock(&bc->bc_mtx); @@ -638,8 +633,6 @@ blockif_open(nvlist_t *nvl, const char *ident) pthread_mutex_init(&bc->bc_mtx, NULL); pthread_cond_init(&bc->bc_cond, NULL); bc->bc_paused = 0; - bc->bc_work_count = 0; - pthread_cond_init(&bc->bc_paused_cond, NULL); pthread_cond_init(&bc->bc_work_done_cond, NULL); TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); @@ -737,6 +730,7 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, err = 0; pthread_mutex_lock(&bc->bc_mtx); + assert(!bc->bc_paused); if (!TAILQ_EMPTY(&bc->bc_freeq)) { /* * Enqueue and inform the block i/o thread @@ -761,7 +755,6 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) { - assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_READ)); } @@ -769,7 +762,6 @@ blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) { - assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_WRITE)); } @@ -777,7 +769,6 @@ blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) { - assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_FLUSH)); } @@ -785,7 +776,6 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) { - assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_DELETE)); } @@ -955,7 +945,6 @@ blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) off_t blockif_size(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_size); } @@ -963,7 +952,6 @@ blockif_size(struct blockif_ctxt *bc) int blockif_sectsz(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_sectsz); } @@ -971,7 +959,6 @@ blockif_sectsz(struct blockif_ctxt *bc) void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) { - assert(bc->bc_magic == BLOCKIF_SIG); *size = bc->bc_psectsz; *off = bc->bc_psectoff; @@ -980,7 +967,6 @@ blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) int blockif_queuesz(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (BLOCKIF_MAXREQ - 1); } @@ -988,7 +974,6 @@ blockif_queuesz(struct blockif_ctxt *bc) int blockif_is_ro(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } @@ -996,7 +981,6 @@ blockif_is_ro(struct blockif_ctxt *bc) int blockif_candelete(struct blockif_ctxt *bc) { - assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_candelete); } @@ -1012,7 +996,7 @@ blockif_pause(struct blockif_ctxt *bc) bc->bc_paused = 1; /* The interface is paused. Wait for workers to finish their work */ - while (bc->bc_work_count) + while (!blockif_empty(bc)) pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx); pthread_mutex_unlock(&bc->bc_mtx); @@ -1029,71 +1013,6 @@ blockif_resume(struct blockif_ctxt *bc) pthread_mutex_lock(&bc->bc_mtx); bc->bc_paused = 0; - /* resume the threads waiting for paused */ - pthread_cond_broadcast(&bc->bc_paused_cond); - /* kick the threads after restore */ - pthread_cond_broadcast(&bc->bc_cond); - pthread_mutex_unlock(&bc->bc_mtx); -} - -int -blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta) -{ - int i; - struct iovec *iov; - int ret; - - SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done); - - /* - * XXX: The callback and parameter must be filled by the virtualized - * device that uses the interface, during its init; we're not touching - * them here. - */ - - /* Snapshot the iovecs. */ - for (i = 0; i < br->br_iovcnt; i++) { - iov = &br->br_iov[i]; - - SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done); - - /* We assume the iov is a guest-mapped address. */ - SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len, - false, meta, ret, done); - } - -done: - return (ret); -} - -int -blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta) -{ - int ret; - - if (bc->bc_paused == 0) { - fprintf(stderr, "%s: Snapshot failed: " - "interface not paused.\r\n", __func__); - return (ENXIO); - } - - pthread_mutex_lock(&bc->bc_mtx); - - SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done); - -done: pthread_mutex_unlock(&bc->bc_mtx); - return (ret); } -#endif +#endif /* BHYVE_SNAPSHOT */ diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h index 0407ff43cf94..cf3acc05c98f 100644 --- a/usr.sbin/bhyve/block_if.h +++ b/usr.sbin/bhyve/block_if.h @@ -87,10 +87,6 @@ int blockif_close(struct blockif_ctxt *bc); #ifdef BHYVE_SNAPSHOT void blockif_pause(struct blockif_ctxt *bc); void blockif_resume(struct blockif_ctxt *bc); -int blockif_snapshot_req(struct blockif_req *br, - struct vm_snapshot_meta *meta); -int blockif_snapshot(struct blockif_ctxt *bc, - struct vm_snapshot_meta *meta); #endif #endif /* _BLOCK_IF_H_ */ diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c index d07b1f085e3d..ba8921ea3b6f 100644 --- a/usr.sbin/bhyve/pci_ahci.c +++ b/usr.sbin/bhyve/pci_ahci.c @@ -2562,114 +2562,14 @@ open_fail: } #ifdef BHYVE_SNAPSHOT -static int -pci_ahci_snapshot_save_queues(struct ahci_port *port, - struct vm_snapshot_meta *meta) -{ - int ret; - int idx; - struct ahci_ioreq *ioreq; - - STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) { - idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq); - SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); - } - - idx = -1; - SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); - - TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) { - idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq); - SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); - - /* - * Snapshot only the busy requests; other requests are - * not valid. - */ - ret = blockif_snapshot_req(&ioreq->io_req, meta); - if (ret != 0) { - fprintf(stderr, "%s: failed to snapshot req\r\n", - __func__); - goto done; - } - } - - idx = -1; - SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); - -done: - return (ret); -} - -static int -pci_ahci_snapshot_restore_queues(struct ahci_port *port, - struct vm_snapshot_meta *meta) -{ - int ret; - int idx; - struct ahci_ioreq *ioreq; - - /* Empty the free queue before restoring. */ - while (!STAILQ_EMPTY(&port->iofhd)) - STAILQ_REMOVE_HEAD(&port->iofhd, io_flist); - - /* Restore the free queue. */ - while (1) { - SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); - if (idx == -1) - break; - - STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist); - } - - /* Restore the busy queue. */ - while (1) { - SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); - if (idx == -1) - break; - - ioreq = &port->ioreq[idx]; - TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist); - - /* - * Restore only the busy requests; other requests are - * not valid. - */ - ret = blockif_snapshot_req(&ioreq->io_req, meta); - if (ret != 0) { - fprintf(stderr, "%s: failed to restore request\r\n", - __func__); - goto done; - } - - /* Re-enqueue the requests in the block interface. */ - if (ioreq->readop) - ret = blockif_read(port->bctx, &ioreq->io_req); - else - ret = blockif_write(port->bctx, &ioreq->io_req); - - if (ret != 0) { - fprintf(stderr, - "%s: failed to re-enqueue request\r\n", - __func__); - goto done; - } - } - -done: - return (ret); -} - static int pci_ahci_snapshot(struct vm_snapshot_meta *meta) { - int i, j, ret; + int i, ret; void *bctx; struct pci_devinst *pi; struct pci_ahci_softc *sc; struct ahci_port *port; - struct ahci_cmd_hdr *hdr; - struct ahci_ioreq *ioreq; pi = meta->dev_data; sc = pi->pi_arg; @@ -2753,43 +2653,7 @@ pci_ahci_snapshot(struct vm_snapshot_meta *meta) SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done); - for (j = 0; j < port->ioqsz; j++) { - ioreq = &port->ioreq[j]; - - /* blockif_req snapshot done only for busy requests. */ - hdr = (struct ahci_cmd_hdr *)(port->cmd_lst + - ioreq->slot * AHCI_CL_SIZE); - SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis, - 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry), - false, meta, ret, done); - - SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done); - } - - /* Perform save / restore specific operations. */ - if (meta->op == VM_SNAPSHOT_SAVE) { - ret = pci_ahci_snapshot_save_queues(port, meta); - if (ret != 0) - goto done; - } else if (meta->op == VM_SNAPSHOT_RESTORE) { - ret = pci_ahci_snapshot_restore_queues(port, meta); - if (ret != 0) - goto done; - } else { - ret = EINVAL; - goto done; - } - - ret = blockif_snapshot(port->bctx, meta); - if (ret != 0) { - fprintf(stderr, "%s: failed to restore blockif\r\n", - __func__); - goto done; - } + assert(TAILQ_EMPTY(&port->iobhd)); } done: @@ -2835,7 +2699,7 @@ pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi) return (0); } -#endif +#endif /* BHYVE_SNAPSHOT */ /* * Use separate emulation names to distinguish drive and atapi devices
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202206231846.25NIkujK055498>