Date: Mon, 17 Oct 2016 21:44:41 +0000 (UTC) From: Kirk McKusick <mckusick@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r307533 - in stable/11: share/man/man3 sys/sys sys/ufs/ffs Message-ID: <201610172144.u9HLif26041731@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mckusick Date: Mon Oct 17 21:44:41 2016 New Revision: 307533 URL: https://svnweb.freebsd.org/changeset/base/307533 Log: MFC r304230: Add two new macros, SLIST_CONCAT and LIST_CONCAT. MFC r304239: Bug 211013 reports that a write error to a UFS filesystem running with softupdates panics the kernel. PR: 211013 Modified: stable/11/share/man/man3/queue.3 stable/11/sys/sys/queue.h stable/11/sys/ufs/ffs/ffs_softdep.c stable/11/sys/ufs/ffs/softdep.h Directory Properties: stable/11/ (props changed) Modified: stable/11/share/man/man3/queue.3 ============================================================================== --- stable/11/share/man/man3/queue.3 Mon Oct 17 21:35:13 2016 (r307532) +++ stable/11/share/man/man3/queue.3 Mon Oct 17 21:44:41 2016 (r307533) @@ -28,12 +28,13 @@ .\" @(#)queue.3 8.2 (Berkeley) 1/24/94 .\" $FreeBSD$ .\" -.Dd June 24, 2015 +.Dd August 15, 2016 .Dt QUEUE 3 .Os .Sh NAME .Nm SLIST_CLASS_ENTRY , .Nm SLIST_CLASS_HEAD , +.Nm SLIST_CONCAT , .Nm SLIST_EMPTY , .Nm SLIST_ENTRY , .Nm SLIST_FIRST , @@ -75,6 +76,7 @@ .Nm STAILQ_SWAP , .Nm LIST_CLASS_ENTRY , .Nm LIST_CLASS_HEAD , +.Nm LIST_CONCAT , .Nm LIST_EMPTY , .Nm LIST_ENTRY , .Nm LIST_FIRST , @@ -125,6 +127,7 @@ lists and tail queues .\" .Fn SLIST_CLASS_ENTRY "CLASSTYPE" .Fn SLIST_CLASS_HEAD "HEADNAME" "CLASSTYPE" +.Fn SLIST_CONCAT "SLIST_HEAD *head1" "SLIST_HEAD *head2" "TYPE" "SLIST_ENTRY NAME" .Fn SLIST_EMPTY "SLIST_HEAD *head" .Fn SLIST_ENTRY "TYPE" .Fn SLIST_FIRST "SLIST_HEAD *head" @@ -168,6 +171,7 @@ lists and tail queues .\" .Fn LIST_CLASS_ENTRY "CLASSTYPE" .Fn LIST_CLASS_HEAD "HEADNAME" "CLASSTYPE" +.Fn LIST_CONCAT "LIST_HEAD *head1" "LIST_HEAD *head2" "TYPE" "LIST_ENTRY NAME" .Fn LIST_EMPTY "LIST_HEAD *head" .Fn LIST_ENTRY "TYPE" .Fn LIST_FIRST "LIST_HEAD *head" @@ -249,6 +253,8 @@ Singly-linked lists add the following fu .Bl -enum -compact -offset indent .It O(n) removal of any entry in the list. +.It +O(n) concatenation of two lists. .El .Pp Singly-linked tail queues add the following functionality: @@ -296,6 +302,8 @@ Linked lists are the simplest of the dou They add the following functionality over the above: .Bl -enum -compact -offset indent .It +O(n) concatenation of two lists. +.It They may be traversed backwards. .El However: @@ -401,6 +409,19 @@ evaluates to an initializer for the list .Fa head . .Pp The macro +.Nm SLIST_CONCAT +concatenates the list headed by +.Fa head2 +onto the end of the one headed by +.Fa head1 +removing all entries from the former. +Use of this macro should be avoided as it traverses the entirety of the +.Fa head1 +list. +A singly-linked tail queue should be used if this macro is needed in +high-usage code paths or to operate on long lists. +.Pp +The macro .Nm SLIST_EMPTY evaluates to true if there are no elements in the list. .Pp @@ -508,6 +529,9 @@ The macro removes the element .Fa elm from the list. +Use of this macro should be avoided as it traverses the entire list. +A doubly-linked list should be used if this macro is needed in +high-usage code paths or to operate on long lists. .Pp The macro .Nm SLIST_SWAP @@ -724,6 +748,9 @@ The macro removes the element .Fa elm from the tail queue. +Use of this macro should be avoided as it traverses the entire list. +A doubly-linked tail queue should be used if this macro is needed in +high-usage code paths or to operate on long tail queues. .Pp The macro .Nm STAILQ_SWAP @@ -823,6 +850,19 @@ evaluates to an initializer for the list .Fa head . .Pp The macro +.Nm LIST_CONCAT +concatenates the list headed by +.Fa head2 +onto the end of the one headed by +.Fa head1 +removing all entries from the former. +Use of this macro should be avoided as it traverses the entirety of the +.Fa head1 +list. +A tail queue should be used if this macro is needed in +high-usage code paths or to operate on long lists. +.Pp +The macro .Nm LIST_EMPTY evaluates to true if there are no elements in the list. .Pp Modified: stable/11/sys/sys/queue.h ============================================================================== --- stable/11/sys/sys/queue.h Mon Oct 17 21:35:13 2016 (r307532) +++ stable/11/sys/sys/queue.h Mon Oct 17 21:44:41 2016 (r307533) @@ -76,6 +76,10 @@ * * For details on the use of these macros, see the queue(3) manual page. * + * Below is a summary of implemented functions where: + * + means the macro is available + * - means the macro is not available + * s means the macro is available but is slow (runs in O(n) time) * * SLIST LIST STAILQ TAILQ * _HEAD + + + + @@ -101,10 +105,10 @@ * _INSERT_BEFORE - + - + * _INSERT_AFTER + + + + * _INSERT_TAIL - - + + - * _CONCAT - - + + + * _CONCAT s s + + * _REMOVE_AFTER + - + - * _REMOVE_HEAD + - + - - * _REMOVE + + + + + * _REMOVE s + s + * _SWAP + + + + * */ @@ -183,6 +187,19 @@ struct { \ /* * Singly-linked List functions. */ +#define SLIST_CONCAT(head1, head2, type, field) do { \ + QUEUE_TYPEOF(type) *curelm = SLIST_FIRST(head1); \ + if (curelm == NULL) { \ + if ((SLIST_FIRST(head1) = SLIST_FIRST(head2)) != NULL) \ + SLIST_INIT(head2); \ + } else if (SLIST_FIRST(head2) != NULL) { \ + while (SLIST_NEXT(curelm, field) != NULL) \ + curelm = SLIST_NEXT(curelm, field); \ + SLIST_NEXT(curelm, field) = SLIST_FIRST(head2); \ + SLIST_INIT(head2); \ + } \ +} while (0) + #define SLIST_EMPTY(head) ((head)->slh_first == NULL) #define SLIST_FIRST(head) ((head)->slh_first) @@ -447,6 +464,23 @@ struct { \ #define QMD_LIST_CHECK_PREV(elm, field) #endif /* (_KERNEL && INVARIANTS) */ +#define LIST_CONCAT(head1, head2, type, field) do { \ + QUEUE_TYPEOF(type) *curelm = LIST_FIRST(head1); \ + if (curelm == NULL) { \ + if ((LIST_FIRST(head1) = LIST_FIRST(head2)) != NULL) { \ + LIST_FIRST(head2)->field.le_prev = \ + &LIST_FIRST((head1)); \ + LIST_INIT(head2); \ + } \ + } else if (LIST_FIRST(head2) != NULL) { \ + while (LIST_NEXT(curelm, field) != NULL) \ + curelm = LIST_NEXT(curelm, field); \ + LIST_NEXT(curelm, field) = LIST_FIRST(head2); \ + LIST_FIRST(head2)->field.le_prev = &LIST_NEXT(curelm, field); \ + LIST_INIT(head2); \ + } \ +} while (0) + #define LIST_EMPTY(head) ((head)->lh_first == NULL) #define LIST_FIRST(head) ((head)->lh_first) Modified: stable/11/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- stable/11/sys/ufs/ffs/ffs_softdep.c Mon Oct 17 21:35:13 2016 (r307532) +++ stable/11/sys/ufs/ffs/ffs_softdep.c Mon Oct 17 21:44:41 2016 (r307533) @@ -752,16 +752,16 @@ static int flush_newblk_dep(struct vnode static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t); static int flush_deplist(struct allocdirectlst *, int, int *); static int sync_cgs(struct mount *, int); -static int handle_written_filepage(struct pagedep *, struct buf *); +static int handle_written_filepage(struct pagedep *, struct buf *, int); static int handle_written_sbdep(struct sbdep *, struct buf *); static void initiate_write_sbdep(struct sbdep *); static void diradd_inode_written(struct diradd *, struct inodedep *); static int handle_written_indirdep(struct indirdep *, struct buf *, - struct buf**); -static int handle_written_inodeblock(struct inodedep *, struct buf *); + struct buf**, int); +static int handle_written_inodeblock(struct inodedep *, struct buf *, int); static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *, uint8_t *); -static int handle_written_bmsafemap(struct bmsafemap *, struct buf *); +static int handle_written_bmsafemap(struct bmsafemap *, struct buf *, int); static void handle_written_jaddref(struct jaddref *); static void handle_written_jremref(struct jremref *); static void handle_written_jseg(struct jseg *, struct buf *); @@ -10903,6 +10903,10 @@ initiate_write_bmsafemap(bmsafemap, bp) struct fs *fs; ino_t ino; + /* + * If this is a background write, we did this at the time that + * the copy was made, so do not need to do it again. + */ if (bmsafemap->sm_state & IOSTARTED) return; bmsafemap->sm_state |= IOSTARTED; @@ -10976,10 +10980,39 @@ softdep_disk_write_complete(bp) /* * If an error occurred while doing the write, then the data - * has not hit the disk and the dependencies cannot be unrolled. + * has not hit the disk and the dependencies cannot be processed. + * But we do have to go through and roll forward any dependencies + * that were rolled back before the disk write. */ - if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) + if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) { + LIST_FOREACH(wk, &bp->b_dep, wk_list) { + switch (wk->wk_type) { + + case D_PAGEDEP: + handle_written_filepage(WK_PAGEDEP(wk), bp, 0); + continue; + + case D_INODEDEP: + handle_written_inodeblock(WK_INODEDEP(wk), + bp, 0); + continue; + + case D_BMSAFEMAP: + handle_written_bmsafemap(WK_BMSAFEMAP(wk), + bp, 0); + continue; + + case D_INDIRDEP: + handle_written_indirdep(WK_INDIRDEP(wk), + bp, &sbp, 0); + continue; + default: + /* nothing to roll forward */ + continue; + } + } return; + } if ((wk = LIST_FIRST(&bp->b_dep)) == NULL) return; ump = VFSTOUFS(wk->wk_mp); @@ -10999,17 +11032,20 @@ softdep_disk_write_complete(bp) switch (wk->wk_type) { case D_PAGEDEP: - if (handle_written_filepage(WK_PAGEDEP(wk), bp)) + if (handle_written_filepage(WK_PAGEDEP(wk), bp, + WRITESUCCEEDED)) WORKLIST_INSERT(&reattach, wk); continue; case D_INODEDEP: - if (handle_written_inodeblock(WK_INODEDEP(wk), bp)) + if (handle_written_inodeblock(WK_INODEDEP(wk), bp, + WRITESUCCEEDED)) WORKLIST_INSERT(&reattach, wk); continue; case D_BMSAFEMAP: - if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp)) + if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp, + WRITESUCCEEDED)) WORKLIST_INSERT(&reattach, wk); continue; @@ -11028,7 +11064,8 @@ softdep_disk_write_complete(bp) continue; case D_INDIRDEP: - if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp)) + if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp, + WRITESUCCEEDED)) WORKLIST_INSERT(&reattach, wk); continue; @@ -11328,12 +11365,17 @@ handle_bufwait(inodedep, refhd) * Called from within softdep_disk_write_complete above to restore * in-memory inode block contents to their most up-to-date state. Note * that this routine is always called from interrupt level with further - * splbio interrupts blocked. + * interrupts from this device blocked. + * + * If the write did not succeed, we will do all the roll-forward + * operations, but we will not take the actions that will allow its + * dependencies to be processed. */ static int -handle_written_inodeblock(inodedep, bp) +handle_written_inodeblock(inodedep, bp, flags) struct inodedep *inodedep; struct buf *bp; /* buffer containing the inode block */ + int flags; { struct freefile *freefile; struct allocdirect *adp, *nextadp; @@ -11363,7 +11405,8 @@ handle_written_inodeblock(inodedep, bp) /* * Leave this inodeblock dirty until it's in the list. */ - if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED) { + if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED && + (flags & WRITESUCCEEDED)) { struct inodedep *inon; inon = TAILQ_NEXT(inodedep, id_unlinked); @@ -11402,7 +11445,8 @@ handle_written_inodeblock(inodedep, bp) goto bufwait; return (1); } - inodedep->id_state |= COMPLETE; + if (flags & WRITESUCCEEDED) + inodedep->id_state |= COMPLETE; /* * Roll forward anything that had to be rolled back before * the inode could be updated. @@ -11517,6 +11561,13 @@ handle_written_inodeblock(inodedep, bp) bdirty(bp); bufwait: /* + * If the write did not succeed, we have done all the roll-forward + * operations, but we cannot take the actions that will allow its + * dependencies to be processed. + */ + if ((flags & WRITESUCCEEDED) == 0) + return (hadchanges); + /* * Process any allocdirects that completed during the update. */ if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL) @@ -11573,11 +11624,20 @@ bufwait: return (hadchanges); } +/* + * Perform needed roll-forwards and kick off any dependencies that + * can now be processed. + * + * If the write did not succeed, we will do all the roll-forward + * operations, but we will not take the actions that will allow its + * dependencies to be processed. + */ static int -handle_written_indirdep(indirdep, bp, bpp) +handle_written_indirdep(indirdep, bp, bpp, flags) struct indirdep *indirdep; struct buf *bp; struct buf **bpp; + int flags; { struct allocindir *aip; struct buf *sbp; @@ -11602,6 +11662,16 @@ handle_written_indirdep(indirdep, bp, bp indirdep->ir_state &= ~(UNDONE | IOSTARTED); indirdep->ir_state |= ATTACHED; /* + * If the write did not succeed, we have done all the roll-forward + * operations, but we cannot take the actions that will allow its + * dependencies to be processed. + */ + if ((flags & WRITESUCCEEDED) == 0) { + stat_indir_blk_ptrs++; + bdirty(bp); + return (1); + } + /* * Move allocindirs with written pointers to the completehd if * the indirdep's pointer is not yet written. Otherwise * free them here. @@ -11755,11 +11825,16 @@ jnewblk_rollforward(jnewblk, fs, cgp, bl * Complete a write to a bmsafemap structure. Roll forward any bitmap * changes if it's not a background write. Set all written dependencies * to DEPCOMPLETE and free the structure if possible. + * + * If the write did not succeed, we will do all the roll-forward + * operations, but we will not take the actions that will allow its + * dependencies to be processed. */ static int -handle_written_bmsafemap(bmsafemap, bp) +handle_written_bmsafemap(bmsafemap, bp, flags) struct bmsafemap *bmsafemap; struct buf *bp; + int flags; { struct newblk *newblk; struct inodedep *inodedep; @@ -11775,15 +11850,20 @@ handle_written_bmsafemap(bmsafemap, bp) int chgs; if ((bmsafemap->sm_state & IOSTARTED) == 0) - panic("initiate_write_bmsafemap: Not started\n"); + panic("handle_written_bmsafemap: Not started\n"); ump = VFSTOUFS(bmsafemap->sm_list.wk_mp); chgs = 0; bmsafemap->sm_state &= ~IOSTARTED; foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0; /* - * Release journal work that was waiting on the write. + * If write was successful, release journal work that was waiting + * on the write. Otherwise move the work back. */ - handle_jwork(&bmsafemap->sm_freewr); + if (flags & WRITESUCCEEDED) + handle_jwork(&bmsafemap->sm_freewr); + else + LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, + worklist, wk_list); /* * Restore unwritten inode allocation pending jaddref writes. @@ -11833,6 +11913,20 @@ handle_written_bmsafemap(bmsafemap, bp) free_jnewblk(jnewblk); } } + /* + * If the write did not succeed, we have done all the roll-forward + * operations, but we cannot take the actions that will allow its + * dependencies to be processed. + */ + if ((flags & WRITESUCCEEDED) == 0) { + LIST_CONCAT(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr, + newblk, nb_deps); + LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, + worklist, wk_list); + if (foreground) + bdirty(bp); + return (1); + } while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) { newblk->nb_state |= DEPCOMPLETE; newblk->nb_state &= ~ONDEPLIST; @@ -11936,12 +12030,17 @@ free_pagedep(pagedep) * A write operation was just completed. Removed inodes can * now be freed and associated block pointers may be committed. * Note that this routine is always called from interrupt level - * with further splbio interrupts blocked. + * with further interrupts from this device blocked. + * + * If the write did not succeed, we will do all the roll-forward + * operations, but we will not take the actions that will allow its + * dependencies to be processed. */ static int -handle_written_filepage(pagedep, bp) +handle_written_filepage(pagedep, bp, flags) struct pagedep *pagedep; struct buf *bp; /* buffer containing the written page */ + int flags; { struct dirrem *dirrem; struct diradd *dap, *nextdap; @@ -11951,6 +12050,8 @@ handle_written_filepage(pagedep, bp) if ((pagedep->pd_state & IOSTARTED) == 0) panic("handle_written_filepage: not started"); pagedep->pd_state &= ~IOSTARTED; + if ((flags & WRITESUCCEEDED) == 0) + goto rollforward; /* * Process any directory removals that have been committed. */ @@ -11970,6 +12071,7 @@ handle_written_filepage(pagedep, bp) if ((pagedep->pd_state & NEWBLOCK) == 0) while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) free_diradd(dap, NULL); +rollforward: /* * Uncommitted directory entries must be restored. */ @@ -12002,7 +12104,7 @@ handle_written_filepage(pagedep, bp) * marked dirty so that its will eventually get written back in * its correct form. */ - if (chgs) { + if (chgs || (flags & WRITESUCCEEDED) == 0) { if ((bp->b_flags & B_DELWRI) == 0) stat_dir_entry++; bdirty(bp); Modified: stable/11/sys/ufs/ffs/softdep.h ============================================================================== --- stable/11/sys/ufs/ffs/softdep.h Mon Oct 17 21:35:13 2016 (r307532) +++ stable/11/sys/ufs/ffs/softdep.h Mon Oct 17 21:44:41 2016 (r307533) @@ -140,6 +140,7 @@ #define UNLINKPREV 0x100000 /* inodedep is pointed at in the unlink list */ #define UNLINKONLIST 0x200000 /* inodedep is in the unlinked list on disk */ #define UNLINKLINKS (UNLINKNEXT | UNLINKPREV) +#define WRITESUCCEEDED 0x400000 /* the disk write completed successfully */ #define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201610172144.u9HLif26041731>