Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 5 Sep 2014 12:46:31 -0700
From:      Adrian Chadd <adrian@freebsd.org>
To:        Konstantin Belousov <kostikbel@gmail.com>
Cc:        Alan Cox <alc@freebsd.org>, Pieter de Goeje <pieter@degoeje.nl>, "freebsd-hackers@freebsd.org" <hackers@freebsd.org>
Subject:   Re: mmap MAP_NOSYNC regression in 10.x
Message-ID:  <CAJ-Vmo=Dw02HsazH1n9wkXFDXM83bOnNLGndAt5=KZN9QHxyOw@mail.gmail.com>
In-Reply-To: <20140905123826.GP2737@kib.kiev.ua>
References:  <540903FF.6010602@degoeje.nl> <CAJUyCcNiLwLuL9crpQBjSdg4ED5kR53fPjyJG3HNmP5Roor8RQ@mail.gmail.com> <20140905080633.GM2737@kib.kiev.ua> <5409A4F8.6020204@degoeje.nl> <20140905123826.GP2737@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
Is this worth putting into the regression test suite?



-a

On 5 September 2014 05:38, Konstantin Belousov <kostikbel@gmail.com> wrote:
> On Fri, Sep 05, 2014 at 01:56:40PM +0200, Pieter de Goeje wrote:
>> Thanks, works for me!
>
> I realized that the patch contains yet another bug. The oflags page
> flags update is protected by the exclusive vm object lock, which is only
> held in shared mode on the fast path. Below is the fixed patch, where
> I take the page lock around setting VPO_NOSYNC (exclusive lock owners
> cannot race with fast path since we own shared lock, and two parallel
> fast path execution would be handled by the page lock).
>
> diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
> index 30b0456..f327ab0 100644
> --- a/sys/vm/vm_fault.c
> +++ b/sys/vm/vm_fault.c
> @@ -174,6 +174,54 @@ unlock_and_deallocate(struct faultstate *fs)
>         }
>  }
>
> +static void
> +vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot,
> +    vm_prot_t fault_type, int fault_flags, boolean_t set_wd)
> +{
> +
> +       if (((prot & VM_PROT_WRITE) != 0 ||
> +           (fault_flags & VM_FAULT_DIRTY) != 0) &&
> +           (m->oflags & VPO_UNMANAGED) == 0) {
> +               if (set_wd)
> +                       vm_object_set_writeable_dirty(m->object);
> +
> +               /*
> +                * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
> +                * if the page is already dirty to prevent data written with
> +                * the expectation of being synced from not being synced.
> +                * Likewise if this entry does not request NOSYNC then make
> +                * sure the page isn't marked NOSYNC.  Applications sharing
> +                * data should use the same flags to avoid ping ponging.
> +                */
> +               if (entry->eflags & MAP_ENTRY_NOSYNC) {
> +                       if (m->dirty == 0) {
> +                               if (!set_wd)
> +                                       vm_page_lock(m);
> +                               m->oflags |= VPO_NOSYNC;
> +                               if (!set_wd)
> +                                       vm_page_unlock(m);
> +                       }
> +               } else {
> +                       m->oflags &= ~VPO_NOSYNC;
> +               }
> +
> +               /*
> +                * If the fault is a write, we know that this page is being
> +                * written NOW so dirty it explicitly to save on
> +                * pmap_is_modified() calls later.
> +                *
> +                * Also tell the backing pager, if any, that it should remove
> +                * any swap backing since the page is now dirty.
> +                */
> +               if (((fault_type & VM_PROT_WRITE) != 0 &&
> +                   (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
> +                   (fault_flags & VM_FAULT_DIRTY) != 0) {
> +                       vm_page_dirty(m);
> +                       vm_pager_page_unswapped(m);
> +               }
> +       }
> +}
> +
>  /*
>   * TRYPAGER - used by vm_fault to calculate whether the pager for the
>   *           current object *might* contain the page.
> @@ -321,11 +369,8 @@ RetryFault:;
>                         vm_page_hold(m);
>                         vm_page_unlock(m);
>                 }
> -               if ((fault_type & VM_PROT_WRITE) != 0 &&
> -                   (m->oflags & VPO_UNMANAGED) == 0) {
> -                       vm_page_dirty(m);
> -                       vm_pager_page_unswapped(m);
> -               }
> +               vm_fault_dirty(fs.entry, m, prot, fault_type, fault_flags,
> +                   FALSE);
>                 VM_OBJECT_RUNLOCK(fs.first_object);
>                 if (!wired)
>                         vm_fault_prefault(&fs, vaddr, 0, 0);
> @@ -898,42 +943,7 @@ vnode_locked:
>         if (hardfault)
>                 fs.entry->next_read = fs.pindex + faultcount - reqpage;
>
> -       if (((prot & VM_PROT_WRITE) != 0 ||
> -           (fault_flags & VM_FAULT_DIRTY) != 0) &&
> -           (fs.m->oflags & VPO_UNMANAGED) == 0) {
> -               vm_object_set_writeable_dirty(fs.object);
> -
> -               /*
> -                * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
> -                * if the page is already dirty to prevent data written with
> -                * the expectation of being synced from not being synced.
> -                * Likewise if this entry does not request NOSYNC then make
> -                * sure the page isn't marked NOSYNC.  Applications sharing
> -                * data should use the same flags to avoid ping ponging.
> -                */
> -               if (fs.entry->eflags & MAP_ENTRY_NOSYNC) {
> -                       if (fs.m->dirty == 0)
> -                               fs.m->oflags |= VPO_NOSYNC;
> -               } else {
> -                       fs.m->oflags &= ~VPO_NOSYNC;
> -               }
> -
> -               /*
> -                * If the fault is a write, we know that this page is being
> -                * written NOW so dirty it explicitly to save on
> -                * pmap_is_modified() calls later.
> -                *
> -                * Also tell the backing pager, if any, that it should remove
> -                * any swap backing since the page is now dirty.
> -                */
> -               if (((fault_type & VM_PROT_WRITE) != 0 &&
> -                   (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
> -                   (fault_flags & VM_FAULT_DIRTY) != 0) {
> -                       vm_page_dirty(fs.m);
> -                       vm_pager_page_unswapped(fs.m);
> -               }
> -       }
> -
> +       vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, TRUE);
>         vm_page_assert_xbusied(fs.m);
>
>         /*
> diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
> index f12b76c..a45648d 100644
> --- a/sys/vm/vm_page.h
> +++ b/sys/vm/vm_page.h
> @@ -147,7 +147,7 @@ struct vm_page {
>         uint16_t hold_count;            /* page hold count (P) */
>         uint16_t flags;                 /* page PG_* flags (P) */
>         uint8_t aflags;                 /* access is atomic */
> -       uint8_t oflags;                 /* page VPO_* flags (O) */
> +       uint8_t oflags;                 /* page VPO_* flags (OM) */
>         uint8_t queue;                  /* page queue index (P,Q) */
>         int8_t psind;                   /* pagesizes[] index (O) */
>         int8_t segind;
> @@ -163,8 +163,9 @@ struct vm_page {
>  /*
>   * Page flags stored in oflags:
>   *
> - * Access to these page flags is synchronized by the lock on the object
> - * containing the page (O).
> + * Access to these page flags is synchronized by the exclusive lock on
> + * the object containing the page, or combination of shared object
> + * lock and the page lock (OM).
>   *
>   * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG)
>   *      indicates that the page is not under PV management but



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAJ-Vmo=Dw02HsazH1n9wkXFDXM83bOnNLGndAt5=KZN9QHxyOw>