Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 8 Jul 2010 12:42:56 +0300
From:      Kostik Belousov <kostikbel@gmail.com>
To:        Andriy Gapon <avg@icyb.net.ua>
Cc:        lstewart@freebsd.org, freebsd-net@freebsd.org, bz@freebsd.org, Andre Oppermann <andre@freebsd.org>, Ming Fu <Ming.Fu@watchguard.com>
Subject:   Re: kern/123095 kern/131602 sendfile
Message-ID:  <20100708094255.GA2408@deviant.kiev.zoral.com.ua>
In-Reply-To: <4C358EE5.8070003@icyb.net.ua>
References:  <7C3D15DD6E8F464998CA1470D8A322F302BB9F72@ES02CO.wgti.net> <20100707205041.GO13238@deviant.kiev.zoral.com.ua> <4C358843.5000001@freebsd.org> <4C358A01.8080206@icyb.net.ua> <20100708082943.GB2439@deviant.kiev.zoral.com.ua> <4C358EE5.8070003@icyb.net.ua>

next in thread | previous in thread | raw e-mail | index | archive | help

--UugvWAfsgieZRqgk
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Thu, Jul 08, 2010 at 11:40:05AM +0300, Andriy Gapon wrote:
> on 08/07/2010 11:29 Kostik Belousov said the following:
> > Right, the patch maps the page in sf buffer read-only (on i386 only).
> > But note the parallel posting with m_cat() change. It is still not enou=
gh,
> > and I am not set up for the real network testing ATM.
>=20
> Could you also try to experiment with mb_dupcl?
> Namely transfer M_RDONLY from source mbuf.

Right, it is it.

Below is my current patch including debugging facilities that seems to work.
Real changes that needed are in m_cat and mb_dupcl.

diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 6f86895..530f704 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -810,6 +810,12 @@ sf_buf_alloc(struct vm_page *m, int flags)
 				nsfbufsused++;
 				nsfbufspeak =3D imax(nsfbufspeak, nsfbufsused);
 			}
+			if ((flags & SFB_RONLY) =3D=3D 0) {
+				ptep =3D vtopte(sf->kva);
+				opte =3D *ptep;
+				if ((opte & PG_RW) =3D=3D 0)
+					*ptep |=3D PG_RW | PG_A;
+			}
 #ifdef SMP
 			goto shootdown;=09
 #else
@@ -854,8 +860,9 @@ sf_buf_alloc(struct vm_page *m, int flags)
        PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
 	   | PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0));
 #else
-	*ptep =3D VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V |
-	    pmap_cache_bits(m->md.pat_mode, 0);
+       *ptep =3D VM_PAGE_TO_PHYS(m) | pgeflag |
+	   ((flags & SFB_RONLY) ? 0 : PG_RW) | PG_V |
+	   pmap_cache_bits(m->md.pat_mode, 0);
 #endif
=20
 	/*
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index f41eb03..1701ef2 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -301,7 +301,7 @@ mb_dupcl(struct mbuf *n, struct mbuf *m)
 	n->m_ext.ext_size =3D m->m_ext.ext_size;
 	n->m_ext.ref_cnt =3D m->m_ext.ref_cnt;
 	n->m_ext.ext_type =3D m->m_ext.ext_type;
-	n->m_flags |=3D M_EXT;
+	n->m_flags |=3D M_EXT | (M_RDONLY & m->m_flags);
 }
=20
 /*
@@ -911,7 +911,8 @@ m_cat(struct mbuf *m, struct mbuf *n)
 		m =3D m->m_next;
 	while (n) {
 		if (m->m_flags & M_EXT ||
-		    m->m_data + m->m_len + n->m_len >=3D &m->m_dat[MLEN]) {
+		    m->m_data + m->m_len + n->m_len >=3D &m->m_dat[MLEN] ||
+		    !M_WRITABLE(m)) {
 			/* just join the two chains */
 			m->m_next =3D n;
 			return;
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 2060a2e..95fae7e 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -776,6 +776,8 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct m=
buf *n)
 		    m->m_len <=3D MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <=3D M_TRAILINGSPACE(n) &&
 		    n->m_type =3D=3D m->m_type) {
+			KASSERT(n->m_ext.ext_type !=3D EXT_SFBUF,
+			    ("destroying file page %p", n));
 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
 			    (unsigned)m->m_len);
 			n->m_len +=3D m->m_len;
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 3165dab..df9cd69 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -2131,7 +2131,8 @@ retry_space:
 			 * as necessary, but this wait can be interrupted.
 			 */
 			if ((sf =3D sf_buf_alloc(pg,
-			    (mnw ? SFB_NOWAIT : SFB_CATCH))) =3D=3D NULL) {
+			    (mnw ? SFB_NOWAIT : SFB_CATCH) | SFB_RONLY))
+			    =3D=3D NULL) {
 				mbstat.sf_allocfail++;
 				vm_page_lock(pg);
 				vm_page_unwire(pg, 0);
@@ -2162,6 +2163,8 @@ retry_space:
 				m_cat(m, m0);
 			else
 				m =3D m0;
+			KASSERT((m0->m_flags & M_RDONLY) !=3D 0,
+			    ("lost M_RDONLY"));
=20
 			/* Keep track of bits processed. */
 			loopbytes +=3D xfsize;
diff --git a/sys/sys/sf_buf.h b/sys/sys/sf_buf.h
index af42065..fcb31f8 100644
--- a/sys/sys/sf_buf.h
+++ b/sys/sys/sf_buf.h
@@ -41,6 +41,7 @@
 #define	SFB_CPUPRIVATE	2		/* Create a CPU private mapping. */
 #define	SFB_DEFAULT	0
 #define	SFB_NOWAIT	4		/* Return NULL if all bufs are used. */
+#define	SFB_RONLY	8		/* Map page read-only, if possible. */
=20
 struct vm_page;
=20

--UugvWAfsgieZRqgk
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.10 (FreeBSD)

iEYEARECAAYFAkw1nZ8ACgkQC3+MBN1Mb4iLtQCghSEGzaeG5KqxQ5bAOOOmoX4d
jhwAn294ck0E3Or4qCsE19dOtfRTLq3w
=U6Au
-----END PGP SIGNATURE-----

--UugvWAfsgieZRqgk--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100708094255.GA2408>