Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 3 Dec 1999 04:55:47 +0100 (CET)
From:      Tor Egge <tegge@not.fast.no>
To:        FreeBSD-gnats-submit@freebsd.org
Subject:   kern/15235: Race conditions in pipe_write causes kernel memory corruption
Message-ID:  <199912030355.EAA90300@not.fast.no>

next in thread | raw e-mail | index | archive | help

>Number:         15235
>Category:       kern
>Synopsis:       Race conditions in pipe_write causes kernel memory corruption
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Dec  2 20:30:01 PST 1999
>Closed-Date:
>Last-Modified:
>Originator:     Tor Egge
>Release:        FreeBSD 4.0-CURRENT i386
>Organization:
Fast Search & Transfer ASA
>Environment:

	3.3-STABLE and 4.0-CURRENT

>Description:

	(3.3-STABLE)

#0  cpu_dump (pcb=0xc0287310) at ../../i386/i386/vm_machdep.c:703
703     ../../i386/i386/vm_machdep.c: No such file or directory.
(kgdb) where
#0  cpu_dump (pcb=0xc0287310) at ../../i386/i386/vm_machdep.c:703
#1  0xc015ec71 in boot (howto=256) at ../../kern/kern_shutdown.c:284
#2  0xc015ef25 in panic (fmt=0xc0265793 "page fault")
    at ../../kern/kern_shutdown.c:443
#3  0xc022eb38 in trap_fatal (frame=0xda694dac, eva=791819679)
    at ../../i386/i386/trap.c:947
#4  0xc022e7b3 in trap_pfault (frame=0xda694dac, usermode=0, eva=791819679)
    at ../../i386/i386/trap.c:840
#5  0xc022e3fa in trap (frame={tf_es = -1071251440, tf_ds = 16, 
      tf_edi = -631040032, tf_esi = 791819679, tf_ebp = -630632952, 
      tf_isp = -630633004, tf_ebx = 791819567, tf_edx = 791819679, 
      tf_ecx = 16777217, tf_eax = 0, tf_trapno = 12, tf_err = 2, 
      tf_eip = -1071460992, tf_cs = 8, tf_eflags = 66050, 
      tf_esp = -1072153213, tf_ss = 791819679}) at ../../i386/i386/trap.c:441
#6  0xc022cd80 in setlock ()
#7  0xc0206305 in vm_object_vndeallocate (object=0xda6c5e0c)
    at ../../vm/vm_object.c:277
#8  0xc01f6185 in ffs_read (ap=0xda694eb8) at ../../ufs/ufs/ufs_readwrite.c:352
#9  0xc018a21b in vn_read (fp=0xc3fc2d80, uio=0xda694f00, cred=0xc3f3ab80, 
    flags=0) at vnode_if.h:303
#10 0xc0169e35 in dofileread (p=0xda6317e0, fp=0xc3fc2d80, fd=3, 
    buf=0x8084000, nbyte=8192, offset=-1, flags=0)
    at ../../kern/sys_generic.c:179
#11 0xc0169d3f in read (p=0xda6317e0, uap=0xda694f84)
    at ../../kern/sys_generic.c:111
#12 0xc022edb3 in syscall (frame={tf_es = 39, tf_ds = 39, tf_edi = 134766545, 
      tf_esi = 1498366912, tf_ebp = -1077978384, tf_isp = -630632492, 
      tf_ebx = 1498310380, tf_edx = 3, tf_ecx = 0, tf_eax = 3, tf_trapno = 7, 
      tf_err = 2, tf_eip = 1498060656, tf_cs = 31, tf_eflags = 514, 
      tf_esp = -1077978420, tf_ss = 39}) at ../../i386/i386/trap.c:1105
#13 0xc021cb2c in Xint0x80_syscall ()
#14 0x594cc665 in ?? ()
#15 0x594c8c54 in ?? ()
#16 0x804ab1b in ?? ()
#17 0x804a8d5 in ?? ()
#18 0x804a68c in ?? ()
#19 0x8049883 in ?? ()
#20 0x80496c5 in ?? ()

Examination shows that the vm object is overwritten in kernel memory:

(kgdb) print *(struct vm_object *) 0xda6c5e0c
$4 = {object_list = {tqe_next = 0x656e7574, tqe_prev = 0x79746963}, 
  shadow_head = {tqh_first = 0x6d6f632e, tqh_last = 0x6e61762f}, 
  shadow_list = {tqe_next = 0x69687369, tqe_prev = 0x382f676e}, memq = {
    tqh_first = 0x762f3432, tqh_last = 0x6f697261}, generation = 1882157941, 
  type = 1919643253, size = 1768647535, ref_count = 1954051118, 
  shadow_count = 892678410, id = 825242168, flags = 12852, pg_color = 8247, 
  paging_in_progress = 12599, behavior = 13366, 
  resident_page_count = 842151476, paging_offset = 3400000498352547872, 
  backing_object = 0x706e6974, backing_object_offset = 8463515422517259873, 
  last_read = 1768121710, page_hint = 0x632e7974, pager_object_list = {
    tqe_next = 0x682f6d6f, tqe_prev = 0x6573756f}, handle = 0x2f32352f, 
  un_pager = {vnp = {vnp_size = 7813028919358284141}, devp = {devp_pglist = {
        tqh_first = 0x2e34616d, tqh_last = 0x6c6d7468}}, swp = {
      swp_nblocks = 775184749, swp_allocsize = 1819112552, swp_blocks = 0xa, 
      swp_poip = 0}}}
(kgdb) print (char *) 0xda6c5e0c
$5 = 0xda6c5e0c "tunecity.com/vanishing/824/various/punkrokki.txt\n1558201427 71644622 http://tinpan.fortunecity.com/house/52/ma4.html\n"
(kgdb)

After some more digging, a pipe structure is found to be invalid:

(kgdb) print *(struct pipe *) 0xda5ba020
$9 = {pipe_buffer = {cnt = 20097, in = 0, out = 0, size = 16384, 
    buffer = 0xda6c1000 "tm\n1578116684 40158689 http://info.med.yale.edu/phyassoc/stulife.html\n1530933342 50718132 http://info.med.yale.edu/labinvest/abstracts/9902feb/9902_151.html\n1720729792 67896937 http://info.med.yale.ed"..., 
    object = 0xda6b407c}, pipe_map = {kva = 0, cnt = 0, pos = 0, npages = 0, 
    ms = {0x0 <repeats 17 times>}}, pipe_sel = {si_pid = 0, si_flags = 0}, 
  pipe_atime = {tv_sec = 944072875, tv_nsec = 119056060}, pipe_mtime = {
    tv_sec = 944072875, tv_nsec = 119056060}, pipe_ctime = {
    tv_sec = 944072871, tv_nsec = 739076689}, pipe_sigio = 0x0, 
  pipe_peer = 0xda5b9f80, pipe_state = 2048, pipe_busy = 0}
(kgdb)

(kgdb) print/x 0xda6c1000 + 20097
$11 = 0xda6c5e81

This indicates that pipe_write wrote beyond the 16KB pipe buffer and into
the next page that contained vm_object structures.

Examination of pipe_write shows why:

	- pipelock() might block.  When this happens, the values for
	  space and size are no longer valid, since the process 
	  holding the pipe lock might have modified the pipe structure.

There seems to be one more race:

	- A write of size <= PIPE_BUF might be split into two internal
	  write operations.  If the pipe size is larger than PIPE_SIZE
	  (e.g. BIG_PIPE_SIZE) the process might block after the first
	  internal write operation due to the pipe containing 
	  PIPE_SIZE or more bytes.
		
>How-To-Repeat:

Let multiple processes write to the same pipe.  Select one of

	- Write to the pipe from memory that has to be faulted in, 
	  causing uiomove to block while the pipe lock is held.

	- read from the other end of the pipe to memory that has to
	  be faulted in, causing uiomove th block while the pipe
	  lock is held.

	- Use a version of uiomove that releases the giant kernel lock
	  during transfers larger than 1024 bytes on SMP systems.

>Fix:

	(4.0-CURRENT)

	- Check that space is still valid after obtaining the pipe lock.

	- Defer the calculation of transfer size until the pipe
	  lock has been obtained.

	- Allow an internal write to span two contiguous segments,
	  so writes of size <= PIPE_BUF can be kept atomic 
	  when wrapping around from the end to the start of the
	  pipe buffer.

	- Update the pipe buffer pointers while holding
	  the pipe lock.


Index: sys/kern/sys_pipe.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sys_pipe.c,v
retrieving revision 1.58
diff -u -r1.58 sys_pipe.c
--- sys_pipe.c	1999/11/08 03:28:48	1.58
+++ sys_pipe.c	1999/12/02 18:18:21
@@ -799,27 +799,13 @@
 		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
 
 		/* Writes of size <= PIPE_BUF must be atomic. */
-		/* XXX perhaps they need to be contiguous to be atomic? */
 		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
 			space = 0;
 
 		if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) {
-			/*
-			 * This set the maximum transfer as a segment of
-			 * the buffer.
-			 */
-			int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in;
-			/*
-			 * space is the size left in the buffer
-			 */
-			if (size > space)
-				size = space;
-			/*
-			 * now limit it to the size of the uio transfer
-			 */
-			if (size > uio->uio_resid)
-				size = uio->uio_resid;
 			if ((error = pipelock(wpipe,1)) == 0) {
+				int size;	/* Transfer size */
+				int segsize;	/* first segment to transfer */
 				/*
 				 * It is possible for a direct write to
 				 * slip in on us... handle it here...
@@ -828,18 +814,73 @@
 					pipeunlock(wpipe);
 					goto retrywrite;
 				}
-				error = uiomove( &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 
-					size, uio);
+				/* 
+				 * If a process blocked in uiomove, our
+				 * value for space might be bad.
+				 */
+				if (space > wpipe->pipe_buffer.size - 
+				    wpipe->pipe_buffer.cnt) {
+					pipeunlock(wpipe);
+					goto retrywrite;
+				}
+
+				/*
+				 * Transfer size is minimum of uio transfer
+				 * and free space in pipe buffer.
+				 */
+				if (space > uio->uio_resid)
+					size = uio->uio_resid;
+				else
+					size = space;
+				/*
+				 * First segment to transfer is minimum of 
+				 * transfer size and contiguous space in
+				 * pipe buffer.  If first segment to transfer
+				 * is less than the transfer size, we've got
+				 * a wraparound in the buffer.
+				 */
+				segsize = wpipe->pipe_buffer.size - 
+					wpipe->pipe_buffer.in;
+				if (segsize > size)
+					segsize = size;
+				
+				/* Transfer first segment */
+
+				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 
+						segsize, uio);
+				
+				if (error == 0 && segsize < size) {
+					/* 
+					 * Transfer remaining part now, to
+					 * support atomic writes.  Wraparound
+					 * happened.
+					 */
+					if (wpipe->pipe_buffer.in + segsize != 
+					    wpipe->pipe_buffer.size)
+						panic("Expected pipe buffer wraparound disappeared");
+						
+					error = uiomove(&wpipe->pipe_buffer.buffer[0],
+							size - segsize, uio);
+				}
+				if (error == 0) {
+					wpipe->pipe_buffer.in += size;
+					if (wpipe->pipe_buffer.in >=
+					    wpipe->pipe_buffer.size) {
+						if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
+							panic("Expected wraparound bad");
+						wpipe->pipe_buffer.in = size - segsize;
+					}
+				
+					wpipe->pipe_buffer.cnt += size;
+					if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
+						panic("Pipe buffer overflow");
+				
+				}
 				pipeunlock(wpipe);
 			}
 			if (error)
 				break;
 
-			wpipe->pipe_buffer.in += size;
-			if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size)
-				wpipe->pipe_buffer.in = 0;
-
-			wpipe->pipe_buffer.cnt += size;
 		} else {
 			/*
 			 * If the "read-side" has been blocked, wake it up now.



>Release-Note:
>Audit-Trail:
>Unformatted:


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199912030355.EAA90300>