Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 23 Feb 2008 23:43:13 +0200
From:      Kostik Belousov <kostikbel@gmail.com>
To:        "Arun Balakrishnan (WT01 - Computing, Storage & Software Products)" <arun.balakrishnan@wipro.com>
Cc:        kan@freebsd.org, freebsd-stable@freebsd.org
Subject:   Re: Memory Leak under FreeBSD 6.0 RELEASE
Message-ID:  <20080223214313.GF57756@deviant.kiev.zoral.com.ua>
In-Reply-To: <47C00A1B.5030708@wipro.com>
References:  <47C00A1B.5030708@wipro.com>

next in thread | previous in thread | raw e-mail | index | archive | help

--gDKptnH9ef5/6wqy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Sat, Feb 23, 2008 at 05:27:15PM +0530, Arun Balakrishnan (WT01 - Computi=
ng, Storage & Software Products) wrote:
>=20
>    Hi,
>    We are currently working on a project wherein we are porting a library
>    from GNU/Linux to FreeBSD 6.0 - RELEASE 32-bit and 64-bit. As part of
>    the standard memory leak tests, we noticed that the ported library is
>    leaking memory. After lots of analysis we found something very
>    strange. Just repeatedly loading and unloading our library was itself
>    throwing up a leak. We are able to reproduce a similar leak using the
>    following steps:
>    1. SimpleLib.cpp - Simple dummy library
>    2. LibLoader.cpp - Utility to repeatedly load the library
>    3. Compile as mentioned
>    4. Run under Valgrind for multiple times (31 times in our example.
>    Hard coded for simpilicity)
>    =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3DSimpleLib.cpp=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>    #include <stdio.h>
>    #include <stdlib.h>
>    class CLeaker
>    {
>    public:
>      CLeaker() { };
>      virtual ~CLeaker() { };
>    };
>    CLeaker obj;
>    =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3DLibLoader.cpp=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>    #include "stdio.h"
>    #include "dlfcn.h"
>    #include <stdlib.h>
>    #include <unistd.h>
>    #include <sys/time.h>
>    int main()
>    {
>      int i =3D 0;
>      int loop =3D 31;
>      while (i<loop)
>        {
>          i++;
>          void *handle =3D dlopen(argv[1], RTLD_LAZY);
>          if ( !handle )
>            exit(1);
>          dlclose(handle);
>        }
>      return 0;
>    }
>    =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>    =3D=3D
>    Compilation:
>    g++ -shared -Wl,-soname,SimpleLib.so -o SimpleLib.so SimpleLib.cpp -g
>    g++ -o LibLoader_FreeBSD LibLoader.cpp -g
>    =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>    =3D=3D=3D
>    Execution:
>    valgrind --trace-pthread=3Dall --show-below-main=3Dyes
>    --show-reachable=3Dyes --leak-check=3Dyes ./LibLoader_FreeBSD
>    ./SimpleLib.so
>    =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>    =3D=3D=3D
>    Output: (snipped off irrelevant portions)
>    =3D=3D1155=3D=3D ERROR SUMMARY: 0 errors from 0 contexts (suppressed: =
0 from
>    0)
>    =3D=3D1155=3D=3D malloc/free: in use at exit: 520 bytes in 1 blocks.
>    =3D=3D1155=3D=3D malloc/free: 1 allocs, 0 frees, 520 bytes allocated.
>    =3D=3D1155=3D=3D For counts of detected errors, rerun with: -v
>    =3D=3D1155=3D=3D searching for pointers to 1 not-freed blocks.
>    =3D=3D1155=3D=3D checked 2140912 bytes.
>    =3D=3D1155=3D=3D
>    =3D=3D1155=3D=3D 520 bytes in 1 blocks are still reachable in loss rec=
ord 1 of
>    1
>    =3D=3D1155=3D=3D    at 0x3C032183: malloc (in
>    /usr/local/lib/valgrind/vgpreload_memcheck.so)
>    =3D=3D1155=3D=3D    by 0x3C1CB018: (within /lib/libc.so.6)
>    =3D=3D1155=3D=3D    by 0x3C1CB206: __cxa_atexit (in /lib/libc.so.6)
>    =3D=3D1155=3D=3D    by 0x3C1F0898: ???
>    =3D=3D1155=3D=3D
>    =3D=3D1155=3D=3D LEAK SUMMARY:
>    =3D=3D1155=3D=3D    definitely lost: 0 bytes in 0 blocks.
>    =3D=3D1155=3D=3D    possibly lost:   0 bytes in 0 blocks.
>    =3D=3D1155=3D=3D    still reachable: 520 bytes in 1 blocks.
>    =3D=3D1155=3D=3D         suppressed: 0 bytes in 0 blocks.
>    =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
>    =3D=3D=3D
>    Queries:
>    1. As seen in the Valgrind output, there is a 520bytes leak. This
>    happens only after around 31 loops and keeps increasing. By 100 loops,
>    the leak goes up to 1560 bytes. In our situation with our library, the
>    520bytes leak starts by the third iteration itself and by around 23
>    iterations it reaches 5KB. We are really stumped as to what could be
>    the possible reason for this leak? Where is the malloc called from?
>    Why only after executing 31 times? Executing the same code under
>    GNU/Linux does not show any leak even for over 1000 iterations.
>    2. While executing this without Valgrind, in another terminal we did a
>    "ps -Aopid,rss | grep LibLoader_" continuously in a loop and saw that
>    the RSS (resident set size) field value keeps increasing by 4KB every
>    now and then. The same experiment on GNU/Linux shows that RSS remains
>    at the same value. What could be the cause for the ever rising RSS
>    value?
>    Any help in this regard would be really helpful. Thanks in advance.
>    Rgds,
>    ~Arun

The valgrind report points to memory used by the atexit_register()
for keeping the information on the functions registered by means of
atexit(3) and __cxa_atexit(). See the lib/libc/stdlib/atexit.c. In your
(non-compilable) example, __cxa_atexit() is used by shared objects to
register the destructor for global objects to be called at the dso
unload.

The handling of the memory is complicated because atexit() specification
states that:
- functions shall be called in the reverse order of their registration;
- at least 32 functions can be registered with atexit().

The current implementation never frees the struct atexit to try to
conform to the requirement of order.

The static __atexit0, intended to guarantee success of the first 32
atexit() calls, may not guarantee it, because the space can be consumed
by the interleaved __cxa_atexit() instead.

Patch below may help with the libc leak.

diff --git a/lib/libc/stdlib/atexit.c b/lib/libc/stdlib/atexit.c
index 05dad84..8389637 100644
--- a/lib/libc/stdlib/atexit.c
+++ b/lib/libc/stdlib/atexit.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD: src/lib/libc/stdlib/atexit.c,v 1.8 20=
07/01/09 00:28:09 imp E
 #include <stdlib.h>
 #include <unistd.h>
 #include <pthread.h>
+#include <sys/queue.h>
 #include "atexit.h"
 #include "un-namespace.h"
=20
@@ -56,7 +57,7 @@ static pthread_mutex_t atexit_mutex =3D PTHREAD_MUTEX_INI=
TIALIZER;
 #define _MUTEX_UNLOCK(x)	if (__isthreaded) _pthread_mutex_unlock(x)
=20
 struct atexit {
-	struct atexit *next;			/* next in list */
+	LIST_ENTRY(atexit) link;
 	int ind;				/* next index in this table */
 	struct atexit_fn {
 		int fn_type;			/* ATEXIT_? from above */
@@ -69,7 +70,10 @@ struct atexit {
 	} fns[ATEXIT_SIZE];			/* the table itself */
 };
=20
-static struct atexit *__atexit;		/* points to head of LIFO stack */
+/* Head of LIFO stack */
+LIST_HEAD(, atexit) __atexit =3D LIST_HEAD_INITIALIZER(__atexit);
+static struct atexit __atexit0;			/* one guaranteed table */
+static unsigned long __atexit_gen;
=20
 /*
  * Register the function described by 'fptr' to be called at application
@@ -79,30 +83,33 @@ static struct atexit *__atexit;		/* points to head of L=
IFO stack */
 static int
 atexit_register(struct atexit_fn *fptr)
 {
-	static struct atexit __atexit0;	/* one guaranteed table */
 	struct atexit *p;
+	unsigned long old__atexit_gen;
=20
 	_MUTEX_LOCK(&atexit_mutex);
-	if ((p =3D __atexit) =3D=3D NULL)
-		__atexit =3D p =3D &__atexit0;
-	else while (p->ind >=3D ATEXIT_SIZE) {
-		struct atexit *old__atexit;
-		old__atexit =3D __atexit;
-	        _MUTEX_UNLOCK(&atexit_mutex);
-		if ((p =3D (struct atexit *)malloc(sizeof(*p))) =3D=3D NULL)
-			return (-1);
-		_MUTEX_LOCK(&atexit_mutex);
-		if (old__atexit !=3D __atexit) {
-			/* Lost race, retry operation */
+	if (LIST_EMPTY(&__atexit)) {
+		p =3D &__atexit0;
+		LIST_INSERT_HEAD(&__atexit, p, link);
+	} else {
+	retry:
+		p =3D LIST_FIRST(&__atexit);
+		if (p->ind >=3D ATEXIT_SIZE) {
+			old__atexit_gen =3D __atexit_gen;
 			_MUTEX_UNLOCK(&atexit_mutex);
-			free(p);
+			if ((p =3D (struct atexit *)malloc(sizeof(*p))) =3D=3D NULL)
+				return (-1);
 			_MUTEX_LOCK(&atexit_mutex);
-			p =3D __atexit;
-			continue;
+			if (old__atexit_gen !=3D __atexit_gen) {
+				/* Lost race, retry operation */
+				_MUTEX_UNLOCK(&atexit_mutex);
+				free(p);
+				_MUTEX_LOCK(&atexit_mutex);
+				goto retry;
+			}
+			p->ind =3D 0;
+			LIST_INSERT_HEAD(&__atexit, p, link);
+			__atexit_gen++;
 		}
-		p->ind =3D 0;
-		p->next =3D __atexit;
-		__atexit =3D p;
 	}
 	p->fns[p->ind++] =3D *fptr;
 	_MUTEX_UNLOCK(&atexit_mutex);
@@ -119,7 +126,7 @@ atexit(void (*func)(void))
 	int error;
=20
 	fn.fn_type =3D ATEXIT_FN_STD;
-	fn.fn_ptr.std_func =3D func;;
+	fn.fn_ptr.std_func =3D func;
 	fn.fn_arg =3D NULL;
 	fn.fn_dso =3D NULL;
=20
@@ -138,7 +145,7 @@ __cxa_atexit(void (*func)(void *), void *arg, void *dso)
 	int error;
=20
 	fn.fn_type =3D ATEXIT_FN_CXA;
-	fn.fn_ptr.cxa_func =3D func;;
+	fn.fn_ptr.cxa_func =3D func;
 	fn.fn_arg =3D arg;
 	fn.fn_dso =3D dso;
=20
@@ -154,32 +161,55 @@ __cxa_atexit(void (*func)(void *), void *arg, void *d=
so)
 void
 __cxa_finalize(void *dso)
 {
-	struct atexit *p;
-	struct atexit_fn fn;
-	int n;
+	struct atexit *p, *p1, cp;
+	struct atexit_fn *fn;
+	int i, n, inuse;
+	unsigned long orig__atexit_gen;
=20
 	_MUTEX_LOCK(&atexit_mutex);
-	for (p =3D __atexit; p; p =3D p->next) {
+ restart:
+	inuse =3D 0;
+	LIST_FOREACH_SAFE(p, &__atexit, link, p1) {
+		cp.ind =3D 0;
 		for (n =3D p->ind; --n >=3D 0;) {
 			if (p->fns[n].fn_type =3D=3D ATEXIT_FN_EMPTY)
 				continue; /* already been called */
-			if (dso !=3D NULL && dso !=3D p->fns[n].fn_dso)
+			if (dso !=3D NULL && dso !=3D p->fns[n].fn_dso) {
+				inuse =3D 1;
 				continue; /* wrong DSO */
-			fn =3D p->fns[n];
+			}
+			cp.fns[cp.ind++] =3D p->fns[n];
 			/*
 			  Mark entry to indicate that this particular handler
 			  has already been called.
 			*/
 			p->fns[n].fn_type =3D ATEXIT_FN_EMPTY;
-		        _MUTEX_UNLOCK(&atexit_mutex);
-	=09
+		}
+		if (!inuse && p !=3D &__atexit0) {
+			LIST_REMOVE(p, link);
+			__atexit_gen++;
+		} else {
+			/*
+			 * The current entry cannot be removed, and so
+			 * any consequent entries.
+			 */
+			inuse =3D 1;
+			p =3D NULL;
+		}
+		orig__atexit_gen =3D __atexit_gen;
+		_MUTEX_UNLOCK(&atexit_mutex);
+		free(p);
+		for (i =3D 0; i < cp.ind; i++) {
+			fn =3D &cp.fns[i];
 			/* Call the function of correct type. */
-			if (fn.fn_type =3D=3D ATEXIT_FN_CXA)
-				fn.fn_ptr.cxa_func(fn.fn_arg);
-			else if (fn.fn_type =3D=3D ATEXIT_FN_STD)
-				fn.fn_ptr.std_func();
-			_MUTEX_LOCK(&atexit_mutex);
+			if (fn->fn_type =3D=3D ATEXIT_FN_CXA)
+				fn->fn_ptr.cxa_func(fn->fn_arg);
+			else if (fn->fn_type =3D=3D ATEXIT_FN_STD)
+				fn->fn_ptr.std_func();
 		}
+		_MUTEX_LOCK(&atexit_mutex);
+		if (orig__atexit_gen !=3D __atexit_gen)
+			goto restart;
 	}
 	_MUTEX_UNLOCK(&atexit_mutex);
 }

--gDKptnH9ef5/6wqy
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.8 (FreeBSD)

iEYEARECAAYFAkfAk3EACgkQC3+MBN1Mb4iYzgCgnIjlPDHsOllc5U33+sV5hceS
L9MAnjXVjleHRdMhZhSytwoF6tr6Uk+2
=Dz5Y
-----END PGP SIGNATURE-----

--gDKptnH9ef5/6wqy--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20080223214313.GF57756>