Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 03 Feb 2010 01:58:07 +0100
From:      =?ISO-8859-1?Q?G=E1bor_K=F6vesd=E1n?= <gabor@FreeBSD.org>
To:        Jilles Tjoelker <jilles@stack.nl>
Cc:        Xin LI <delphij@delphij.net>, current@freebsd.org
Subject:   Re: NLS/strerror efficiency
Message-ID:  <4B68CA1F.5060105@FreeBSD.org>
In-Reply-To: <20100129215038.GA95021@stack.nl>
References:  <20100119212019.GL59590@deviant.kiev.zoral.com.ua> <4B56CACF.50508@FreeBSD.org> <yge1vhgvdd3.wl%ume@mahoroba.org> <4B5B4F4B.3030201@FreeBSD.org> <20100124091911.GI31243@server.vk2pj.dyndns.org> <4B5C27B9.1080805@FreeBSD.org> <20100124113718.GC3877@deviant.kiev.zoral.com.ua> <4B5CD916.1060003@FreeBSD.org> <20100126222338.GA40281@stack.nl> <4B633F2D.6010804@FreeBSD.org> <20100129215038.GA95021@stack.nl>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------010405030806090204040601
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 8bit

Jilles Tjoelker wrote:
> On Fri, Jan 29, 2010 at 09:03:57PM +0100, Gábor Kövesdán wrote:
>   
>>>> +static pthread_rwlock_t		 rwlock;
>>>>         
>
>   
>>> Use PTHREAD_RWLOCK_INITIALIZER here to avoid problems with initializing
>>> the lock.
>>>       
>
>   
>> I talked to delphij@ about this. Shouldn't pthread_rwlock_rdlock() and 
>> pthread_rwlock_wrlock() automatically initialize the lock if it is NULL? 
>> We even removed the pthread_rwlock_init() call and it just works.
>>     
>
> If you look in <pthread.h> you will notice that
> PTHREAD_RWLOCK_INITIALIZER is simply NULL. Also, pthread_rwlock_t is
> just a pointer. However, this may well change later on to allow rwlocks
> in shared memory, making pthread_rwlock_t a struct and
> PTHREAD_RWLOCK_INITIALIZER something more complicated. It already works
> that way in various other implementations, and sem_t has already been
> changed similarly in 9-CURRENT.
>
>   
>>> Hmm, so an error for one language makes it give up for all other
>>> languages too? It is possible that a catalog is only available in a few
>>> languages.
>>>       
>
>   
>> Fixed.
>>     
>
>   
>>>> +				UNLOCK(NLERR);
>>>> +				NLRETERR(np->caterrno);
>>>> +			} else if (strcmp(np->lang, lang) == 0) {
>>>>         
>
>   
>>> Some code below can apparently set np->lang = NULL, how does that work?
>>>       
>
>   
>> NULL means locale-independent open, i.e. catopen() is given an absolute 
>> path. We could add more if's to separate those cases more but that would 
>> result in more code, while this just works. If name is set to an 
>> absolute path, lang will be NULL and strcmp(NULL, NULL) will return 0, 
>> so it will match.
>>     
>
> strcmp(3) and the POSIX spec do not specifically allow passing NULL to
> strcmp(), so it is not valid to do so. It seems that gcc treats a
> literal strcmp(NULL, NULL) specially, replacing it with 0, but any real
> strcmp call involving NULL segfaults.
>
> This probably needs to become something more complicated like
> (np->lang == NULL || lang == NULL ? np->lang == lang :
> strcmp(np->lang, lang) == 0)
>
>   
>> @@ -374,8 +376,8 @@
>>  	}
>>  
>>  	if (_fstat(fd, &st) != 0) {
>> +		_close(fd);
>>  		SAVEFAIL(name, errno);
>> -		_close(fd);
>>  		return (NLERR);
>>  	}
>>     
>
> Be careful that cleanup actions like these might overwrite errno.
> munmap() and _close() are system calls and they should not fail in this
> case (read-only file), so it should be safe. It is cleaner to save errno
> immediately after the failing call, though.
>
>   
>> @@ -390,8 +392,8 @@
>>  
>>  	if (ntohl((u_int32_t)((struct _nls_cat_hdr *)data)->__magic) !=
>>  	    _NLS_MAGIC) {
>> +		munmap(data, (size_t)st.st_size);
>>  		SAVEFAIL(name, errno);
>> -		munmap(data, (size_t)st.st_size);
>>  		NLRETERR(EINVAL);
>>  	}
>>     
>
> The errno value seems garbage. SAVEFAIL with EINVAL, or perhaps use
> EFTYPE (in both places).
>
> The cast to size_t reminds me to ask what happens in the pathological
> case of a catalog file bigger than a size_t can describe, which may
> happen on 32-bit systems. I think this should fail rather than mapping
> the initial size%(1<<32) part.
>   
Hi Jilles,

thanks for pointing out these special cases. I've found some more 
myself, as well and I've made a patch. For this one I've tested all of 
the major cases I could think of and it seems to work for those ones. 
Patch is attached.

Gabor


--------------010405030806090204040601
Content-Type: text/x-patch;
 name="msgcat.c.diff"
Content-Transfer-Encoding: 8bit
Content-Disposition: inline;
 filename="msgcat.c.diff"

Index: nls/msgcat.c
===================================================================
--- nls/msgcat.c	(revisión: 203407)
+++ nls/msgcat.c	(copia de trabajo)
@@ -77,19 +77,22 @@
 
 #define	NLERR		((nl_catd) -1)
 #define NLRETERR(errc)  { errno = errc; return (NLERR); }
-#define SAVEFAIL(n, e)	{ WLOCK(NLERR); \
-			  np = malloc(sizeof(struct catentry)); \
-			  if (np != NULL) { \
-			  	np->name = strdup(n); \
-				np->caterrno = e; \
-			  	SLIST_INSERT_HEAD(&cache, np, list); \
-			  } \
-			  UNLOCK; \
-			}
+#define SAVEFAIL(n, l, e)	{ WLOCK(NLERR); \
+				  np = malloc(sizeof(struct catentry)); \
+				  if (np != NULL) { \
+				  	np->name = strdup(n); \
+					np->path = NULL; \
+					np->lang = (l == NULL) ? NULL : strdup(l); \
+					np->caterrno = e; \
+				  	SLIST_INSERT_HEAD(&cache, np, list); \
+				  } \
+				  UNLOCK; \
+				  errno = e; \
+				}
 
 static nl_catd load_msgcat(const char *, const char *, const char *);
 
-static pthread_rwlock_t		 rwlock;
+static pthread_rwlock_t		 rwlock = PTHREAD_RWLOCK_INITIALIZER;
 
 struct catentry {
 	SLIST_ENTRY(catentry)	 list;
@@ -114,10 +117,12 @@
 	int saverr, spcleft;
 	char path[PATH_MAX];
 
+	/* sanity checking */
 	if (name == NULL || *name == '\0')
 		NLRETERR(EINVAL);
 
 	if (strchr(name, '/') != NULL)
+		/* have a pathname */
 		lang = NULL;
 	else {
 		if (type == NL_CAT_LOCALE)
@@ -135,12 +140,13 @@
 	/* Try to get it from the cache first */
 	RLOCK(NLERR);
 	SLIST_FOREACH(np, &cache, list) {
-		if (strcmp(np->name, name) == 0) {
+		if ((strcmp(np->name, name) == 0) && ((lang == NULL || np->lang == NULL) ? (np->lang == lang) :
+		    (strcmp(np->lang, lang) == 0))) {
 			if (np->caterrno != 0) {
 				/* Found cached failing entry */
 				UNLOCK;
 				NLRETERR(np->caterrno);
-			} else if (strcmp(np->lang, lang) == 0) {
+			} else {
 				/* Found cached successful entry */
 				np->refcount++;
 				UNLOCK;
@@ -154,6 +160,7 @@
 	if (strchr(name, '/') != NULL)
 		return (load_msgcat(name, name, lang));
 
+	/* sanity checking */
 	if ((plang = cptr1 = strdup(lang)) == NULL)
 		return (NLERR);
 	if ((cptr = strchr(cptr1, '@')) != NULL)
@@ -218,6 +225,7 @@
 			too_long:
 						free(plang);
 						free(base);
+						SAVEFAIL(name, lang, ENAMETOOLONG);
 						NLRETERR(ENAMETOOLONG);
 					}
 					pathP += strlen(tmpptr);
@@ -241,6 +249,7 @@
 	}
 	free(plang);
 	free(base);
+	SAVEFAIL(name, lang, ENOENT);
 	NLRETERR(ENOENT);
 }
 
@@ -317,6 +326,7 @@
 {
 	struct catentry *np;
 
+	/* sanity checking */
 	if (catd == NULL || catd == NLERR) {
 		errno = EBADF;
 		return (-1);
@@ -325,13 +335,15 @@
 	/* Remove from cache if not referenced any more */
 	WLOCK(-1);
 	SLIST_FOREACH(np, &cache, list) {
-		if ((np->catd->__size == catd->__size) &&
-		    memcmp((const void *)np->catd, (const void *)catd, np->catd->__size) == 0) {
+		if (catd == np->catd) {
 			np->refcount--;
 			if (np->refcount == 0) {
 				munmap(catd->__data, (size_t)catd->__size);
 				free(catd);
 				SLIST_REMOVE(&cache, np, catentry, list);
+				free(np->name);
+				free(np->path);
+				free(np->lang);
 				free(np);
 			}
 			break;
@@ -360,7 +372,7 @@
 	   it might still be found by absolute path. */
 	RLOCK(NLERR);
 	SLIST_FOREACH(np, &cache, list) {
-		if (strcmp(np->path, path) == 0) {
+		if ((np->path != NULL) && (strcmp(np->path, path) == 0)) {
 			np->refcount++;
 			UNLOCK;
 			return (np->catd);
@@ -369,36 +381,45 @@
 	UNLOCK;
 
 	if ((fd = _open(path, O_RDONLY)) == -1) {
-		SAVEFAIL(name, errno);
-		return (NLERR);
+		SAVEFAIL(name, lang, errno);
+		NLRETERR(errno);
 	}
 
 	if (_fstat(fd, &st) != 0) {
-		SAVEFAIL(name, errno);
 		_close(fd);
-		return (NLERR);
+		SAVEFAIL(name, lang, EFTYPE);
+		NLRETERR(EFTYPE);
 	}
 
-	data = mmap(0, (size_t)st.st_size, PROT_READ, MAP_FILE|MAP_SHARED, fd,
-	    (off_t)0);
-	_close(fd);
+	/* If the file size cannot be held in size_t we cannot mmap()
+	   it to the memory. Probably, this will not be a problem given
+	   that catalog files are usually small.*/
+	if (st.st_size > SIZE_T_MAX) {
+		_close(fd);
+		SAVEFAIL(name, lang, EFBIG);
+		NLRETERR(EFBIG);
+	}
 
-	if (data == MAP_FAILED) {
-		SAVEFAIL(name, errno);
-		return (NLERR);
+	if ((data = mmap(0, (size_t)st.st_size, PROT_READ,
+	    MAP_FILE|MAP_SHARED, fd, (off_t)0)) == MAP_FAILED) {
+		int saved_errno = errno;
+		_close(fd);
+		SAVEFAIL(name, lang, saved_errno);
+		NLRETERR(saved_errno);
 	}
+	_close(fd);
 
 	if (ntohl((u_int32_t)((struct _nls_cat_hdr *)data)->__magic) !=
 	    _NLS_MAGIC) {
-		SAVEFAIL(name, errno);
 		munmap(data, (size_t)st.st_size);
-		NLRETERR(EINVAL);
+		SAVEFAIL(name, lang, EFTYPE);
+		NLRETERR(EFTYPE);
 	}
 
 	if ((catd = malloc(sizeof (*catd))) == NULL) {
-		SAVEFAIL(name, errno);
 		munmap(data, (size_t)st.st_size);
-		return (NLERR);
+		SAVEFAIL(name, lang, ENOMEM);
+		NLRETERR(ENOMEM);
 	}
 
 	catd->__data = data;

--------------010405030806090204040601--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4B68CA1F.5060105>