Date: Sun, 17 Aug 2014 23:06:17 GMT From: ghostmansd@FreeBSD.org To: svn-soc-all@FreeBSD.org Subject: socsvn commit: r272578 - in soc2014/ghostmansd/head/lib/libc: locale string unicode Message-ID: <201408172306.s7HN6HMP043803@socsvn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: ghostmansd Date: Sun Aug 17 23:06:16 2014 New Revision: 272578 URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=272578 Log: various fixes and improvements wcscoll() and wcsxfrm() shall now work as expected Modified: soc2014/ghostmansd/head/lib/libc/locale/xlocale.c soc2014/ghostmansd/head/lib/libc/locale/xlocale_private.h soc2014/ghostmansd/head/lib/libc/string/wcscoll.c soc2014/ghostmansd/head/lib/libc/string/wcsxfrm.c soc2014/ghostmansd/head/lib/libc/unicode/coll.h soc2014/ghostmansd/head/lib/libc/unicode/ucscoll.c soc2014/ghostmansd/head/lib/libc/unicode/ucsnorm.c soc2014/ghostmansd/head/lib/libc/unicode/ucsxfrm.c Modified: soc2014/ghostmansd/head/lib/libc/locale/xlocale.c ============================================================================== --- soc2014/ghostmansd/head/lib/libc/locale/xlocale.c Sun Aug 17 21:36:37 2014 (r272577) +++ soc2014/ghostmansd/head/lib/libc/locale/xlocale.c Sun Aug 17 23:06:16 2014 (r272578) @@ -37,6 +37,8 @@ #include <db.h> #include <fcntl.h> #include <limits.h> +#include <errno.h> +#include <arpa/inet.h> #include "libc_private.h" #include "xlocale_private.h" @@ -54,6 +56,10 @@ void *__colldb_root_handle(void) { _once(&colldb_root_once, &colldb_root_init); + if (colldb_root_handle == NULL) { + errno = ENOSYS; + return (NULL); + } return (colldb_root_handle); } @@ -276,6 +282,7 @@ value->weights[i].level3 = ntohl(weights[i].level3); value->weights[i].level4 = ntohl(weights[i].level4); } + free(dbvalue.data); free(keybuf); return (0); } Modified: soc2014/ghostmansd/head/lib/libc/locale/xlocale_private.h ============================================================================== --- soc2014/ghostmansd/head/lib/libc/locale/xlocale_private.h Sun Aug 17 21:36:37 2014 (r272577) +++ soc2014/ghostmansd/head/lib/libc/locale/xlocale_private.h Sun Aug 17 23:06:16 2014 (r272578) @@ -53,7 +53,7 @@ size_t __ucsnorm(uint32_t*, const uint32_t*, size_t, int); #define __COLLDB_VERSION 0x00000001 -#define __COLLDB_WEIGHTS_MAX 10 +#define __COLLDB_WEIGHTS_MAX 16 struct __colldb_weight { uint8_t alternate; uint32_t level1; Modified: soc2014/ghostmansd/head/lib/libc/string/wcscoll.c ============================================================================== --- soc2014/ghostmansd/head/lib/libc/string/wcscoll.c Sun Aug 17 21:36:37 2014 (r272577) +++ soc2014/ghostmansd/head/lib/libc/string/wcscoll.c Sun Aug 17 23:06:16 2014 (r272578) @@ -78,9 +78,11 @@ struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; + errno = 0; diff = __ucscoll(ucs1, ucs2, locale->colldb); - if (errno == init_error) - return diff; + if (errno != ENOSYS) + return (diff); + errno = init_error; if (table->__collate_load_error || MB_CUR_MAX > 1) /* Modified: soc2014/ghostmansd/head/lib/libc/string/wcsxfrm.c ============================================================================== --- soc2014/ghostmansd/head/lib/libc/string/wcsxfrm.c Sun Aug 17 21:36:37 2014 (r272577) +++ soc2014/ghostmansd/head/lib/libc/string/wcsxfrm.c Sun Aug 17 23:06:16 2014 (r272578) @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> +#include <errno.h> #include <string.h> #include <wchar.h> #include "collate.h" @@ -58,9 +59,11 @@ struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; + errno = 0; ulen = __ucsxfrm(udst, usrc, len, locale->colldb); - if (errno == init_errno) + if (errno != ENOSYS) return (ulen); + errno = init_error; if (*src == L'\0') { if (len != 0) Modified: soc2014/ghostmansd/head/lib/libc/unicode/coll.h ============================================================================== --- soc2014/ghostmansd/head/lib/libc/unicode/coll.h Sun Aug 17 21:36:37 2014 (r272577) +++ soc2014/ghostmansd/head/lib/libc/unicode/coll.h Sun Aug 17 23:06:16 2014 (r272578) @@ -28,6 +28,8 @@ #include "xlocale_private.h" +#include <stdio.h> + static size_t __coll_iter(const uint32_t *iter, void *colldb, struct __colldb_value *val, struct __colldb_weight default_weights[2]) @@ -36,10 +38,9 @@ size_t shift = 0; struct __colldb_key key; - for (shift = 1; shift != 18; ++shift) - { + for (shift = 1; shift < 18; ++shift) { if (*(iter + shift - 1) == 0) - break; + continue; key.count = shift; key.chars = iter; state = __colldb_get(colldb, &key, val); @@ -49,12 +50,10 @@ break; } - if ((state != 0) && (colldb != __colldb_root)) - { - for (shift = 1; shift != 18; ++shift) - { + if ((state != 0) && (colldb != __colldb_root)) { + for (shift = 1; shift < 18; ++shift) { if (*(iter + shift - 1) == 0) - break; + continue; key.count = shift; key.chars = iter; state = __colldb_get(__colldb_root, &key, val); @@ -65,8 +64,7 @@ } } - if (state != 0) - { + if (state != 0) { shift = 1; default_weights[0].level1 = 0xFBC0; if (((0x4E00 <= *iter) && (*iter <= 0x9FCC)) || Modified: soc2014/ghostmansd/head/lib/libc/unicode/ucscoll.c ============================================================================== --- soc2014/ghostmansd/head/lib/libc/unicode/ucscoll.c Sun Aug 17 21:36:37 2014 (r272577) +++ soc2014/ghostmansd/head/lib/libc/unicode/ucscoll.c Sun Aug 17 23:06:16 2014 (r272578) @@ -36,28 +36,29 @@ int __ucscoll(const uint32_t *lstr, const uint32_t *rstr, void *colldb) { - int cmp = 0; size_t i = 0; int state = 0; int error = 0; + size_t count = 0; size_t size[2] = {0, 0}; size_t shift[2] = {0, 0}; struct __colldb_value val[2]; - uint32_t *str[2] = {NULL, NULL}; uint32_t *norm[2] = {NULL, NULL}; uint32_t *iter[2] = {NULL, NULL}; + const uint32_t *str[2] = {lstr, rstr}; struct __colldb_weight default_weights[2][2]; struct __colldb_weight weights[2][__COLLDB_WEIGHTS_MAX]; const int init_error = errno; - fprintf(stderr, "ucscoll\n"); if ((lstr == NULL) || (rstr == NULL)) { errno = EINVAL; return (0); } - if (colldb == NULL) - colldb = __colldb_root; - fprintf(stderr, "root=%p\n", colldb); + if (colldb == NULL) { + colldb = __colldb_open("/usr/share/locale/UTF-8/LC_COLLATE"); + if (colldb == NULL) + return (0); + } for (i = 0; i < 2; ++i) { size[i] = __ucsnorm(NULL, str[i], 0, __UC_NFD); norm[i] = malloc(size[i] * sizeof(uint32_t)); @@ -68,22 +69,26 @@ errno = error; return (0); } + __ucscanon(norm[i]); + memset(weights[i], 0, (__COLLDB_WEIGHTS_MAX * sizeof(struct __colldb_weight))); + memset(default_weights[i], 0, (2 * sizeof(struct __colldb_weight))); iter[i] = norm[i]; } if (colldb == NULL) colldb = __colldb_root; - while (*iter[0] != 0) { - if (*iter[1] == 0) { + while (*(iter[0]) != 0) { + if (*(iter[1]) == 0) { free(norm[0]); free(norm[1]); + errno = init_error; return (+1); } for (i = 0; i < 2; ++i) { val[i].weights = weights[i]; val[i].count = __COLLDB_WEIGHTS_MAX; - shift[i] = __coll_iter(iter[0], colldb, &val[i], - default_weights[i]); + shift[i] = __coll_iter(iter[i], colldb, &val[i], + default_weights[i]); if (shift[i] == 0) { error = errno; free(norm[0]); @@ -91,28 +96,38 @@ errno = error; return (0); } + iter[i] += shift[i]; } - iter[i] += shift[i]; if (val[0].count < val[1].count) - cmp = val[0].count; + count = val[0].count; else - cmp = val[1].count; - for (i = 0; i < cmp; ++i) { - state = memcmp(&val[0].weights[i], &val[1].weights[i], - sizeof(struct __colldb_weight)); - if (state != 0) { + count = val[1].count; + + for (i = 0; i < count; ++i) { + state = memcmp((val[0].weights + i), + (val[1].weights + i), + sizeof(struct __colldb_weight)); + if ((state != 0) || (val[0].count != val[1].count)) { + if (val[0].count < val[1].count) + state = -1; + else if (val[0].count > val[1].count) + state = +1; free(norm[0]); free(norm[1]); errno = init_error; return (state); } } - if (val[0].count < val[1].count) - state = -1; - else if (val[0].count > val[1].count) - state = +1; } - if (*iter[1] != 0) + if (*iter[1] != 0) { + free(norm[0]); + free(norm[1]); + errno = init_error; return (-1); + } + free(norm[0]); + free(norm[1]); + __colldb_close(colldb); + errno = init_error; return (0); } Modified: soc2014/ghostmansd/head/lib/libc/unicode/ucsnorm.c ============================================================================== --- soc2014/ghostmansd/head/lib/libc/unicode/ucsnorm.c Sun Aug 17 21:36:37 2014 (r272577) +++ soc2014/ghostmansd/head/lib/libc/unicode/ucsnorm.c Sun Aug 17 23:06:16 2014 (r272578) @@ -82,56 +82,58 @@ static size_t decompose(uint32_t *buffer, const uint32_t *str, size_t size, int compat) { + size_t lsize = 0; + size_t rsize = 0; uint32_t code = 0; - uint32_t tmpbuf[20]; - uint32_t segbuf[20]; size_t reqsize = 0; - size_t tmpsize = 0; - size_t segsize = 0; + uint32_t *ptr = NULL; + uint32_t lbuf[20] = {0}; + uint32_t rbuf[20] = {0}; + uint32_t *lptr = lbuf; + uint32_t *rptr = rbuf; const uint32_t *iter = str; - const uint32_t *segptr = segbuf; + const uint32_t *xptr = NULL; for (; *iter != 0; ++iter) { code = *iter; - segsize = 1; - segptr = iter; + lsize = 1; + xptr = iter; if ((HANGUL_SBASE <= code) && (code < (HANGUL_SBASE + HANGUL_SCOUNT))) { code -= HANGUL_SBASE; if (code < HANGUL_SCOUNT) { - segbuf[0] = (HANGUL_LBASE + (code / HANGUL_NCOUNT)); - segbuf[1] = (HANGUL_VBASE + ((code % HANGUL_NCOUNT) / HANGUL_TCOUNT)); - segbuf[2] = (HANGUL_TBASE + (code % HANGUL_TCOUNT)); - segsize = ((segptr[2] == HANGUL_TBASE) ? 2 : 3); + lbuf[0] = (HANGUL_LBASE + (code / HANGUL_NCOUNT)); + lbuf[1] = (HANGUL_VBASE + ((code % HANGUL_NCOUNT) / HANGUL_TCOUNT)); + lbuf[2] = (HANGUL_TBASE + (code % HANGUL_TCOUNT)); + lsize = ((xptr[2] == HANGUL_TBASE) ? 2 : 3); } else - segbuf[0] = *iter; + lbuf[0] = *iter; } else if ((DECOMPOSITION_MIN <= code) && (code <= DECOMPOSITION_MAX)) { - segptr = decompose_base(&code, &segsize, compat); - if (segptr != &code) { - segsize *= sizeof(uint32_t); - memcpy(segbuf, segptr, segsize); - segsize /= sizeof(uint32_t); - segptr = segbuf; - while (tmpsize != segsize) { - tmpsize = segsize; - tmpbuf[segsize] = 0; - segsize *= sizeof(uint32_t); - memcpy(tmpbuf, segbuf, segsize); - segsize /= sizeof(uint32_t); - segsize = decompose(tmpbuf, segbuf, 20, compat); - } + xptr = decompose_base(&code, &lsize, compat); + if (xptr != &code) { + memcpy(lbuf, xptr, (lsize * sizeof(uint32_t))); + do { + rsize = decompose(rptr, lptr, 20, compat); + if (lsize == rsize) { + xptr = rptr; + break; + } + ptr = lptr; + lptr = rptr; + rptr = ptr; + } while (1); } } - if (((reqsize + segsize + 1) > size) && (buffer != NULL)) { + if (((reqsize + lsize + 1) > size) && (buffer != NULL)) { size = (size - reqsize - 1); - memcpy(buffer, segptr, (size * sizeof(uint32_t))); + memcpy(buffer, xptr, (size * sizeof(uint32_t))); *(buffer + size) = 0; return decompose(NULL, str, 0, compat); } if (buffer != NULL) { - memcpy(buffer, segptr, (segsize * sizeof(uint32_t))); - buffer += segsize; + memcpy(buffer, xptr, (lsize * sizeof(uint32_t))); + buffer += lsize; } - reqsize += segsize; + reqsize += lsize; } if (buffer == NULL) ++reqsize; Modified: soc2014/ghostmansd/head/lib/libc/unicode/ucsxfrm.c ============================================================================== --- soc2014/ghostmansd/head/lib/libc/unicode/ucsxfrm.c Sun Aug 17 21:36:37 2014 (r272577) +++ soc2014/ghostmansd/head/lib/libc/unicode/ucsxfrm.c Sun Aug 17 23:06:16 2014 (r272578) @@ -51,8 +51,11 @@ errno = EINVAL; return (SIZE_MAX); } - if (colldb == NULL) - colldb = __colldb_root; + if (colldb == NULL) { + colldb = __colldb_open("/usr/share/locale/UTF-8/LC_COLLATE"); + if (colldb == NULL) + return (0); + } normsize = __ucsnorm(NULL, str, 0, __UC_NFD); norm = malloc(normsize * sizeof(uint32_t)); if (norm == NULL) { @@ -74,26 +77,24 @@ shift = __coll_iter(iter, colldb, &value, default_weights); if (shift == 0) { error = errno; - free(elements); free(norm); errno = error; return (SIZE_MAX); } - free(elements); - count = (value.count * 4); - elements = malloc(count * sizeof(uint32_t)); + count = value.count; + elements = malloc(count * 4 * sizeof(uint32_t)); if (elements == NULL) { - free(elements); free(norm); errno = ENOMEM; return (SIZE_MAX); } - for (i = 0; i < value.count; ++i) { + for (i = 0; i < count; ++i) { elements[(count * 0) + i] = value.weights[i].level1; elements[(count * 1) + i] = value.weights[i].level2; elements[(count * 2) + i] = value.weights[i].level3; elements[(count * 3) + i] = value.weights[i].level4; } + count *= 4; for (i = 0; i < count; ++i) elements[i] = (elements[i] ? elements[i] : 1); @@ -109,6 +110,7 @@ memcpy(buffer, elements, (count * sizeof(uint32_t))); buffer += count; } + free(elements); reqsize += count; iter += shift; } @@ -118,6 +120,7 @@ else *buffer = 0; free(norm); + __colldb_close(colldb); errno = init_error; return (reqsize); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201408172306.s7HN6HMP043803>