Date: Fri, 6 Jun 2014 01:11:39 GMT From: ghostmansd@FreeBSD.org To: svn-soc-all@FreeBSD.org Subject: socsvn commit: r269159 - in soc2014/ghostmansd/src/lib/libc: locale string Message-ID: <201406060111.s561Bd7G038466@socsvn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: ghostmansd Date: Fri Jun 6 01:11:38 2014 New Revision: 269159 URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=269159 Log: continue working on collation support Modified: soc2014/ghostmansd/src/lib/libc/locale/collate.c soc2014/ghostmansd/src/lib/libc/string/strcoll.c soc2014/ghostmansd/src/lib/libc/string/strxfrm.c Modified: soc2014/ghostmansd/src/lib/libc/locale/collate.c ============================================================================== --- soc2014/ghostmansd/src/lib/libc/locale/collate.c Fri Jun 6 00:24:04 2014 (r269158) +++ soc2014/ghostmansd/src/lib/libc/locale/collate.c Fri Jun 6 01:11:38 2014 (r269159) @@ -80,23 +80,26 @@ destruct_collate(void *t) { struct xlocale_collate *table = t; - if (__collate_chain_pri_table) { + + if (__collate_chain_pri_table) free(__collate_chain_pri_table); - } free(t); } void * -__collate_load(const char *encoding, locale_t unused) +__collate_load(const char *encoding, locale_t locale) { - if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { + struct xlocale_collate *table = NULL; + + (void) locale; + if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) return &__xlocale_C_collate; - } - struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); + table = calloc(sizeof(struct xlocale_collate), 1); table->header.header.destructor = destruct_collate; // FIXME: Make sure that _LDP_CACHE is never returned. We should be doing // the caching outside of this section - if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { + if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) + { xlocale_release(table); return NULL; } @@ -117,11 +120,16 @@ int __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) { - FILE *fp; - int i, saverr, chains; - uint32_t u32; - char strbuf[STR_LEN], buf[PATH_MAX]; - void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table; + int i = 0; + int error = 0; + int chains = 0; + FILE *fp = NULL; + uint32_t u32 = 0; + char buf[PATH_MAX] = {0}; + char strbuf[STR_LEN] = {0}; + void *TMP_substitute_table = NULL; + void *TMP_char_pri_table = NULL; + void *TMP_chain_pri_table = NULL; /* 'encoding' must be already checked. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { @@ -131,17 +139,18 @@ /* 'PathLocale' must be already set & checked. */ /* Range checking not needed, encoding has fixed size */ - (void)strcpy(buf, _PathLocale); - (void)strcat(buf, "/"); - (void)strcat(buf, encoding); - (void)strcat(buf, "/LC_COLLATE"); + (void) strcpy(buf, _PathLocale); + (void) strcat(buf, "/"); + (void) strcat(buf, encoding); + (void) strcat(buf, "/LC_COLLATE"); if ((fp = fopen(buf, "re")) == NULL) return (_LDP_ERROR); - if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) { - saverr = errno; + if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) + { + error = errno; (void)fclose(fp); - errno = saverr; + errno = error; return (_LDP_ERROR); } chains = -1; @@ -149,60 +158,56 @@ chains = 0; else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0) chains = 1; - if (chains < 0) { + if (chains < 0) + { (void)fclose(fp); errno = EFTYPE; return (_LDP_ERROR); } - if (chains) { - if (fread(&u32, sizeof(u32), 1, fp) != 1) { - saverr = errno; + if (chains) + { + if (fread(&u32, sizeof(u32), 1, fp) != 1) + { + error = errno; (void)fclose(fp); - errno = saverr; + errno = error; return (_LDP_ERROR); } - if ((chains = (int)ntohl(u32)) < 1) { + if ((chains = (int)ntohl(u32)) < 1) + { (void)fclose(fp); errno = EFTYPE; return (_LDP_ERROR); } - } else + } + else chains = TABLE_SIZE; - if ((TMP_substitute_table = - malloc(sizeof(__collate_substitute_table))) == NULL) { - saverr = errno; + TMP_substitute_table = malloc(sizeof(__collate_substitute_table)); + TMP_char_pri_table = malloc(sizeof(__collate_char_pri_table)); + TMP_chain_pri_table = malloc(sizeof(*__collate_chain_pri_table) * chains); + if ((TMP_substitute_table == NULL) + || (TMP_char_pri_table == NULL) + || (TMP_chain_pri_table == NULL)) + { + error = errno; (void)fclose(fp); - errno = saverr; - return (_LDP_ERROR); - } - if ((TMP_char_pri_table = - malloc(sizeof(__collate_char_pri_table))) == NULL) { - saverr = errno; - free(TMP_substitute_table); - (void)fclose(fp); - errno = saverr; - return (_LDP_ERROR); - } - if ((TMP_chain_pri_table = - malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) { - saverr = errno; free(TMP_substitute_table); free(TMP_char_pri_table); - (void)fclose(fp); - errno = saverr; + free(TMP_chain_pri_table); + errno = error; return (_LDP_ERROR); } #define FREAD(a, b, c, d) \ { \ if (fread(a, b, c, d) != c) { \ - saverr = errno; \ + error = errno; \ free(TMP_substitute_table); \ free(TMP_char_pri_table); \ free(TMP_chain_pri_table); \ (void)fclose(d); \ - errno = saverr; \ + errno = error; \ return (_LDP_ERROR); \ } \ } @@ -243,55 +248,179 @@ } } table->__collate_load_error = 0; - return (_LDP_LOADED); } -u_char * -__collate_substitute(struct xlocale_collate *table, const u_char *s) +static int +__collate_wcsnlen(const wchar_t *s, int len) +{ + int n = 0; + while (*s && n < len) { + s++; + n++; + } + return n; +} + +wchar_t * +__collate_substitute(const wchar_t *s, int which, locale_t locale) { - int dest_len, len, nlen; - int delta = strlen(s); - u_char *dest_str = NULL; + int n = 0; + int len = 0; + int nlen = 0; + int delta = 0; + int nsubst = 0; + int dest_len = 0; + const wchar_t *fp = NULL; + wchar_t *dest_str = NULL; + struct __collate_st_subst *subst = NULL; + struct __collate_st_subst *match = NULL; + (void) locale; if (s == NULL || *s == '\0') - return (__collate_strdup("")); - delta += delta / 8; - dest_str = malloc(dest_len = delta); + return __collate_wcsdup(L""); + dest_len = wcslen(s); + nsubst = __collate_info->subst_count[which]; + if (nsubst <= 0) + return __collate_wcsdup(s); + subst = __collate_substitute_table[which]; + delta = (dest_len / 4); + if (delta < 2) + delta = 2; + dest_str = (wchar_t *) malloc((dest_len += delta) * sizeof(wchar_t)); if (dest_str == NULL) __collate_err(EX_OSERR, __func__); len = 0; - while (*s) { - nlen = len + strlen(__collate_substitute_table[*s]); - if (dest_len <= nlen) { - dest_str = reallocf(dest_str, dest_len = nlen + delta); + while (*s) + { + if ((match = substsearch(*s, subst, nsubst)) != NULL) + { + fp = match->str; + n = __collate_wcsnlen(fp, STR_LEN); + } + else + { + fp = s; + n = 1; + } + nlen = len + n; + if (dest_len <= nlen) + { + dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t)); if (dest_str == NULL) __collate_err(EX_OSERR, __func__); } - (void)strcpy(dest_str + len, __collate_substitute_table[*s++]); - len = nlen; + wcsncpy(dest_str + len, fp, n); + len += n; + s++; } - return (dest_str); + dest_str[len] = 0; + return dest_str; +} + +static struct __collate_st_chain_pri * +chainsearch(const wchar_t *key, int *len, locale_t locale) +{ + int low = 0; + int high = __collate_info->chain_count - 1; + int next, compar, l; + struct __collate_st_chain_pri *p; + struct __collate_st_chain_pri *tab = __collate_chain_pri_table; + + while (low <= high) + { + next = (low + high) / 2; + p = tab + next; + compar = *key - *p->str; + if (compar == 0) + { + l = __collate_wcsnlen(p->str, STR_LEN); + compar = wcsncmp(key, p->str, l); + if (compar == 0) + { + *len = l; + return p; + } + } + if (compar > 0) + low = next + 1; + else + high = next - 1; + } + return NULL; +} + +static struct __collate_st_large_char_pri * +largesearch(const wchar_t key, locale_t locale) +{ + int low = 0; + int high = __collate_info->large_pri_count - 1; + int next, compar; + struct __collate_st_large_char_pri *p; + struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table; + + while (low <= high) { + next = (low + high) / 2; + p = tab + next; + compar = key - p->val; + if (compar == 0) + return p; + if (compar > 0) + low = next + 1; + else + high = next - 1; + } + return NULL; } void -__collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec) +__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t locale) { struct __collate_st_chain_pri *p2; + int l; *len = 1; *prim = *sec = 0; - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) { - if (*t == p2->str[0] && - strncmp(t, p2->str, strlen(p2->str)) == 0) { - *len = strlen(p2->str); - *prim = p2->prim; - *sec = p2->sec; + FIX_LOCALE(locale); + p2 = chainsearch(t, &l, locale); + /* use the chain if prim >= 0 */ + if (p2 && p2->pri[0] >= 0) + { + *len = l; + *prim = p2->pri[0]; + *sec = p2->pri[1]; + return; + } + if (*t <= UCHAR_MAX) + { + *prim = __collate_char_pri_table[*t].pri[0]; + *sec = __collate_char_pri_table[*t].pri[1]; + return; + } + if (__collate_info->large_pri_count > 0) + { + struct __collate_st_large_char_pri *match; + match = largesearch(*t, locale); + if (match) + { + *prim = match->pri.pri[0]; + *sec = match->pri.pri[1]; return; } } - *prim = __collate_char_pri_table[*t].prim; - *sec = __collate_char_pri_table[*t].sec; + *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l; + *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l; +} + +void +__collate_lookup(const wchar_t *t, int *len, int *prim, int *sec) +{ + int error = 0; + locale_t locale = __get_locale(); + wchar_t *wcs = __collate_mbstowcs((const char *)t, locale); + + __collate_lookup_l(wcs, len, prim, sec, locale); + free(wcs); } wchar_t * @@ -316,6 +445,119 @@ return wcs; } +wchar_t * +__collate_wcsdup(wchar_t *s) +{ + wchar_t *t = wcsdup(s); + + if (t == NULL) + __collate_err(EX_OSERR, __func__); + return t; +} + +void +__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t locale) +{ + int pri = 0; + int len = 0; + int pass = 0; + int direc = 0; + int error = 0; + size_t slen = 0; + wchar_t *tt = NULL, + wchar_t *tr = NULL; + wchar_t *xfp = NULL; + const wchar_t *t = NULL; + struct __collate_st_info *info = __collate_info; + + for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++) + xf[pass] = NULL; + for(pass = 0; pass < info->directive_count; pass++) + { + direc = info->directive[pass]; + if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) + { + error = errno; + free(tt); + errno = error; + tt = __collate_substitute(src, pass, locale); + } + if (direc & DIRECTIVE_BACKWARD) + { + wchar_t *bp, *fp, c; + error = errno; + free(tr); + errno = error; + tr = __collate_wcsdup(tt ? tt : src); + bp = tr; + fp = tr + wcslen(tr) - 1; + while(bp < fp) + { + c = *bp; + *bp++ = *fp; + *fp-- = c; + } + t = tr; + } + else if (tt) + t = tt; + else + t = src; + error = errno; + xf[pass] = (wchar_t *) malloc(sizeof(wchar_t) * (wcslen(t) + 1)); + if (xf[pass] == NULL) + { + errno = error; + slen = 0; + goto end; + } + errno = error; + xfp = xf[pass]; + if (direc & DIRECTIVE_POSITION) + { + while(*t) + { + __collate_lookup_which(t, &len, &pri, pass, locale); + t += len; + if (pri <= 0) + { + if (pri < 0) + { + errno = EINVAL; + slen = 0; + goto end; + } + pri = COLLATE_MAX_PRIORITY; + } + *xfp++ = pri; + } + } + else + { + while(*t) + { + __collate_lookup_which(t, &len, &pri, pass, locale); + t += len; + if (pri <= 0) + { + if (pri < 0) + { + errno = EINVAL; + slen = 0; + goto end; + } + continue; + } + *xfp++ = pri; + } + } + *xfp = 0; + } + end: + free(tt); + free(tr); +} + u_char * __collate_strdup(u_char *s) { @@ -330,7 +572,7 @@ __collate_err(int ex, const char *f) { const char *s; - int serrno = errno; + int error = errno; s = _getprogname(); _write(STDERR_FILENO, s, strlen(s)); @@ -338,7 +580,7 @@ s = f; _write(STDERR_FILENO, s, strlen(s)); _write(STDERR_FILENO, ": ", 2); - s = strerror(serrno); + s = strerror(error); _write(STDERR_FILENO, s, strlen(s)); _write(STDERR_FILENO, "\n", 1); exit(ex); Modified: soc2014/ghostmansd/src/lib/libc/string/strcoll.c ============================================================================== --- soc2014/ghostmansd/src/lib/libc/string/strcoll.c Fri Jun 6 00:24:04 2014 (r269158) +++ soc2014/ghostmansd/src/lib/libc/string/strcoll.c Fri Jun 6 01:11:38 2014 (r269159) @@ -45,11 +45,10 @@ int ret = 0; wchar_t *wcs1 = NULL; wchar_t *wcs2 = NULL; + struct xlocale_collate *table = NULL; FIX_LOCALE(locale); - struct xlocale_collate *table = - (struct xlocale_collate*)locale->components[XLC_COLLATE]; - + *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; if ((table->__collate_load_error) || (wcs1 = __collate_mbstowcs(mbs1, locale) == NULL) || (wcs2 = __collate_mbstowcs(mbs2, locale) == NULL)) Modified: soc2014/ghostmansd/src/lib/libc/string/strxfrm.c ============================================================================== --- soc2014/ghostmansd/src/lib/libc/string/strxfrm.c Fri Jun 6 00:24:04 2014 (r269158) +++ soc2014/ghostmansd/src/lib/libc/string/strxfrm.c Fri Jun 6 01:11:38 2014 (r269159) @@ -37,12 +37,56 @@ #include <string.h> #include "collate.h" +/* + * In the non-POSIX case, we transform each character into a string of + * characters representing the character's priority. Since char is usually + * signed, we are limited by 7 bits per byte. To avoid zero, we need to add + * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 + * bits per byte. We choose 4 bytes per character as a good compromise + * between maximum coverage and minimum size. This gives 24 bits, or 16M + * priorities. So we choose COLLATE_MAX_PRIORITY to be (2^24 - 1). This + * this can be increased if more is needed. + */ + +#define XFRM_BYTES 4 +#define XFRM_OFFSET ('0') /* make all printable characters */ +#define XFRM_SHIFT 6 +#define XFRM_MASK ((1 << XFRM_SHIFT) - 1) + +static void +xfrm(unsigned char *p, int pri) +{ + + p[3] = (pri & XFRM_MASK) + XFRM_OFFSET; + pri >>= XFRM_SHIFT; + p[2] = (pri & XFRM_MASK) + XFRM_OFFSET; + pri >>= XFRM_SHIFT; + p[1] = (pri & XFRM_MASK) + XFRM_OFFSET; + pri >>= XFRM_SHIFT; + p[0] = (pri & XFRM_MASK) + XFRM_OFFSET; +} + size_t -strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t loc); -size_t -strxfrm(char * __restrict dest, const char * __restrict src, size_t len) +strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale) { - return strxfrm_l(dest, src, len, __get_locale()); + int error = 0; + size_t slen = 0; + wchar_t *wcs = NULL; + wchar_t *xf[2] = {NULL, NULL}; + struct xlocale_collate *table = NULL; + + if (!*src && dest) + { + if (len > 0) + *dest = '\0'; + return 0; + } + FIX_LOCALE(locale); + *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; + if (table->__collate_load_error + || (wcs = __collate_mbstowcs(mbs1, locale) == NULL)) + return strlcpy(dest, src, len); + __collate_xfrm(wcs, xf, loc); } size_t @@ -87,3 +131,10 @@ return slen; } + +size_t +strxfrm(char * __restrict dest, const char * __restrict src, size_t len) +{ + return strxfrm_l(dest, src, len, __get_locale()); +} +
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201406060111.s561Bd7G038466>