Date: Mon, 16 Jan 2012 05:15:13 +0000 (UTC) From: Kevin Lo <kevlo@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r230196 - in stable/9: lib/libkiconv sys/conf sys/fs/msdosfs sys/fs/smbfs sys/kern sys/libkern sys/modules/libiconv sys/modules/libmchain sys/netsmb sys/sys Message-ID: <201201160515.q0G5FDPx017016@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kevlo Date: Mon Jan 16 05:15:13 2012 New Revision: 230196 URL: http://svn.freebsd.org/changeset/base/230196 Log: MFC r228796: Discarding local array based on return values. MFC r227650: Add unicode support to msdosfs and smbfs; original pathes from imura, bug fixes by Kuan-Chung Chiu <buganini at gmail dot com>. Added: stable/9/sys/libkern/iconv_ucs.c (contents, props changed) Modified: stable/9/lib/libkiconv/xlat16_iconv.c stable/9/sys/conf/files stable/9/sys/fs/msdosfs/msdosfs_conv.c stable/9/sys/fs/smbfs/smbfs_smb.c stable/9/sys/fs/smbfs/smbfs_subr.c stable/9/sys/kern/subr_mchain.c stable/9/sys/libkern/iconv.c stable/9/sys/modules/libiconv/Makefile stable/9/sys/modules/libmchain/Makefile stable/9/sys/netsmb/smb_conn.c stable/9/sys/netsmb/smb_conn.h stable/9/sys/netsmb/smb_smb.c stable/9/sys/netsmb/smb_subr.c stable/9/sys/sys/iconv.h stable/9/sys/sys/mchain.h Modified: stable/9/lib/libkiconv/xlat16_iconv.c ============================================================================== --- stable/9/lib/libkiconv/xlat16_iconv.c Mon Jan 16 05:07:32 2012 (r230195) +++ stable/9/lib/libkiconv/xlat16_iconv.c Mon Jan 16 05:15:13 2012 (r230196) @@ -74,6 +74,18 @@ kiconv_add_xlat16_cspair(const char *toc struct xlat16_table xt; void *data; char *p; + const char unicode[] = ENCODING_UNICODE; + + if ((flag & KICONV_WCTYPE) == 0 && + strcmp(unicode, tocode) != 0 && + strcmp(unicode, fromcode) != 0 && + kiconv_lookupconv(unicode) == 0) { + error = kiconv_add_xlat16_cspair(unicode, fromcode, flag); + if (error) + return (-1); + error = kiconv_add_xlat16_cspair(tocode, unicode, flag); + return (error); + } if (kiconv_lookupcs(tocode, fromcode) == 0) return (0); Modified: stable/9/sys/conf/files ============================================================================== --- stable/9/sys/conf/files Mon Jan 16 05:07:32 2012 (r230195) +++ stable/9/sys/conf/files Mon Jan 16 05:15:13 2012 (r230196) @@ -2540,6 +2540,7 @@ libkern/fnmatch.c standard libkern/gets.c standard libkern/iconv.c optional libiconv libkern/iconv_converter_if.m optional libiconv +libkern/iconv_ucs.c optional libiconv libkern/iconv_xlat.c optional libiconv libkern/iconv_xlat16.c optional libiconv libkern/index.c standard Modified: stable/9/sys/fs/msdosfs/msdosfs_conv.c ============================================================================== --- stable/9/sys/fs/msdosfs/msdosfs_conv.c Mon Jan 16 05:07:32 2012 (r230195) +++ stable/9/sys/fs/msdosfs/msdosfs_conv.c Mon Jan 16 05:15:13 2012 (r230196) @@ -61,9 +61,9 @@ extern struct iconv_functions *msdosfs_iconv; static int mbsadjpos(const char **, size_t, size_t, int, int, void *handle); -static u_int16_t dos2unixchr(const u_char **, size_t *, int, struct msdosfsmount *); +static u_char * dos2unixchr(u_char *, const u_char **, size_t *, int, struct msdosfsmount *); static u_int16_t unix2doschr(const u_char **, size_t *, struct msdosfsmount *); -static u_int16_t win2unixchr(u_int16_t, struct msdosfsmount *); +static u_char * win2unixchr(u_char *, u_int16_t, struct msdosfsmount *); static u_int16_t unix2winchr(const u_char **, size_t *, int, struct msdosfsmount *); /* @@ -242,7 +242,7 @@ dos2unixfn(dn, un, lower, pmp) { size_t i; int thislong = 0; - u_int16_t c; + u_char *c, tmpbuf[5]; /* * If first char of the filename is SLOT_E5 (0x05), then the real @@ -257,14 +257,12 @@ dos2unixfn(dn, un, lower, pmp) * Copy the name portion into the unix filename string. */ for (i = 8; i > 0 && *dn != ' ';) { - c = dos2unixchr((const u_char **)&dn, &i, lower & LCASE_BASE, - pmp); - if (c & 0xff00) { - *un++ = c >> 8; + c = dos2unixchr(tmpbuf, (const u_char **)&dn, &i, + lower & LCASE_BASE, pmp); + while (*c != '\0') { + *un++ = *c++; thislong++; } - *un++ = c; - thislong++; } dn += i; @@ -276,14 +274,12 @@ dos2unixfn(dn, un, lower, pmp) *un++ = '.'; thislong++; for (i = 3; i > 0 && *dn != ' ';) { - c = dos2unixchr((const u_char **)&dn, &i, + c = dos2unixchr(tmpbuf, (const u_char **)&dn, &i, lower & LCASE_EXT, pmp); - if (c & 0xff00) { - *un++ = c >> 8; + while (*c != '\0') { + *un++ = *c++; thislong++; } - *un++ = c; - thislong++; } } *un++ = 0; @@ -652,8 +648,9 @@ win2unixfn(nbp, wep, chksum, pmp) int chksum; struct msdosfsmount *pmp; { + u_char *c, tmpbuf[5]; u_int8_t *cp; - u_int8_t *np, name[WIN_CHARS * 2 + 1]; + u_int8_t *np, name[WIN_CHARS * 3 + 1]; u_int16_t code; int i; @@ -686,10 +683,9 @@ win2unixfn(nbp, wep, chksum, pmp) *np = '\0'; return -1; default: - code = win2unixchr(code, pmp); - if (code & 0xff00) - *np++ = code >> 8; - *np++ = code; + c = win2unixchr(tmpbuf, code, pmp); + while (*c != '\0') + *np++ = *c++; break; } cp += 2; @@ -705,10 +701,9 @@ win2unixfn(nbp, wep, chksum, pmp) *np = '\0'; return -1; default: - code = win2unixchr(code, pmp); - if (code & 0xff00) - *np++ = code >> 8; - *np++ = code; + c = win2unixchr(tmpbuf, code, pmp); + while (*c != '\0') + *np++ = *c++; break; } cp += 2; @@ -724,10 +719,9 @@ win2unixfn(nbp, wep, chksum, pmp) *np = '\0'; return -1; default: - code = win2unixchr(code, pmp); - if (code & 0xff00) - *np++ = code >> 8; - *np++ = code; + c = win2unixchr(tmpbuf, code, pmp); + while (*c != '\0') + *np++ = *c++; break; } cp += 2; @@ -817,24 +811,22 @@ mbsadjpos(const char **instr, size_t inl /* * Convert DOS char to Local char */ -static u_int16_t -dos2unixchr(const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp) +static u_char * +dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp) { - u_char c; - char *outp, outbuf[3]; - u_int16_t wc; + u_char c, *outp; size_t len, olen; + outp = outbuf; if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { - olen = len = 2; - outp = outbuf; + olen = len = 4; if (lower & (LCASE_BASE | LCASE_EXT)) msdosfs_iconv->convchr_case(pmp->pm_d2u, (const char **)instr, - ilen, &outp, &olen, KICONV_LOWER); + ilen, (char **)&outp, &olen, KICONV_LOWER); else msdosfs_iconv->convchr(pmp->pm_d2u, (const char **)instr, - ilen, &outp, &olen); + ilen, (char **)&outp, &olen); len -= olen; /* @@ -843,21 +835,21 @@ dos2unixchr(const u_char **instr, size_t if (len == 0) { (*ilen)--; (*instr)++; - return ('?'); + *outp++ = '?'; } - - wc = 0; - while(len--) - wc |= (*(outp - len - 1) & 0xff) << (len << 3); - return (wc); + } else { + (*ilen)--; + c = *(*instr)++; + c = dos2unix[c]; + if (lower & (LCASE_BASE | LCASE_EXT)) + c = u2l[c]; + *outp++ = c; + outbuf[1] = '\0'; } - (*ilen)--; - c = *(*instr)++; - c = dos2unix[c]; - if (lower & (LCASE_BASE | LCASE_EXT)) - c = u2l[c]; - return ((u_int16_t)c); + *outp = '\0'; + outp = outbuf; + return (outp); } /* @@ -940,23 +932,21 @@ unix2doschr(const u_char **instr, size_t /* * Convert Windows char to Local char */ -static u_int16_t -win2unixchr(u_int16_t wc, struct msdosfsmount *pmp) +static u_char * +win2unixchr(u_char *outbuf, u_int16_t wc, struct msdosfsmount *pmp) { - u_char *inp, *outp, inbuf[3], outbuf[3]; + u_char *inp, *outp, inbuf[3]; size_t ilen, olen, len; - if (wc == 0) - return (0); - + outp = outbuf; if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { inbuf[0] = (u_char)(wc>>8); inbuf[1] = (u_char)wc; inbuf[2] = '\0'; - ilen = olen = len = 2; + ilen = 2; + olen = len = 4; inp = inbuf; - outp = outbuf; msdosfs_iconv->convchr(pmp->pm_w2u, (const char **)&inp, &ilen, (char **)&outp, &olen); len -= olen; @@ -964,21 +954,15 @@ win2unixchr(u_int16_t wc, struct msdosfs /* * return '?' if failed to convert */ - if (len == 0) { - wc = '?'; - return (wc); - } - - wc = 0; - while(len--) - wc |= (*(outp - len - 1) & 0xff) << (len << 3); - return (wc); + if (len == 0) + *outp++ = '?'; + } else { + *outp++ = (wc & 0xff00) ? '?' : (u_char)(wc & 0xff); } - if (wc & 0xff00) - wc = '?'; - - return (wc); + *outp = '\0'; + outp = outbuf; + return (outp); } /* Modified: stable/9/sys/fs/smbfs/smbfs_smb.c ============================================================================== --- stable/9/sys/fs/smbfs/smbfs_smb.c Mon Jan 16 05:07:32 2012 (r230195) +++ stable/9/sys/fs/smbfs/smbfs_smb.c Mon Jan 16 05:15:13 2012 (r230196) @@ -34,6 +34,7 @@ #include <sys/vnode.h> #include <sys/mbuf.h> #include <sys/mount.h> +#include <sys/endian.h> #ifdef USE_MD5_HASH #include <sys/md5.h> @@ -393,6 +394,10 @@ smbfs_smb_setpattr(struct smbnode *np, u if (error) break; mb_put_uint8(mbp, SMB_DT_ASCII); + if (SMB_UNICODE_STRINGS(SSTOVC(ssp))) { + mb_put_padbyte(mbp); + mb_put_uint8(mbp, 0); /* 1st byte of NULL Unicode char */ + } mb_put_uint8(mbp, 0); smb_rq_bend(rqp); error = smb_rq_simple(rqp); @@ -909,6 +914,10 @@ smbfs_smb_search(struct smbfs_fctx *ctx) mb_put_uint16le(mbp, 0); /* context length */ ctx->f_flags &= ~SMBFS_RDD_FINDFIRST; } else { + if (SMB_UNICODE_STRINGS(vcp)) { + mb_put_padbyte(mbp); + mb_put_uint8(mbp, 0); + } mb_put_uint8(mbp, 0); /* file name length */ mb_put_uint8(mbp, SMB_DT_VARIABLE); mb_put_uint16le(mbp, SMB_SKEYLEN); @@ -1069,7 +1078,7 @@ smbfs_smb_trans2find2(struct smbfs_fctx mb_put_uint32le(mbp, 0); /* resume key */ mb_put_uint16le(mbp, flags); if (ctx->f_rname) - mb_put_mem(mbp, ctx->f_rname, strlen(ctx->f_rname) + 1, MB_MSYSTEM); + mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen + 1, MB_MSYSTEM); else mb_put_uint8(mbp, 0); /* resume file name */ #if 0 @@ -1152,7 +1161,10 @@ static int smbfs_findopenLM2(struct smbfs_fctx *ctx, struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred) { - ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK); + if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { + ctx->f_name = malloc(SMB_MAXFNAMELEN * 2, M_SMBFSDATA, M_WAITOK); + } else + ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK); if (ctx->f_name == NULL) return ENOMEM; ctx->f_infolevel = SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_NTLM0_12 ? @@ -1231,7 +1243,10 @@ smbfs_findnextLM2(struct smbfs_fctx *ctx SMBERROR("unexpected info level %d\n", ctx->f_infolevel); return EINVAL; } - nmlen = min(size, SMB_MAXFNAMELEN); + if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { + nmlen = min(size, SMB_MAXFNAMELEN * 2); + } else + nmlen = min(size, SMB_MAXFNAMELEN); cp = ctx->f_name; error = md_get_mem(mbp, cp, nmlen, MB_MSYSTEM); if (error) @@ -1245,8 +1260,12 @@ smbfs_findnextLM2(struct smbfs_fctx *ctx return EBADRPC; } } - if (nmlen && cp[nmlen - 1] == 0) - nmlen--; + if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { + if (nmlen > 1 && cp[nmlen - 1] == 0 && cp[nmlen - 2] == 0) + nmlen -= 2; + } else + if (nmlen && cp[nmlen - 1] == 0) + nmlen--; if (nmlen == 0) return EBADRPC; @@ -1330,10 +1349,17 @@ smbfs_findnext(struct smbfs_fctx *ctx, i error = smbfs_findnextLM2(ctx, limit); if (error) return error; - if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') || - (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' && - ctx->f_name[1] == '.')) - continue; + if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { + if ((ctx->f_nmlen == 2 && + *(u_int16_t *)ctx->f_name == htole16(0x002e)) || + (ctx->f_nmlen == 4 && + *(u_int32_t *)ctx->f_name == htole32(0x002e002e))) + continue; + } else + if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') || + (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' && + ctx->f_name[1] == '.')) + continue; break; } smbfs_fname_tolocal(SSTOVC(ctx->f_ssp), ctx->f_name, &ctx->f_nmlen, Modified: stable/9/sys/fs/smbfs/smbfs_subr.c ============================================================================== --- stable/9/sys/fs/smbfs/smbfs_subr.c Mon Jan 16 05:07:32 2012 (r230195) +++ stable/9/sys/fs/smbfs/smbfs_subr.c Mon Jan 16 05:15:13 2012 (r230196) @@ -130,7 +130,10 @@ smb_fphelp(struct mbchain *mbp, struct s return smb_put_dmem(mbp, vcp, "\\", 2, caseopt);*/ while (i--) { np = *--npp; - error = mb_put_uint8(mbp, '\\'); + if (SMB_UNICODE_STRINGS(vcp)) + error = mb_put_uint16le(mbp, '\\'); + else + error = mb_put_uint8(mbp, '\\'); if (error) break; error = smb_put_dmem(mbp, vcp, np->n_name, np->n_nmlen, caseopt); @@ -148,6 +151,11 @@ smbfs_fullpath(struct mbchain *mbp, stru int caseopt = SMB_CS_NONE; int error; + if (SMB_UNICODE_STRINGS(vcp)) { + error = mb_put_padbyte(mbp); + if (error) + return error; + } if (SMB_DIALECT(vcp) < SMB_DIALECT_LANMAN1_0) caseopt |= SMB_CS_UPPER; if (dnp != NULL) { @@ -156,7 +164,10 @@ smbfs_fullpath(struct mbchain *mbp, stru return error; } if (name) { - error = mb_put_uint8(mbp, '\\'); + if (SMB_UNICODE_STRINGS(vcp)) + error = mb_put_uint16le(mbp, '\\'); + else + error = mb_put_uint8(mbp, '\\'); if (error) return error; error = smb_put_dmem(mbp, vcp, name, nmlen, caseopt); @@ -164,6 +175,8 @@ smbfs_fullpath(struct mbchain *mbp, stru return error; } error = mb_put_uint8(mbp, 0); + if (SMB_UNICODE_STRINGS(vcp) && error == 0) + error = mb_put_uint8(mbp, 0); return error; } @@ -191,6 +204,17 @@ smbfs_fname_tolocal(struct smb_vc *vcp, error = iconv_conv_case (vcp->vc_tolocal, (const char **)&ibuf, &ilen, &obuf, &olen, copt); + if (error && SMB_UNICODE_STRINGS(vcp)) { + /* + * If using unicode, leaving a file name as it was when + * convert fails will cause a problem because the file name + * will contain NULL. + * Here, put '?' and give converted file name. + */ + *obuf = '?'; + olen--; + error = 0; + } if (!error) { *nmlen = sizeof(outbuf) - olen; memcpy(name, outbuf, *nmlen); Modified: stable/9/sys/kern/subr_mchain.c ============================================================================== --- stable/9/sys/kern/subr_mchain.c Mon Jan 16 05:07:32 2012 (r230195) +++ stable/9/sys/kern/subr_mchain.c Mon Jan 16 05:15:13 2012 (r230196) @@ -128,6 +128,36 @@ mb_reserve(struct mbchain *mbp, int size } int +mb_put_padbyte(struct mbchain *mbp) +{ + caddr_t dst; + char x = 0; + + dst = mtod(mbp->mb_cur, caddr_t) + mbp->mb_cur->m_len; + + /* only add padding if address is odd */ + if ((unsigned long)dst & 1) + return mb_put_mem(mbp, (caddr_t)&x, 1, MB_MSYSTEM); + else + return 0; +} + +int +mb_put_padbyte(struct mbchain *mbp) +{ + caddr_t dst; + char x = 0; + + dst = mtod(mbp->mb_cur, caddr_t) + mbp->mb_cur->m_len; + + /* only add padding if address is odd */ + if ((unsigned long)dst & 1) + return mb_put_mem(mbp, (caddr_t)&x, 1, MB_MSYSTEM); + else + return 0; +} + +int mb_put_uint8(struct mbchain *mbp, uint8_t x) { return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM); Modified: stable/9/sys/libkern/iconv.c ============================================================================== --- stable/9/sys/libkern/iconv.c Mon Jan 16 05:07:32 2012 (r230195) +++ stable/9/sys/libkern/iconv.c Mon Jan 16 05:15:13 2012 (r230196) @@ -377,6 +377,18 @@ iconv_sysctl_cslist(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern_iconv, OID_AUTO, cslist, CTLFLAG_RD | CTLTYPE_OPAQUE, NULL, 0, iconv_sysctl_cslist, "S,xlat", "registered charset pairs"); +int +iconv_add(const char *converter, const char *to, const char *from) +{ + struct iconv_converter_class *dcp; + struct iconv_cspair *csp; + + if (iconv_lookupconv(converter, &dcp) != 0) + return EINVAL; + + return iconv_register_cspair(to, from, dcp, NULL, &csp); +} + /* * Add new charset pair */ Added: stable/9/sys/libkern/iconv_ucs.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ stable/9/sys/libkern/iconv_ucs.c Mon Jan 16 05:15:13 2012 (r230196) @@ -0,0 +1,540 @@ +/*- + * Copyright (c) 2003, 2005 Ryuichiro Imura + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/iconv.h> + +#include "iconv_converter_if.h" + +/* + * "UCS" converter + */ + +#define KICONV_UCS_COMBINE 0x1 +#define KICONV_UCS_FROM_UTF8 0x2 +#define KICONV_UCS_TO_UTF8 0x4 +#define KICONV_UCS_FROM_LE 0x8 +#define KICONV_UCS_TO_LE 0x10 +#define KICONV_UCS_FROM_UTF16 0x20 +#define KICONV_UCS_TO_UTF16 0x40 +#define KICONV_UCS_UCS4 0x80 + +#define ENCODING_UTF16 "UTF-16BE" +#define ENCODING_UTF8 "UTF-8" + +static struct { + const char *name; + int from_flag, to_flag; +} unicode_family[] = { + { "UTF-8", KICONV_UCS_FROM_UTF8, KICONV_UCS_TO_UTF8 }, + { "UCS-2LE", KICONV_UCS_FROM_LE, KICONV_UCS_TO_LE }, + { "UTF-16BE", KICONV_UCS_FROM_UTF16, KICONV_UCS_TO_UTF16 }, + { "UTF-16LE", KICONV_UCS_FROM_UTF16|KICONV_UCS_FROM_LE, + KICONV_UCS_TO_UTF16|KICONV_UCS_TO_LE }, + { NULL, 0, 0 } +}; + +static uint32_t utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen); +static u_char *ucs4_to_utf8(uint32_t ucs4, char * dst, size_t *utf8width, size_t dstlen); +static uint32_t encode_surrogate(uint32_t code); +static uint32_t decode_surrogate(const u_char *ucs); + +#ifdef MODULE_DEPEND +MODULE_DEPEND(iconv_ucs, libiconv, 2, 2, 2); +#endif + +/* + * UCS converter instance + */ +struct iconv_ucs { + KOBJ_FIELDS; + int convtype; + struct iconv_cspair * d_csp; + struct iconv_cspair * d_cspf; + void * f_ctp; + void * t_ctp; + void * ctype; +}; + +static int +iconv_ucs_open(struct iconv_converter_class *dcp, + struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp) +{ + struct iconv_ucs *dp; + int i; + const char *from, *to; + + dp = (struct iconv_ucs *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK); + to = csp->cp_to; + from = cspf ? cspf->cp_from : csp->cp_from; + + dp->convtype = 0; + + if (cspf) + dp->convtype |= KICONV_UCS_COMBINE; + for (i = 0; unicode_family[i].name; i++) { + if (strcmp(from, unicode_family[i].name) == 0) + dp->convtype |= unicode_family[i].from_flag; + if (strcmp(to, unicode_family[i].name) == 0) + dp->convtype |= unicode_family[i].to_flag; + } + if (strcmp(ENCODING_UNICODE, ENCODING_UTF16) == 0) + dp->convtype |= KICONV_UCS_UCS4; + else + dp->convtype &= ~KICONV_UCS_UCS4; + + dp->f_ctp = dp->t_ctp = NULL; + if (dp->convtype & KICONV_UCS_COMBINE) { + if ((dp->convtype & KICONV_UCS_FROM_UTF8) == 0 && + (dp->convtype & KICONV_UCS_FROM_LE) == 0) { + iconv_open(ENCODING_UNICODE, from, &dp->f_ctp); + } + if ((dp->convtype & KICONV_UCS_TO_UTF8) == 0 && + (dp->convtype & KICONV_UCS_TO_LE) == 0) { + iconv_open(to, ENCODING_UNICODE, &dp->t_ctp); + } + } + + dp->ctype = NULL; + if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_TO_UTF8)) + iconv_open(KICONV_WCTYPE_NAME, ENCODING_UTF8, &dp->ctype); + + dp->d_csp = csp; + if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE)) { + if (cspf) { + dp->d_cspf = cspf; + cspf->cp_refcount++; + } else + csp->cp_refcount++; + } + if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE)) + csp->cp_refcount++; + *dpp = (void*)dp; + return 0; +} + +static int +iconv_ucs_close(void *data) +{ + struct iconv_ucs *dp = data; + + if (dp->f_ctp) + iconv_close(dp->f_ctp); + if (dp->t_ctp) + iconv_close(dp->t_ctp); + if (dp->ctype) + iconv_close(dp->ctype); + if (dp->d_cspf) + dp->d_cspf->cp_refcount--; + else if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE)) + dp->d_csp->cp_refcount--; + if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE)) + dp->d_csp->cp_refcount--; + kobj_delete((struct kobj*)data, M_ICONV); + return 0; +} + +static int +iconv_ucs_conv(void *d2p, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft, + int convchar, int casetype) +{ + struct iconv_ucs *dp = (struct iconv_ucs*)d2p; + int ret = 0, i; + size_t in, on, ir, or, inlen, outlen, ucslen; + const char *src, *p; + char *dst; + u_char ucs[4], *q; + uint32_t code; + + if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL) + return 0; + ir = in = *inbytesleft; + or = on = *outbytesleft; + src = *inbuf; + dst = *outbuf; + + while (ir > 0 && or > 0) { + + /* + * The first half of conversion. + * (convert any code into ENCODING_UNICODE) + */ + code = 0; + p = src; + if (dp->convtype & KICONV_UCS_FROM_UTF8) { + /* convert UTF-8 to ENCODING_UNICODE */ + inlen = 0; + code = utf8_to_ucs4(p, &inlen, ir); + if (code == 0) { + ret = -1; + break; + } + + if (casetype == KICONV_FROM_LOWER && dp->ctype) { + code = towlower(code, dp->ctype); + } else if (casetype == KICONV_FROM_UPPER && dp->ctype) { + code = towupper(code, dp->ctype); + } + + if ((code >= 0xd800 && code < 0xe000) || code >= 0x110000 ) { + /* reserved for utf-16 surrogate pair */ + /* invalid unicode */ + ret = -1; + break; + } + + if (inlen == 4) { + if (dp->convtype & KICONV_UCS_UCS4) { + ucslen = 4; + code = encode_surrogate(code); + } else { + /* can't handle with ucs-2 */ + ret = -1; + break; + } + } else { + ucslen = 2; + } + + /* save UCS-4 into ucs[] */ + for (q = ucs, i = ucslen - 1 ; i >= 0 ; i--) + *q++ = (code >> (i << 3)) & 0xff; + + } else if (dp->convtype & KICONV_UCS_COMBINE && dp->f_ctp) { + /* convert local code to ENCODING_UNICODE */ + ucslen = 4; + inlen = ir; + q = ucs; + ret = iconv_convchr_case(dp->f_ctp, &p, &inlen, (char **)&q, + &ucslen, casetype & (KICONV_FROM_LOWER | KICONV_FROM_UPPER)); + if (ret) + break; + inlen = ir - inlen; + ucslen = 4 - ucslen; + + } else { + /* src code is a proper subset of ENCODING_UNICODE */ + q = ucs; + if (dp->convtype & KICONV_UCS_FROM_LE) { + *q = *(p + 1); + *(q + 1) = *p; + p += 2; + } else { + *q = *p++; + *(q + 1) = *p++; + } + if ((*q & 0xfc) == 0xd8) { + if (dp->convtype & KICONV_UCS_UCS4 && + dp->convtype & KICONV_UCS_FROM_UTF16) { + inlen = ucslen = 4; + } else { + /* invalid unicode */ + ret = -1; + break; + } + } else { + inlen = ucslen = 2; + } + if (ir < inlen) { + ret = -1; + break; + } + if (ucslen == 4) { + q += 2; + if (dp->convtype & KICONV_UCS_FROM_LE) { + *q = *(p + 1); + *(q + 1) = *p; + } else { + *q = *p++; + *(q + 1) = *p; + } + if ((*q & 0xfc) != 0xdc) { + /* invalid unicode */ + ret = -1; + break; + } + } + } + + /* + * The second half of conversion. + * (convert ENCODING_UNICODE into any code) + */ + p = ucs; + if (dp->convtype & KICONV_UCS_TO_UTF8) { + q = (u_char *)dst; + if (ucslen == 4 && dp->convtype & KICONV_UCS_UCS4) { + /* decode surrogate pair */ + code = decode_surrogate(p); + } else { + code = (ucs[0] << 8) | ucs[1]; + } + + if (casetype == KICONV_LOWER && dp->ctype) { + code = towlower(code, dp->ctype); + } else if (casetype == KICONV_UPPER && dp->ctype) { + code = towupper(code, dp->ctype); + } + + outlen = 0; + if (ucs4_to_utf8(code, q, &outlen, or) == NULL) { + ret = -1; + break; + } + + src += inlen; + ir -= inlen; + dst += outlen; + or -= outlen; + + } else if (dp->convtype & KICONV_UCS_COMBINE && dp->t_ctp) { + ret = iconv_convchr_case(dp->t_ctp, &p, &ucslen, &dst, + &or, casetype & (KICONV_LOWER | KICONV_UPPER)); + if (ret) + break; + + src += inlen; + ir -= inlen; + + } else { + /* dst code is a proper subset of ENCODING_UNICODE */ + if (or < ucslen) { + ret = -1; + break; + } + src += inlen; + ir -= inlen; + or -= ucslen; + if (dp->convtype & KICONV_UCS_TO_LE) { + *dst++ = *(p + 1); + *dst++ = *p; + p += 2; + } else { + *dst++ = *p++; + *dst++ = *p++; + } + if (ucslen == 4) { + if ((dp->convtype & KICONV_UCS_UCS4) == 0 || + (dp->convtype & KICONV_UCS_TO_UTF16) == 0) { + ret = -1; + break; + } + if (dp->convtype & KICONV_UCS_TO_LE) { + *dst++ = *(p + 1); + *dst++ = *p; + } else { + *dst++ = *p++; + *dst++ = *p; + } + } + } + + if (convchar == 1) + break; + } + + *inbuf += in - ir; + *outbuf += on - or; + *inbytesleft -= in - ir; + *outbytesleft -= on - or; + return (ret); +} + +static int +iconv_ucs_init(struct iconv_converter_class *dcp) +{ + int error; + + error = iconv_add(ENCODING_UNICODE, ENCODING_UNICODE, ENCODING_UTF8); + if (error) + return (error); + error = iconv_add(ENCODING_UNICODE, ENCODING_UTF8, ENCODING_UNICODE); + if (error) + return (error); + return (0); +} + +static int +iconv_ucs_done(struct iconv_converter_class *dcp) +{ + return (0); +} + +static const char * +iconv_ucs_name(struct iconv_converter_class *dcp) +{ + return (ENCODING_UNICODE); +} + +static kobj_method_t iconv_ucs_methods[] = { + KOBJMETHOD(iconv_converter_open, iconv_ucs_open), + KOBJMETHOD(iconv_converter_close, iconv_ucs_close), + KOBJMETHOD(iconv_converter_conv, iconv_ucs_conv), + KOBJMETHOD(iconv_converter_init, iconv_ucs_init), + KOBJMETHOD(iconv_converter_done, iconv_ucs_done), + KOBJMETHOD(iconv_converter_name, iconv_ucs_name), + {0, 0} +}; + +KICONV_CONVERTER(ucs, sizeof(struct iconv_ucs)); + +static uint32_t +utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen) +{ + size_t i, w = 0; + uint32_t ucs4 = 0; + + /* + * get leading 1 byte from utf-8 + */ + if ((*src & 0x80) == 0) { + /* + * leading 1 bit is "0" + * utf-8: 0xxxxxxx + * ucs-4: 00000000 00000000 00000000 0xxxxxxx + */ + w = 1; + /* get trailing 7 bits */ + ucs4 = *src & 0x7f; + } else if ((*src & 0xe0) == 0xc0) { + /* + * leading 3 bits are "110" + * utf-8: 110xxxxx 10yyyyyy + * ucs-4: 00000000 00000000 00000xxx xxyyyyyy + */ + w = 2; + /* get trailing 5 bits */ + ucs4 = *src & 0x1f; + } else if ((*src & 0xf0) == 0xe0) { + /* + * leading 4 bits are "1110" + * utf-8: 1110xxxx 10yyyyyy 10zzzzzz + * ucs-4: 00000000 00000000 xxxxyyyy yyzzzzzz + */ + w = 3; + /* get trailing 4 bits */ + ucs4 = *src & 0x0f; + } else if ((*src & 0xf8) == 0xf0) { + /* + * leading 5 bits are "11110" + * utf-8: 11110www 10xxxxxx 10yyyyyy 10zzzzzz + * ucs-4: 00000000 000wwwxx xxxxyyyy yyzzzzzz + */ + w = 4; + /* get trailing 3 bits */ + ucs4 = *src & 0x07; + } else { + /* out of utf-16 range or having illegal bits */ + return (0); + } + if (w == 0) + return (0); + + if (srclen < w) + return (0); + + /* + * get left parts from utf-8 + */ + for (i = 1 ; i < w ; i++) { + if ((*(src + i) & 0xc0) != 0x80) { + /* invalid: leading 2 bits are not "10" */ + return (0); + } + /* concatenate trailing 6 bits into ucs4 */ + ucs4 <<= 6; + ucs4 |= *(src + i) & 0x3f; + } + + *utf8width = w; + return (ucs4); +} + +static u_char * +ucs4_to_utf8(uint32_t ucs4, char *dst, size_t *utf8width, size_t dstlen) +{ + u_char lead, *p; + size_t i, w; + + /* + * determine utf-8 width and leading bits + */ + if (ucs4 < 0x80) { + w = 1; + lead = 0; /* "0" */ + } else if (ucs4 < 0x800) { + w = 2; + lead = 0xc0; /* "11" */ + } else if (ucs4 < 0x10000) { + w = 3; + lead = 0xe0; /* "111" */ + } else if (ucs4 < 0x200000) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201201160515.q0G5FDPx017016>