Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 16 Jan 2012 05:15:13 +0000 (UTC)
From:      Kevin Lo <kevlo@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r230196 - in stable/9: lib/libkiconv sys/conf sys/fs/msdosfs sys/fs/smbfs sys/kern sys/libkern sys/modules/libiconv sys/modules/libmchain sys/netsmb sys/sys
Message-ID:  <201201160515.q0G5FDPx017016@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kevlo
Date: Mon Jan 16 05:15:13 2012
New Revision: 230196
URL: http://svn.freebsd.org/changeset/base/230196

Log:
  MFC r228796:
  Discarding local array based on return values.
  
  MFC r227650:
  Add unicode support to msdosfs and smbfs; original pathes from imura,
  bug fixes by Kuan-Chung Chiu <buganini at gmail dot com>.

Added:
  stable/9/sys/libkern/iconv_ucs.c   (contents, props changed)
Modified:
  stable/9/lib/libkiconv/xlat16_iconv.c
  stable/9/sys/conf/files
  stable/9/sys/fs/msdosfs/msdosfs_conv.c
  stable/9/sys/fs/smbfs/smbfs_smb.c
  stable/9/sys/fs/smbfs/smbfs_subr.c
  stable/9/sys/kern/subr_mchain.c
  stable/9/sys/libkern/iconv.c
  stable/9/sys/modules/libiconv/Makefile
  stable/9/sys/modules/libmchain/Makefile
  stable/9/sys/netsmb/smb_conn.c
  stable/9/sys/netsmb/smb_conn.h
  stable/9/sys/netsmb/smb_smb.c
  stable/9/sys/netsmb/smb_subr.c
  stable/9/sys/sys/iconv.h
  stable/9/sys/sys/mchain.h

Modified: stable/9/lib/libkiconv/xlat16_iconv.c
==============================================================================
--- stable/9/lib/libkiconv/xlat16_iconv.c	Mon Jan 16 05:07:32 2012	(r230195)
+++ stable/9/lib/libkiconv/xlat16_iconv.c	Mon Jan 16 05:15:13 2012	(r230196)
@@ -74,6 +74,18 @@ kiconv_add_xlat16_cspair(const char *toc
 	struct xlat16_table xt;
 	void *data;
 	char *p;
+	const char unicode[] = ENCODING_UNICODE;
+
+	if ((flag & KICONV_WCTYPE) == 0 &&
+	    strcmp(unicode, tocode) != 0 &&
+	    strcmp(unicode, fromcode) != 0 &&
+	    kiconv_lookupconv(unicode) == 0) {
+		error = kiconv_add_xlat16_cspair(unicode, fromcode, flag);
+		if (error)
+			return (-1);
+		error = kiconv_add_xlat16_cspair(tocode, unicode, flag);
+		return (error);
+	}
 
 	if (kiconv_lookupcs(tocode, fromcode) == 0)
 		return (0);

Modified: stable/9/sys/conf/files
==============================================================================
--- stable/9/sys/conf/files	Mon Jan 16 05:07:32 2012	(r230195)
+++ stable/9/sys/conf/files	Mon Jan 16 05:15:13 2012	(r230196)
@@ -2540,6 +2540,7 @@ libkern/fnmatch.c		standard
 libkern/gets.c			standard
 libkern/iconv.c			optional libiconv
 libkern/iconv_converter_if.m	optional libiconv
+libkern/iconv_ucs.c		optional libiconv
 libkern/iconv_xlat.c		optional libiconv
 libkern/iconv_xlat16.c		optional libiconv
 libkern/index.c			standard

Modified: stable/9/sys/fs/msdosfs/msdosfs_conv.c
==============================================================================
--- stable/9/sys/fs/msdosfs/msdosfs_conv.c	Mon Jan 16 05:07:32 2012	(r230195)
+++ stable/9/sys/fs/msdosfs/msdosfs_conv.c	Mon Jan 16 05:15:13 2012	(r230196)
@@ -61,9 +61,9 @@
 extern struct iconv_functions *msdosfs_iconv;
 
 static int mbsadjpos(const char **, size_t, size_t, int, int, void *handle);
-static u_int16_t dos2unixchr(const u_char **, size_t *, int, struct msdosfsmount *);
+static u_char * dos2unixchr(u_char *, const u_char **, size_t *, int, struct msdosfsmount *);
 static u_int16_t unix2doschr(const u_char **, size_t *, struct msdosfsmount *);
-static u_int16_t win2unixchr(u_int16_t, struct msdosfsmount *);
+static u_char * win2unixchr(u_char *, u_int16_t, struct msdosfsmount *);
 static u_int16_t unix2winchr(const u_char **, size_t *, int, struct msdosfsmount *);
 
 /*
@@ -242,7 +242,7 @@ dos2unixfn(dn, un, lower, pmp)
 {
 	size_t i;
 	int thislong = 0;
-	u_int16_t c;
+	u_char *c, tmpbuf[5];
 
 	/*
 	 * If first char of the filename is SLOT_E5 (0x05), then the real
@@ -257,14 +257,12 @@ dos2unixfn(dn, un, lower, pmp)
 	 * Copy the name portion into the unix filename string.
 	 */
 	for (i = 8; i > 0 && *dn != ' ';) {
-		c = dos2unixchr((const u_char **)&dn, &i, lower & LCASE_BASE,
-		    pmp);
-		if (c & 0xff00) {
-			*un++ = c >> 8;
+		c = dos2unixchr(tmpbuf, (const u_char **)&dn, &i,
+		    lower & LCASE_BASE, pmp);
+		while (*c != '\0') {
+			*un++ = *c++;
 			thislong++;
 		}
-		*un++ = c;
-		thislong++;
 	}
 	dn += i;
 
@@ -276,14 +274,12 @@ dos2unixfn(dn, un, lower, pmp)
 		*un++ = '.';
 		thislong++;
 		for (i = 3; i > 0 && *dn != ' ';) {
-			c = dos2unixchr((const u_char **)&dn, &i,
+			c = dos2unixchr(tmpbuf, (const u_char **)&dn, &i,
 			    lower & LCASE_EXT, pmp);
-			if (c & 0xff00) {
-				*un++ = c >> 8;
+			while (*c != '\0') {
+				*un++ = *c++;
 				thislong++;
 			}
-			*un++ = c;
-			thislong++;
 		}
 	}
 	*un++ = 0;
@@ -652,8 +648,9 @@ win2unixfn(nbp, wep, chksum, pmp)
 	int chksum;
 	struct msdosfsmount *pmp;
 {
+	u_char *c, tmpbuf[5];
 	u_int8_t *cp;
-	u_int8_t *np, name[WIN_CHARS * 2 + 1];
+	u_int8_t *np, name[WIN_CHARS * 3 + 1];
 	u_int16_t code;
 	int i;
 
@@ -686,10 +683,9 @@ win2unixfn(nbp, wep, chksum, pmp)
 			*np = '\0';
 			return -1;
 		default:
-			code = win2unixchr(code, pmp);
-			if (code & 0xff00)
-				*np++ = code >> 8;
-			*np++ = code;
+			c = win2unixchr(tmpbuf, code, pmp);
+			while (*c != '\0')
+				*np++ = *c++;
 			break;
 		}
 		cp += 2;
@@ -705,10 +701,9 @@ win2unixfn(nbp, wep, chksum, pmp)
 			*np = '\0';
 			return -1;
 		default:
-			code = win2unixchr(code, pmp);
-			if (code & 0xff00)
-				*np++ = code >> 8;
-			*np++ = code;
+			c = win2unixchr(tmpbuf, code, pmp);
+			while (*c != '\0')
+				*np++ = *c++;
 			break;
 		}
 		cp += 2;
@@ -724,10 +719,9 @@ win2unixfn(nbp, wep, chksum, pmp)
 			*np = '\0';
 			return -1;
 		default:
-			code = win2unixchr(code, pmp);
-			if (code & 0xff00)
-				*np++ = code >> 8;
-			*np++ = code;
+			c = win2unixchr(tmpbuf, code, pmp);
+			while (*c != '\0')
+				*np++ = *c++;
 			break;
 		}
 		cp += 2;
@@ -817,24 +811,22 @@ mbsadjpos(const char **instr, size_t inl
 /*
  * Convert DOS char to Local char
  */
-static u_int16_t
-dos2unixchr(const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp)
+static u_char *
+dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp)
 {
-	u_char c;
-	char *outp, outbuf[3];
-	u_int16_t wc;
+	u_char c, *outp;
 	size_t len, olen;
 
+	outp = outbuf;
 	if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
-		olen = len = 2;
-		outp = outbuf;
+		olen = len = 4;
 
 		if (lower & (LCASE_BASE | LCASE_EXT))
 			msdosfs_iconv->convchr_case(pmp->pm_d2u, (const char **)instr,
-						  ilen, &outp, &olen, KICONV_LOWER);
+						  ilen, (char **)&outp, &olen, KICONV_LOWER);
 		else
 			msdosfs_iconv->convchr(pmp->pm_d2u, (const char **)instr,
-					     ilen, &outp, &olen);
+					     ilen, (char **)&outp, &olen);
 		len -= olen;
 
 		/*
@@ -843,21 +835,21 @@ dos2unixchr(const u_char **instr, size_t
 		if (len == 0) {
 			(*ilen)--;
 			(*instr)++;
-			return ('?');
+			*outp++ = '?';
 		}
-
-		wc = 0;
-		while(len--)
-			wc |= (*(outp - len - 1) & 0xff) << (len << 3);
-		return (wc);
+	} else {
+		(*ilen)--;
+		c = *(*instr)++;
+		c = dos2unix[c];
+		if (lower & (LCASE_BASE | LCASE_EXT))
+			c = u2l[c];
+		*outp++ = c;
+		outbuf[1] = '\0';
 	}
 
-	(*ilen)--;
-	c = *(*instr)++;
-	c = dos2unix[c];
-	if (lower & (LCASE_BASE | LCASE_EXT))
-		c = u2l[c];
-	return ((u_int16_t)c);
+	*outp = '\0';
+	outp = outbuf;
+	return (outp);
 }
 
 /*
@@ -940,23 +932,21 @@ unix2doschr(const u_char **instr, size_t
 /*
  * Convert Windows char to Local char
  */
-static u_int16_t
-win2unixchr(u_int16_t wc, struct msdosfsmount *pmp)
+static u_char *
+win2unixchr(u_char *outbuf, u_int16_t wc, struct msdosfsmount *pmp)
 {
-	u_char *inp, *outp, inbuf[3], outbuf[3];
+	u_char *inp, *outp, inbuf[3];
 	size_t ilen, olen, len;
 
-	if (wc == 0)
-		return (0);
-
+	outp = outbuf;
 	if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
 		inbuf[0] = (u_char)(wc>>8);
 		inbuf[1] = (u_char)wc;
 		inbuf[2] = '\0';
 
-		ilen = olen = len = 2;
+		ilen = 2;
+		olen = len = 4;
 		inp = inbuf;
-		outp = outbuf;
 		msdosfs_iconv->convchr(pmp->pm_w2u, (const char **)&inp, &ilen,
 				     (char **)&outp, &olen);
 		len -= olen;
@@ -964,21 +954,15 @@ win2unixchr(u_int16_t wc, struct msdosfs
 		/*
 		 * return '?' if failed to convert
 		 */
-		if (len == 0) {
-			wc = '?';
-			return (wc);
-		}
-
-		wc = 0;
-		while(len--)
-			wc |= (*(outp - len - 1) & 0xff) << (len << 3);
-		return (wc);
+		if (len == 0)
+			*outp++ = '?';
+	} else {
+		*outp++ = (wc & 0xff00) ? '?' : (u_char)(wc & 0xff);
 	}
 
-	if (wc & 0xff00)
-		wc = '?';
-
-	return (wc);
+	*outp = '\0';
+	outp = outbuf;
+	return (outp);
 }
 
 /*

Modified: stable/9/sys/fs/smbfs/smbfs_smb.c
==============================================================================
--- stable/9/sys/fs/smbfs/smbfs_smb.c	Mon Jan 16 05:07:32 2012	(r230195)
+++ stable/9/sys/fs/smbfs/smbfs_smb.c	Mon Jan 16 05:15:13 2012	(r230196)
@@ -34,6 +34,7 @@
 #include <sys/vnode.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
+#include <sys/endian.h>
 
 #ifdef USE_MD5_HASH
 #include <sys/md5.h>
@@ -393,6 +394,10 @@ smbfs_smb_setpattr(struct smbnode *np, u
 		if (error)
 			break;
 		mb_put_uint8(mbp, SMB_DT_ASCII);
+		if (SMB_UNICODE_STRINGS(SSTOVC(ssp))) {
+			mb_put_padbyte(mbp);
+			mb_put_uint8(mbp, 0);	/* 1st byte of NULL Unicode char */
+		}
 		mb_put_uint8(mbp, 0);
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
@@ -909,6 +914,10 @@ smbfs_smb_search(struct smbfs_fctx *ctx)
 		mb_put_uint16le(mbp, 0);	/* context length */
 		ctx->f_flags &= ~SMBFS_RDD_FINDFIRST;
 	} else {
+		if (SMB_UNICODE_STRINGS(vcp)) {
+			mb_put_padbyte(mbp);
+			mb_put_uint8(mbp, 0);
+		}
 		mb_put_uint8(mbp, 0);	/* file name length */
 		mb_put_uint8(mbp, SMB_DT_VARIABLE);
 		mb_put_uint16le(mbp, SMB_SKEYLEN);
@@ -1069,7 +1078,7 @@ smbfs_smb_trans2find2(struct smbfs_fctx 
 		mb_put_uint32le(mbp, 0);		/* resume key */
 		mb_put_uint16le(mbp, flags);
 		if (ctx->f_rname)
-			mb_put_mem(mbp, ctx->f_rname, strlen(ctx->f_rname) + 1, MB_MSYSTEM);
+			mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen + 1, MB_MSYSTEM);
 		else
 			mb_put_uint8(mbp, 0);	/* resume file name */
 #if 0
@@ -1152,7 +1161,10 @@ static int
 smbfs_findopenLM2(struct smbfs_fctx *ctx, struct smbnode *dnp,
 	const char *wildcard, int wclen, int attr, struct smb_cred *scred)
 {
-	ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK);
+	if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
+		ctx->f_name = malloc(SMB_MAXFNAMELEN * 2, M_SMBFSDATA, M_WAITOK);
+	} else
+		ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK);
 	if (ctx->f_name == NULL)
 		return ENOMEM;
 	ctx->f_infolevel = SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_NTLM0_12 ?
@@ -1231,7 +1243,10 @@ smbfs_findnextLM2(struct smbfs_fctx *ctx
 		SMBERROR("unexpected info level %d\n", ctx->f_infolevel);
 		return EINVAL;
 	}
-	nmlen = min(size, SMB_MAXFNAMELEN);
+	if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
+		nmlen = min(size, SMB_MAXFNAMELEN * 2);
+	} else
+		nmlen = min(size, SMB_MAXFNAMELEN);
 	cp = ctx->f_name;
 	error = md_get_mem(mbp, cp, nmlen, MB_MSYSTEM);
 	if (error)
@@ -1245,8 +1260,12 @@ smbfs_findnextLM2(struct smbfs_fctx *ctx
 			return EBADRPC;
 		}
 	}
-	if (nmlen && cp[nmlen - 1] == 0)
-		nmlen--;
+	if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
+		if (nmlen > 1 && cp[nmlen - 1] == 0 && cp[nmlen - 2] == 0)
+			nmlen -= 2;
+	} else
+		if (nmlen && cp[nmlen - 1] == 0)
+			nmlen--;
 	if (nmlen == 0)
 		return EBADRPC;
 
@@ -1330,10 +1349,17 @@ smbfs_findnext(struct smbfs_fctx *ctx, i
 			error = smbfs_findnextLM2(ctx, limit);
 		if (error)
 			return error;
-		if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') ||
-		    (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' &&
-		     ctx->f_name[1] == '.'))
-			continue;
+		if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
+			if ((ctx->f_nmlen == 2 &&
+			     *(u_int16_t *)ctx->f_name == htole16(0x002e)) ||
+			    (ctx->f_nmlen == 4 &&
+			     *(u_int32_t *)ctx->f_name == htole32(0x002e002e)))
+				continue;
+		} else
+			if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') ||
+			    (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' &&
+			     ctx->f_name[1] == '.'))
+				continue;
 		break;
 	}
 	smbfs_fname_tolocal(SSTOVC(ctx->f_ssp), ctx->f_name, &ctx->f_nmlen,

Modified: stable/9/sys/fs/smbfs/smbfs_subr.c
==============================================================================
--- stable/9/sys/fs/smbfs/smbfs_subr.c	Mon Jan 16 05:07:32 2012	(r230195)
+++ stable/9/sys/fs/smbfs/smbfs_subr.c	Mon Jan 16 05:15:13 2012	(r230196)
@@ -130,7 +130,10 @@ smb_fphelp(struct mbchain *mbp, struct s
 		return smb_put_dmem(mbp, vcp, "\\", 2, caseopt);*/
 	while (i--) {
 		np = *--npp;
-		error = mb_put_uint8(mbp, '\\');
+		if (SMB_UNICODE_STRINGS(vcp))
+			error = mb_put_uint16le(mbp, '\\');
+		else
+			error = mb_put_uint8(mbp, '\\');
 		if (error)
 			break;
 		error = smb_put_dmem(mbp, vcp, np->n_name, np->n_nmlen, caseopt);
@@ -148,6 +151,11 @@ smbfs_fullpath(struct mbchain *mbp, stru
 	int caseopt = SMB_CS_NONE;
 	int error;
 
+	if (SMB_UNICODE_STRINGS(vcp)) {
+		error = mb_put_padbyte(mbp);
+		if (error)
+			return error;
+	}
 	if (SMB_DIALECT(vcp) < SMB_DIALECT_LANMAN1_0)
 		caseopt |= SMB_CS_UPPER;
 	if (dnp != NULL) {
@@ -156,7 +164,10 @@ smbfs_fullpath(struct mbchain *mbp, stru
 			return error;
 	}
 	if (name) {
-		error = mb_put_uint8(mbp, '\\');
+		if (SMB_UNICODE_STRINGS(vcp))
+			error = mb_put_uint16le(mbp, '\\');
+		else
+			error = mb_put_uint8(mbp, '\\');
 		if (error)
 			return error;
 		error = smb_put_dmem(mbp, vcp, name, nmlen, caseopt);
@@ -164,6 +175,8 @@ smbfs_fullpath(struct mbchain *mbp, stru
 			return error;
 	}
 	error = mb_put_uint8(mbp, 0);
+	if (SMB_UNICODE_STRINGS(vcp) && error == 0)
+		error = mb_put_uint8(mbp, 0);
 	return error;
 }
 
@@ -191,6 +204,17 @@ smbfs_fname_tolocal(struct smb_vc *vcp, 
 
 		error = iconv_conv_case
 			(vcp->vc_tolocal, (const char **)&ibuf, &ilen, &obuf, &olen, copt);
+		if (error && SMB_UNICODE_STRINGS(vcp)) {
+			/*
+			 * If using unicode, leaving a file name as it was when
+			 * convert fails will cause a problem because the file name
+			 * will contain NULL.
+			 * Here, put '?' and give converted file name.
+			 */
+			*obuf = '?';
+			olen--;
+			error = 0;
+		}
 		if (!error) {
 			*nmlen = sizeof(outbuf) - olen;
 			memcpy(name, outbuf, *nmlen);

Modified: stable/9/sys/kern/subr_mchain.c
==============================================================================
--- stable/9/sys/kern/subr_mchain.c	Mon Jan 16 05:07:32 2012	(r230195)
+++ stable/9/sys/kern/subr_mchain.c	Mon Jan 16 05:15:13 2012	(r230196)
@@ -128,6 +128,36 @@ mb_reserve(struct mbchain *mbp, int size
 }
 
 int
+mb_put_padbyte(struct mbchain *mbp)
+{
+	caddr_t dst;
+	char x = 0;
+
+	dst = mtod(mbp-&gt;mb_cur, caddr_t) + mbp-&gt;mb_cur-&gt;m_len;
+
+	/* only add padding if address is odd */
+	if ((unsigned long)dst &amp; 1)
+		return mb_put_mem(mbp, (caddr_t)&amp;x, 1, MB_MSYSTEM);
+	else
+	return 0;
+}
+
+int
+mb_put_padbyte(struct mbchain *mbp)
+{
+	caddr_t dst;
+	char x = 0;
+
+	dst = mtod(mbp->mb_cur, caddr_t) + mbp->mb_cur->m_len;
+
+	/* only add padding if address is odd */
+	if ((unsigned long)dst & 1)
+		return mb_put_mem(mbp, (caddr_t)&x, 1, MB_MSYSTEM);
+	else
+	return 0;
+}
+
+int
 mb_put_uint8(struct mbchain *mbp, uint8_t x)
 {
 	return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);

Modified: stable/9/sys/libkern/iconv.c
==============================================================================
--- stable/9/sys/libkern/iconv.c	Mon Jan 16 05:07:32 2012	(r230195)
+++ stable/9/sys/libkern/iconv.c	Mon Jan 16 05:15:13 2012	(r230196)
@@ -377,6 +377,18 @@ iconv_sysctl_cslist(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_kern_iconv, OID_AUTO, cslist, CTLFLAG_RD | CTLTYPE_OPAQUE,
 	    NULL, 0, iconv_sysctl_cslist, "S,xlat", "registered charset pairs");
 
+int
+iconv_add(const char *converter, const char *to, const char *from)
+{
+	struct iconv_converter_class *dcp;
+	struct iconv_cspair *csp;
+
+	if (iconv_lookupconv(converter, &dcp) != 0)
+		return EINVAL;
+
+	return iconv_register_cspair(to, from, dcp, NULL, &csp);
+}
+
 /*
  * Add new charset pair
  */

Added: stable/9/sys/libkern/iconv_ucs.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/9/sys/libkern/iconv_ucs.c	Mon Jan 16 05:15:13 2012	(r230196)
@@ -0,0 +1,540 @@
+/*-
+ * Copyright (c) 2003, 2005 Ryuichiro Imura
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/iconv.h>
+
+#include "iconv_converter_if.h"
+
+/*
+ * "UCS" converter
+ */
+
+#define	KICONV_UCS_COMBINE	0x1
+#define	KICONV_UCS_FROM_UTF8	0x2
+#define	KICONV_UCS_TO_UTF8	0x4
+#define	KICONV_UCS_FROM_LE	0x8
+#define	KICONV_UCS_TO_LE	0x10
+#define	KICONV_UCS_FROM_UTF16	0x20
+#define	KICONV_UCS_TO_UTF16	0x40
+#define	KICONV_UCS_UCS4		0x80
+
+#define	ENCODING_UTF16	"UTF-16BE"
+#define	ENCODING_UTF8	"UTF-8"
+
+static struct {
+	const char *name;
+	int from_flag, to_flag;
+} unicode_family[] = {
+	{ "UTF-8",	KICONV_UCS_FROM_UTF8,	KICONV_UCS_TO_UTF8 },
+	{ "UCS-2LE",	KICONV_UCS_FROM_LE,	KICONV_UCS_TO_LE },
+	{ "UTF-16BE",	KICONV_UCS_FROM_UTF16,	KICONV_UCS_TO_UTF16 },
+	{ "UTF-16LE",	KICONV_UCS_FROM_UTF16|KICONV_UCS_FROM_LE,
+	    KICONV_UCS_TO_UTF16|KICONV_UCS_TO_LE },
+	{ NULL,		0,	0 }
+};
+
+static uint32_t utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen);
+static u_char *ucs4_to_utf8(uint32_t ucs4, char * dst, size_t *utf8width, size_t dstlen);
+static uint32_t encode_surrogate(uint32_t code);
+static uint32_t decode_surrogate(const u_char *ucs);
+
+#ifdef MODULE_DEPEND
+MODULE_DEPEND(iconv_ucs, libiconv, 2, 2, 2);
+#endif
+
+/*
+ * UCS converter instance
+ */
+struct iconv_ucs {
+	KOBJ_FIELDS;
+	int			convtype;
+	struct iconv_cspair *	d_csp;
+	struct iconv_cspair *	d_cspf;
+	void *			f_ctp;
+	void *			t_ctp;
+	void *			ctype;
+};
+
+static int
+iconv_ucs_open(struct iconv_converter_class *dcp,
+	struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp)
+{
+	struct iconv_ucs *dp;
+	int i;
+	const char *from, *to;
+
+	dp = (struct iconv_ucs *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK);
+	to = csp->cp_to;
+	from = cspf ? cspf->cp_from : csp->cp_from;
+
+	dp->convtype = 0;
+
+	if (cspf)
+		dp->convtype |= KICONV_UCS_COMBINE;
+	for (i = 0; unicode_family[i].name; i++) {
+		if (strcmp(from, unicode_family[i].name) == 0)
+			dp->convtype |= unicode_family[i].from_flag;
+		if (strcmp(to, unicode_family[i].name) == 0)
+			dp->convtype |= unicode_family[i].to_flag;
+	}
+	if (strcmp(ENCODING_UNICODE, ENCODING_UTF16) == 0)
+		dp->convtype |= KICONV_UCS_UCS4;
+	else
+		dp->convtype &= ~KICONV_UCS_UCS4;
+
+	dp->f_ctp = dp->t_ctp = NULL;
+	if (dp->convtype & KICONV_UCS_COMBINE) {
+		if ((dp->convtype & KICONV_UCS_FROM_UTF8) == 0 &&
+		    (dp->convtype & KICONV_UCS_FROM_LE) == 0) {
+			iconv_open(ENCODING_UNICODE, from, &dp->f_ctp);
+		}
+		if ((dp->convtype & KICONV_UCS_TO_UTF8) == 0 &&
+		    (dp->convtype & KICONV_UCS_TO_LE) == 0) {
+			iconv_open(to, ENCODING_UNICODE, &dp->t_ctp);
+		}
+	}
+
+	dp->ctype = NULL;
+	if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_TO_UTF8))
+		iconv_open(KICONV_WCTYPE_NAME, ENCODING_UTF8, &dp->ctype);
+
+	dp->d_csp = csp;
+	if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE)) {
+		if (cspf) {
+			dp->d_cspf = cspf;
+			cspf->cp_refcount++;
+		} else
+			csp->cp_refcount++;
+	}
+	if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE))
+		csp->cp_refcount++;
+	*dpp = (void*)dp;
+	return 0;
+}
+
+static int
+iconv_ucs_close(void *data)
+{
+	struct iconv_ucs *dp = data;
+
+	if (dp->f_ctp)
+		iconv_close(dp->f_ctp);
+	if (dp->t_ctp)
+		iconv_close(dp->t_ctp);
+	if (dp->ctype)
+		iconv_close(dp->ctype);
+	if (dp->d_cspf)
+		dp->d_cspf->cp_refcount--;
+	else if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE))
+		dp->d_csp->cp_refcount--;
+	if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE))
+		dp->d_csp->cp_refcount--;
+	kobj_delete((struct kobj*)data, M_ICONV);
+	return 0;
+}
+
+static int
+iconv_ucs_conv(void *d2p, const char **inbuf,
+	size_t *inbytesleft, char **outbuf, size_t *outbytesleft,
+	int convchar, int casetype)
+{
+	struct iconv_ucs *dp = (struct iconv_ucs*)d2p;
+	int ret = 0, i;
+	size_t in, on, ir, or, inlen, outlen, ucslen;
+	const char *src, *p;
+	char *dst;
+	u_char ucs[4], *q;
+	uint32_t code;
+
+	if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
+		return 0;
+	ir = in = *inbytesleft;
+	or = on = *outbytesleft;
+	src = *inbuf;
+	dst = *outbuf;
+
+	while (ir > 0 && or > 0) {
+
+		/*
+		 * The first half of conversion.
+		 * (convert any code into ENCODING_UNICODE)
+		 */
+		code = 0;
+		p = src;
+		if (dp->convtype & KICONV_UCS_FROM_UTF8) {
+			/* convert UTF-8 to ENCODING_UNICODE */
+			inlen = 0;
+			code = utf8_to_ucs4(p, &inlen, ir);
+			if (code == 0) {
+				ret = -1;
+				break;
+			}
+
+			if (casetype == KICONV_FROM_LOWER && dp->ctype) {
+				code = towlower(code, dp->ctype);
+			} else if (casetype == KICONV_FROM_UPPER && dp->ctype) {
+				code = towupper(code, dp->ctype);
+			}
+
+			if ((code >= 0xd800 && code < 0xe000) || code >= 0x110000 ) {
+				/* reserved for utf-16 surrogate pair */
+				/* invalid unicode */
+				ret = -1;
+				break;
+			}
+
+			if (inlen == 4) {
+				if (dp->convtype & KICONV_UCS_UCS4) {
+					ucslen = 4;
+					code = encode_surrogate(code);
+				} else {
+					/* can't handle with ucs-2 */
+					ret = -1;
+					break;
+				}
+			} else {
+				ucslen = 2;
+			}
+
+			/* save UCS-4 into ucs[] */
+			for (q = ucs, i = ucslen - 1 ; i >= 0 ; i--)
+				*q++ = (code >> (i << 3)) & 0xff;
+
+		} else if (dp->convtype & KICONV_UCS_COMBINE && dp->f_ctp) {
+			/* convert local code to ENCODING_UNICODE */
+			ucslen = 4;
+			inlen = ir;
+			q = ucs;
+			ret = iconv_convchr_case(dp->f_ctp, &p, &inlen, (char **)&q,
+			    &ucslen, casetype & (KICONV_FROM_LOWER | KICONV_FROM_UPPER));
+			if (ret)
+				break;
+			inlen = ir - inlen;
+			ucslen = 4 - ucslen;
+
+		} else {
+			/* src code is a proper subset of ENCODING_UNICODE */
+			q = ucs;
+			if (dp->convtype & KICONV_UCS_FROM_LE) {
+				*q = *(p + 1);
+				*(q + 1) = *p;
+				p += 2;
+			} else {
+				*q = *p++;
+				*(q + 1) = *p++;
+			}
+			if ((*q & 0xfc) == 0xd8) {
+				if (dp->convtype & KICONV_UCS_UCS4 &&
+				    dp->convtype & KICONV_UCS_FROM_UTF16) {
+					inlen = ucslen = 4;
+				} else {
+					/* invalid unicode */
+					ret = -1;
+					break;
+				}
+			} else {
+				inlen = ucslen = 2;
+			}
+			if (ir < inlen) {
+				ret = -1;
+				break;
+			}
+			if (ucslen == 4) {
+				q += 2;
+				if (dp->convtype & KICONV_UCS_FROM_LE) {
+					*q = *(p + 1);
+					*(q + 1) = *p;
+				} else {
+					*q = *p++;
+					*(q + 1) = *p;
+				}
+				if ((*q & 0xfc) != 0xdc) {
+					/* invalid unicode */
+					ret = -1;
+					break;
+				}
+			}
+		}
+
+		/*
+		 * The second half of conversion.
+		 * (convert ENCODING_UNICODE into any code)
+		 */
+		p = ucs;
+		if (dp->convtype & KICONV_UCS_TO_UTF8) {
+			q = (u_char *)dst;
+			if (ucslen == 4 && dp->convtype & KICONV_UCS_UCS4) {
+				/* decode surrogate pair */
+				code = decode_surrogate(p);
+			} else {
+				code = (ucs[0] << 8) | ucs[1];
+			}
+
+			if (casetype == KICONV_LOWER && dp->ctype) {
+				code = towlower(code, dp->ctype);
+			} else if (casetype == KICONV_UPPER && dp->ctype) {
+				code = towupper(code, dp->ctype);
+			}
+
+			outlen = 0;
+			if (ucs4_to_utf8(code, q, &outlen, or) == NULL) {
+				ret = -1;
+				break;
+			}
+
+			src += inlen;
+			ir -= inlen;
+			dst += outlen;
+			or -= outlen;
+
+		} else if (dp->convtype & KICONV_UCS_COMBINE && dp->t_ctp) {
+			ret = iconv_convchr_case(dp->t_ctp, &p, &ucslen, &dst,
+			    &or, casetype & (KICONV_LOWER | KICONV_UPPER));
+			if (ret)
+				break;
+
+			src += inlen;
+			ir -= inlen;
+
+		} else {
+			/* dst code is a proper subset of ENCODING_UNICODE */
+			if (or < ucslen) {
+				ret = -1;
+				break;
+			}
+			src += inlen;
+			ir -= inlen;
+			or -= ucslen;
+			if (dp->convtype & KICONV_UCS_TO_LE) {
+				*dst++ = *(p + 1);
+				*dst++ = *p;
+				p += 2;
+			} else {
+				*dst++ = *p++;
+				*dst++ = *p++;
+			}
+			if (ucslen == 4) {
+				if ((dp->convtype & KICONV_UCS_UCS4) == 0 ||
+				    (dp->convtype & KICONV_UCS_TO_UTF16) == 0) {
+					ret = -1;
+					break;
+				}
+				if (dp->convtype & KICONV_UCS_TO_LE) {
+					*dst++ = *(p + 1);
+					*dst++ = *p;
+				} else {
+					*dst++ = *p++;
+					*dst++ = *p;
+				}
+			}
+		}
+
+		if (convchar == 1)
+			break;
+	}
+
+	*inbuf += in - ir;
+	*outbuf += on - or;
+	*inbytesleft -= in - ir;
+	*outbytesleft -= on - or;
+	return (ret);
+}
+
+static int
+iconv_ucs_init(struct iconv_converter_class *dcp)
+{
+	int error;
+
+	error = iconv_add(ENCODING_UNICODE, ENCODING_UNICODE, ENCODING_UTF8);
+	if (error)
+		return (error);
+	error = iconv_add(ENCODING_UNICODE, ENCODING_UTF8, ENCODING_UNICODE);
+	if (error)
+		return (error);
+	return (0);
+}
+
+static int
+iconv_ucs_done(struct iconv_converter_class *dcp)
+{
+	return (0);
+}
+
+static const char *
+iconv_ucs_name(struct iconv_converter_class *dcp)
+{
+	return (ENCODING_UNICODE);
+}
+
+static kobj_method_t iconv_ucs_methods[] = {
+	KOBJMETHOD(iconv_converter_open,	iconv_ucs_open),
+	KOBJMETHOD(iconv_converter_close,	iconv_ucs_close),
+	KOBJMETHOD(iconv_converter_conv,	iconv_ucs_conv),
+	KOBJMETHOD(iconv_converter_init,	iconv_ucs_init),
+	KOBJMETHOD(iconv_converter_done,	iconv_ucs_done),
+	KOBJMETHOD(iconv_converter_name,	iconv_ucs_name),
+	{0, 0}
+};
+
+KICONV_CONVERTER(ucs, sizeof(struct iconv_ucs));
+
+static uint32_t
+utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen)
+{
+	size_t i, w = 0;
+	uint32_t ucs4 = 0;
+
+	/*
+	 * get leading 1 byte from utf-8
+	 */
+	if ((*src & 0x80) == 0) {
+		/*
+		 * leading 1 bit is "0"
+		 *  utf-8: 0xxxxxxx
+		 *  ucs-4: 00000000 00000000 00000000 0xxxxxxx
+		 */
+		w = 1;
+		/* get trailing 7 bits */
+		ucs4 = *src & 0x7f;
+	} else if ((*src & 0xe0) == 0xc0) {
+		/*
+		 * leading 3 bits are "110"
+		 *  utf-8: 110xxxxx 10yyyyyy
+		 *  ucs-4: 00000000 00000000 00000xxx xxyyyyyy
+		 */
+		w = 2;
+		/* get trailing 5 bits */
+		ucs4 = *src & 0x1f;
+	} else if ((*src & 0xf0) == 0xe0) {
+		/*
+		 * leading 4 bits are "1110"
+		 *  utf-8: 1110xxxx 10yyyyyy 10zzzzzz
+		 *  ucs-4: 00000000 00000000 xxxxyyyy yyzzzzzz
+		 */
+		w = 3;
+		/* get trailing 4 bits */
+		ucs4 = *src & 0x0f;
+	} else if ((*src & 0xf8) == 0xf0) {
+		/*
+		 * leading 5 bits are "11110"
+		 *  utf-8: 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+		 *  ucs-4: 00000000 000wwwxx xxxxyyyy yyzzzzzz
+		 */
+		w = 4;
+		/* get trailing 3 bits */
+		ucs4 = *src & 0x07;
+	} else {
+		/* out of utf-16 range or having illegal bits */
+		return (0);
+	}
+	if (w == 0)
+		return (0);
+
+	if (srclen < w)
+		return (0);
+
+	/*
+	 * get left parts from utf-8
+	 */
+	for (i = 1 ; i < w ; i++) {
+		if ((*(src + i) & 0xc0) != 0x80) {
+			/* invalid: leading 2 bits are not "10" */
+			return (0);
+		}
+		/* concatenate trailing 6 bits into ucs4 */
+		ucs4 <<= 6;
+		ucs4 |= *(src + i) & 0x3f;
+	}
+
+	*utf8width = w;
+	return (ucs4);
+}
+
+static u_char *
+ucs4_to_utf8(uint32_t ucs4, char *dst, size_t *utf8width, size_t dstlen)
+{
+	u_char lead, *p;
+	size_t i, w;
+
+	/*
+	 * determine utf-8 width and leading bits
+	 */
+	if (ucs4 < 0x80) {
+		w = 1;
+		lead = 0;	/* "0" */
+	} else if (ucs4 < 0x800) {
+		w = 2;
+		lead = 0xc0;	/* "11" */
+	} else if (ucs4 < 0x10000) {
+		w = 3;
+		lead = 0xe0;	/* "111" */
+	} else if (ucs4 < 0x200000) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201201160515.q0G5FDPx017016>