Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 7 May 2011 14:32:16 +0000 (UTC)
From:      Jilles Tjoelker <jilles@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r221602 - in head: bin/sh tools/regression/bin/sh/expansion
Message-ID:  <201105071432.p47EWGi9002296@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jilles
Date: Sat May  7 14:32:16 2011
New Revision: 221602
URL: http://svn.freebsd.org/changeset/base/221602

Log:
  sh: Add UTF-8 support to ${#var}.
  
  If the current locale uses UTF-8, ${#var} counts codepoints (more precisely,
  bytes b with (b & 0xc0) != 0x80).

Added:
  head/tools/regression/bin/sh/expansion/length7.0   (contents, props changed)
  head/tools/regression/bin/sh/expansion/length8.0   (contents, props changed)
Modified:
  head/bin/sh/expand.c

Modified: head/bin/sh/expand.c
==============================================================================
--- head/bin/sh/expand.c	Sat May  7 13:57:30 2011	(r221601)
+++ head/bin/sh/expand.c	Sat May  7 14:32:16 2011	(r221602)
@@ -665,6 +665,7 @@ evalvar(char *p, int flag)
 	int special;
 	int startloc;
 	int varlen;
+	int varlenb;
 	int easy;
 	int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
 
@@ -712,8 +713,15 @@ again: /* jump here after setting a vari
 		if (special) {
 			varvalue(var, varflags & VSQUOTE, subtype, flag);
 			if (subtype == VSLENGTH) {
-				varlen = expdest - stackblock() - startloc;
-				STADJUST(-varlen, expdest);
+				varlenb = expdest - stackblock() - startloc;
+				varlen = varlenb;
+				if (localeisutf8) {
+					val = stackblock() + startloc;
+					for (;val != expdest; val++)
+						if ((*val & 0xC0) == 0x80)
+							varlen--;
+				}
+				STADJUST(-varlenb, expdest);
 			}
 		} else {
 			char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
@@ -721,7 +729,9 @@ again: /* jump here after setting a vari
 
 			if (subtype == VSLENGTH) {
 				for (;*val; val++)
-					varlen++;
+					if (!localeisutf8 ||
+					    (*val & 0xC0) != 0x80)
+						varlen++;
 			}
 			else {
 				if (quotes)

Added: head/tools/regression/bin/sh/expansion/length7.0
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/bin/sh/expansion/length7.0	Sat May  7 14:32:16 2011	(r221602)
@@ -0,0 +1,14 @@
+# $FreeBSD$
+
+unset LC_ALL
+LC_CTYPE=en_US.UTF-8
+export LC_CTYPE
+
+# a umlaut
+s=$(printf '\303\244')
+# euro sign
+s=$s$(printf '\342\202\254')
+# some sort of 't' outside BMP
+s=$s$(printf '\360\235\225\245')
+set -- "$s"
+[ ${#s} = 3 ] && [ ${#1} = 3 ]

Added: head/tools/regression/bin/sh/expansion/length8.0
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/tools/regression/bin/sh/expansion/length8.0	Sat May  7 14:32:16 2011	(r221602)
@@ -0,0 +1,14 @@
+# $FreeBSD$
+
+unset LC_ALL
+LC_CTYPE=en_US.ISO8859-1
+export LC_CTYPE
+
+# a umlaut
+s=$(printf '\303\244')
+# euro sign
+s=$s$(printf '\342\202\254')
+# some sort of 't' outside BMP
+s=$s$(printf '\360\235\225\245')
+set -- "$s"
+[ ${#s} = 9 ] && [ ${#1} = 9 ]



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201105071432.p47EWGi9002296>