Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 26 Nov 2018 08:16:33 +0000 (UTC)
From:      Baptiste Daroussin <bapt@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r340933 - head/lib/libedit
Message-ID:  <201811260816.wAQ8GXpk027081@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bapt
Date: Mon Nov 26 08:16:33 2018
New Revision: 340933
URL: https://svnweb.freebsd.org/changeset/base/340933

Log:
  libedit: improve multibyte support
  
  Until this commit libedit only supported UTF-8 for multibyte charset
  Improve it to support other multibyte charsets
  
  Tested with eucJP and SJIS charsets.
  Note that this change as been review and committed in upstream libedit
  as well via christos@NetBSD
  
  Submitted by:	naito.yuichiro _at_ gmail.com
  Reviewed by:	bapt, pfg, yuripv, 0mp
  MFC after:	1 month
  Differential Revision:	https://reviews.freebsd.org/D17903

Modified:
  head/lib/libedit/chartype.c
  head/lib/libedit/chartype.h
  head/lib/libedit/el.c
  head/lib/libedit/el.h
  head/lib/libedit/read.c

Modified: head/lib/libedit/chartype.c
==============================================================================
--- head/lib/libedit/chartype.c	Mon Nov 26 07:42:52 2018	(r340932)
+++ head/lib/libedit/chartype.c	Mon Nov 26 08:16:33 2018	(r340933)
@@ -37,6 +37,7 @@ __RCSID("$NetBSD: chartype.c,v 1.23 2016/02/28 23:02:2
 __FBSDID("$FreeBSD$");
 
 #include <ctype.h>
+#include <limits.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -182,17 +183,13 @@ ct_decode_argv(int argc, const char *argv[], ct_buffer
 protected size_t
 ct_enc_width(Char c)
 {
-	/* UTF-8 encoding specific values */
-	if (c < 0x80)
-		return 1;
-	else if (c < 0x0800)
-		return 2;
-	else if (c < 0x10000)
-		return 3;
-	else if (c < 0x110000)
-		return 4;
-	else
-		return 0; /* not a valid codepoint */
+	mbstate_t ps = (mbstate_t){{0}};
+	size_t len;
+	char cbuf[MB_LEN_MAX];
+	len = ct_wcrtomb(cbuf, c, &ps);
+	if (len == (size_t)-1)
+		return (0);
+	return (len);
 }
 
 protected ssize_t

Modified: head/lib/libedit/chartype.h
==============================================================================
--- head/lib/libedit/chartype.h	Mon Nov 26 07:42:52 2018	(r340932)
+++ head/lib/libedit/chartype.h	Mon Nov 26 08:16:33 2018	(r340933)
@@ -56,6 +56,7 @@
 
 #define ct_wctob             wctob
 #define ct_wctomb            wctomb
+#define ct_wcrtomb           wcrtomb
 #define ct_wctomb_reset      wctomb(0,0)
 #define ct_wcstombs          wcstombs
 #define ct_mbstowcs          mbstowcs
@@ -109,6 +110,7 @@ Width(wchar_t c)
 
 #define ct_wctob(w)          ((int)(w))
 #define ct_wctomb            error
+#define ct_wcrtomb           error
 #define ct_wctomb_reset
 #define ct_wcstombs(a, b, c)    (strncpy(a, b, c), strlen(a))
 #define ct_mbstowcs(a, b, c)    (strncpy(a, b, c), strlen(a))

Modified: head/lib/libedit/el.c
==============================================================================
--- head/lib/libedit/el.c	Mon Nov 26 07:42:52 2018	(r340932)
+++ head/lib/libedit/el.c	Mon Nov 26 08:16:33 2018	(r340933)
@@ -99,10 +99,6 @@ el_init_fd(const char *prog, FILE *fin, FILE *fout, FI
          * Initialize all the modules. Order is important!!!
          */
 	el->el_flags = 0;
-	if (setlocale(LC_CTYPE, NULL) != NULL){
-		if (strcmp(nl_langinfo(CODESET), "UTF-8") == 0)
-			el->el_flags |= CHARSET_IS_UTF8;
-	}
 
 	if (terminal_init(el) == -1) {
 		el_free(el->el_prog);
@@ -293,7 +289,7 @@ FUN(el,set)(EditLine *el, int op, ...)
 		void *ptr = va_arg(ap, void *);
 
 		rv = hist_set(el, func, ptr);
-		if (!(el->el_flags & CHARSET_IS_UTF8))
+		if (MB_CUR_MAX == 1)
 			el->el_flags &= ~NARROW_HISTORY;
 		break;
 	}

Modified: head/lib/libedit/el.h
==============================================================================
--- head/lib/libedit/el.h	Mon Nov 26 07:42:52 2018	(r340932)
+++ head/lib/libedit/el.h	Mon Nov 26 08:16:33 2018	(r340933)
@@ -56,7 +56,6 @@
 #define	NO_TTY		0x02
 #define	EDIT_DISABLED	0x04
 #define	UNBUFFERED	0x08
-#define	CHARSET_IS_UTF8 0x10
 #define	NARROW_HISTORY	0x40
 
 typedef unsigned char el_action_t;	/* Index to command array	*/

Modified: head/lib/libedit/read.c
==============================================================================
--- head/lib/libedit/read.c	Mon Nov 26 07:42:52 2018	(r340932)
+++ head/lib/libedit/read.c	Mon Nov 26 08:16:33 2018	(r340933)
@@ -363,13 +363,7 @@ read_char(EditLine *el, wchar_t *cp)
 				goto again;
 			}
 		case (size_t)-2:
-			/*
-			 * We don't support other multibyte charsets.
-			 * The second condition shouldn't happen
-			 * and is here merely for additional safety.
-			 */
-			if ((el->el_flags & CHARSET_IS_UTF8) == 0 ||
-			    cbp >= MB_LEN_MAX) {
+			if (cbp >= MB_LEN_MAX) {
 				errno = EILSEQ;
 				*cp = L'\0';
 				return -1;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811260816.wAQ8GXpk027081>