Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 05 Jul 2018 01:37:15 +0900 (JST)
From:      Hiroki Sato <hrs@FreeBSD.org>
To:        daichigoto@icloud.com, daichi@freebsd.org
Cc:        lists@eitanadler.com, gnn@FreeBSD.org, cem@freebsd.org, src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r335836 - head/usr.bin/top
Message-ID:  <20180705.013715.810854236306970666.hrs@allbsd.org>
In-Reply-To: <20180703.020956.859981414196673670.hrs@allbsd.org>
References:  <20180702.155529.1102410939281120947.hrs@allbsd.org> <459BD898-8072-426E-A968-96C1382AC616@icloud.com> <20180703.020956.859981414196673670.hrs@allbsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
----Security_Multipart0(Thu_Jul__5_01_37_15_2018_534)--
Content-Type: Multipart/Mixed;
 boundary="--Next_Part(Thu_Jul__5_01_37_15_2018_361)--"
Content-Transfer-Encoding: 7bit

----Next_Part(Thu_Jul__5_01_37_15_2018_361)--
Content-Type: Text/Plain; charset=iso-2022-jp
Content-Transfer-Encoding: 7bit

Hiroki Sato <hrs@FreeBSD.org> wrote
  in <20180703.020956.859981414196673670.hrs@allbsd.org>:

hr> $B8eF#BgCO(B <daichigoto@icloud.com> wrote
hr>   in <459BD898-8072-426E-A968-96C1382AC616@icloud.com>:
hr>
hr> da>
hr> da>
hr> da> > 2018/07/02 15:55$B!"(BHiroki Sato <hrs@FreeBSD.org>$B$N%a!<%k(B:
hr> da> >
hr> da> > Eitan Adler <lists@eitanadler.com> wrote
hr> da> >  in <CAF6rxg=Zjkf6EbSgt1fBQBUDHGKWwLf=n9ZJweJH+Di800kJ3w@mail.gmail.com>:
hr> da> >
hr> da> > li> On 1 July 2018 at 10:08, Conrad Meyer <cem@freebsd.org> wrote:
hr> da> > li> > Hi Daichi,
hr> da> > li> >
hr> da> > li> >
hr> da> > li> >
hr> da> > li> > I don't think code to decode UTF-8 belongs in top(1).  I don't know
hr> da> > li> > what the goal of this routine is, but I doubt this is the right way to
hr> da> > li> > accomplish it.
hr> da> > li>
hr> da> > li> For the record, I agree. This is why I didn't click "accept" on the
hr> da> > li> revision. I don't fully oppose leaving it in top(1) for now as we work
hr> da> > li> out the API, but long term its the wrong place.
hr> da> > li>
hr> da> > li> https://reviews.freebsd.org/D16058 is the review.
hr> da> >
hr> da> > I strongly object this kind of encoding-specific routine.  Please
hr> da> > back out it.  The problem is that top(1) does not support multibyte
hr> da> > encoding in functions for printing, and using C99 wide/multibyte
hr> da> > character manipulation API such as iswprint(3) is the way to solve
hr> da> > it.  Doing getenv("LANG") and assuming an encoding based on it is a
hr> da> > very bad practice to internationalize software.
hr> da> >
hr> da> > -- Hiroki
hr> da>
hr> da> I respect what you mean.
hr> da>
hr> da> Once I back out, I will begin implementing it in a different way.
hr> da> Please advise which function should be used for implementation
hr> da> (iswprint (3) and what other functions should be used?)
hr>
hr>  Roughly speaking, POSIX/XPG/C99 I18N model requires the following
hr>  steps:
(snip)

 Are you going to back out r335836, or disagree about it?

 If there is no objection in the next 24 hours, I will commit the
 attached patch.  This should be a minimal change to support multibyte
 characters in ARGV array depending on LC_CTYPE, not limited to UTF-8.

-- Hiroki

----Next_Part(Thu_Jul__5_01_37_15_2018_361)--
Content-Type: Text/X-Patch; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="top_mbsupport.20180705-1.diff"

Index: usr.bin/top/display.c
===================================================================
--- usr.bin/top/display.c	(revision 335957)
+++ usr.bin/top/display.c	(working copy)
@@ -1248,55 +1248,6 @@
     }
 }

-/*
- *  printable(str) - make the string pointed to by "str" into one that is
- *	printable (i.e.: all ascii), by converting all non-printable
- *	characters into '?'.  Replacements are done in place and a pointer
- *	to the original buffer is returned.
- */
-
-char *
-printable(char str[])
-{
-	char *ptr;
-	char ch;
-
-	ptr = str;
-	if (utf8flag) {
-		while ((ch = *ptr) != '\0') {
-			if (0x00 == (0x80 & ch)) {
-				if (!isprint(ch)) {
-					*ptr = '?';
-				}
-				++ptr;
-			} else if (0xC0 == (0xE0 & ch)) {
-				++ptr;
-				if ('\0' != *ptr) ++ptr;
-			} else if (0xE0 == (0xF0 & ch)) {
-				++ptr;
-				if ('\0' != *ptr) ++ptr;
-				if ('\0' != *ptr) ++ptr;
-			} else if (0xF0 == (0xF8 & ch)) {
-				++ptr;
-				if ('\0' != *ptr) ++ptr;
-				if ('\0' != *ptr) ++ptr;
-				if ('\0' != *ptr) ++ptr;
-			} else {
-				*ptr = '?';
-				++ptr;
-			}
-		}
-	} else {
-		while ((ch = *ptr) != '\0') {
-			if (!isprint(ch)) {
-				*ptr = '?';
-			}
-			ptr++;
-		}
-	}
-	return(str);
-}
-
 void
 i_uptime(struct timeval *bt, time_t *tod)
 {
Index: usr.bin/top/display.h
===================================================================
--- usr.bin/top/display.h	(revision 335957)
+++ usr.bin/top/display.h	(working copy)
@@ -11,7 +11,6 @@
 void	 clear_message(void);
 int		 display_resize(void);
 void	 i_header(const char *text);
-char	*printable(char *string);
 void	 display_header(int t);
 int		 display_init(struct statics *statics);
 void	 i_arc(int *stats);
Index: usr.bin/top/machine.c
===================================================================
--- usr.bin/top/machine.c	(revision 335957)
+++ usr.bin/top/machine.c	(working copy)
@@ -986,13 +986,8 @@
 				if (*src == '\0')
 					continue;
 				len = (argbuflen - (dst - argbuf) - 1) / 4;
-				if (utf8flag) {
-					utf8strvisx(dst, src, MIN(strlen(src), len));
-				} else {
-					strvisx(dst, src,
-					    MIN(strlen(src), len),
-					    VIS_NL | VIS_CSTYLE);
-				}
+				strvisx(dst, src, MIN(strlen(src), len),
+				    VIS_NL | VIS_CSTYLE | VIS_OCTAL);
 				while (*dst != '\0')
 					dst++;
 				if ((argbuflen - (dst - argbuf) - 1) / 4 > 0)
@@ -1089,7 +1084,7 @@
 		sbuf_printf(procbuf, "%6s ", format_time(cputime));
 		sbuf_printf(procbuf, "%6.2f%% ", ps.wcpu ? 100.0 * weighted_cpu(PCTCPU(pp), pp) : 100.0 * PCTCPU(pp));
 	}
-	sbuf_printf(procbuf, "%s", printable(cmdbuf));
+	sbuf_printf(procbuf, "%s", cmdbuf);
 	free(cmdbuf);
 	return (sbuf_data(procbuf));
 }
Index: usr.bin/top/top.c
===================================================================
--- usr.bin/top/top.c	(revision 335957)
+++ usr.bin/top/top.c	(working copy)
@@ -24,6 +24,7 @@
 #include <errno.h>
 #include <getopt.h>
 #include <jail.h>
+#include <locale.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -69,7 +70,6 @@
 struct process_select ps;
 const char * myname = "top";
 pid_t mypid;
-bool utf8flag = false;

 /* pointers to display routines */
 static void (*d_loadave)(int mpid, double *avenrun) = i_loadave;
@@ -276,6 +276,11 @@
 	}
     }

+    if (setlocale(LC_ALL, "") == NULL) {
+	fprintf(stderr, "%s: invalid locale.\n", myname);
+	exit(1);
+    }
+
     mypid = getpid();

     /* get our name */
@@ -607,14 +612,6 @@
 	fputc('\n', stderr);
     }

-	/* check if you are using UTF-8 */
-	char *env_lang;
-	if (NULL != (env_lang = getenv("LANG")) &&
-		0 != strcmp(env_lang, "") &&
-		NULL != strstr(env_lang, "UTF-8")) {
-		utf8flag = true;
-	}
-
 restart:

     /*
Index: usr.bin/top/top.h
===================================================================
--- usr.bin/top/top.h	(revision 335957)
+++ usr.bin/top/top.h	(working copy)
@@ -8,7 +8,6 @@
 #define TOP_H

 #include <unistd.h>
-#include <stdbool.h>

 /* Number of lines of header information on the standard screen */
 extern int Header_lines;
@@ -38,7 +37,6 @@
 extern int pcpu_stats;
 extern int overstrike;
 extern pid_t mypid;
-extern bool utf8flag;

 extern const char * myname;

Index: usr.bin/top/utils.c
===================================================================
--- usr.bin/top/utils.c	(revision 335957)
+++ usr.bin/top/utils.c	(working copy)
@@ -2,7 +2,6 @@
  *  This program may be freely redistributed,
  *  but this entire comment MUST remain intact.
  *
- *  Copyright (c) 2018, Daichi Goto
  *  Copyright (c) 2018, Eitan Adler
  *  Copyright (c) 1984, 1989, William LeFebvre, Rice University
  *  Copyright (c) 1989, 1990, 1992, William LeFebvre, Northwestern University
@@ -329,61 +328,3 @@
 	kvm_close(kd);
 	return ret;
 }
-
-/*
- * utf8strvisx(dst,src,src_len)
- *	strvisx(dst,src,src_len,VIS_NL|VIS_CSTYLE) coresponding to UTF-8.
- */
-static const char *vis_encodes[] = {
-	"\\0", "\\^A", "\\^B", "\\^C", "\\^D", "\\^E", "\\^F", "\\a",
-	"\\b", "\t", "\\n", "\\v", "\\f", "\\r", "\\^N", "\\^O", "\\^P",
-	"\\^Q", "\\^R", "\\^S", "\\^T", "\\^U", "\\^V", "\\^W", "\\^X",
-	"\\^Y", "\\^Z", "\\^[", "\\^\\", "\\^]", "\\^^", "\\^_"
-};
-
-int
-utf8strvisx(char *dst, const char *src, size_t src_len)
-{
-	const signed char *src_p;
-	char *dst_p;
-	int i, j, olen, len;
-
-	src_p = src;
-	dst_p = dst;
-	i = olen = 0;
-	len = (int)src_len;
-	while (i < len) {
-		if (0x00 == (0x80 & *src_p)) {
-			if (0 <= *src_p && *src_p <= 31) {
-				j = strlen(vis_encodes[(int)*src_p]);
-				strcpy(dst_p, vis_encodes[(int)*src_p]);
-				dst_p += j;
-				olen += j;
-			} else if (127 == *src_p) {
-				strcpy(dst_p, "\\^?");
-				olen += 3;
-			} else {
-				*dst_p++ = *src_p;
-				++olen;
-			}
-			++i;
-			++src_p;
-		} else if (0xC0 == (0xE0 & *src_p)) {
-			*dst_p++ = *src_p++; ++i; ++olen;
-			if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
-		} else if (0xE0 == (0xF0 & *src_p)) {
-			*dst_p++ = *src_p++; ++i; ++olen;
-			if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
-			if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
-		} else if (0xF0 == (0xF8 & *src_p)) {
-			*dst_p++ = *src_p++; ++i; ++olen;
-			if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
-			if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
-			if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
-		} else {
-			*dst_p++ = '?'; ++i; ++olen;
-		}
-	}
-
-	return olen;
-}
Index: usr.bin/top/utils.h
===================================================================
--- usr.bin/top/utils.h	(revision 335957)
+++ usr.bin/top/utils.h	(working copy)
@@ -22,5 +22,4 @@
 char *format_k(int64_t);
 int string_index(const char *string, const char * const *array);
 int find_pid(pid_t pid);
-int utf8strvisx(char *, const char *, size_t);


----Next_Part(Thu_Jul__5_01_37_15_2018_361)----

----Security_Multipart0(Thu_Jul__5_01_37_15_2018_534)--
Content-Type: application/pgp-signature
Content-Transfer-Encoding: 7bit

-----BEGIN PGP SIGNATURE-----

iEYEABECAAYFAls897sACgkQTyzT2CeTzy0hWgCdGFlfYH9+dCrZ53Nm0hm1qqL3
XLcAnROqVtAT85GtvSdg/spBq1kqH3Kq
=jnqj
-----END PGP SIGNATURE-----

----Security_Multipart0(Thu_Jul__5_01_37_15_2018_534)----



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20180705.013715.810854236306970666.hrs>