Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 10 Jul 2018 10:07:22 +0900
From:      =?utf-8?B?5b6M6Jek5aSn5Zyw?= <daichigoto@icloud.com>
To:        Hiroki Sato <hrs@FreeBSD.org>
Cc:        Daichi GOTO <daichi@freebsd.org>, Eitan Adler <lists@eitanadler.com>, gnn@FreeBSD.org, cem@freebsd.org, src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r335836 - head/usr.bin/top
Message-ID:  <FFD28241-ADD2-4926-8E2E-BBB33616C30A@icloud.com>
In-Reply-To: <20180705.013715.810854236306970666.hrs@allbsd.org>
References:  <20180702.155529.1102410939281120947.hrs@allbsd.org> <459BD898-8072-426E-A968-96C1382AC616@icloud.com> <20180703.020956.859981414196673670.hrs@allbsd.org> <20180705.013715.810854236306970666.hrs@allbsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
I summarized in the Phabricator. Check it out please.

    https://reviews.freebsd.org/D16203

> 2018/07/05 1:37=E3=80=81Hiroki Sato <hrs@FreeBSD.org>=E3=81=AE=E3=83=A1=E3=
=83=BC=E3=83=AB:
>=20
> Hiroki Sato <hrs@FreeBSD.org> wrote
>  in <20180703.020956.859981414196673670.hrs@allbsd.org>:
>=20
> hr> =E5=BE=8C=E8=97=A4=E5=A4=A7=E5=9C=B0 <daichigoto@icloud.com> wrote
> hr>   in <459BD898-8072-426E-A968-96C1382AC616@icloud.com>:
> hr>
> hr> da>
> hr> da>
> hr> da> > 2018/07/02 15:55=E3=80=81Hiroki Sato =
<hrs@FreeBSD.org>=E3=81=AE=E3=83=A1=E3=83=BC=E3=83=AB:
> hr> da> >
> hr> da> > Eitan Adler <lists@eitanadler.com> wrote
> hr> da> >  in =
<CAF6rxg=3DZjkf6EbSgt1fBQBUDHGKWwLf=3Dn9ZJweJH+Di800kJ3w@mail.gmail.com>:
> hr> da> >
> hr> da> > li> On 1 July 2018 at 10:08, Conrad Meyer <cem@freebsd.org> =
wrote:
> hr> da> > li> > Hi Daichi,
> hr> da> > li> >
> hr> da> > li> >
> hr> da> > li> >
> hr> da> > li> > I don't think code to decode UTF-8 belongs in top(1).  =
I don't know
> hr> da> > li> > what the goal of this routine is, but I doubt this is =
the right way to
> hr> da> > li> > accomplish it.
> hr> da> > li>
> hr> da> > li> For the record, I agree. This is why I didn't click =
"accept" on the
> hr> da> > li> revision. I don't fully oppose leaving it in top(1) for =
now as we work
> hr> da> > li> out the API, but long term its the wrong place.
> hr> da> > li>
> hr> da> > li> https://reviews.freebsd.org/D16058 is the review.
> hr> da> >
> hr> da> > I strongly object this kind of encoding-specific routine.  =
Please
> hr> da> > back out it.  The problem is that top(1) does not support =
multibyte
> hr> da> > encoding in functions for printing, and using C99 =
wide/multibyte
> hr> da> > character manipulation API such as iswprint(3) is the way to =
solve
> hr> da> > it.  Doing getenv("LANG") and assuming an encoding based on =
it is a
> hr> da> > very bad practice to internationalize software.
> hr> da> >
> hr> da> > -- Hiroki
> hr> da>
> hr> da> I respect what you mean.
> hr> da>
> hr> da> Once I back out, I will begin implementing it in a different =
way.
> hr> da> Please advise which function should be used for implementation
> hr> da> (iswprint (3) and what other functions should be used?)
> hr>
> hr>  Roughly speaking, POSIX/XPG/C99 I18N model requires the following
> hr>  steps:
> (snip)
>=20
> Are you going to back out r335836, or disagree about it?
>=20
> If there is no objection in the next 24 hours, I will commit the
> attached patch.  This should be a minimal change to support multibyte
> characters in ARGV array depending on LC_CTYPE, not limited to UTF-8.
>=20
> -- Hiroki
> Index: usr.bin/top/display.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- usr.bin/top/display.c	(revision 335957)
> +++ usr.bin/top/display.c	(working copy)
> @@ -1248,55 +1248,6 @@
>     }
> }
>=20
> -/*
> - *  printable(str) - make the string pointed to by "str" into one =
that is
> - *	printable (i.e.: all ascii), by converting all non-printable
> - *	characters into '?'.  Replacements are done in place and a =
pointer
> - *	to the original buffer is returned.
> - */
> -
> -char *
> -printable(char str[])
> -{
> -	char *ptr;
> -	char ch;
> -
> -	ptr =3D str;
> -	if (utf8flag) {
> -		while ((ch =3D *ptr) !=3D '\0') {
> -			if (0x00 =3D=3D (0x80 & ch)) {
> -				if (!isprint(ch)) {
> -					*ptr =3D '?';
> -				}
> -				++ptr;
> -			} else if (0xC0 =3D=3D (0xE0 & ch)) {
> -				++ptr;
> -				if ('\0' !=3D *ptr) ++ptr;
> -			} else if (0xE0 =3D=3D (0xF0 & ch)) {
> -				++ptr;
> -				if ('\0' !=3D *ptr) ++ptr;
> -				if ('\0' !=3D *ptr) ++ptr;
> -			} else if (0xF0 =3D=3D (0xF8 & ch)) {
> -				++ptr;
> -				if ('\0' !=3D *ptr) ++ptr;
> -				if ('\0' !=3D *ptr) ++ptr;
> -				if ('\0' !=3D *ptr) ++ptr;
> -			} else {
> -				*ptr =3D '?';
> -				++ptr;
> -			}
> -		}
> -	} else {
> -		while ((ch =3D *ptr) !=3D '\0') {
> -			if (!isprint(ch)) {
> -				*ptr =3D '?';
> -			}
> -			ptr++;
> -		}
> -	}
> -	return(str);
> -}
> -
> void
> i_uptime(struct timeval *bt, time_t *tod)
> {
> Index: usr.bin/top/display.h
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- usr.bin/top/display.h	(revision 335957)
> +++ usr.bin/top/display.h	(working copy)
> @@ -11,7 +11,6 @@
> void	 clear_message(void);
> int		 display_resize(void);
> void	 i_header(const char *text);
> -char	*printable(char *string);
> void	 display_header(int t);
> int		 display_init(struct statics *statics);
> void	 i_arc(int *stats);
> Index: usr.bin/top/machine.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- usr.bin/top/machine.c	(revision 335957)
> +++ usr.bin/top/machine.c	(working copy)
> @@ -986,13 +986,8 @@
> 				if (*src =3D=3D '\0')
> 					continue;
> 				len =3D (argbuflen - (dst - argbuf) - 1) =
/ 4;
> -				if (utf8flag) {
> -					utf8strvisx(dst, src, =
MIN(strlen(src), len));
> -				} else {
> -					strvisx(dst, src,
> -					    MIN(strlen(src), len),
> -					    VIS_NL | VIS_CSTYLE);
> -				}
> +				strvisx(dst, src, MIN(strlen(src), len),
> +				    VIS_NL | VIS_CSTYLE | VIS_OCTAL);
> 				while (*dst !=3D '\0')
> 					dst++;
> 				if ((argbuflen - (dst - argbuf) - 1) / 4 =
> 0)
> @@ -1089,7 +1084,7 @@
> 		sbuf_printf(procbuf, "%6s ", format_time(cputime));
> 		sbuf_printf(procbuf, "%6.2f%% ", ps.wcpu ? 100.0 * =
weighted_cpu(PCTCPU(pp), pp) : 100.0 * PCTCPU(pp));
> 	}
> -	sbuf_printf(procbuf, "%s", printable(cmdbuf));
> +	sbuf_printf(procbuf, "%s", cmdbuf);
> 	free(cmdbuf);
> 	return (sbuf_data(procbuf));
> }
> Index: usr.bin/top/top.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- usr.bin/top/top.c	(revision 335957)
> +++ usr.bin/top/top.c	(working copy)
> @@ -24,6 +24,7 @@
> #include <errno.h>
> #include <getopt.h>
> #include <jail.h>
> +#include <locale.h>
> #include <stdbool.h>
> #include <stdio.h>
> #include <stdlib.h>
> @@ -69,7 +70,6 @@
> struct process_select ps;
> const char * myname =3D "top";
> pid_t mypid;
> -bool utf8flag =3D false;
>=20
> /* pointers to display routines */
> static void (*d_loadave)(int mpid, double *avenrun) =3D i_loadave;
> @@ -276,6 +276,11 @@
> 	}
>     }
>=20
> +    if (setlocale(LC_ALL, "") =3D=3D NULL) {
> +	fprintf(stderr, "%s: invalid locale.\n", myname);
> +	exit(1);
> +    }
> +
>     mypid =3D getpid();
>=20
>     /* get our name */
> @@ -607,14 +612,6 @@
> 	fputc('\n', stderr);
>     }
>=20
> -	/* check if you are using UTF-8 */
> -	char *env_lang;
> -	if (NULL !=3D (env_lang =3D getenv("LANG")) &&
> -		0 !=3D strcmp(env_lang, "") &&
> -		NULL !=3D strstr(env_lang, "UTF-8")) {
> -		utf8flag =3D true;
> -	}
> -
> restart:
>=20
>     /*
> Index: usr.bin/top/top.h
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- usr.bin/top/top.h	(revision 335957)
> +++ usr.bin/top/top.h	(working copy)
> @@ -8,7 +8,6 @@
> #define TOP_H
>=20
> #include <unistd.h>
> -#include <stdbool.h>
>=20
> /* Number of lines of header information on the standard screen */
> extern int Header_lines;
> @@ -38,7 +37,6 @@
> extern int pcpu_stats;
> extern int overstrike;
> extern pid_t mypid;
> -extern bool utf8flag;
>=20
> extern const char * myname;
>=20
> Index: usr.bin/top/utils.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- usr.bin/top/utils.c	(revision 335957)
> +++ usr.bin/top/utils.c	(working copy)
> @@ -2,7 +2,6 @@
>  *  This program may be freely redistributed,
>  *  but this entire comment MUST remain intact.
>  *
> - *  Copyright (c) 2018, Daichi Goto
>  *  Copyright (c) 2018, Eitan Adler
>  *  Copyright (c) 1984, 1989, William LeFebvre, Rice University
>  *  Copyright (c) 1989, 1990, 1992, William LeFebvre, Northwestern =
University
> @@ -329,61 +328,3 @@
> 	kvm_close(kd);
> 	return ret;
> }
> -
> -/*
> - * utf8strvisx(dst,src,src_len)
> - *	strvisx(dst,src,src_len,VIS_NL|VIS_CSTYLE) coresponding to =
UTF-8.
> - */
> -static const char *vis_encodes[] =3D {
> -	"\\0", "\\^A", "\\^B", "\\^C", "\\^D", "\\^E", "\\^F", "\\a",
> -	"\\b", "\t", "\\n", "\\v", "\\f", "\\r", "\\^N", "\\^O", "\\^P",
> -	"\\^Q", "\\^R", "\\^S", "\\^T", "\\^U", "\\^V", "\\^W", "\\^X",
> -	"\\^Y", "\\^Z", "\\^[", "\\^\\", "\\^]", "\\^^", "\\^_"
> -};
> -
> -int
> -utf8strvisx(char *dst, const char *src, size_t src_len)
> -{
> -	const signed char *src_p;
> -	char *dst_p;
> -	int i, j, olen, len;
> -
> -	src_p =3D src;
> -	dst_p =3D dst;
> -	i =3D olen =3D 0;
> -	len =3D (int)src_len;
> -	while (i < len) {
> -		if (0x00 =3D=3D (0x80 & *src_p)) {
> -			if (0 <=3D *src_p && *src_p <=3D 31) {
> -				j =3D strlen(vis_encodes[(int)*src_p]);
> -				strcpy(dst_p, vis_encodes[(int)*src_p]);
> -				dst_p +=3D j;
> -				olen +=3D j;
> -			} else if (127 =3D=3D *src_p) {
> -				strcpy(dst_p, "\\^?");
> -				olen +=3D 3;
> -			} else {
> -				*dst_p++ =3D *src_p;
> -				++olen;
> -			}
> -			++i;
> -			++src_p;
> -		} else if (0xC0 =3D=3D (0xE0 & *src_p)) {
> -			*dst_p++ =3D *src_p++; ++i; ++olen;
> -			if (i < len) { *dst_p++ =3D *src_p++; ++i; =
++olen; }
> -		} else if (0xE0 =3D=3D (0xF0 & *src_p)) {
> -			*dst_p++ =3D *src_p++; ++i; ++olen;
> -			if (i < len) { *dst_p++ =3D *src_p++; ++i; =
++olen; }
> -			if (i < len) { *dst_p++ =3D *src_p++; ++i; =
++olen; }
> -		} else if (0xF0 =3D=3D (0xF8 & *src_p)) {
> -			*dst_p++ =3D *src_p++; ++i; ++olen;
> -			if (i < len) { *dst_p++ =3D *src_p++; ++i; =
++olen; }
> -			if (i < len) { *dst_p++ =3D *src_p++; ++i; =
++olen; }
> -			if (i < len) { *dst_p++ =3D *src_p++; ++i; =
++olen; }
> -		} else {
> -			*dst_p++ =3D '?'; ++i; ++olen;
> -		}
> -	}
> -
> -	return olen;
> -}
> Index: usr.bin/top/utils.h
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- usr.bin/top/utils.h	(revision 335957)
> +++ usr.bin/top/utils.h	(working copy)
> @@ -22,5 +22,4 @@
> char *format_k(int64_t);
> int string_index(const char *string, const char * const *array);
> int find_pid(pid_t pid);
> -int utf8strvisx(char *, const char *, size_t);
>=20




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?FFD28241-ADD2-4926-8E2E-BBB33616C30A>