Date: Thu, 05 Jul 2018 01:37:15 +0900 (JST) From: Hiroki Sato <hrs@FreeBSD.org> To: daichigoto@icloud.com, daichi@freebsd.org Cc: lists@eitanadler.com, gnn@FreeBSD.org, cem@freebsd.org, src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: Re: svn commit: r335836 - head/usr.bin/top Message-ID: <20180705.013715.810854236306970666.hrs@allbsd.org> In-Reply-To: <20180703.020956.859981414196673670.hrs@allbsd.org> References: <20180702.155529.1102410939281120947.hrs@allbsd.org> <459BD898-8072-426E-A968-96C1382AC616@icloud.com> <20180703.020956.859981414196673670.hrs@allbsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
----Security_Multipart0(Thu_Jul__5_01_37_15_2018_534)-- Content-Type: Multipart/Mixed; boundary="--Next_Part(Thu_Jul__5_01_37_15_2018_361)--" Content-Transfer-Encoding: 7bit ----Next_Part(Thu_Jul__5_01_37_15_2018_361)-- Content-Type: Text/Plain; charset=iso-2022-jp Content-Transfer-Encoding: 7bit Hiroki Sato <hrs@FreeBSD.org> wrote in <20180703.020956.859981414196673670.hrs@allbsd.org>: hr> $B8eF#BgCO(B <daichigoto@icloud.com> wrote hr> in <459BD898-8072-426E-A968-96C1382AC616@icloud.com>: hr> hr> da> hr> da> hr> da> > 2018/07/02 15:55$B!"(BHiroki Sato <hrs@FreeBSD.org>$B$N%a!<%k(B: hr> da> > hr> da> > Eitan Adler <lists@eitanadler.com> wrote hr> da> > in <CAF6rxg=Zjkf6EbSgt1fBQBUDHGKWwLf=n9ZJweJH+Di800kJ3w@mail.gmail.com>: hr> da> > hr> da> > li> On 1 July 2018 at 10:08, Conrad Meyer <cem@freebsd.org> wrote: hr> da> > li> > Hi Daichi, hr> da> > li> > hr> da> > li> > hr> da> > li> > hr> da> > li> > I don't think code to decode UTF-8 belongs in top(1). I don't know hr> da> > li> > what the goal of this routine is, but I doubt this is the right way to hr> da> > li> > accomplish it. hr> da> > li> hr> da> > li> For the record, I agree. This is why I didn't click "accept" on the hr> da> > li> revision. I don't fully oppose leaving it in top(1) for now as we work hr> da> > li> out the API, but long term its the wrong place. hr> da> > li> hr> da> > li> https://reviews.freebsd.org/D16058 is the review. hr> da> > hr> da> > I strongly object this kind of encoding-specific routine. Please hr> da> > back out it. The problem is that top(1) does not support multibyte hr> da> > encoding in functions for printing, and using C99 wide/multibyte hr> da> > character manipulation API such as iswprint(3) is the way to solve hr> da> > it. Doing getenv("LANG") and assuming an encoding based on it is a hr> da> > very bad practice to internationalize software. hr> da> > hr> da> > -- Hiroki hr> da> hr> da> I respect what you mean. hr> da> hr> da> Once I back out, I will begin implementing it in a different way. hr> da> Please advise which function should be used for implementation hr> da> (iswprint (3) and what other functions should be used?) hr> hr> Roughly speaking, POSIX/XPG/C99 I18N model requires the following hr> steps: (snip) Are you going to back out r335836, or disagree about it? If there is no objection in the next 24 hours, I will commit the attached patch. This should be a minimal change to support multibyte characters in ARGV array depending on LC_CTYPE, not limited to UTF-8. -- Hiroki ----Next_Part(Thu_Jul__5_01_37_15_2018_361)-- Content-Type: Text/X-Patch; charset=us-ascii Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="top_mbsupport.20180705-1.diff" Index: usr.bin/top/display.c =================================================================== --- usr.bin/top/display.c (revision 335957) +++ usr.bin/top/display.c (working copy) @@ -1248,55 +1248,6 @@ } } -/* - * printable(str) - make the string pointed to by "str" into one that is - * printable (i.e.: all ascii), by converting all non-printable - * characters into '?'. Replacements are done in place and a pointer - * to the original buffer is returned. - */ - -char * -printable(char str[]) -{ - char *ptr; - char ch; - - ptr = str; - if (utf8flag) { - while ((ch = *ptr) != '\0') { - if (0x00 == (0x80 & ch)) { - if (!isprint(ch)) { - *ptr = '?'; - } - ++ptr; - } else if (0xC0 == (0xE0 & ch)) { - ++ptr; - if ('\0' != *ptr) ++ptr; - } else if (0xE0 == (0xF0 & ch)) { - ++ptr; - if ('\0' != *ptr) ++ptr; - if ('\0' != *ptr) ++ptr; - } else if (0xF0 == (0xF8 & ch)) { - ++ptr; - if ('\0' != *ptr) ++ptr; - if ('\0' != *ptr) ++ptr; - if ('\0' != *ptr) ++ptr; - } else { - *ptr = '?'; - ++ptr; - } - } - } else { - while ((ch = *ptr) != '\0') { - if (!isprint(ch)) { - *ptr = '?'; - } - ptr++; - } - } - return(str); -} - void i_uptime(struct timeval *bt, time_t *tod) { Index: usr.bin/top/display.h =================================================================== --- usr.bin/top/display.h (revision 335957) +++ usr.bin/top/display.h (working copy) @@ -11,7 +11,6 @@ void clear_message(void); int display_resize(void); void i_header(const char *text); -char *printable(char *string); void display_header(int t); int display_init(struct statics *statics); void i_arc(int *stats); Index: usr.bin/top/machine.c =================================================================== --- usr.bin/top/machine.c (revision 335957) +++ usr.bin/top/machine.c (working copy) @@ -986,13 +986,8 @@ if (*src == '\0') continue; len = (argbuflen - (dst - argbuf) - 1) / 4; - if (utf8flag) { - utf8strvisx(dst, src, MIN(strlen(src), len)); - } else { - strvisx(dst, src, - MIN(strlen(src), len), - VIS_NL | VIS_CSTYLE); - } + strvisx(dst, src, MIN(strlen(src), len), + VIS_NL | VIS_CSTYLE | VIS_OCTAL); while (*dst != '\0') dst++; if ((argbuflen - (dst - argbuf) - 1) / 4 > 0) @@ -1089,7 +1084,7 @@ sbuf_printf(procbuf, "%6s ", format_time(cputime)); sbuf_printf(procbuf, "%6.2f%% ", ps.wcpu ? 100.0 * weighted_cpu(PCTCPU(pp), pp) : 100.0 * PCTCPU(pp)); } - sbuf_printf(procbuf, "%s", printable(cmdbuf)); + sbuf_printf(procbuf, "%s", cmdbuf); free(cmdbuf); return (sbuf_data(procbuf)); } Index: usr.bin/top/top.c =================================================================== --- usr.bin/top/top.c (revision 335957) +++ usr.bin/top/top.c (working copy) @@ -24,6 +24,7 @@ #include <errno.h> #include <getopt.h> #include <jail.h> +#include <locale.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -69,7 +70,6 @@ struct process_select ps; const char * myname = "top"; pid_t mypid; -bool utf8flag = false; /* pointers to display routines */ static void (*d_loadave)(int mpid, double *avenrun) = i_loadave; @@ -276,6 +276,11 @@ } } + if (setlocale(LC_ALL, "") == NULL) { + fprintf(stderr, "%s: invalid locale.\n", myname); + exit(1); + } + mypid = getpid(); /* get our name */ @@ -607,14 +612,6 @@ fputc('\n', stderr); } - /* check if you are using UTF-8 */ - char *env_lang; - if (NULL != (env_lang = getenv("LANG")) && - 0 != strcmp(env_lang, "") && - NULL != strstr(env_lang, "UTF-8")) { - utf8flag = true; - } - restart: /* Index: usr.bin/top/top.h =================================================================== --- usr.bin/top/top.h (revision 335957) +++ usr.bin/top/top.h (working copy) @@ -8,7 +8,6 @@ #define TOP_H #include <unistd.h> -#include <stdbool.h> /* Number of lines of header information on the standard screen */ extern int Header_lines; @@ -38,7 +37,6 @@ extern int pcpu_stats; extern int overstrike; extern pid_t mypid; -extern bool utf8flag; extern const char * myname; Index: usr.bin/top/utils.c =================================================================== --- usr.bin/top/utils.c (revision 335957) +++ usr.bin/top/utils.c (working copy) @@ -2,7 +2,6 @@ * This program may be freely redistributed, * but this entire comment MUST remain intact. * - * Copyright (c) 2018, Daichi Goto * Copyright (c) 2018, Eitan Adler * Copyright (c) 1984, 1989, William LeFebvre, Rice University * Copyright (c) 1989, 1990, 1992, William LeFebvre, Northwestern University @@ -329,61 +328,3 @@ kvm_close(kd); return ret; } - -/* - * utf8strvisx(dst,src,src_len) - * strvisx(dst,src,src_len,VIS_NL|VIS_CSTYLE) coresponding to UTF-8. - */ -static const char *vis_encodes[] = { - "\\0", "\\^A", "\\^B", "\\^C", "\\^D", "\\^E", "\\^F", "\\a", - "\\b", "\t", "\\n", "\\v", "\\f", "\\r", "\\^N", "\\^O", "\\^P", - "\\^Q", "\\^R", "\\^S", "\\^T", "\\^U", "\\^V", "\\^W", "\\^X", - "\\^Y", "\\^Z", "\\^[", "\\^\\", "\\^]", "\\^^", "\\^_" -}; - -int -utf8strvisx(char *dst, const char *src, size_t src_len) -{ - const signed char *src_p; - char *dst_p; - int i, j, olen, len; - - src_p = src; - dst_p = dst; - i = olen = 0; - len = (int)src_len; - while (i < len) { - if (0x00 == (0x80 & *src_p)) { - if (0 <= *src_p && *src_p <= 31) { - j = strlen(vis_encodes[(int)*src_p]); - strcpy(dst_p, vis_encodes[(int)*src_p]); - dst_p += j; - olen += j; - } else if (127 == *src_p) { - strcpy(dst_p, "\\^?"); - olen += 3; - } else { - *dst_p++ = *src_p; - ++olen; - } - ++i; - ++src_p; - } else if (0xC0 == (0xE0 & *src_p)) { - *dst_p++ = *src_p++; ++i; ++olen; - if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; } - } else if (0xE0 == (0xF0 & *src_p)) { - *dst_p++ = *src_p++; ++i; ++olen; - if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; } - if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; } - } else if (0xF0 == (0xF8 & *src_p)) { - *dst_p++ = *src_p++; ++i; ++olen; - if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; } - if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; } - if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; } - } else { - *dst_p++ = '?'; ++i; ++olen; - } - } - - return olen; -} Index: usr.bin/top/utils.h =================================================================== --- usr.bin/top/utils.h (revision 335957) +++ usr.bin/top/utils.h (working copy) @@ -22,5 +22,4 @@ char *format_k(int64_t); int string_index(const char *string, const char * const *array); int find_pid(pid_t pid); -int utf8strvisx(char *, const char *, size_t); ----Next_Part(Thu_Jul__5_01_37_15_2018_361)---- ----Security_Multipart0(Thu_Jul__5_01_37_15_2018_534)-- Content-Type: application/pgp-signature Content-Transfer-Encoding: 7bit -----BEGIN PGP SIGNATURE----- iEYEABECAAYFAls897sACgkQTyzT2CeTzy0hWgCdGFlfYH9+dCrZ53Nm0hm1qqL3 XLcAnROqVtAT85GtvSdg/spBq1kqH3Kq =jnqj -----END PGP SIGNATURE----- ----Security_Multipart0(Thu_Jul__5_01_37_15_2018_534)----
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20180705.013715.810854236306970666.hrs>