From owner-freebsd-bugs@FreeBSD.ORG Mon Mar 7 21:20:03 2005 Return-Path: Delivered-To: freebsd-bugs@hub.freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id B601616A4CE for ; Mon, 7 Mar 2005 21:20:03 +0000 (GMT) Received: from freefall.freebsd.org (freefall.freebsd.org [216.136.204.21]) by mx1.FreeBSD.org (Postfix) with ESMTP id 348EB43D60 for ; Mon, 7 Mar 2005 21:20:03 +0000 (GMT) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (gnats@localhost [127.0.0.1]) by freefall.freebsd.org (8.13.3/8.13.3) with ESMTP id j27LK3Uv021261 for ; Mon, 7 Mar 2005 21:20:03 GMT (envelope-from gnats@freefall.freebsd.org) Received: (from gnats@localhost) by freefall.freebsd.org (8.13.3/8.13.1/Submit) id j27LK3EB021260; Mon, 7 Mar 2005 21:20:03 GMT (envelope-from gnats) Resent-Date: Mon, 7 Mar 2005 21:20:03 GMT Resent-Message-Id: <200503072120.j27LK3EB021260@freefall.freebsd.org> Resent-From: FreeBSD-gnats-submit@FreeBSD.org (GNATS Filer) Resent-To: freebsd-bugs@FreeBSD.org Resent-Reply-To: FreeBSD-gnats-submit@FreeBSD.org, Dmitrij Tejblum Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id D458516A4CE for ; Mon, 7 Mar 2005 21:11:21 +0000 (GMT) Received: from developer.yandex.ru (developer.yandex.ru [213.180.193.15]) by mx1.FreeBSD.org (Postfix) with ESMTP id B952943D68 for ; Mon, 7 Mar 2005 21:11:20 +0000 (GMT) (envelope-from tejblum@developer.yandex.ru) Received: from developer.yandex.ru (localhost [127.0.0.1]) by developer.yandex.ru (8.13.3/8.13.1) with ESMTP id j27LBIjH015914 for ; Tue, 8 Mar 2005 00:11:18 +0300 (MSK) (envelope-from tejblum@developer.yandex.ru) Received: (from tejblum@localhost) by developer.yandex.ru (8.13.3/8.13.1/Submit) id j27LBIxC015913; Tue, 8 Mar 2005 00:11:18 +0300 (MSK) (envelope-from tejblum) Message-Id: <200503072111.j27LBIxC015913@developer.yandex.ru> Date: Tue, 8 Mar 2005 00:11:18 +0300 (MSK) From: Dmitrij Tejblum To: FreeBSD-gnats-submit@FreeBSD.org X-Send-Pr-Version: 3.113 Subject: bin/78562: Add numerical sorting option to join(1) X-BeenThere: freebsd-bugs@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list Reply-To: Dmitrij Tejblum List-Id: Bug reports List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 07 Mar 2005 21:20:03 -0000 >Number: 78562 >Category: bin >Synopsis: Add numerical sorting option to join(1) >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: change-request >Submitter-Id: current-users >Arrival-Date: Mon Mar 07 21:20:02 GMT 2005 >Closed-Date: >Last-Modified: >Originator: Dmitrij Tejblum >Release: FreeBSD 5.4-PRERELEASE i386 >Organization: >Environment: >Description: join(1) require input files to be lexicographically sorted. Thus if someone has a numerically sorted files he must resort them in the lexicographic order. It is inconvenient. Linux (GNU) join(1) has an -n option to handle numerically sorted files. >How-To-Repeat: >Fix: --- join.c Mon Mar 7 23:14:49 2005 +++ join.c Mon Mar 7 23:52:19 2005 @@ -105,7 +105,8 @@ static wchar_t default_tabchar[] = L" \t"; wchar_t *tabchar = default_tabchar;/* delimiter characters (-t) */ -int cmp(LINE *, u_long, LINE *, u_long); +int cmp(LINE *, u_long, LINE *, u_long, int); +int cmpnum(long long, long long); void fieldarg(char *); void joinlines(INPUT *, INPUT *); int mbscoll(const char *, const char *); @@ -114,7 +115,7 @@ void outfield(LINE *, u_long, int); void outoneline(INPUT *, LINE *); void outtwoline(INPUT *, LINE *, INPUT *, LINE *); -void slurp(INPUT *); +void slurp(INPUT *, int); wchar_t *towcs(const char *); void usage(void); @@ -122,7 +123,7 @@ main(int argc, char *argv[]) { INPUT *F1, *F2; - int aflag, ch, cval, vflag; + int aflag, ch, cval, nflag, vflag; char *end; setlocale(LC_ALL, ""); @@ -130,9 +131,9 @@ F1 = &input1; F2 = &input2; - aflag = vflag = 0; + aflag = nflag = vflag = 0; obsolete(argv); - while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) { + while ((ch = getopt(argc, argv, "\01na:e:j:1:2:o:t:v:")) != -1) { switch (ch) { case '\01': /* See comment in obsolete(). */ aflag = 1; @@ -180,6 +181,9 @@ --F1->joinf; --F2->joinf; break; + case 'n': + nflag = 1; + break; case 'o': fieldarg(optarg); break; @@ -234,26 +238,26 @@ if (F1->fp == stdin && F2->fp == stdin) errx(1, "only one input file may be stdin"); - slurp(F1); - slurp(F2); + slurp(F1, nflag); + slurp(F2, nflag); while (F1->setcnt && F2->setcnt) { - cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf); + cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf, nflag); if (cval == 0) { /* Oh joy, oh rapture, oh beauty divine! */ if (joinout) joinlines(F1, F2); - slurp(F1); - slurp(F2); + slurp(F1, nflag); + slurp(F2, nflag); } else if (cval < 0) { /* File 1 takes the lead... */ if (F1->unpair) joinlines(F1, NULL); - slurp(F1); + slurp(F1, nflag); } else { /* File 2 takes the lead... */ if (F2->unpair) joinlines(F2, NULL); - slurp(F2); + slurp(F2, nflag); } } @@ -264,18 +268,18 @@ if (F1->unpair) while (F1->setcnt) { joinlines(F1, NULL); - slurp(F1); + slurp(F1, nflag); } if (F2->unpair) while (F2->setcnt) { joinlines(F2, NULL); - slurp(F2); + slurp(F2, nflag); } exit(0); } void -slurp(INPUT *F) +slurp(INPUT *F, int nflag) { LINE *lp, *lastlp, tmp; size_t len; @@ -355,7 +359,7 @@ } /* See if the join field value has changed. */ - if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf)) { + if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf, nflag)) { F->pushbool = 1; F->pushback = F->setcnt; break; @@ -393,13 +397,25 @@ } int -cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2) +cmpnum(long long a, long long b) +{ + if (a < b) + return (-1); + else if (a == b) + return 0; + else + return 1; +} + +int +cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2, int nflag) { if (lp1->fieldcnt <= fieldno1) return (lp2->fieldcnt <= fieldno2 ? 0 : 1); if (lp2->fieldcnt <= fieldno2) return (-1); - return (mbscoll(lp1->fields[fieldno1], lp2->fields[fieldno2])); + return (nflag ? cmpnum(atoll(lp1->fields[fieldno1]), atoll(lp2->fields[fieldno2])): + mbscoll(lp1->fields[fieldno1], lp2->fields[fieldno2])); } int @@ -664,6 +680,6 @@ (void)fprintf(stderr, "%s %s\n%s\n", "usage: join [-a fileno | -v fileno ] [-e string] [-1 field]", "[-2 field]", - " [-o list] [-t char] file1 file2"); + " [-o list] [-n] [-t char] file1 file2"); exit(1); } --- join.1 Mon Mar 7 23:39:17 2005 +++ join.1 Tue Mar 8 00:00:27 2005 @@ -50,6 +50,7 @@ .Op Fl o Ar list .Bk -words .Ek +.Op Fl n .Op Fl t Ar char .Op Fl \&1 Ar field .Op Fl \&2 Ar field @@ -93,6 +94,8 @@ .It Fl e Ar string Replace empty output fields with .Ar string . +.It Fl n +Assume numerically sorted input files. .It Fl o Ar list The .Fl o @@ -158,6 +161,13 @@ without the .Fl b option. +When the option +.Fl n +is used, the files to be joined should be ordered as with +.Xr sort 1 +with +.Fl n +option. .Pp If one of the arguments .Ar file1 @@ -211,6 +221,11 @@ .Nm command conforms to .St -p1003.1-2001 . +The +.Fl n +option is a non-standard +.Fx +extension. .Sh SEE ALSO .Xr awk 1 , .Xr comm 1 , >Release-Note: >Audit-Trail: >Unformatted: