Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 9 Jan 1999 17:53:00 +0100 (CET)
From:      Philippe Charnier <charnier@xp11.frmug.org>
To:        FreeBSD-gnats-submit@FreeBSD.ORG
Subject:   bin/9405: split(1) when line matches a regex
Message-ID:  <199901091653.RAA03486@xp11.frmug.org>

next in thread | raw e-mail | index | archive | help


>Number:         9405
>Category:       bin
>Synopsis:       split(1) when line matches a regex
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Sat Jan  9 09:10:00 PST 1999
>Closed-Date:
>Last-Modified:
>Originator:     Philippe Charnier
>Release:        FreeBSD 3.0-CURRENT i386
>Organization:
>Environment:

	

>Description:

New flag (-p pattern) that make split(1) capable of splitting a file when the
analyzed line matches a given pattern. I first needed this change when making
one-file-one-patch from a big diff -r:

%split -p '^Index: ' big-patch-file patch-

>How-To-Repeat:

>Fix:
	
Here is the patch to review, please look at correcting my english in the man
page.


Index: split.1
===================================================================
RCS file: /home0h/FreeBSD.cvsroot/src/usr.bin/split/split.1,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 split.1
--- split.1	1994/05/27 12:32:42	1.1.1.1
+++ split.1	1999/01/09 16:29:52
@@ -30,6 +30,7 @@
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)split.1	8.3 (Berkeley) 4/16/94
+.\"	$Id$
 .\"
 .Dd April 16, 1994
 .Dt SPLIT 1
@@ -41,6 +42,7 @@
 .Nm split
 .Op Fl b Ar byte_count[k|m]
 .Op Fl l Ar line_count
+.Op Fl p Ar pattern
 .Op Ar file Op Ar name
 .Sh DESCRIPTION
 The
@@ -70,6 +72,13 @@
 Create smaller files
 .Ar n
 lines in length.
+.It Fl p Ar pattern
+The file is split when the analyzed line contains a match to the given
+.Ar pattern .
+Interpret
+.Ar pattern
+as an extended regular expression.  See
+.Xr regex 3 .
 .El
 .Pp
 If additional arguments are specified, the first is used as the name
Index: split.c
===================================================================
RCS file: /home0h/FreeBSD.cvsroot/src/usr.bin/split/split.c,v
retrieving revision 1.4
diff -u -r1.4 split.c
--- split.c	1997/08/11 07:30:22	1.4
+++ split.c	1997/08/17 10:45:30
@@ -44,6 +44,7 @@
 #endif /* not lint */
 
 #include <sys/param.h>
+#include <sys/types.h>
 
 #include <ctype.h>
 #include <err.h>
@@ -52,6 +53,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <regex.h>
 
 #define DEFLINE	1000			/* Default num lines per file. */
 
@@ -61,6 +63,8 @@
 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
 char	 bfr[MAXBSIZE];			/* I/O buffer. */
 char	 fname[MAXPATHLEN];		/* File name prefix. */
+regex_t	 rgx;
+int		 pflag;
 
 void newfile __P((void));
 void split1 __P((void));
@@ -75,7 +79,8 @@
 	int ch;
 	char *ep, *p;
 
-	while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1)
+	pflag = 0;
+	while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1)
 		switch (ch) {
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
@@ -108,6 +113,11 @@
 			else if (*ep == 'm')
 				bytecnt *= 1048576;
 			break;
+		case 'p' :      /* pattern matching. */
+			pflag = 1;
+			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)    
+				errx(1, "%s: illegal regexp", optarg);
+            break;
 		case 'l':		/* Line count. */
 			if (numlines != 0)
 				usage();
@@ -144,6 +154,8 @@
 		exit (0);
 	}
 	split2();
+	if (pflag)
+		regfree(&rgx);
 	exit(0);
 }
 
@@ -220,18 +232,25 @@
 				newfile();
 				file_open = 1;
 			}
-			for (Cs = Ce = bfr; len--; Ce++)
-				if (*Ce == '\n' && ++lcnt == numlines) {
-					bcnt = Ce - Cs + 1;
-					if (write(ofd, Cs, bcnt) != bcnt)
-						err(1, "write");
-					lcnt = 0;
-					Cs = Ce + 1;
-					if (len)
-						newfile();
-					else
-						file_open = 0;
+			for (Cs = Ce = bfr; len--; Ce++) {
+				if (*Ce != '\n') continue;
+				if (pflag) {
+				  if (0 != regexec(&rgx, Ce + 1, 0, NULL, 0))
+					continue;
+				} else {
+				  if (++lcnt != numlines)
+					continue;
 				}
+				bcnt = Ce - Cs + 1;
+				if (write(ofd, Cs, bcnt) != bcnt)
+					err(1, "write");
+				lcnt = 0;
+				Cs = Ce + 1;
+				if (len)
+					newfile();
+				else
+					file_open = 0;
+			}
 			if (Cs < Ce) {
 				bcnt = Ce - Cs;
 				if (write(ofd, Cs, bcnt) != bcnt)
@@ -284,6 +303,6 @@
 usage()
 {
 	(void)fprintf(stderr,
-"usage: split [-b byte_count] [-l line_count] [file [prefix]]\n");
+"usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n");
 	exit(1);
 }
>Release-Note:
>Audit-Trail:
>Unformatted:

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199901091653.RAA03486>