Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 25 Jul 2025 16:35:11 GMT
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 7b9c912c41f4 - main - find: Add GNU find's -printf
Message-ID:  <202507251635.56PGZB73098553@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=7b9c912c41f484b0fe75b30fbac465cc984e56b2

commit 7b9c912c41f484b0fe75b30fbac465cc984e56b2
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2025-07-25 16:30:06 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2025-07-25 16:34:37 +0000

    find: Add GNU find's -printf
    
    Implements most of gnu find's -printf predicate. However, the '#', '-',
    '.' and size format modifiers are unimplemented, as are %P, %H, %F, %y,
    and %Y formats. Follows what I think it should do based on the info
    page, I've not looked at the gnu find code.
    
    Sponsored by:           Netflix
    Discussed with:         des, jilles
    Reviewed by:            pauamma@gundo.com (man)
    Differential Revision: https://reviews.freebsd.org/D38138
---
 usr.bin/find/Makefile   |   2 +-
 usr.bin/find/extern.h   |   4 +
 usr.bin/find/find.1     |  84 +++++++++++++
 usr.bin/find/find.h     |   2 +
 usr.bin/find/function.c |  27 +++++
 usr.bin/find/option.c   |   2 +-
 usr.bin/find/printf.c   | 307 ++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 426 insertions(+), 2 deletions(-)

diff --git a/usr.bin/find/Makefile b/usr.bin/find/Makefile
index 904c08620833..48b164133bb0 100644
--- a/usr.bin/find/Makefile
+++ b/usr.bin/find/Makefile
@@ -3,7 +3,7 @@
 PACKAGE=	runtime
 
 PROG=	find
-SRCS=	find.c function.c ls.c main.c misc.c operator.c option.c \
+SRCS=	find.c function.c ls.c main.c misc.c operator.c option.c printf.c \
 	getdate.y
 YFLAGS=
 CFLAGS.clang+=	-Werror=undef
diff --git a/usr.bin/find/extern.h b/usr.bin/find/extern.h
index feb2e0202056..6760ac24fb04 100644
--- a/usr.bin/find/extern.h
+++ b/usr.bin/find/extern.h
@@ -44,6 +44,8 @@ void	 printlong(char *, char *, struct stat *);
 int	 queryuser(char **);
 OPTION	*lookup_option(const char *);
 void	 finish_execplus(void);
+void	 do_printf(PLAN *plan, FTSENT *entry, FILE *fout);
+
 
 creat_f	c_Xmin;
 creat_f	c_Xtime;
@@ -68,6 +70,7 @@ creat_f	c_nogroup;
 creat_f	c_nouser;
 creat_f	c_perm;
 creat_f	c_print;
+creat_f	c_printf;
 creat_f	c_regex;
 creat_f	c_samefile;
 creat_f	c_simple;
@@ -106,6 +109,7 @@ exec_f	f_path;
 exec_f	f_perm;
 exec_f	f_print;
 exec_f	f_print0;
+exec_f	f_printf;
 exec_f	f_prune;
 exec_f	f_quit;
 exec_f	f_readable;
diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1
index 8c2d8624a82a..1217d9151168 100644
--- a/usr.bin/find/find.1
+++ b/usr.bin/find/find.1
@@ -821,6 +821,17 @@ It prints the pathname of the current file to standard output, followed by an
 ASCII
 .Dv NUL
 character (character code 0).
+.It Ic -printf Ar fmt
+This primary always evaluates to true.
+It prints information about the file, interpreting
+.Sq \
+and
+.Sq %
+escape sequences as described in the PRINTF FORMATS section.
+Unlike
+.Ic -print ,
+.Ic -printf
+does not add a newline automatically.
 .It Ic -prune
 This primary always evaluates to true.
 It causes
@@ -993,6 +1004,79 @@ All operands and primaries must be separate arguments to
 Primaries which themselves take arguments expect each argument
 to be a separate argument to
 .Nm .
+.Sh PRINTF FORMATS
+The following
+.Sq \e
+escapes are recognized:
+.Bl -tag -width Ds -offset indent -compact
+.It Cm \ea
+Write a <bell> character.
+.It Cm \eb
+Write a <backspace> character.
+.It Cm \ec
+Writes no characters, but terminates the string and flushes the output so far
+after each match.
+.It Cm \ef
+Write a <form-feed> character.
+.It Cm \en
+Write a <new-line> character.
+.It Cm \er
+Write a <carriage return> character.
+.It Cm \et
+Write a <tab> character.
+.It Cm \ev
+Write a <vertical tab> character.
+.It Cm \e\'
+Write a <single quote> character.
+.It Cm \e\e
+Write a backslash character.
+.It Cm \e Ns Ar num
+Write a byte whose
+value is the 1-, 2-, or 3-digit
+octal number
+.Ar num .
+Multibyte characters can be constructed using multiple
+.Cm \e Ns Ar num
+sequences.
+.El
+.Pp
+Each format specification is introduced by the percent character
+(``%'').
+The remainder of the format specification includes,
+in the following order:
+.Bl -tag -width Ds
+.It "Zero or more of the following flags:"
+.Bl -tag -width Ds
+.It Cm #
+A `#' character, has no effect on almost all formats.
+It is not yet implemented.
+.It Cm \&\-
+A minus sign `\-' which specifies
+.Em left adjustment
+of the output in the indicated field;
+It is not yet implemented.
+.It "Field Width:"
+An optional digit string specifying a
+.Em field width ;
+if the output string has fewer bytes than the field width it will
+be blank-padded on the left (or right, if the left-adjustment indicator
+has been given) to make up the field width (note that a leading zero
+is a flag, but an embedded zero is part of a field width);
+It is not yet implemented.
+.It Precision:
+An optional period,
+.Sq Cm \&.\& ,
+followed by an optional digit string giving a
+.Em precision
+which specifies the maximum number of bytes to be printed
+from a string; if the digit string is missing, the precision is treated
+as zero;
+It is not yet implemented.
+.It Format:
+One or two characters, described below, which indicates the information to display.
+XXX need to write this.
+.El
+.El
 .Sh ENVIRONMENT
 The
 .Ev LANG , LC_ALL , LC_COLLATE , LC_CTYPE , LC_MESSAGES
diff --git a/usr.bin/find/find.h b/usr.bin/find/find.h
index 1664eeb9a93f..2ddb70fd7bcc 100644
--- a/usr.bin/find/find.h
+++ b/usr.bin/find/find.h
@@ -97,6 +97,8 @@ typedef	struct _plandata *creat_f(struct _option *, char ***);
 #define	F_TIME2_B	0x00080000	/* one of -newer?B */
 #endif
 #define F_LINK		0x00100000	/* lname or ilname */
+/* Notes about execution */
+#define F_HAS_WARNED	0x10000000	/* Has issued a warning for maybe bad input */
 
 /* node definition */
 typedef struct _plandata {
diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c
index ef610903cc00..21dfab8fe408 100644
--- a/usr.bin/find/function.c
+++ b/usr.bin/find/function.c
@@ -1388,6 +1388,33 @@ f_print0(PLAN *plan __unused, FTSENT *entry)
 
 /* c_print0 is the same as c_print */
 
+/*
+ * -printf functions --
+ *
+ *	Always true. Causes information as specified in the
+ *	argument to be written to standard output.
+ */
+int
+f_printf(PLAN *plan, FTSENT *entry)
+{
+	do_printf(plan, entry, stdout);
+	return 1;
+}
+
+PLAN *
+c_printf(OPTION *option, char ***argvp)
+{
+	PLAN *new;
+
+	isoutput = 1;
+	ftsoptions &= ~FTS_NOSTAT;
+
+	new = palloc(option);
+	new->c_data = nextarg(option, argvp);
+
+	return (new);
+}
+
 /*
  * -prune functions --
  *
diff --git a/usr.bin/find/option.c b/usr.bin/find/option.c
index 268803343a8d..79fa581e79f5 100644
--- a/usr.bin/find/option.c
+++ b/usr.bin/find/option.c
@@ -148,7 +148,7 @@ static OPTION const options[] = {
 	{ "-perm",	c_perm,		f_perm,		0 },
 	{ "-print",	c_print,	f_print,	0 },
 	{ "-print0",	c_print,	f_print0,	0 },
-// -printf
+	{ "-printf",	c_printf,	f_printf,	0 },
 	{ "-prune",	c_simple,	f_prune,	0 },
 	{ "-quit",	c_simple,	f_quit,		0 },
 	{ "-readable",	c_simple,	f_readable,	0 },
diff --git a/usr.bin/find/printf.c b/usr.bin/find/printf.c
new file mode 100644
index 000000000000..643f04b9cef5
--- /dev/null
+++ b/usr.bin/find/printf.c
@@ -0,0 +1,307 @@
+/*-
+ * Copyright (c) 2023, Netflix, Inc
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/types.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <fts.h>
+#include <grp.h>
+#include <pwd.h>
+#include <time.h>
+
+#include "find.h"
+
+/* translate \X to proper escape, or to itself if no special meaning */
+static const char *esc = "\a\bcde\fghijklm\nopq\rs\tu\v";
+
+static inline bool
+isoct(char c)
+{
+	return (c >= '0' && c <= '7');
+}
+
+static inline bool
+isesc(char c)
+{
+	return (c >= 'a' && c <= 'v' && esc[c - 'a'] != c);
+}
+
+static const char *
+escape(const char *str, bool *flush, bool *warned)
+{
+	char c;
+	int value;
+	char *tmpstr;
+	size_t tmplen;
+	FILE *fp;
+
+	fp = open_memstream(&tmpstr, &tmplen);
+
+	/*
+	 * Copy the str string into a new struct sbuf and return that expanding
+	 * the different ANSI escape sequences.
+	 */
+	*flush = false;
+	for (c = *str++; c; c = *str++) {
+		if (c != '\\') {
+			putc(c, fp);
+			continue;
+		}
+		c = *str++;
+
+		/*
+		 * User error \ at end of string
+		 */
+		if (c == '\0') {
+			putc('\\', fp);
+			break;
+		}
+
+		/*
+		 * \c terminates output now and is supposed to flush the output
+		 * too...
+		 */
+		if (c == 'c') {
+			*flush = true;
+			break;
+		}
+
+		/*
+		 * Is it octal? If so, decode up to 3 octal characters.
+		 */
+		if (isoct(c)) {
+			value = 0;
+			for (int i = 3; i-- > 0 && isoct(c);
+			     c = *str++) {
+				value <<= 3;
+				value += c - '0';
+			}
+			str--;
+			putc((char)value, fp);
+			continue;
+		}
+
+		/*
+		 * It's an ANSI X3.159-1989 escape, use the mini-escape lookup
+		 * table to translate.
+		 */
+		if (isesc(c)) {
+			putc(esc[c - 'a'], fp);
+			continue;
+		}
+
+		/*
+		 * Otherwise, it's self inserting. gnu find specifically says
+		 * not to rely on this behavior though. gnu find will issue
+		 * a warning here, while printf(1) won't.
+		 */
+		if (!*warned) {
+			warn("Unknown character %c after \\.", c);
+			*warned = true;
+		}
+		putc(c, fp);
+	}
+	fclose(fp);
+
+	return (tmpstr);
+}
+
+static void
+fp_ctime(FILE *fp, time_t t)
+{
+	char s[26];
+
+	ctime_r(&t, s);
+	s[24] = '\0';	/* kill newline, though gnu find info silent on issue */
+	fputs(s, fp);
+}
+
+/*
+ * Assumes all times are displayed in UTC rather than local time, gnu find info
+ * page silent on the issue.
+ *
+ * Also assumes that gnu find doesn't support multiple character escape sequences,
+ * which it's info page is also silent on.
+ */
+static void
+fp_strftime(FILE *fp, time_t t, char mod)
+{
+	struct tm tm;
+	char buffer[128];
+	char fmt[3] = "% ";
+
+	/*
+	 * Gnu libc extension we don't yet support -- seconds since epoch
+	 * Used in Linux kernel build, so we kinda have to support it here
+	 */
+	if (mod == '@')	{
+		fprintf(fp, "%ju", (uintmax_t)t);
+		return;
+	}
+
+	gmtime_r(&t, &tm);
+	fmt[1] = mod;
+	printf("fmt is '%s'\n", fmt);
+	if (strftime(buffer, sizeof(buffer), fmt, &tm) == 0)
+		errx(1, "Format bad or data too long for buffer"); /* Can't really happen ??? */
+	fputs(buffer, fp);
+}
+
+void
+do_printf(PLAN *plan, FTSENT *entry, FILE *fout)
+{
+	const char *fmt, *path, *pend, *all;
+	char c;
+	FILE *fp;
+	bool flush, warned;
+	struct stat *sb;
+	char *tmp;
+	size_t tmplen;
+
+	fp = open_memstream(&tmp, &tmplen);
+	warned = (plan->flags & F_HAS_WARNED) != 0;
+	all = fmt = escape(plan->c_data, &flush, &warned);
+	if (warned)
+		plan->flags |= F_HAS_WARNED;
+	sb = entry->fts_statp;
+	for (c = *fmt++; c; c = *fmt++) {
+		if (c != '%') {
+			putc(c, fp);
+			continue;
+		}
+		c = *fmt++;
+		/* Style(9) deviation: case order same as gnu find info doc */
+		switch (c) {
+		case '%':
+			putc(c, fp);
+			break;
+		case 'p': /* Path to file */
+			fputs(entry->fts_path, fp);
+			break;
+		case 'f': /* filename w/o dirs */
+			fputs(entry->fts_name, fp);
+			break;
+		case 'h':
+			/*
+			 * path, relative to the starting point, of the file, or
+			 * '.' if that's empty for some reason.
+			 */
+			path = entry->fts_path;
+			pend = strrchr(path, '/');
+			if (pend == NULL)
+				putc('.', fp);
+			else {
+				char *t = malloc(pend - path + 1);
+				memcpy(t, path, pend - path);
+				t[pend - path] = '\0';
+				fputs(t, fp);
+				free(t);
+			}
+			break;
+		case 'P': /* file with command line arg rm'd -- HOW? fts_parent? */
+			errx(1, "%%%c is unimplemented", c);
+		case 'H': /* Command line arg -- HOW? */
+			errx(1, "%%%c is unimplemented", c);
+		case 'g': /* gid human readable */
+			fputs(group_from_gid(sb->st_gid, 0), fp);
+			break;
+		case 'G': /* gid numeric */
+			fprintf(fp, "%d", sb->st_gid);
+			break;
+		case 'u': /* uid human readable */
+			fputs(user_from_uid(sb->st_uid, 0), fp);
+			break;
+		case 'U': /* uid numeric */
+			fprintf(fp, "%d", sb->st_uid);
+			break;
+		case 'm': /* mode in octal */
+			fprintf(fp, "%o", sb->st_mode & 07777);
+			break;
+		case 'M': { /* Mode in ls-standard form */
+			char mode[12];
+			strmode(sb->st_mode, mode);
+			fputs(mode, fp);
+			break;
+		}
+		case 'k': /* kbytes used by file */
+			fprintf(fp, "%ld", sb->st_blocks / 2);
+			break;
+		case 'b': /* blocks used by file */
+			fprintf(fp, "%ld", sb->st_blocks);
+			break;
+		case 's': /* size in bytes of file */
+			fprintf(fp, "%zu", sb->st_size);
+			break;
+		case 'S': /* sparseness of file */
+			fprintf(fp, "%3.1f",
+			    (float)sb->st_blocks * 512 / (float)sb->st_size);
+			break;
+		case 'd': /* Depth in tree */
+			fprintf(fp, "%ld", entry->fts_level);
+			break;
+		case 'D': /* device number */
+			fprintf(fp, "%lu", sb->st_dev);
+			break;
+		case 'F': /* Filesystem type */
+			errx(1, "%%%c is unimplemented", c);
+		case 'l': /* object of symbolic link */
+			fprintf(fp, "%s", entry->fts_accpath);
+			break;
+		case 'i': /* inode # */
+			fprintf(fp, "%lu", sb->st_ino);
+			break;
+		case 'n': /* number of hard links */
+			fprintf(fp, "%lu", sb->st_nlink);
+			break;
+		case 'y': /* -type of file, incl 'l' */
+			errx(1, "%%%c is unimplemented", c);
+		case 'Y': /* -type of file, following 'l' types L loop ? error */
+			errx(1, "%%%c is unimplemented", c);
+		case 'a': /* access time ctime */
+			fp_ctime(fp, sb->st_atime);
+			break;
+		case 'A': /* access time with next char strftime format */
+			fp_strftime(fp, sb->st_atime, *fmt++);
+			break;
+		case 'B': /* birth time with next char strftime format */
+#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME
+			if (sb->st_birthtime != 0)
+				fp_strftime(fp, sb->st_birthtime, *fmt);
+#endif
+			fmt++;
+			break;	/* blank on systems that don't support it */
+		case 'c': /* status change time ctime */
+			fp_ctime(fp, sb->st_ctime);
+			break;
+		case 'C': /* statuc change time with next char strftime format */
+			fp_strftime(fp, sb->st_ctime, *fmt++);
+			break;
+		case 't': /* modification change time ctime */
+			fp_ctime(fp, sb->st_mtime);
+			break;
+		case 'T': /* modification time with next char strftime format */
+			fp_strftime(fp, sb->st_mtime, *fmt++);
+			break;
+		case 'Z': /* empty string for compat SELinux context string */
+			break;
+		/* Modifier parsing here, but also need to modify above somehow */
+		case '#': case '-': case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9': case '.':
+			errx(1, "Format modifier %c not yet supported: '%s'", c, all);
+		/* Any FeeeBSD-specific modifications here -- none yet */
+		default:
+			errx(1, "Unknown format %c '%s'", c, all);
+		}
+	}
+	fputs(tmp, fout);
+	if (flush)
+		fflush(fout);
+	free(__DECONST(char *, fmt));
+	free(tmp);
+}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202507251635.56PGZB73098553>