Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 13 Nov 2021 19:10:54 +0000
From:      Jessica Clarke <jrtc27@freebsd.org>
To:        Konstantin Belousov <kostikbel@gmail.com>
Cc:        "src-committers@freebsd.org" <src-committers@freebsd.org>, "dev-commits-src-all@freebsd.org" <dev-commits-src-all@freebsd.org>, "dev-commits-src-main@freebsd.org" <dev-commits-src-main@freebsd.org>
Subject:   Re: git: 64ba1f4cf3a6 - main - rtld: Implement LD_SHOW_AUXV
Message-ID:  <BF2534B3-7A3A-4006-9105-5AACDD728AA4@freebsd.org>
In-Reply-To: <A76167C8-7A1D-41C2-A9F2-A3A2D990A4BA@freebsd.org>
References:  <202111131733.1ADHXekX049248@gitrepo.freebsd.org> <37FC39AA-925D-4D75-8E0A-EA14E846E3A6@freebsd.org> <110784F6-3A7A-4F27-AAEB-E9B5A8F7CF0E@freebsd.org> <2450270B-CB98-43D0-B3BE-3C6D02F9B6FD@freebsd.org> <YZAK7b8PzqlaS85b@kib.kiev.ua> <YZAMvZdyEsxXodm0@kib.kiev.ua> <A76167C8-7A1D-41C2-A9F2-A3A2D990A4BA@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On 13 Nov 2021, at 19:09, Jessica Clarke <jrtc27@freebsd.org> wrote:
>=20
> On 13 Nov 2021, at 19:06, Konstantin Belousov <kostikbel@gmail.com> =
wrote:
>> On Sat, Nov 13, 2021 at 08:59:00PM +0200, Konstantin Belousov wrote:
>>> On Sat, Nov 13, 2021 at 06:29:24PM +0000, Jessica Clarke wrote:
>>>> On 13 Nov 2021, at 17:57, Jessica Clarke <jrtc27@FreeBSD.org> =
wrote:
>>>>>=20
>>>>> On 13 Nov 2021, at 17:54, Jessica Clarke <jrtc27@FreeBSD.org> =
wrote:
>>>>>>=20
>>>>>> On 13 Nov 2021, at 17:33, Konstantin Belousov <kib@FreeBSD.org> =
wrote:
>>>>>>>=20
>>>>>>> The branch main has been updated by kib:
>>>>>>>=20
>>>>>>> URL: =
https://cgit.FreeBSD.org/src/commit/?id=3D64ba1f4cf3a6847a1dacf4bab0409d94=
898fa168
>>>>>>>=20
>>>>>>> commit 64ba1f4cf3a6847a1dacf4bab0409d94898fa168
>>>>>>> Author:     Konstantin Belousov <kib@FreeBSD.org>
>>>>>>> AuthorDate: 2021-11-13 01:18:13 +0000
>>>>>>> Commit:     Konstantin Belousov <kib@FreeBSD.org>
>>>>>>> CommitDate: 2021-11-13 17:33:13 +0000
>>>>>>>=20
>>>>>>> rtld: Implement LD_SHOW_AUXV
>>>>>>>=20
>>>>>>> It dumps auxv as seen by interpreter, right before starting any =
user
>>>>>>> code.
>>>>>>>=20
>>>>>>> Copied from:    glibc
>>>>>>> Sponsored by:   The FreeBSD Foundation
>>>>>>> MFC after:      1 week
>>>>>>> ---
>>>>>>> libexec/rtld-elf/rtld.1 |  7 +++++-
>>>>>>> libexec/rtld-elf/rtld.c | 67 =
+++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>> 2 files changed, 73 insertions(+), 1 deletion(-)
>>>>>>>=20
>>>>>>> diff --git a/libexec/rtld-elf/rtld.1 b/libexec/rtld-elf/rtld.1
>>>>>>> index 187dc105667a..66aa2bdabd17 100644
>>>>>>> --- a/libexec/rtld-elf/rtld.1
>>>>>>> +++ b/libexec/rtld-elf/rtld.1
>>>>>>> @@ -28,7 +28,7 @@
>>>>>>> .\"
>>>>>>> .\" $FreeBSD$
>>>>>>> .\"
>>>>>>> -.Dd August 15, 2021
>>>>>>> +.Dd November 13, 2021
>>>>>>> .Dt RTLD 1
>>>>>>> .Os
>>>>>>> .Sh NAME
>>>>>>> @@ -309,6 +309,11 @@ will process the filtee dependencies of the =
loaded objects immediately,
>>>>>>> instead of postponing it until required.
>>>>>>> Normally, the filtees are opened at the time of the first symbol =
resolution
>>>>>>> from the filter object.
>>>>>>> +.It Ev LD_SHOW_AUXV
>>>>>>> +If set, causes
>>>>>>> +.Nm
>>>>>>> +to dump content of the aux vector to standard output, before =
passing
>>>>>>> +control to any user code.
>>>>>>> .El
>>>>>>> .Sh DIRECT EXECUTION MODE
>>>>>>> .Nm
>>>>>>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
>>>>>>> index c173c5a6e22e..0475134b0d96 100644
>>>>>>> --- a/libexec/rtld-elf/rtld.c
>>>>>>> +++ b/libexec/rtld-elf/rtld.c
>>>>>>> @@ -104,6 +104,7 @@ static Obj_Entry *dlopen_object(const char =
*name, int fd, Obj_Entry *refobj,
>>>>>>> static Obj_Entry *do_load_object(int, const char *, char *, =
struct stat *, int);
>>>>>>> static int do_search_info(const Obj_Entry *obj, int, struct =
dl_serinfo *);
>>>>>>> static bool donelist_check(DoneList *, const Obj_Entry *);
>>>>>>> +static void dump_auxv(Elf_Auxinfo **aux_info);
>>>>>>> static void errmsg_restore(struct dlerror_save *);
>>>>>>> static struct dlerror_save *errmsg_save(void);
>>>>>>> static void *fill_search_info(const char *, size_t, void *);
>>>>>>> @@ -364,6 +365,7 @@ enum {
>>>>>>> 	LD_TRACE_LOADED_OBJECTS_FMT1,
>>>>>>> 	LD_TRACE_LOADED_OBJECTS_FMT2,
>>>>>>> 	LD_TRACE_LOADED_OBJECTS_ALL,
>>>>>>> +	LD_SHOW_AUXV,
>>>>>>> };
>>>>>>>=20
>>>>>>> struct ld_env_var_desc {
>>>>>>> @@ -396,6 +398,7 @@ static struct ld_env_var_desc ld_env_vars[] =
=3D {
>>>>>>> 	LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT1, false),
>>>>>>> 	LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT2, false),
>>>>>>> 	LD_ENV_DESC(TRACE_LOADED_OBJECTS_ALL, false),
>>>>>>> +	LD_ENV_DESC(SHOW_AUXV, false),
>>>>>>> };
>>>>>>>=20
>>>>>>> static const char *
>>>>>>> @@ -857,6 +860,9 @@ _rtld(Elf_Addr *sp, func_ptr_type =
*exit_proc, Obj_Entry **objp)
>>>>>>> if (rtld_verify_versions(&list_main) =3D=3D -1 && !ld_tracing)
>>>>>>> 	rtld_die();
>>>>>>>=20
>>>>>>> +    if (ld_get_env_var(LD_SHOW_AUXV) !=3D NULL)
>>>>>>> +       dump_auxv(aux_info);
>>>>>>> +
>>>>>>> if (ld_tracing) {		/* We're done */
>>>>>>> 	trace_loaded_objects(obj_main);
>>>>>>> 	exit(0);
>>>>>>> @@ -6058,6 +6064,67 @@ print_usage(const char *argv0)
>>>>>>> 	    "  <args>    Arguments to the executed process\n", =
argv0);
>>>>>>> }
>>>>>>>=20
>>>>>>> +#define	AUXFMT(at, xfmt) [at] =3D { .name =3D #at, .fmt =
=3D xfmt }
>>>>>>> +static const struct auxfmt {
>>>>>>> +	const char *name;
>>>>>>> +	const char *fmt;
>>>>>>> +} auxfmts[] =3D {
>>>>>>> +	AUXFMT(AT_NULL, NULL),
>>>>>>> +	AUXFMT(AT_IGNORE, NULL),
>>>>>>> +	AUXFMT(AT_EXECFD, "%d"),
>>>>>>> +	AUXFMT(AT_PHDR, "%p"),
>>>>>>> +	AUXFMT(AT_PHENT, "%u"),
>>>>>>> +	AUXFMT(AT_PHNUM, "%u"),
>>>>>>> +	AUXFMT(AT_PAGESZ, "%u"),
>>>>>>> +	AUXFMT(AT_BASE, "%#lx"),
>>>>>>> +	AUXFMT(AT_FLAGS, "%#lx"),
>>>>>>> +	AUXFMT(AT_ENTRY, "%p"),
>>>>>>> +	AUXFMT(AT_NOTELF, NULL),
>>>>>>> +	AUXFMT(AT_UID, "%d"),
>>>>>>> +	AUXFMT(AT_EUID, "%d"),
>>>>>>> +	AUXFMT(AT_GID, "%d"),
>>>>>>> +	AUXFMT(AT_EGID, "%d"),
>>>>>>> +	AUXFMT(AT_EXECPATH, "%s"),
>>>>>>> +	AUXFMT(AT_CANARY, "%p"),
>>>>>>> +	AUXFMT(AT_CANARYLEN, "%u"),
>>>>>>> +	AUXFMT(AT_OSRELDATE, "%u"),
>>>>>>> +	AUXFMT(AT_NCPUS, "%u"),
>>>>>>> +	AUXFMT(AT_PAGESIZES, "%p"),
>>>>>>> +	AUXFMT(AT_PAGESIZESLEN, "%u"),
>>>>>>> +	AUXFMT(AT_TIMEKEEP, "%p"),
>>>>>>> +	AUXFMT(AT_STACKPROT, "%#x"),
>>>>>>> +	AUXFMT(AT_EHDRFLAGS, "%#lx"),
>>>>>>> +	AUXFMT(AT_HWCAP, "%#lx"),
>>>>>>> +	AUXFMT(AT_HWCAP2, "%#lx"),
>>>>>>> +	AUXFMT(AT_BSDFLAGS, "%#lx"),
>>>>>>> +	AUXFMT(AT_ARGC, "%u"),
>>>>>>> +	AUXFMT(AT_ARGV, "%p"),
>>>>>>> +	AUXFMT(AT_ENVC, "%p"),
>>>>>>> +	AUXFMT(AT_ENVV, "%p"),
>>>>>>> +	AUXFMT(AT_PS_STRINGS, "%p"),
>>>>>>> +	AUXFMT(AT_FXRNG, "%p"),
>>>>>>> +};
>>>>>>> +
>>>>>>> +static void
>>>>>>> +dump_auxv(Elf_Auxinfo **aux_info)
>>>>>>> +{
>>>>>>> +	Elf_Auxinfo *auxp;
>>>>>>> +	const struct auxfmt *fmt;
>>>>>>> +	int i;
>>>>>>> +
>>>>>>> +	for (i =3D 0; i < AT_COUNT; i++) {
>>>>>>> +		auxp =3D aux_info[i];
>>>>>>> +		if (auxp =3D=3D NULL)
>>>>>>> +			continue;
>>>>>>> +		fmt =3D &auxfmts[i];
>>>>>>> +		if (fmt->fmt =3D=3D NULL)
>>>>>>> +			continue;
>>>>>>> +		rtld_fdprintf(STDOUT_FILENO, "%s:\t", =
fmt->name);
>>>>>>> +		rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, =
auxp->a_un.a_ptr);
>>>>>>> +		rtld_fdprintf(STDOUT_FILENO, "\n");
>>>>>>=20
>>>>>> This is undefined behaviour, breaks CHERI, and totally =
unnecessary. You
>>>>>> have a handful of cases here, just make an enum and have separate
>>>>>> rtld_fdprintf calls.
>>>>=20
>>>> In particular, ignoring CHERI, unsigned ints are sign-extended to =
64
>>>> bits on MIPS and RISC-V. Thus by passing a 64-bit value but using a =
%u,
>>>> you are violating the calling convention. I can=E2=80=99t currently =
get GCC or
>>>> Clang to exploit the fact that varargs arguments are sign-extended, =
but
>>>> on MIPS, and RISC-V GCC (Clang is currently stupid and round-trips =
via
>>>> memory even when the va_arg calls have no branching surrounding =
them,
>>>> rather than just grabbing from the register) there is a redundant
>>>> sext.w that can legally be optimised out, but would be broken by =
this
>>>> calling convention violation.
>>> I might understand the argument that all non-pointer formats for =
auxv
>>> should be longs, i.e. %lu/%ld/%lx, but this is the only problem I =
see
>>> there. We do rely on having specific representations for addresses =
and
>>> longs, and a low-level component as rtld has full rights to exercise
>>> this fact, same as VM subsystem or memory allocators.
>>>=20
>>> In fact ELF spec exercises this as well.
>>> Our arches are either ILP32 or LP64.
>>>=20
>>>>=20
>>>> Then CHERI makes it worse because a_ptr and a_val do not have the =
same
>>>> representation, although in practice I think passing a_ptr and =
nothing
>>>> further does end up working on CHERI-RISC-V and Morello, just not
>>>> CHERI-MIPS due to being big-endian.
>>=20
>> Ok, the following should be enough for CHERI, right?
>>=20
>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
>> index 0475134b0d96..cf467ae7aacd 100644
>> --- a/libexec/rtld-elf/rtld.c
>> +++ b/libexec/rtld-elf/rtld.c
>> @@ -6071,33 +6071,33 @@ static const struct auxfmt {
>> } auxfmts[] =3D {
>> 	AUXFMT(AT_NULL, NULL),
>> 	AUXFMT(AT_IGNORE, NULL),
>> -	AUXFMT(AT_EXECFD, "%d"),
>> +	AUXFMT(AT_EXECFD, "%ld"),
>> 	AUXFMT(AT_PHDR, "%p"),
>> -	AUXFMT(AT_PHENT, "%u"),
>> -	AUXFMT(AT_PHNUM, "%u"),
>> -	AUXFMT(AT_PAGESZ, "%u"),
>> +	AUXFMT(AT_PHENT, "%lu"),
>> +	AUXFMT(AT_PHNUM, "%lu"),
>> +	AUXFMT(AT_PAGESZ, "%lu"),
>> 	AUXFMT(AT_BASE, "%#lx"),
>> 	AUXFMT(AT_FLAGS, "%#lx"),
>> 	AUXFMT(AT_ENTRY, "%p"),
>> 	AUXFMT(AT_NOTELF, NULL),
>> -	AUXFMT(AT_UID, "%d"),
>> -	AUXFMT(AT_EUID, "%d"),
>> -	AUXFMT(AT_GID, "%d"),
>> -	AUXFMT(AT_EGID, "%d"),
>> +	AUXFMT(AT_UID, "%ld"),
>> +	AUXFMT(AT_EUID, "%ld"),
>> +	AUXFMT(AT_GID, "%ld"),
>> +	AUXFMT(AT_EGID, "%ld"),
>> 	AUXFMT(AT_EXECPATH, "%s"),
>> 	AUXFMT(AT_CANARY, "%p"),
>> -	AUXFMT(AT_CANARYLEN, "%u"),
>> -	AUXFMT(AT_OSRELDATE, "%u"),
>> -	AUXFMT(AT_NCPUS, "%u"),
>> +	AUXFMT(AT_CANARYLEN, "%lu"),
>> +	AUXFMT(AT_OSRELDATE, "%lu"),
>> +	AUXFMT(AT_NCPUS, "%lu"),
>> 	AUXFMT(AT_PAGESIZES, "%p"),
>> -	AUXFMT(AT_PAGESIZESLEN, "%u"),
>> +	AUXFMT(AT_PAGESIZESLEN, "%lu"),
>> 	AUXFMT(AT_TIMEKEEP, "%p"),
>> -	AUXFMT(AT_STACKPROT, "%#x"),
>> +	AUXFMT(AT_STACKPROT, "%#lx"),
>> 	AUXFMT(AT_EHDRFLAGS, "%#lx"),
>> 	AUXFMT(AT_HWCAP, "%#lx"),
>> 	AUXFMT(AT_HWCAP2, "%#lx"),
>> 	AUXFMT(AT_BSDFLAGS, "%#lx"),
>> -	AUXFMT(AT_ARGC, "%u"),
>> +	AUXFMT(AT_ARGC, "%lu"),
>> 	AUXFMT(AT_ARGV, "%p"),
>> 	AUXFMT(AT_ENVC, "%p"),
>> 	AUXFMT(AT_ENVV, "%p"),
>> @@ -6105,6 +6105,15 @@ static const struct auxfmt {
>> 	AUXFMT(AT_FXRNG, "%p"),
>> };
>>=20
>> +static bool
>> +is_ptr_fmt(const char *fmt)
>> +{
>> +	char last;
>> +
>> +	last =3D fmt[strlen(fmt) - 1];
>> +	return (last =3D=3D 'p' || last =3D=3D 's');
>> +}
>> +
>> static void
>> dump_auxv(Elf_Auxinfo **aux_info)
>> {
>> @@ -6120,7 +6129,8 @@ dump_auxv(Elf_Auxinfo **aux_info)
>> 		if (fmt->fmt =3D=3D NULL)
>> 			continue;
>> 		rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
>> -		rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, =
auxp->a_un.a_ptr);
>> +		rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, =
is_ptr_fmt(fmt->fmt) ?
>> +		    auxp->a_un.a_ptr : auxp->a_un.a_val);
>> 		rtld_fdprintf(STDOUT_FILENO, "\n");
>> 	}
>> }
>=20
> That should indeed work, though I=E2=80=99d argue it=E2=80=99s still =
not as nice as
> avoiding rtld_fdprintfx entirely.

Wait, no, it doesn=E2=80=99t, the ternary means both operands need to =
have the
same type, so you end up implicitly casting the long to a pointer. You
need a real if and two different rtld_fdprintfx calls (or just do it as
I=E2=80=99ve suggested).

Jess




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?BF2534B3-7A3A-4006-9105-5AACDD728AA4>