Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 30 Aug 2017 20:43:51 -0700
From:      Mark Millard <markmi@dsl-only.net>
To:        Don Lewis <truckman@FreeBSD.org>
Cc:        linimon@lonesome.com, freebsd-toolchain@freebsd.org, freebsd-ports@freebsd.org, freebsd-ppc@freebsd.org
Subject:   Re: FYI: qemu-ppc64-static and qemu-ppc-static "live-hang" when I attempt use with poudriere; qemu-arm-static and qemu-aarch64-static work
Message-ID:  <9B916738-394B-48B7-AA2E-6193F54760B3@dsl-only.net>
In-Reply-To: <201708302332.v7UNWSVY073465@gw.catspoiler.org>
References:  <201708302332.v7UNWSVY073465@gw.catspoiler.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On 2017-Aug-30, at 4:32 PM, Don Lewis <truckman at FreeBSD.org> wrote:

> On 30 Aug, Mark Millard wrote:
>> On 2017-Aug-30, at 4:00 AM, Mark Linimon <linimon at lonesome.com> =
wrote:
>>=20
>>> On Wed, Aug 30, 2017 at 03:09:40AM -0700, Mark Millard wrote:
>>>> It appears that qemu-ppc64-static and qemu-ppc-static from
>>>> emulators/qemu-user-static are broken.
>>>=20
>>> Correct, and known for some time.  (fwiw sparc64 hangs as well.)
>>=20
>> Looks like qemu-ppc64-static is stuck in a loop, calling
>> repeatedly:
>>=20
>> do_freebsd_syscall (cpu_env=3D0x860ea3ac0, num=3D58, arg1=3D14, =
arg2=3D35995509911, arg3=3D1024, arg4=3D268435904, arg5=3D281494784, =
arg6=3D35985701568, arg7=3D515, arg8=3D35985668288)
>>    at =
/wrkdirs/usr/ports/emulators/qemu-user-static/work/qemu-bsd-user-17977d0/b=
sd-user/syscall.c:210
>> 210	=
/wrkdirs/usr/ports/emulators/qemu-user-static/work/qemu-bsd-user-17977d0/b=
sd-user/syscall.c: No such file or directory.
>>=20
>> Which is for:
>>=20
>> 58      AUE_READLINK    STD     { ssize_t readlink(char *path, char =
*buf, \
>>                                    size_t count); }
>>=20
>> As confirmed by (note the "callq  0x60207360 <readlink>" ):
>>=20
>> (gdb)=20
>> lock_user_string (guest_addr=3D14) at =
/wrkdirs/usr/ports/emulators/qemu-user-static/work/qemu-bsd-user-17977d0/b=
sd-user/qemu.h:508
>> 508	=
/wrkdirs/usr/ports/emulators/qemu-user-static/work/qemu-bsd-user-17977d0/b=
sd-user/qemu.h: No such file or directory.
>>=20
>> (gdb) x/64i 0x0000000060045d3e
>> =3D> 0x60045d3e <do_freebsd_syscall+3246>:	callq  0x6004fd20 =
<target_strlen>
>>   0x60045d43 <do_freebsd_syscall+3251>:	test   %rax,%rax
>>   0x60045d46 <do_freebsd_syscall+3254>:	js     0x6004b99c =
<do_freebsd_syscall+26892>
>>   0x60045d4c <do_freebsd_syscall+3260>:	inc    %rax
>>   0x60045d4f <do_freebsd_syscall+3263>:	mov    $0x1,%edx
>>   0x60045d54 <do_freebsd_syscall+3268>:	mov    %rbx,%rdi
>>   0x60045d57 <do_freebsd_syscall+3271>:	mov    %rax,%rsi
>>   0x60045d5a <do_freebsd_syscall+3274>:	callq  0x6003c430 =
<page_check_range>
>>   0x60045d5f <do_freebsd_syscall+3279>:	test   %eax,%eax
>>   0x60045d61 <do_freebsd_syscall+3281>:	jne    0x6004bce4 =
<do_freebsd_syscall+27732>
>>   0x60045d67 <do_freebsd_syscall+3287>:	add    =
0x26d91b2(%rip),%rbx        # 0x6271ef20 <guest_base>
>>   0x60045d6e <do_freebsd_syscall+3294>:	je     0x6004bce4 =
<do_freebsd_syscall+27732>
>>   0x60045d74 <do_freebsd_syscall+3300>:	mov    $0x3,%edx
>>   0x60045d79 <do_freebsd_syscall+3305>:	mov    -0x2a8(%rbp),%r14
>>   0x60045d80 <do_freebsd_syscall+3312>:	mov    %r14,%rdi
>>   0x60045d83 <do_freebsd_syscall+3315>:	mov    %r12,%rsi
>>   0x60045d86 <do_freebsd_syscall+3318>:	callq  0x6003c430 =
<page_check_range>
>>   0x60045d8b <do_freebsd_syscall+3323>:	test   %eax,%eax
>>   0x60045d8d <do_freebsd_syscall+3325>:	jne    0x6004bce4 =
<do_freebsd_syscall+27732>
>>   0x60045d93 <do_freebsd_syscall+3331>:	add    =
0x26d9186(%rip),%r14        # 0x6271ef20 <guest_base>
>>   0x60045d9a <do_freebsd_syscall+3338>:	mov    =
-0x294(%rbp),%r10d
>>   0x60045da1 <do_freebsd_syscall+3345>:	mov    =
$0xfffffffffffffff2,%r13
>>   0x60045da8 <do_freebsd_syscall+3352>:	je     0x6004bcf2 =
<do_freebsd_syscall+27746>
>>   0x60045dae <do_freebsd_syscall+3358>:	mov    $0x602b93da,%esi
>>   0x60045db3 <do_freebsd_syscall+3363>:	mov    %rbx,%rdi
>>   0x60045db6 <do_freebsd_syscall+3366>:	callq  0x60230af0 =
<strcmp>
>>   0x60045dbb <do_freebsd_syscall+3371>:	test   %eax,%eax
>>   0x60045dbd <do_freebsd_syscall+3373>:	je     0x6004c566 =
<do_freebsd_syscall+29910>
>>   0x60045dc3 <do_freebsd_syscall+3379>:	mov    %rbx,%rdi
>>   0x60045dc6 <do_freebsd_syscall+3382>:	callq  0x60158660 <path>
>>   0x60045dcb <do_freebsd_syscall+3387>:	mov    %rax,%rdi
>>   0x60045dce <do_freebsd_syscall+3390>:	mov    %r14,%rsi
>>   0x60045dd1 <do_freebsd_syscall+3393>:	mov    %r12,%rdx
>>   0x60045dd4 <do_freebsd_syscall+3396>:	callq  0x60207360 =
<readlink>
>>=20
>> But note that the "lock_user_string (guest_addr=3D14)" and
>> "do_freebsd_syscall (cpu_env=3D0x860ea3ac0, num=3D58, arg1=3D14,"
>> indicate that the "readlink(char *path," is using a really
>> small address for the path string.
>>=20
>>=20
>> I've not figured a way for poudriere bulk builds to leave
>> behind the source code automatically. So far I've not
>> looked at the qemu-bsd-user source code. I do build with
>> both debug and optimization turned on via bsd.port.mk
>> having:
>=20
> The -w option will create a tarball of the work directory if the
> package build fails.  I also often use the testport -i option I want =
to
> poke around in the WRKDIR after a build.

I've been using -w right along. But I'd not used testport at all.

It looks to me like the syscall errno handling is messed
up. The details that I've observed follow. It follows
a simplified sequence of discovery as far a presentation
order goes.

The looping code is:

static inline void target_cpu_loop(CPUPPCState *env)
{
    CPUState *cs =3D CPU(ppc_env_get_cpu(env));
    target_siginfo_t info;
    int trapnr;
    target_ulong ret;
           =20
    for(;;) {
        cpu_exec_start(cs);
        trapnr =3D cpu_exec(cs);
        cpu_exec_end(cs);
        process_queued_cpu_work(cs);
            =20
        switch(trapnr) {
. . .
        case POWERPC_EXCP_SYSCALL_USER:
            /* system call in user-mode emulation */
            /* WARNING:
             * PPC ABI uses overflow flag in cr0 to signal an error
             * in syscalls.
             */
            env->crf[0] &=3D ~0x1;
            ret =3D do_freebsd_syscall(env, env->gpr[0], env->gpr[3], =
env->gpr[4],
                             env->gpr[5], env->gpr[6], env->gpr[7],
                             env->gpr[8], env->gpr[9], env->gpr[10]);
            if (ret =3D=3D (target_ulong)(-TARGET_QEMU_ESIGRETURN)) {
                /* Returning from a successful sigreturn syscall.
                   Avoid corrupting register state.  */
                break;
            }
            if (ret > (target_ulong)(-515)) {
                env->crf[0] |=3D 0x1;
                ret =3D -ret;
            }
            env->gpr[3] =3D ret;
            break;
. . .
        }
        process_pending_signals(env);
    }
}

The observed env->gpr[3] =3D=3D 14 is from a prior loop
iteration having ret =3D=3D 14 in the:

            env->gpr[3] =3D ret;

Prior to this were the values (as seen via
lock_user_string):

guest_addr=3D278408977
guest_addr=3D2

That 2 also came from the prior ret =3D=3D 2 in the:

            env->gpr[3] =3D ret;

from when the 278408977 was in being attempted.

For both the ret =3D=3D 2 and ret =3D=3D 14 were from:

                ret =3D -ret;

so the return values from do_freebsd_syscall were
-2 and -14 (interpreted as signed).

The return values trace back to the following code,
where TARGET_EFAULT =3D=3D 14 :

static inline abi_long do_bsd_readlink(CPUArchState *env, abi_long arg1,
        abi_long arg2, abi_long arg3)
{
    abi_long ret;
    void *p1, *p2;
       =20
    LOCK_PATH(p1, arg1);
    p2 =3D lock_user(VERIFY_WRITE, arg2, arg3, 0);
    if (p2 =3D=3D NULL) {
        UNLOCK_PATH(p1, arg1);
        return -TARGET_EFAULT;
    }
#ifdef __FreeBSD__
    if (strcmp(p1, "/proc/curproc/file") =3D=3D 0) {
        CPUState *cpu =3D ENV_GET_CPU(env);
        TaskState *ts =3D (TaskState *)cpu->opaque;
        strncpy(p2, ts->bprm->fullpath, arg3);
        ret =3D MIN((abi_long)strlen(ts->bprm->fullpath), arg3);
    } else
#endif
    ret =3D get_errno(readlink(path(p1), p2, arg3));
    unlock_user(p2, arg2, ret);
    UNLOCK_PATH(p1, arg1);

    return ret;
}

The 2 is from:

    ret =3D get_errno(readlink(path(p1), p2, arg3));

At the time the p1 points to "/etc/malloc.conf":

(gdb) step=20
path (name=3D0x10982f11 "/etc/malloc.conf") at util/path.c:173

169	const char *path(const char *name)
170	{
171	    /* Only do absolute paths: quick and dirty, but should =
mostly be OK.
172	       Could do relative by tracking cwd. */
(gdb)=20
173	    if (!base || !name || name[0] !=3D '/')
174	        return name;
175=09
176	    return follow_path(base, name) ?: name;
177	}

(gdb) print base
$8 =3D (struct pathelem *) 0x0

So name is returned unchanged.


The 2 is in turn from:

#define	__ENOENT	2		/* No such file or directory */


Overall one oddity is that this code structure
seems to use -ret from:

            ret =3D do_freebsd_syscall(env, env->gpr[0], env->gpr[3], =
env->gpr[4],
                             env->gpr[5], env->gpr[6], env->gpr[7],
                             env->gpr[8], env->gpr[9], env->gpr[10]);

to retry the same operation again the next iteration,
but with env->gpr[3] =3D=3D -ret (as ret was on the return
of do_freebsd_syscall ).

Once abs(ret) =3D=3D 14 it is fully stuck repeating itself.

I've no clue if:

            env->gpr[3] =3D ret;

even makes sense here.

I've not tried to track down the memory leak activity
that is associated.

Nor have I checked anything for the:

        cpu_exec_start(cs);
        trapnr =3D cpu_exec(cs);
        cpu_exec_end(cs);
        process_queued_cpu_work(cs);

activity. It likely contributes to why the loop
retries the readlink again (with a junk address
for the path).

=3D=3D=3D
Mark Millard
markmi at dsl-only.net




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?9B916738-394B-48B7-AA2E-6193F54760B3>