Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 01 Nov 2017 01:09:41 +0100
From:      Andreas Longwitz <longwitz@incore.de>
To:        Konstantin Belousov <kostikbel@gmail.com>
Cc:        freebsd-hackers@freebsd.org
Subject:   Re: double fault on 10.3-Stable i386 during installworld
Message-ID:  <59F910C5.8020709@incore.de>
In-Reply-To: <20171001180943.GO95911@kib.kiev.ua>
References:  <59D11664.1060206@incore.de> <20171001180943.GO95911@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
Hello hackers,

I still try to find the reason for the "double fault" described in my
previous post and can give some more information:

On CPU3 runs the thread with tid 100099:

(kgdb) tid 100099
Switching to thread 85 (Thread 100099)]#0  0xc0bb6825 in cpustop_handler
() at /usr/src/sys/i386/i386/mp_machdep.c:1506
1506            savectx(&stoppcbs[cpu]);
(kgdb) f 4
#4  0xc0bb6177 in smp_tlb_shootdown (vector=<value optimized out>,
addr1=<value optimized out>) at /usr/src/sys/i386/i386/mp_machdep.c:1242
1242            while (smp_tlb_wait < ncpu)
(kgdb) list
1237            mtx_lock_spin(&smp_ipi_mtx);
1238            smp_tlb_addr1 = addr1;
1239            smp_tlb_addr2 = addr2;
1240            atomic_store_rel_int(&smp_tlb_wait, 0);
1241            ipi_all_but_self(vector);
1242            while (smp_tlb_wait < ncpu)
1243                    ia32_pause();
1244            mtx_unlock_spin(&smp_ipi_mtx);
1245    }
1246
(kgdb) p smp_tlb_wait
$1 = 2
(kgdb) p mp_ncpus
$2 = 4
(kgdb) p smp_ipi_mtx
$3 = {lock_object = {lo_name = 0xc0cc4ba0 "smp rendezvous", lo_flags =
196608, lo_data = 0, lo_witness = 0x0}, mtx_lock = 3372713664}
(kgdb) p/x smp_ipi_mtx
$4 = {lock_object = {lo_name = 0xc0cc4ba0, lo_flags = 0x30000, lo_data =
0x0, lo_witness = 0x0}, mtx_lock = 0xc90786c0}
(kgdb) f 5
#5  0xc0bb6214 in smp_invlpg (addr=<value optimized out>) at
/usr/src/sys/i386/i386/mp_machdep.c:1311
1311                    smp_tlb_shootdown(IPI_INVLPG, addr, 0);
(kgdb) list
1306    void
1307    smp_invlpg(vm_offset_t addr)
1308    {
1309
1310            if (smp_started) {
1311                    smp_tlb_shootdown(IPI_INVLPG, addr, 0);
1312    #ifdef COUNT_XINVLTLB_HITS
1313                    ipi_page++;
1314    #endif
1315

So tid 100099 holds the "smp rendezvous" spin lock, has send the ipi
message IPI_INVLPG to the other CPU's and waits for answer of the last CPU.

On CPU0 runs the thread with tid 100014:

(kgdb) tid 100014
[Switching to thread 37 (Thread 100014)]#0  0xc0bb6825 in
cpustop_handler () at /usr/src/sys/i386/i386/mp_machdep.c:1506
1506            savectx(&stoppcbs[cpu]);
(kgdb) f 5
#5  0xc0bb613d in smp_tlb_shootdown (vector=246, addr1=3891986432) at
/usr/src/sys/i386/i386/mp_machdep.c:1237
1237            mtx_lock_spin(&smp_ipi_mtx);
(kgdb) list
1232            ncpu = mp_ncpus - 1;    /* does not shootdown self */
1233            if (ncpu < 1)
1234                    return;         /* no other cpus */
1235            if (!(read_eflags() & PSL_I))
1236                    panic("%s: interrupts disabled", __func__);
1237            mtx_lock_spin(&smp_ipi_mtx);
1238            smp_tlb_addr1 = addr1;
1239            smp_tlb_addr2 = addr2;
1240            atomic_store_rel_int(&smp_tlb_wait, 0);
1241            ipi_all_but_self(vector);
(kgdb) f 6
#6  0xc0bb6243 in smp_invlpg_range (addr1=<value optimized out>,
addr2=<value optimized out>) at /usr/src/sys/i386/i386/mp_machdep.c:1323
1323                    smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
(kgdb) list
1318    void
1319    smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
1320    {
1321
1322            if (smp_started) {
1323                    smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
1324    #ifdef COUNT_XINVLTLB_HITS
1325                    ipi_range++;
1326                    ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
1327    #endif

So tid 100014 wants to send the ipi messages IPI_INVLRNG and waits on
the "smp rendezvous" spin lock held by tid 100099.

CPU1 idles in thread 100004, nothing special.

The fault happens on CPU2 running tid 100005:

(kgdb) p/x __pcpu[2]
$20 = {pc_curthread = 0xc7ebd000, pc_idlethread = 0xc7ebd000,
pc_fpcurthread = 0x0, pc_deadthread = 0x0, pc_curpcb = 0xe437fd40,
pc_switchtime = 0x5bbaecb51c,
  pc_switchticks = 0xd3cf, pc_cpuid = 0x2, pc_allcpu = {stqe_next =
0xc0eade80}, pc_spinlocks = 0x0, pc_cnt = {v_swtch = 0xdfe6, v_trap =
0x2dafc,
    v_syscall = 0x3c0c0, v_intr = 0xe, v_soft = 0x0, v_vm_faults =
0x2ce48, v_io_faults = 0x64, v_cow_faults = 0x7104, v_cow_optim = 0x5,
v_zfod = 0x255b3,
    v_ozfod = 0x2574, v_swapin = 0x0, v_swapout = 0x0, v_swappgsin =
0x0, v_swappgsout = 0x0, v_vnodein = 0x40a, v_vnodeout = 0x0,
v_vnodepgsin = 0xc4b,
    v_vnodepgsout = 0x0, v_intrans = 0x1, v_reactivated = 0x0,
v_pdwakeups = 0x0, v_pdpages = 0x6a, v_tcached = 0x250, v_dfree = 0x0,
v_pfree = 0x17689,
    v_tfree = 0x27abc, v_page_size = 0x0, v_page_count = 0x0,
v_free_reserved = 0x0, v_free_target = 0x0, v_free_min = 0x0,
v_free_count = 0x0,
    v_wire_count = 0x0, v_active_count = 0x0, v_inactive_target = 0x0,
v_inactive_count = 0x0, v_cache_count = 0x0, v_cache_min = 0x0,
v_cache_max = 0x0,
    v_pageout_free_min = 0x0, v_interrupt_free_min = 0x0, v_free_severe
= 0x0, v_forks = 0x2bb, v_vforks = 0xc3c, v_rforks = 0x0, v_kthreads = 0x0,
    v_forkpages = 0x5cd4, v_vforkpages = 0x230f8, v_rforkpages = 0x0,
v_kthreadpages = 0x0}, pc_cp_time = {0x15d, 0x0, 0x260, 0xb, 0x1649},
  pc_device = 0xc7fea400, pc_netisr = 0x0, pc_unused1 = 0x0, pc_domain =
0x0, pc_rm_queue = {rmq_next = 0xc0eadb9c, rmq_prev = 0xc0eadb9c},
  pc_dynamic = 0x235f1500, pc_monitorbuf = {0x2, 0x0 <repeats 127
times>}, pc_prvspace = 0xc0eada80, pc_curpmap = 0xc0ea3cc0,
pc_common_tss = {tss_link = 0x0,
    tss_esp0 = 0xe437fd30, tss_ss0 = 0x28, tss_esp1 = 0x0, tss_ss1 =
0x0, tss_esp2 = 0x0, tss_ss2 = 0x0, tss_cr3 = 0x0, tss_eip = 0xc0bacac8,
    tss_eflags = 0x10007, tss_eax = 0xc08f492f, tss_ecx = 0xc8029a20,
tss_edx = 0xf0a3ad40, tss_ebx = 0xd3cf, tss_esp = 0xe437f000, tss_ebp =
0xe437fafc,
    tss_esi = 0xc0e43400, tss_edi = 0xc7ebd000, tss_es = 0x28, tss_cs =
0x20, tss_ss = 0x28, tss_ds = 0x28, tss_fs = 0x8, tss_gs = 0x3b, tss_ldt
= 0x0,
    tss_ioopt = 0x680000}, pc_common_tssd = {sd_lolimit = 0x67,
sd_lobase = 0xeadc88, sd_type = 0x9, sd_dpl = 0x0, sd_p = 0x1,
sd_hilimit = 0x0, sd_xx = 0x0,
    sd_def32 = 0x0, sd_gran = 0x0, sd_hibase = 0xc0}, pc_tss_gdt =
0xc0eac01c, pc_fsgs_gdt = 0xc0eabfe4, pc_currentldt = 0x50, pc_acpi_id =
0x1,
  pc_apic_id = 0x6, pc_private_tss = 0x0, pc_cmci_mask = 0x0, pc_vcpu_id
= 0x0, __pad = {0x0 <repeats 233 times>}}

(kgdb) p *(struct thread *)0xc7ebd000
$22 = {td_lock = 0xc0e43400, td_proc = 0xc7eba000, td_plist = {tqe_next
= 0xc7ebd360, tqe_prev = 0xc7ebca28}, td_runq = {tqe_next = 0x0,
tqe_prev = 0x0},
  td_slpq = {tqe_next = 0x0, tqe_prev = 0x0}, td_lockq = {tqe_next =
0x0, tqe_prev = 0x0}, td_hash = {le_next = 0x0, le_prev = 0xc7ea3a94},
  td_cpuset = 0xc7ea6708, td_sel = 0x0, td_sleepqueue = 0xc7d74d80,
td_turnstile = 0xc7d96b00, td_rlqe = 0x0, td_umtxq = 0xc7d6c280, td_tid
= 100005,
  padding1 = {0, 0, 0, 0}, padding2 = {0x0, 0x0, 0x0, 0x0},
td_lend_user_pri = 255 'y"', td_flags = 262180, td_inhibitors = 0,
td_pflags = 2097152, td_dupfd = 0,
  td_sqqueue = 0, td_wchan = 0x0, td_wmesg = 0x0, td_lastcpu = 2 '\002',
td_oncpu = 255 'y"', td_owepreempt = 0 '\0', td_tsqueue = 0 '\0',
td_locks = 0,
  td_rw_rlocks = 0, td_lk_slocks = 0, td_stopsched = 1, td_blocked =
0x0, td_lockname = 0x0, td_contested = {lh_first = 0x0}, td_sleeplocks =
0x0,
  td_intr_nesting_level = 0, td_pinned = 0, td_ucred = 0xc7d6fb00,
td_estcpu = 0, td_slptick = 0, td_blktick = 0, td_swvoltick = 54223,
td_cow = 0, td_ru = {
    ru_utime = {tv_sec = 0, tv_usec = 0}, ru_stime = {tv_sec = 0,
tv_usec = 0}, ru_maxrss = 0, ru_ixrss = 0, ru_idrss = 0, ru_isrss = 0,
ru_minflt = 0,
    ru_majflt = 0, ru_nswap = 0, ru_inblock = 0, ru_oublock = 0,
ru_msgsnd = 0, ru_msgrcv = 0, ru_nsignals = 0, ru_nvcsw = 3516,
ru_nivcsw = 24669}, td_rux = {
    rux_runtime = 9370350888, rux_uticks = 0, rux_sticks = 423,
rux_iticks = 0, rux_uu = 0, rux_su = 0, rux_tu = 0}, td_incruntime =
116052890114,
  td_runtime = 125423241002, td_pticks = 5705, td_sticks = 5282,
td_iticks = 0, td_uticks = 0, td_intrval = 0, td_oldsigmask = {__bits =
{0, 0, 0, 0}},
  td_generation = 28185, td_sigstk = {ss_sp = 0x0, ss_size = 0, ss_flags
= 0}, td_xsig = 0, td_profil_addr = 0, td_profil_ticks = 0,
  td_name = "idle: cpu2\000\000\000\000\000\000\000\000\000", td_fpop =
0x0, td_dbgflags = 0, td_dbgksi = {ksi_link = {tqe_next = 0x0, tqe_prev
= 0x0},
    ksi_info = {si_signo = 0, si_errno = 0, si_code = 0, si_pid = 0,
si_uid = 0, si_status = 0, si_addr = 0x0, si_value = {sival_int = 0,
sival_ptr = 0x0,
        sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault = {_trapno
= 0}, _timer = {_timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0}, _poll
= {_band = 0},
        __spare__ = {__spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0, 0,
0}}}}, ksi_flags = 0, ksi_sigq = 0x0}, td_ng_outbound = 0, td_osd =
{osd_nslots = 0,
    osd_slots = 0x0, osd_next = {le_next = 0x0, le_prev = 0x0}},
td_map_def_user = 0x0, td_dbg_forked = 0, td_vp_reserv = 0,
td_no_sleeping = 1,
  td_dom_rr_idx = 0, td_sigmask = {__bits = {0, 0, 0, 0}}, td_rqindex =
0 '\0', td_base_pri = 255 'y"', td_priority = 255 'y"', td_pri_class = 4
'\004',
  td_user_pri = 120 'x', td_base_user_pri = 120 'x', td_pcb =
0xe437fd40, td_state = TDS_CAN_RUN, td_retval = {0, 0}, td_slpcallout =
{c_links = {le = {
        le_next = 0x0, le_prev = 0x0}, sle = {sle_next = 0x0}, tqe =
{tqe_next = 0x0, tqe_prev = 0x0}}, c_time = 0, c_precision = 0, c_arg =
0x0, c_func = 0,
    c_lock = 0x0, c_flags = 0, c_iflags = 16, c_cpu = 0}, td_frame =
0xe437fce8, td_kstack_obj = 0xc17a43a8, td_kstack = 3828867072,
td_kstack_pages = 2,
  td_critnest = 2, td_md = {md_spinlock_count = 2, md_saved_flags = 70,
md_spurflt_addr = 0}, td_sched = 0xc7ebd338, td_ar = 0x0, td_lprof =
{{lh_first = 0x0}, {
      lh_first = 0x0}}, td_dtrace = 0xc7d9a600, td_errno = 0, td_vnet =
0x0, td_vnet_lpush = 0x0, td_intr_frame = 0x0, td_rfppwait_p = 0x0,
td_ma = 0x0,
  td_ma_cnt = 0, td_su = 0x0, td_dbg_sc_code = 0, td_dbg_sc_narg = 0,
td_emuldata = 0x0, td_sleeptimo = 0, td_sigqueue = {sq_signals = {__bits
= {0, 0, 0, 0}},
    sq_kill = {__bits = {0, 0, 0, 0}}, sq_ptrace = {__bits = {0, 0, 0,
0}}, sq_list = {tqh_first = 0x0, tqh_last = 0xc7ebd328}, sq_proc =
0xc7eba000,
    sq_flags = 1}}

(kgdb) p/x *(struct pcb*)0xe437fd40
$25 = {pcb_edi = 0xd3cf, pcb_esi = 0xc7ebd000, pcb_ebp = 0xe437fafc,
pcb_esp = 0xe437fab0, pcb_ebx = 0xd3cf, pcb_eip = 0xc08f492f, pcb_fsd =
{sd_lolimit = 0x0,
    sd_lobase = 0x0, sd_type = 0x0, sd_dpl = 0x0, sd_p = 0x0, sd_hilimit
= 0x0, sd_xx = 0x0, sd_def32 = 0x0, sd_gran = 0x0, sd_hibase = 0x0},
pcb_gsd = {
    sd_lolimit = 0x0, sd_lobase = 0x0, sd_type = 0x0, sd_dpl = 0x0, sd_p
= 0x0, sd_hilimit = 0x0, sd_xx = 0x0, sd_def32 = 0x0, sd_gran = 0x0,
sd_hibase = 0x0},
  pcb_ds = 0x0, pcb_es = 0x0, pcb_fs = 0x0, pcb_gs = 0x3b, pcb_ss = 0x0,
pcb_cr0 = 0x0, pcb_cr2 = 0x0, pcb_cr3 = 0x1424000, pcb_cr4 = 0x0,
pcb_dr0 = 0x0,
  pcb_dr1 = 0x0, pcb_dr2 = 0x0, pcb_dr3 = 0x0, pcb_dr6 = 0x0, pcb_dr7 =
0x0, pcb_gdt = {rd_limit = 0x0, rd_base = 0x0}, pcb_idt = {rd_limit = 0x0,
    rd_base = 0x0}, pcb_ldt = 0x0, pcb_tr = 0x0, pcb_flags = 0x0,
pcb_initial_npxcw = 0x0, pcb_onfault = 0x0, pcb_ext = 0x0, pcb_psl =
0x46, pcb_vm86 = {0x0,
    0x0}, pcb_save = 0xe437fe00, pcb_pad = {0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0}}

(kgdb) p/x __pcpu[2]->pc_common_tss
$16 = {tss_link = 0x0, tss_esp0 = 0xe437fd30, tss_ss0 = 0x28, tss_esp1 =
0x0, tss_ss1 = 0x0, tss_esp2 = 0x0, tss_ss2 = 0x0, tss_cr3 = 0x0,
tss_eip = 0xc0bacac8, tss_e
flags = 0x10007, tss_eax = 0xc08f492f, tss_ecx = 0xc8029a20, tss_edx =
0xf0a3ad40, tss_ebx = 0xd3cf, tss_esp = 0xe437f000, tss_ebp =
0xe437fafc, tss_esi = 0xc0e43400,
 tss_edi = 0xc7ebd000, tss_es = 0x28, tss_cs = 0x20, tss_ss = 0x28,
tss_ds = 0x28, tss_fs = 0x8, tss_gs = 0x3b, tss_ldt = 0x0, tss_ioopt =
0x680000}

The ddb output on console shows CPU2 has idled in thread 100005, when an
IPI_BITMAP_VECTOR message comes in.

--- trap 0x17, eip = 0xc0bacac8, esp = 0xe437f000, ebp = 0xe437fafc ---
Xpage(c7ebd000,0,608,c08d12a5,c7ebd000,...) at Xpage/frame 0xe437fafc
mi_switch(608,0,c0cc08f4,e2,c7ebd000,...) at mi_switch+0x145/frame
0xe437fb34
critical_exit(c7ebd000,0,2) at critical_exit+0x89/frame 0xe437fb50
ipi_bitmap_handler(8,28,28,c7f83200,0,...) at
ipi_bitmap_handler+0x6b/frame 0xe437fb70
Xipi_intr_bitmap_handler() at Xipi_intr_bitmap_handler+0x3d/frame 0xe437fb70
--- interrupt, eip = 0xc0ba87e5, esp = 0xe437fbb8, ebp = 0xe437fbb8 ---
acpi_cpu_c1(16600000,16,a65c2a4c,0,c90786c0,...) at
acpi_cpu_c1+0x5/frame 0xe437fbb8

db>  show thread
Thread 100005 at 0xc7ebd000:
 proc (pid 11): 0xc7eba000
 name: idle: cpu2
 stack: 0xe437e000-0xe437ffff
 flags: 0x40024  pflags: 0x200000
 state: CAN RUN
 priority: 255
 container lock: sched lock 2 (0xc0e43400)

>From kernel stack the above console output is given in more detail:

0xe437f000:    0x00000000     0xc0bacac8     0x00000020     0x00010007
0xe437f010:    0x00000000     0xc0bacac8     0x00000020     0x00010007
.... repeated:                tss_eip        tss_cs         tss_eflags
0xe437fa80:    0x00000000     0xc0bacac8     0x00000020     0x00010007
0xe437fa90:    0x00000000  R8:0xc0bacac8     0x00000020     0x00010007
0xe437faa0:    0x00000000  R7:0xc0bc051c     0x00000020     0x00010007
0xe437fab0: R6:0xc08f492f     0xc7ebd000     0xc8029a20     0xc0e43400
0xe437fac0:    0xe437fb28     0xc08aded5     0xc0e43400     0xc7ebd000
0xe437fad0:    0xc7ebd338     0xc0e43400     0x0000ac69     0xc8029a20
0xe437fae0:    0x00002710     0xc0e43400     0xc7ebd000     0x00000002
0xe437faf0:    0xc7ebd000     0x33cec72a     0x00000608     0xe437fb34
0xe437fb00: R5:0xc08d12a5     0xc7ebd000     0x00000000     0x00000608
0xe437fb10:    0xc08d12a5     0xc7ebd000     0xc7ebd000     0xe437fb28
0xe437fb20:    0xbaecb51c     0xc7ebd000     0xc7ebd000     0x00000002
0xe437fb30:    0x00000002     0xe437fb50  R4:0xc08ced89     0x00000608
0xe437fb40:    0x00000000     0xc0cc08f4     0x000000e2     0xc7ebd000
0xe437fb50:    0xe437fb70  R3:0xc0bb659b     0xc7ebd000     0x00000000
0xe437fb60:    0x00000002     0x00000000     0xc7f83200     0xc7f8321c
0xe437fb70:    0xe437fbb8  R2:0xc0bad42d     0x00000008     0x00000028
0xe437fb80:    0x00000028     0xc7f83200     0x00000000     0xe437fbb8
0xe437fb90:    0xe437fba4     0xc7f8321c     0x0000005b     0x00000000
0xe437fba0:    0xbaebe810     0x00000000     0x00000000  R1:0xc0ba87e5
0xe437fbb0:    0x00000020     0x00000246     0xe437fbf8  R0:0xc0549d3a
0xe437fbc0:    0x16600000     0x00000016     0xa65c2a4c     0x00000000
0xe437fbd0:    0xc90786c0     0xbaebe810     0x0000005b     0x00000002
0xe437fbe0:    0x00000001     0x00000021     0x00000008     0xc0eadc00
0xe437fbf0:    0xffffffff     0x00000001     0xe437fc0c     0xc0bb0d8f

R0: return address frame 0xe437fbf8
(kgdb) list *0xc0549d3a
0xc0549d3a is in acpi_cpu_idle (/usr/src/sys/dev/acpica/acpi_cpu.c:1027).
1022         * we are called inside critical section, delaying context
switch.
1023         */
1024        if (cx_next->type == ACPI_STATE_C1) {
1025            cputicks = cpu_ticks();
1026            acpi_cpu_c1();
1027            end_time = ((cpu_ticks() - cputicks) << 20) /
cpu_tickrate();
1028            if (curthread->td_critnest == 0)
1029                    end_time = min(end_time, 500000 / hz);
1030            sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 + end_time)
/ 4;
1031            return;

R1: address after hlt
(kgdb) list *0xc0ba87e5
0xc0ba87e5 is in acpi_cpu_c1 (/usr/src/sys/i386/acpica/acpi_machdep.c:115).
110     void
111     acpi_cpu_c1()
112     {
113
114             __asm __volatile("sti; hlt");
115     }
116
117     /*
118      * Support for mapping ACPI tables during early boot.  This
abuses the
119      * crashdump map because the kernel cannot allocate KVA in

R2: return address frame 0xe437fbb8
(kgdb) list *0xc0bad42d
0xc0bad42d is at apic_vector.s:242.
237
238             FAKE_MCOUNT(TF_EIP(%esp))
239
240             call    ipi_bitmap_handler
241             MEXITCOUNT
242             jmp     doreti
243     #endif
244     /*
245      * Executed by a CPU when it receives an IPI_STOP from another CPU.
246      */

R3: return address frame 0xe437fb70
(kgdb) list *0xc0bb659b
0xc0bb659b is in ipi_bitmap_handler
(/usr/src/sys/i386/i386/mp_machdep.c:1403).
1398                    hardclockintr();
1399            }
1400            td->td_intr_frame = oldframe;
1401            td->td_intr_nesting_level--;
1402            critical_exit();
1403    }
1404
1405    /*
1406     * send an IPI to a set of cpus.
1407     */

R4: return address frame 0xe437fb50
(kgdb) list *0xc08ced89
0xc08ced89 is in critical_exit (/usr/src/sys/kern/kern_switch.c:234).
229                             if (TD_IS_IDLETHREAD(td))
230                                     flags |= SWT_IDLE;
231                             else
232                                     flags |= SWT_OWEPREEMPT;
233                             mi_switch(flags, NULL);
234                             thread_unlock(td);
235                     }
236             } else
237                     td->td_critnest--;

R5: return address frame 0xe437fb34:
Call "sched_switch(td, newtd, mtx)" with td=0xc7ebd000,
newtd=0x00000000, mtx=0x00000608
(kgdb) list *0xc08d12a5
0xc08d12a5 is in mi_switch (/usr/src/sys/kern/kern_synch.c:480).
473             sched_switch(td, newtd, flags);
474             CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p,
pid %ld, %s)",
475                 td->td_tid, td->td_sched, td->td_proc->p_pid,
td->td_name);
476
477             /*
478              * If the last thread was exiting, finish cleaning it up.
479              */
480             if ((td = PCPU_GET(deadthread))) {
481                     PCPU_SET(deadthread, NULL);
482                     thread_stash(td);
483             }
484     }

R6: return address frame unknown (overwritten on stack)
Call "cpu_switch(td, newtd, mtx)" with td=0xc7ebd000, newtd=0xc8029a20,
mtx=0xc0e43400 (=td_lock)
(kgdb) list *0xc08f492f
0xc08f492f is in sched_switch (/usr/src/sys/kern/sched_ule.c:1956).
1950                    cpu_switch(td, newtd, mtx);
1951                    /*
1952                     * We may return from cpu_switch on a different
cpu.  However,
1953                     * we always return with td_lock pointing to the
current cpu's
1954                     * run queue lock.
1955                     */
1956                    cpuid = PCPU_GET(cpuid);
1957                    tdq = TDQ_CPU(cpuid);
1958                    lock_profile_obtain_lock_success(
1959                        &TDQ_LOCKPTR(tdq)->lock_object, 0, 0,
__FILE__, __LINE__);
1960

(kgdb) p (*(struct thread*)0xc8029a20)->td_name
$5 = "irq16: uhci0\000\000\000\000\000\000\000"
(kgdb) p (*(struct thread*)0xc8029a20)->td_tid
$3 = 100030

R7:
(kgdb)  list *0xc0bc051c
0xc0bc051c is at /usr/src/sys/i386/i386/swtch.s:176.
171             jz      badsw3                          /* no, panic */
172     #endif
173             movl    TD_PCB(%ecx),%edx
174
175             /* switch address space */
176             movl    PCB_CR3(%edx),%eax
177     #if defined(PAE) || defined(PAE_TABLES)
178             cmpl    %eax,IdlePDPT                   /* Kernel
address space? */
179     #else
180             cmpl    %eax,IdlePTD                    /* Kernel
address space? */

R8:
(kgdb)  list *0xc0bacac8
0xc0bacac8 is at /usr/src/sys/i386/i386/exception.s:135.
130     IDTVEC(stk)
131             TRAP(T_STKFLT)
132     IDTVEC(prot)
133             TRAP(T_PROTFLT)
134     IDTVEC(page)
135             TRAP(T_PAGEFLT)
136     IDTVEC(mchk)
137             pushl $0; TRAP(T_MCHK)
138     IDTVEC(rsvd)
139             pushl $0; TRAP(T_RESERVED)

On the stack we have

0xe437faa0:    0x00000000  R7:0xc0bc051c     0x00000020     0x00010007

so there is an exception on the instruction "movl  PCB_CR3(%edx),%eax"
in function cpu_switch(). The next stack entries indicates a lot of page
faults, but the "double fault" happens not until the page boundary at
0xe437f000 is reached. I do not really understand this, but it seems to
me that the thread

  "panic: double fault with 11.0-CURRENT r258504" on freebsd-stable

describes a similar problem.  However I have an Intel CPU:

(kgdb) p/x cpu_vendor_id
$3 = 0x8086
(kgdb) p/x cpu_id
$4 = 0xf29
(kgdb) p/x cpu_feature
$5 = 0xbfebfbff
(kgdb) p/x cpu_feature2
$6 = 0x4400

kernel: Copyright (c) 1992-2017 The FreeBSD Project.
 kernel: Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992,
1993, 1994
 kernel: The Regents of the University of California. All rights reserved.
 kernel: FreeBSD is a registered trademark of The FreeBSD Foundation.
 kernel: FreeBSD 10.3-STABLE #2 r317936: Wed Sep 13 10:24:12 CEST 2017
 kernel: root@dssresv3.incore:/usr/obj/usr/src/sys/SERVER32 i386
 kernel: FreeBSD clang version 3.4.1 (tags/RELEASE_34/dot1-final 208032)
20140512
 kernel: CPU: Intel(R) Xeon(TM) CPU 3.20GHz (3189.77-MHz 686-class CPU)
 kernel: Origin="GenuineIntel"  Id=0xf25  Family=0xf  Model=0x2  Stepping=5
 kernel:
Features=0xbfebfbff<FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CLFLUSH,DTS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE>
 kernel: Features2=0x4400<CNXT-ID,xTPR>
 kernel: real memory  = 4294967296 (4096 MB)
 kernel: avail memory = 3936845824 (3754 MB)
 kernel: Event timer "LAPIC" quality 400
 kernel: ACPI APIC Table: <INTEL  SWV25   >
 kernel: FreeBSD/SMP: Multiprocessor System Detected: 4 CPUs
 kernel: FreeBSD/SMP: 2 package(s) x 1 core(s) x 2 HTT threads
 kernel: cpu0 (BSP): APIC ID:  0
 kernel: cpu1 (AP/HT): APIC ID:  1
 kernel: cpu2 (AP): APIC ID:  6
 kernel: cpu3 (AP/HT): APIC ID:  7
 kernel: random: <Software, Yarrow> initialized
 kernel: ioapic0 <Version 2.0> irqs 0-23 on motherboard
 kernel: ioapic1 <Version 2.0> irqs 24-47 on motherboard
 kernel: ioapic2 <Version 2.0> irqs 48-71 on motherboard
 kernel: lapic0: Forcing LINT1 to edge trigger

I can not reproduce the problem, last double fault I saw was some years
ago on identical hardware (Bug 194225  double fault after page fault on
8.4 Stable).

Andreas Longwitz



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?59F910C5.8020709>