Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 Jul 2006 22:25:09 +0200
From:      Pawel Worach <pawel.worach@gmail.com>
To:        Randall Stewart <rrs@cisco.com>
Cc:        freebsd-net@freebsd.org
Subject:   Re: SCTP
Message-ID:  <44C67E25.7050706@gmail.com>
In-Reply-To: <44C6549B.8080407@cisco.com>
References:  <44BB7A92.9080008@cisco.com>	<d227e09e0607181323q18e53947p942c944602c43cfe@mail.gmail.com>	<44BE34E2.7070603@cisco.com> <44C6549B.8080407@cisco.com>

next in thread | previous in thread | raw e-mail | index | archive | help
Another interesting thing, I did see this before as well.

System freezes in the middle of a NPsctp run. This does not happen when 
I run over loopback but occasionally when run over crossover network.

I enabled WITNESS, INVARIANTS and INVARIANTS_SCTP, no complains from any 
of them.

I can break to ddb and here is some random debugging. I also have a 
manual dump of this.

db> ps
  pid   uid  ppid  pgrp  state   wmesg      wchan    cmd
  1077    0  1002  1077  R+                          NPsctp
  1008    0   835   835  S       pause    0xc2757034 ntpd
  1002    0   998  1002  S+      pause    0xc2703904 csh
  1001    0     1  1001  Ss+     ttyin    0xc25cd410 getty
  1000    0     1  1000  Ss+     ttyin    0xc25cd010 getty
   999    0     1   999  Ss+     ttyin    0xc25ce010 getty
   998    0     1   998  Ss+     wait     0xc2574d38 login
   917    0     1   917  Ss      nanslp   0xc07c1784 cron
   906    0     1   906  Ss      select   0xc080b04c sshd
   842    0     1   842  Ss      select   0xc080b04c powerd
   835    0     1   835  Ss      select   0xc080b04c ntpd
   761    0     0     0  SL      mdwait   0xc282b800 [md0]
   718    0     1   718  Ss      select   0xc080b04c syslogd
   617    0     1   617  Ss      select   0xc080b04c devd
   174    0     1   174  Ss      pause    0xc2703d6c adjkerntz
    46    0     0     0  SL      geli:w   0xc26f7c00 [g_eli[0] ad0s2d]
    45    0     0     0  SL      -        0xd480dcf8 [schedcpu]
    44    0     0     0  SL      sdflush  0xc08164ec [softdepflush]
    43    0     0     0  SL      syncer   0xc07c154c [syncer]
--More--           42    0     0     0  SL      vlruwt   0xc27018d0 [vnlru]
    41    0     0     0  SL      psleep   0xc080b4d4 [bufdaemon]
    40    0     0     0  SL      pollid   0xc07c0c18 [idlepoll]
    39    0     0     0  SL      pgzero   0xc081ee30 [pagezero]
    38    0     0     0  SL      psleep   0xc0816d60 [vmdaemon]
    37    0     0     0  SL      psleep   0xc0816d20 [pagedaemon]
    36    0     0     0  WL                          [swi0: sio]
    35    0     0     0  WL                          [irq12: psm0]
    34    0     0     0  RL      CPU 0               [irq1: atkbd0]
    33    0     0     0  SL      cooling  0xc257acd4 [acpi_cooling0]
    32    0     0     0  SL      tzpoll   0xc09d6520 [acpi_thermal]
    31    0     0     0  WL                          [irq15: ata1]
    30    0     0     0  WL                          [irq14: ata0]
    29    0     0     0  SL      -        0xc2578080 [ath0 taskq]
    28    0     0     0  SL      usbevt   0xc24e9a10 [usb3]
    27    0     0     0  SL      usbevt   0xc2567210 [usb2]
    26    0     0     0  SL      usbevt   0xc2552210 [usb1]
    25    0     0     0  SL      usbtsk   0xc07bece4 [usbtask]
    24    0     0     0  SL      usbevt   0xc24fa210 [usb0]
    23    0     0     0  WL                          [irq11: pcm0 em0++*]
--More--           22    0     0     0  WL 
[irq9: acpi0]
    21    0     0     0  WL                          [swi2: cambio]
    20    0     0     0  SL      -        0xc24f6400 [acpi_task_2]
    19    0     0     0  SL      -        0xc24f6400 [acpi_task_1]
     9    0     0     0  SL      -        0xc24f6400 [acpi_task_0]
     8    0     0     0  SL      -        0xc24f6500 [kqueue taskq]
    18    0     0     0  WL                          [swi5: +]
     7    0     0     0  SL      -        0xc2463480 [thread taskq]
    17    0     0     0  WL                          [swi6: Giant taskq]
    16    0     0     0  WL                          [swi6: task queue]
    15    0     0     0  SL      seqstate 0xc245fc54 [sequencer 00]
    14    0     0     0  SL      -        0xc07be580 [yarrow]
     6    0     0     0  SL      -        0xc07bf03c [g_down]
     5    0     0     0  SL      -        0xc07bf038 [g_up]
     4    0     0     0  SL      -        0xc07bf030 [g_event]
     3    0     0     0  SL      crypto_r 0xc0816254 [crypto returns]
     2    0     0     0  SL      crypto_w 0xc081622c [crypto]
    13    0     0     0  WL                          [swi3: vm]
    12    0     0     0  RL                          [swi4: clock sio]
    11    0     0     0  LL     *so_rcv   0xc2466140 [swi1: net]
--More--           10    0     0     0  RL                          [idle]
     1    0     0     1  SLs     wait     0xc246a000 [init]
     0    0     0     0  WLs                         [swapper]
db> show alllocks
Process 1077 (NPsctp) thread 0xc25706c0 (100038)
exclusive sleep mutex so_rcv r = 0 (0xc2780458) locked @ 
/usr/src/sys/netinet/sctputil.c:4042
Process 34 (irq1: atkbd0) thread 0xc246b6c0 (100031)
exclusive sleep mutex Giant r = 0 (0xc07c0b08) locked @ 
/usr/src/sys/kern/kern_intr.c:661
Process 11 (swi1: net) thread 0xc24656c0 (100005)
exclusive sleep mutex sctp-tcb (tcb) r = 0 (0xc29a5498) locked @ 
/usr/src/sys/netinet/sctp_pcb.c:191
db> tr 1077
Tracing pid 1077 tid 100038 td 0xc25706c0
sched_switch(c25706c0,c246b6c0,6,158,417a95,...) at sched_switch+0x1a0
mi_switch(6,c246b6c0,c07563d7,290,c246b850,...) at mi_switch+0x306
maybe_preempt(c246b6c0,1,c075626f,4ab,c2468360,...) at maybe_preempt+0x1f0
sched_add(c246b6c0,4,c07563d7,1f4,c2462c80,...) at sched_add+0x97
setrunqueue(c246b6c0,4,c0751b05,211,d488092c,...) at setrunqueue+0xd9
intr_event_schedule_thread(c2462c80,0,d4880958,1,c25706c0,...) at 
intr_event_schedule_thread+0x116
intr_execute_handlers(c07b614c,d4880984,38,c0758c54,c29ab5d8,...) at 
intr_execute_handlers+0x14a
atpic_handle_intr(1) at atpic_handle_intr+0xcf
Xatpic_intr1() at Xatpic_intr1+0x20
--- interrupt, eip = 0xc05fae33, esp = 0xd48809c4, ebp = 0xd4880a34 ---
sctp_sorecvmsg(c27803e4,d4880c5c,0,0,0,...) at sctp_sorecvmsg+0x183
sctp_soreceive(c27803e4,0,d4880c5c,0,0,...) at sctp_soreceive+0x9b
soreceive(c27803e4,0,d4880c5c,0,0,...) at soreceive+0x53
soo_read(c272ec60,d4880c5c,c28d9400,0,c25706c0,...) at soo_read+0x6f
dofileread(c25706c0,3,c272ec60,d4880c5c,ffffffff,...) at dofileread+0xa7
kern_readv(c25706c0,3,d4880c5c,82945ac,2a51,...) at kern_readv+0x60
read(c25706c0,d4880d04,c,444,c257469c,...) at read+0x4f
syscall(bfbf003b,829003b,bfbc003b,bfbfe8d0,82945ac,...) at syscall+0x313
Xint0x80_syscall() at Xint0x80_syscall+0x1f
--More--        --- syscall (3, FreeBSD ELF32, read), eip = 0x28168887, 
esp = 0xbfbbfffc, ebp = 0xbfbc0028 ---
db> tr 11
Tracing pid 11 tid 100005 td 0xc24656c0
sched_switch(c24656c0,0,1,158,c25706c0,...) at sched_switch+0x1a0
mi_switch(1,0,c075815d,283,c07c41d0,...) at mi_switch+0x306
turnstile_wait(c2780458,c25706c0,0,223,c25706c2,...) at turnstile_wait+0x4c5
_mtx_lock_sleep(c2780458,c24656c0,0,c07637e7,e65,...) at 
_mtx_lock_sleep+0x130
_mtx_lock_flags(c2780458,0,c07637e7,e65,c294fe00,...) at 
_mtx_lock_flags+0xc7
sctp_append_to_readq(c29a3000,c29a5000,c29ab5d8,c27ff000,0,...) at 
sctp_append_to_readq+0x116
sctp_service_reassembly(c29a5000,c29a502c,69215acb,5bc,c2982030,...) at 
sctp_service_reassembly+0x2be
sctp_service_queues(c29a5000,c29a502c,d35e2c40,20,c2982030,...) at 
sctp_service_queues+0x2a
sctp_process_data(d35e2c40,14,d35e2bc8,5dc,c2982024,...) at 
sctp_process_data+0x65f
sctp_common_input_processing(d35e2c40,14,5dc,5dc,c2982024,...) at 
sctp_common_input_processing+0x424
sctp_input(c294e100,14,c255a800,1,0,...) at sctp_input+0x420
ip_input(c294e100,0,c0760336,e9,c080c318,...) at ip_input+0x6c1
netisr_processqueue(c080c318,c07c0af0,2,c07563d7,c245ea00,...) at 
netisr_processqueue+0x8e
swi_net(0,d35e2cd8,c052758c,c07c0af0,1,...) at swi_net+0xf9
ithread_execute_handlers(c2464b04,c2462580,c0751b05,2f9,c24656c0,...) at 
ithread_execute_handlers+0x168
ithread_loop(c24333b0,d35e2d38,c07518ff,32e,0,...) at ithread_loop+0x83
fork_exit(c05183d0,c24333b0,d35e2d38) at fork_exit+0xc3
fork_trampoline() at fork_trampoline+0x8
--- trap 0x1, eip = 0, esp = 0xd35e2d6c, ebp = 0 ---
--More--        db> call doadump
Physical memory: 502 MB
Dumping 33 MB: 18 2

(kgdb) l /usr/src/sys/netinet/sctputil.c:4042
4037            inp = (struct sctp_inpcb *)so->so_pcb;
4038            if (inp == NULL) {
4039                    return (EFAULT);
4040            }
4041            s = splnet();
4042            SOCKBUF_LOCK(&so->so_rcv);
4043
4044    restart:
4045            if((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
4046               (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
(kgdb) l /usr/src/sys/netinet/sctp_pcb.c:191
186                     }
187             if (stcb->sctp_socket)
188                     if 
(mtx_owned(&(stcb->sctp_socket->so_snd.sb_mtx))) {
189                             panic("own snd socket mtx at lock of tcb");
190                     }
191             mtx_lock(&(stcb)->tcb_mtx);
192     }
193
194
195     void
(kgdb) l *sctp_append_to_readq+0x116
0xc05fa396 is in sctp_append_to_readq 
(/usr/src/sys/netinet/sctputil.c:3687).
3682                    mm = mm->m_next;
3683            }
3684            if (sb) {
3685                    SOCKBUF_LOCK(sb);
3686            }
3687            if (control->tail_mbuf) {
3688                    /* append */
3689                    control->tail_mbuf->m_next = m;
3690                    control->tail_mbuf = tail;
3691            } else {

-- 
Pawel



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?44C67E25.7050706>