Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 23 Feb 2011 12:16:37 +0300
From:      Lev Serebryakov <lev@serebryakov.spb.ru>
To:        freebsd-net@freebsd.org, freebsd-stable@freebsd.org
Cc:        =?utf-8?Q?Michael_T=C3=BCxen?= <Michael.Tuexen@lurchi.franken.de>, Jack Vogel <jack.vogel@intel.com>
Subject:   em0 with latest driver hangs again and again (without "Watchdog timeout" message!)
Message-ID:  <1975926365.20110223121637@serebryakov.spb.ru>

next in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
Hello, Freebsd-net.

  It's me again, as problem is not solved and no "clear" answer was
received.

  em0 NIC on my storage server hangs every several (2-3) days.
Symptoms are simple: no packets can be send, mbufs are overfilled, "No
buf space to send" error for any program.

    Configuration now is VERY BASIC: no polling, no sysctls or
loader.conf tunables AT ALL. No jumbo frames.

  nic doesn't show any "Watchdog timeout" / "resetting" messages.

  Driver from "em driver, 82574L chip, and possibly ASPM" thread
 doesn't help, really: it seems, that it decrease frequincy of hangs,
 but doesn't eliminate them, but I can not say for sure, may be
 frequency change is only illusion, as it random process.

 I've added diagnostic patch from Michael Tuxen.

 System is cvsupped and built  Mon Feb 21, it is FreeBSD 8-STABLE
(RELENG_8).

  Hardware is:

em0@pci0:0:25:0:        class=0x020000 card=0x82681043 chip=0x10bd8086 rev=0x02 hdr=0x00
    vendor     = 'Intel Corporation'
    device     = 'Intel 82566DM Gigabit Ethernet Adapter (82566DM)'
    class      = network
    subclass   = ethernet
    bar   [10] = type Memory, range 32, base 0xfeb40000, size 131072, enabled
    bar   [14] = type Memory, range 32, base 0xfeb7a000, size 4096, enabled
    bar   [18] = type I/O Port, range 32, base 0xe880, size 32, enabled
    cap 01[c8] = powerspec 2  supports D0 D3  current D0
    cap 05[d0] = MSI supports 1 message, 64 bit
    cap 09[e0] = vendor (length 6) Intel cap 2 version 0


  Output of ifconfig, vmstat -m, netstat -m, top -Snd 1, sysctl
dev.em.0 is attached.

  Interesting part of diagnostic sysctls:

dev.em.0.queue0.rxd_head: 896
dev.em.0.queue0.rxd_tail: 895
dev.em.0.queue0.rx_irq: 0
dev.em.0.queue0.rx_nxt_refresh: 896
dev.em.0.queue0.rx_nxt_check: 896

-- 
// Black Lion AKA Lev Serebryakov <lev@serebryakov.spb.ru>
[-- Attachment #2 --]
>>> ifconfig em0
em0: flags=8c43<UP,BROADCAST,RUNNING,OACTIVE,SIMPLEX,MULTICAST> metric 0 mtu 1500
	options=219b<RXCSUM,TXCSUM,VLAN_MTU,VLAN_HWTAGGING,VLAN_HWCSUM,TSO4,WOL_MAGIC>
	ether 00:1e:8c:75:03:0d
	inet 0.0.0.0 netmask 0xff000000 broadcast 255.255.255.255
	media: Ethernet 1000baseT <full-duplex>
	status: active
<<< ifconfig em0
>>> vmstat -m
         Type InUse MemUse HighUse Requests  Size(s)
       module   152    19K       -      152  128
          USB    76    66K       -       80  16,32,64,128,256,1024,2048,4096
     mtx_pool     2    16K       -        2  
   CAM periph    22     6K       -       44  16,32,64,256
     pci_link    16     2K       -       16  64,128
      acpisem    19     3K       -       19  128
      subproc   374   355K       -    53847  512,4096
         proc     2    16K       -        2  
      session    23     3K       -      906  128
         pgrp    25     4K       -      948  128
         cred    58    10K       -   589406  64,256
      uidinfo     8     3K       -    65562  128,2048
       plimit    10     3K       -    11992  256
      CAM XPT   283   405K       -      404  16,32,64,128,256,1024,2048
       DEVFS1   142    71K       -      151  512
    sysctltmp     0     0K       -     3398  16,32,64,128
    sysctloid  3256   161K       -     3390  16,32,64,128
       sysctl     0     0K       -    42659  16,32,64
      callout     1   512K       -        1  
         umtx   402    51K       -      402  128
     p1003.1b     1     1K       -        1  16
         SWAP     2   549K       -        2  64
       DEVFS3   168    42K       -      178  256
       bus-sc    63   373K       -     1192  16,32,64,128,256,512,2048,4096
          bus   609    62K       -     3480  16,32,64,128,256,512,1024
      devstat    14    29K       -       14  32,4096
 eventhandler    67     6K       -       67  64,128
         kobj    93   372K       -      115  4096
      Per-cpu     1     1K       -        1  32
        DEVFS    20     1K       -       21  16,128
         rman   177    22K       -      601  16,32,128
       DEVFSP     1     1K       -        1  64
         sbuf     0     0K       -     1238  16,32,64,128,256,512,1024,2048,4096
    pfs_nodes    21     6K       -       21  256
        stack     0     0K       -        2  256
    taskqueue    15     2K       -       15  16,32,128
       Unitno    10     1K       -       70  32,64
          iov     0     0K       -    33426  16,32,64,128,256,512
       select    75    10K       - 2813743742  128,512,1024
     ioctlops     0     0K       - 51631113  16,32,64,128,256,512,1024,2048,4096
          msg     4    30K       -        4  2048,4096
          sem     4    11K       -        4  512,1024
          shm     1    20K       -        1  
          tty    20    20K       -       25  1024,2048
          pts     0     0K       -        3  256
     mbuf_tag     0     0K       -    36538  32
        shmfd     1     8K       -        1  
         GEOM   175    38K       -      741  16,32,64,128,256,512,1024
          pcb    31    13K       -  1301072  16,32,1024,2048,4096
       soname     6     1K       -  4871796  16,32,128
          acl     0     0K       -     2823  4096
       biobuf     0     0K       -      687  2048
     vfscache     1  1024K       -        1  
   cl_savebuf     0     0K       -    10463  64,128
  export_host     2     1K       -        2  256
     vfs_hash     1   512K       -        1  
       vnodes     2     1K       -        2  256
  vnodemarker     0     0K       -    55956  512
        mount   104     6K       -      304  16,32,64,128,256,512
          BPF     7     9K       -        7  128,512,4096
  ether_multi    12     1K       -       26  16,64
       ifaddr    14     7K       -       16  32,512,4096
        ifnet     3     5K       -        3  128,2048
        clone     2     8K       -        2  4096
       arpcom     1     1K       -        1  16
      lltable     2     1K       -        9  256,512
      scsi_da     0     0K       -       16  16
       kbdmux     6    10K       -        6  16,512,1024,2048,4096
          LED     1     1K       -        1  128
       isadev     6     1K       -        6  128
     routetbl    12     4K       -    36189  32,64,128,256,512
         igmp     2     1K       -        2  256
CAM dev queue     8     1K       -        8  128
    CAM queue    43     3K       -      148  16,32,64,256
      CAM SIM     8     2K       -        8  256
  ip_moptions     4     1K       -        4  64,256
     in_multi     3     1K       -        5  256
   in_mfilter     2     2K       -        2  1024
    hostcache     1    28K       -        1  
     syncache     1    96K       -        1  
      NFS FHA     1     2K       -      143  64,2048
          rpc   152    81K       -      462  32,64,128,256,512,2048
audit_evclass   172     6K       -      211  32
     savedino     0     0K       -    11637  256
    newdirblk     0     0K       -      153  64
       dirrem     0     0K       -    65186  64
        mkdir     0     0K       -     1366  64
       diradd     3     1K       -    57136  64
     freefile     1     1K       -    44390  64
     freeblks     1     1K       -    43828  256
     freefrag     0     0K       -    40153  64
   allocindir     0     0K       -    51662  128
     indirdep     1     1K       -      709  64
  allocdirect     3     1K       -   108309  256
    bmsafemap     2     1K       -    19232  128
       newblk     1     1K       -   159972  64,512
     inodedep     6   514K       -    79194  256
      pagedep     4   129K       -     8085  128
  ufs_dirhash  1652   546K       -    92988  16,32,64,128,256,512,1024
    ufs_mount    15   127K       -       15  512,2048,4096
      UMAHash     3     7K       -        9  512,1024,2048,4096
  ddb_capture     1    48K       -        1  
       acpica  3770   386K       -    82037  16,32,64,128,256,512,1024,2048
         cdev     8     2K       -        8  256
    vm_pgdata     2   129K       -        2  128
      acpidev    78     5K       -       78  64
        sigio     1     1K       -        1  64
     filedesc    60    77K       -    61157  16,32,64,128,256,512,1024,2048,4096
         kenv    76    11K       -       80  16,32,64,128
      io_apic     1     2K       -        1  2048
       kqueue     2    13K       -    57882  256,2048,4096
      memdesc     1     4K       -        1  4096
     acpitask     1     2K       -        1  2048
    proc-args    27     2K       -    71620  16,32,64,128,256
     atkbddev     2     1K       -        2  64
      ithread    72    12K       -       72  32,128,256
      entropy  1024    64K       -     1024  64
         UART     3     2K       -        3  16,512,1024
       KTRACE   100    13K       -      100  128
       linker    57     6K       -       63  16,32,64,128,512
        lockf    53     6K       -  1123948  64,128
         temp    20   401K       -   137072  16,32,64,128,256,512,1024,2048,4096
       devbuf 19999 34877K       -    20097  16,32,64,128,256,512,1024,2048,4096
       USBdev    47    12K       -       47  64,128,1024
     nexusdev     3     1K       -        3  16
   raid5_data    19  7545K       - 41120455  16,64,512,4096
<<< vmstat -m
>>> netstat -m
3404/6571/9975 mbufs in use (current/cache/total)
1211/2209/3420/204800 mbuf clusters in use (current/cache/total/max)
1200/449 mbuf+clusters out of packet secondary zone in use (current/cache)
0/217/217/192000 4k (page size) jumbo clusters in use (current/cache/total/max)
0/0/0/6400 9k jumbo clusters in use (current/cache/total/max)
0/0/0/3200 16k jumbo clusters in use (current/cache/total/max)
3273K/6928K/10201K bytes allocated to network (current/cache/total)
0/0/0 requests for mbufs denied (mbufs/clusters/mbuf+clusters)
0/0/0 requests for jumbo clusters denied (4k/9k/16k)
0/0/0 sfbufs in use (current/peak/max)
0 requests for sfbufs denied
0 requests for sfbufs delayed
0 requests for I/O initiated by sendfile
0 calls to protocol drain routines
<<< netstat -m
>>> top -Snd 1
last pid: 53521;  load averages:  0.00,  0.00,  0.00  up 1+20:09:02    11:53:47
111 processes: 3 running, 91 sleeping, 17 waiting

Mem: 81M Active, 1413M Inact, 292M Wired, 56M Cache, 213M Buf, 133M Free
Swap: 4096M Total, 116K Used, 4096M Free


  PID USERNAME      THR PRI NICE   SIZE    RES STATE   C   TIME   WCPU COMMAND
   11 root            2 171 ki31     0K    32K CPU0    0  61.6H 200.00% idle
   12 root           17 -60    -     0K   272K WAIT    0  44:23  0.10% intr
 5911 rtorrent        3  44    0 85752K 63444K select  1  25.2H  0.00% transmission-daemon
    0 root            9 -68    0     0K   128K -       0  12:21  0.00% kernel
   14 root           33 -40    -     0K   528K -       1   5:11  0.00% usb
   19 root            2  -8    -     0K    32K gr5do   1   4:31  0.00% g_raid5
    4 root            1  -8    -     0K    16K -       0   3:57  0.00% g_down
   17 root            1  20    -     0K    16K syncer  1   3:15  0.00% syncer
    3 root            1  -8    -     0K    16K -       0   3:01  0.00% g_up
   13 root            1 -16    -     0K    16K -       0   1:52  0.00% yarrow
    6 root            1 -16    -     0K    16K psleep  1   1:02  0.00% pagedaemon
  913 uucp            1  44    0  6916K  1356K select  1   0:12  0.00% megatec
  788 root            1  44    0 21052K  2808K select  1   0:06  0.00% nmbd
  915 uucp            1  44    0 10928K  2188K select  0   0:05  0.00% upsd
  860 root            1  44    0 11788K  2028K select  1   0:05  0.00% ntpd
    2 root            1  -8    -     0K    16K -       1   0:05  0.00% g_event
   16 root            1  44    -     0K    16K vlruwt  1   0:04  0.00% vnlru
   18 root            1  44    -     0K    16K sdflus  1   0:02  0.00% softdepflush

<<< top -Snd 1
>>> sysctl dev.em.0
dev.em.0.%desc: Intel(R) PRO/1000 Network Connection 7.1.9
dev.em.0.%driver: em
dev.em.0.%location: slot=25 function=0 handle=\_SB_.PCI0.GBEC
dev.em.0.%pnpinfo: vendor=0x8086 device=0x10bd subvendor=0x1043 subdevice=0x8268 class=0x020000
dev.em.0.%parent: pci0
dev.em.0.nvm: -1
dev.em.0.debug: -1
dev.em.0.rx_int_delay: 0
dev.em.0.tx_int_delay: 66
dev.em.0.rx_abs_int_delay: 66
dev.em.0.tx_abs_int_delay: 66
dev.em.0.rx_processing_limit: 100
dev.em.0.flow_control: 3
dev.em.0.link_irq: 0
dev.em.0.mbuf_alloc_fail: 0
dev.em.0.cluster_alloc_fail: 0
dev.em.0.dropped: 0
dev.em.0.tx_dma_fail: 0
dev.em.0.rx_overruns: 1
dev.em.0.watchdog_timeouts: 0
dev.em.0.device_control: 1477444160
dev.em.0.rx_control: 67141634
dev.em.0.fc_high_water: 8192
dev.em.0.fc_low_water: 6692
dev.em.0.queue0.txd_head: 144
dev.em.0.queue0.txd_tail: 108
dev.em.0.queue0.tx_irq: 0
dev.em.0.queue0.no_desc_avail: 0
dev.em.0.queue0.rxd_head: 896
dev.em.0.queue0.rxd_tail: 895
dev.em.0.queue0.rx_irq: 0
dev.em.0.queue0.rx_nxt_refresh: 896
dev.em.0.queue0.rx_nxt_check: 896
dev.em.0.mac_stats.excess_coll: 0
dev.em.0.mac_stats.single_coll: 0
dev.em.0.mac_stats.multiple_coll: 0
dev.em.0.mac_stats.late_coll: 0
dev.em.0.mac_stats.collision_count: 0
dev.em.0.mac_stats.symbol_errors: 0
dev.em.0.mac_stats.sequence_errors: 0
dev.em.0.mac_stats.defer_count: 683
dev.em.0.mac_stats.missed_packets: 11166
dev.em.0.mac_stats.recv_no_buff: 0
dev.em.0.mac_stats.recv_undersize: 0
dev.em.0.mac_stats.recv_fragmented: 9
dev.em.0.mac_stats.recv_oversize: 0
dev.em.0.mac_stats.recv_jabber: 0
dev.em.0.mac_stats.recv_errs: 619
dev.em.0.mac_stats.crc_errs: 645
dev.em.0.mac_stats.alignment_errs: 0
dev.em.0.mac_stats.coll_ext_errs: 0
dev.em.0.mac_stats.xon_recvd: 694
dev.em.0.mac_stats.xon_txd: 1745
dev.em.0.mac_stats.xoff_recvd: 1998
dev.em.0.mac_stats.xoff_txd: 1744
dev.em.0.mac_stats.total_pkts_recvd: 191037978
dev.em.0.mac_stats.good_pkts_recvd: 191023462
dev.em.0.mac_stats.bcast_pkts_recvd: 3118
dev.em.0.mac_stats.mcast_pkts_recvd: 0
dev.em.0.mac_stats.rx_frames_64: 0
dev.em.0.mac_stats.rx_frames_65_127: 0
dev.em.0.mac_stats.rx_frames_128_255: 0
dev.em.0.mac_stats.rx_frames_256_511: 0
dev.em.0.mac_stats.rx_frames_512_1023: 0
dev.em.0.mac_stats.rx_frames_1024_1522: 0
dev.em.0.mac_stats.good_octets_recvd: 147756951052
dev.em.0.mac_stats.good_octets_txd: 258037511297
dev.em.0.mac_stats.total_pkts_txd: 242746090
dev.em.0.mac_stats.good_pkts_txd: 242742601
dev.em.0.mac_stats.bcast_pkts_txd: 682
dev.em.0.mac_stats.mcast_pkts_txd: 3805
dev.em.0.mac_stats.tx_frames_64: 0
dev.em.0.mac_stats.tx_frames_65_127: 0
dev.em.0.mac_stats.tx_frames_128_255: 0
dev.em.0.mac_stats.tx_frames_256_511: 0
dev.em.0.mac_stats.tx_frames_512_1023: 0
dev.em.0.mac_stats.tx_frames_1024_1522: 0
dev.em.0.mac_stats.tso_txd: 61895030
dev.em.0.mac_stats.tso_ctx_fail: 0
dev.em.0.interrupts.asserts: 147048246
dev.em.0.interrupts.rx_pkt_timer: 0
dev.em.0.interrupts.rx_abs_timer: 0
dev.em.0.interrupts.tx_pkt_timer: 0
dev.em.0.interrupts.tx_abs_timer: 0
dev.em.0.interrupts.tx_queue_empty: 0
dev.em.0.interrupts.tx_queue_min_thresh: 0
dev.em.0.interrupts.rx_desc_min_thresh: 0
dev.em.0.interrupts.rx_overrun: 0
dev.em.0.wake: 0
<<< sysctl dev.em.0

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1975926365.20110223121637>