Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 30 Dec 2016 22:55:19 +0100
From:      Ben RUBSON <ben.rubson@gmail.com>
To:        FreeBSD Net <freebsd-net@freebsd.org>
Subject:   iSCSI failing, MLX rx_ring errors ?
Message-ID:  <486A6DA0-54C8-40DF-8437-F6E382DA01A8@gmail.com>

next in thread | raw e-mail | index | archive | help
Hello,

2 FreeBSD 11.0-p3 servers, one iSCSI initiator, one target.
Both with Mellanox ConnectX-3 40G.

Since a few days, sometimes, under undetermined circumstances, as soon =
as there is some (very low) iSCSI traffic, some of the disks get =
disconnected :
kernel: WARNING: 192.168.2.2 (iqn......): no ping reply (NOP-Out) after =
5 seconds; dropping connection

At the same moment, sysctl counters hw.mlxen1.stat.rx_ring*.error grow =
on initiator side.

I then tried to reproduce these network errors burning the link at 40G =
full-duplex using iPerf.
But I did not manage to increase these error counters.

It's strange because it's a sporadic issue, I can have traffic on iSCSI =
disks without any issue, and sometimes, they get disconnected with =
errors growing.

What should I look at ?
What do these rx_ring*.error counters mean ? Hardware errors ?

Below are some numbers to help with the investigation.
(strangely enough for MLX guys, all hw.mlxen*.stat.tx_*_bytes_packets =
counters are 0)

Tank you very much for your help & support !

Best regards,

Ben



# uname -r
11.0-RELEASE-p3

# ifconfig mlxen1
mlxen1: flags=3D8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> metric 0 =
mtu 9020
=
options=3Ded07bb<RXCSUM,TXCSUM,VLAN_MTU,VLAN_HWTAGGING,JUMBO_MTU,VLAN_HWCS=
UM,TSO4,TSO6,LRO,VLAN_HWFILTER,VLAN_HWTSO,LINKSTATE,RXCSUM_IPV6,TXCSUM_IPV=
6>
ether XX:XX:XX:XX:XX:XX
inet 192.168.2.1 netmask 0xffff0000 broadcast 192.168.255.255=20
nd6 options=3D29<PERFORMNUD,IFDISABLED,AUTO_LINKLOCAL>
media: Ethernet autoselect (40Gbase-CR4 <full-duplex,rxpause,txpause>)
status: active

# mst status
MST devices:
------------
pci0:133:0:0 - MT27500 Family [ConnectX-3]

# flint -d pci0:133:0:0 q
Image type:          FS2
FW Version:          2.36.5000
FW Release Date:     26.1.2016
Product Version:     02.36.50.00
Rom Info:            type=3DPXE version=3D3.4.718 devid=3D4099
PSID:                MT_1090110023

[initiator]# netstat -I mlxen1
Name    Mtu Network       Address                Ipkts Ierrs Idrop      =
Opkts Oerrs  Coll
mlxen  9020 <Link#3>      XX:XX:XX:XX:XX:XX 4095609916     0     0 =
3321316930     0     0
mlxen     - 192.168.0.0/1 initiator         2020732710     -     - =
3242031277     -     -

[target]# netstat -I mlxen1
Name    Mtu Network       Address                Ipkts Ierrs Idrop      =
Opkts Oerrs  Coll
mlxen  9020 <Link#3>      XX:XX:XX:XX:XX:XX 3798170324     0     0 =
5098312540     0     0
mlxen     - 192.168.0.0/1 target            2462248779     -     - =
5057776404     -     -

[initiator]# sysctl hw.mlxen1
hw.mlxen1.stat.rx_ring15.error: 52
hw.mlxen1.stat.rx_ring15.bytes: 3477976760
hw.mlxen1.stat.rx_ring15.packets: 7360524
hw.mlxen1.stat.rx_ring14.error: 77
hw.mlxen1.stat.rx_ring14.bytes: 791762343420
hw.mlxen1.stat.rx_ring14.packets: 142943349
hw.mlxen1.stat.rx_ring13.error: 33
hw.mlxen1.stat.rx_ring13.bytes: 2284826126
hw.mlxen1.stat.rx_ring13.packets: 7781479
hw.mlxen1.stat.rx_ring12.error: 20
hw.mlxen1.stat.rx_ring12.bytes: 730312221216
hw.mlxen1.stat.rx_ring12.packets: 155950019
hw.mlxen1.stat.rx_ring11.error: 57
hw.mlxen1.stat.rx_ring11.bytes: 114233581104
hw.mlxen1.stat.rx_ring11.packets: 69633934
hw.mlxen1.stat.rx_ring10.error: 49
hw.mlxen1.stat.rx_ring10.bytes: 10775291086886
hw.mlxen1.stat.rx_ring10.packets: 1389173314
hw.mlxen1.stat.rx_ring9.error: 68
hw.mlxen1.stat.rx_ring9.bytes: 35171979154
hw.mlxen1.stat.rx_ring9.packets: 86633073
hw.mlxen1.stat.rx_ring8.error: 81
hw.mlxen1.stat.rx_ring8.bytes: 23210482350
hw.mlxen1.stat.rx_ring8.packets: 68058961
hw.mlxen1.stat.rx_ring7.error: 49
hw.mlxen1.stat.rx_ring7.bytes: 5093871869318
hw.mlxen1.stat.rx_ring7.packets: 744833265
hw.mlxen1.stat.rx_ring6.error: 37
hw.mlxen1.stat.rx_ring6.bytes: 90764137790
hw.mlxen1.stat.rx_ring6.packets: 130626431
hw.mlxen1.stat.rx_ring5.error: 7
hw.mlxen1.stat.rx_ring5.bytes: 641902292152
hw.mlxen1.stat.rx_ring5.packets: 76754874
hw.mlxen1.stat.rx_ring4.error: 59
hw.mlxen1.stat.rx_ring4.bytes: 28894253498
hw.mlxen1.stat.rx_ring4.packets: 12545685
hw.mlxen1.stat.rx_ring3.error: 87
hw.mlxen1.stat.rx_ring3.bytes: 1581250152646
hw.mlxen1.stat.rx_ring3.packets: 255027061
hw.mlxen1.stat.rx_ring2.error: 19
hw.mlxen1.stat.rx_ring2.bytes: 47056101376
hw.mlxen1.stat.rx_ring2.packets: 11670049
hw.mlxen1.stat.rx_ring1.error: 76
hw.mlxen1.stat.rx_ring1.bytes: 6673057117060
hw.mlxen1.stat.rx_ring1.packets: 917070315
hw.mlxen1.stat.rx_ring0.error: 21
hw.mlxen1.stat.rx_ring0.bytes: 65223989496
hw.mlxen1.stat.rx_ring0.packets: 19600719
hw.mlxen1.stat.tx_ring23.bytes: 829090401622
hw.mlxen1.stat.tx_ring23.packets: 336892256
hw.mlxen1.stat.tx_ring22.bytes: 793782461006
hw.mlxen1.stat.tx_ring22.packets: 110951927
hw.mlxen1.stat.tx_ring21.bytes: 133908896030
hw.mlxen1.stat.tx_ring21.packets: 15515669
hw.mlxen1.stat.tx_ring20.bytes: 64369317656
hw.mlxen1.stat.tx_ring20.packets: 9666791
hw.mlxen1.stat.tx_ring19.bytes: 50388926288
hw.mlxen1.stat.tx_ring19.packets: 46840752
hw.mlxen1.stat.tx_ring18.bytes: 828058240322
hw.mlxen1.stat.tx_ring18.packets: 332214476
hw.mlxen1.stat.tx_ring17.bytes: 2405121596404
hw.mlxen1.stat.tx_ring17.packets: 514331797
hw.mlxen1.stat.tx_ring16.bytes: 74310025468
hw.mlxen1.stat.tx_ring16.packets: 9385759
hw.mlxen1.stat.tx_ring15.bytes: 113497610894
hw.mlxen1.stat.tx_ring15.packets: 13184789
hw.mlxen1.stat.tx_ring14.bytes: 2277138343902
hw.mlxen1.stat.tx_ring14.packets: 307498601
hw.mlxen1.stat.tx_ring13.bytes: 52566528570
hw.mlxen1.stat.tx_ring13.packets: 16261361
hw.mlxen1.stat.tx_ring12.bytes: 490379207044
hw.mlxen1.stat.tx_ring12.packets: 58488779
hw.mlxen1.stat.tx_ring11.bytes: 71659340140
hw.mlxen1.stat.tx_ring11.packets: 12239602
hw.mlxen1.stat.tx_ring10.bytes: 912427288872
hw.mlxen1.stat.tx_ring10.packets: 349927049
hw.mlxen1.stat.tx_ring9.bytes: 72551256540
hw.mlxen1.stat.tx_ring9.packets: 51930045
hw.mlxen1.stat.tx_ring8.bytes: 111389160186
hw.mlxen1.stat.tx_ring8.packets: 12694233
hw.mlxen1.stat.tx_ring7.bytes: 1706882082390
hw.mlxen1.stat.tx_ring7.packets: 203421211
hw.mlxen1.stat.tx_ring6.bytes: 184536049610
hw.mlxen1.stat.tx_ring6.packets: 24060941
hw.mlxen1.stat.tx_ring5.bytes: 118658630412
hw.mlxen1.stat.tx_ring5.packets: 14390742
hw.mlxen1.stat.tx_ring4.bytes: 679672951824
hw.mlxen1.stat.tx_ring4.packets: 137562479
hw.mlxen1.stat.tx_ring3.bytes: 2390933455110
hw.mlxen1.stat.tx_ring3.packets: 295791371
hw.mlxen1.stat.tx_ring2.bytes: 178631018408
hw.mlxen1.stat.tx_ring2.packets: 30935420
hw.mlxen1.stat.tx_ring1.bytes: 1773565506936
hw.mlxen1.stat.tx_ring1.packets: 206723306
hw.mlxen1.stat.tx_ring0.bytes: 1818793974890
hw.mlxen1.stat.tx_ring0.packets: 210483166
hw.mlxen1.stat.tx_gt_1548_bytes_packets: 0
hw.mlxen1.stat.tx_1548_bytes_packets: 0
hw.mlxen1.stat.tx_1522_bytes_packets: 0
hw.mlxen1.stat.tx_1518_bytes_packets: 0
hw.mlxen1.stat.tx_1023_bytes_packets: 0
hw.mlxen1.stat.tx_511_bytes_packets: 0
hw.mlxen1.stat.tx_255_bytes_packets: 0
hw.mlxen1.stat.tx_127_bytes_packets: 0
hw.mlxen1.stat.tx_lt_64_bytes_packets: 0
hw.mlxen1.stat.tx_dropped: 0
hw.mlxen1.stat.tx_errors: 0
hw.mlxen1.stat.tx_broadcast_packets: 4518
hw.mlxen1.stat.tx_multicast_packets: 0
hw.mlxen1.stat.tx_bytes: 18145597836008
hw.mlxen1.stat.tx_packets: 3321392471
hw.mlxen1.stat.rx_gt_1548_bytes_packets: 3049781443
hw.mlxen1.stat.rx_1548_bytes_packets: 5
hw.mlxen1.stat.rx_1522_bytes_packets: 0
hw.mlxen1.stat.rx_1518_bytes_packets: 1498044
hw.mlxen1.stat.rx_1023_bytes_packets: 683359
hw.mlxen1.stat.rx_511_bytes_packets: 1593761
hw.mlxen1.stat.rx_255_bytes_packets: 2074222
hw.mlxen1.stat.rx_127_bytes_packets: 1039982287
hw.mlxen1.stat.rx_lt_64_bytes_packets: 50631
hw.mlxen1.stat.rx_out_range_length_error: 0
hw.mlxen1.stat.rx_in_range_length_error: 0
hw.mlxen1.stat.rx_jabbers: 0
hw.mlxen1.stat.rx_crc_errors: 0
hw.mlxen1.stat.rx_over_errors: 0
hw.mlxen1.stat.rx_length_errors: 0
hw.mlxen1.stat.rx_dropped: 0
hw.mlxen1.stat.rx_errors: 0
hw.mlxen1.stat.rx_broadcast_packets: 1047
hw.mlxen1.stat.rx_multicast_packets: 26
hw.mlxen1.stat.rx_packets: 4095663752
hw.mlxen1.stat.rx_bytes: 26714147524337
hw.mlxen1.stat.tx_chksum_offload: 1798163742
hw.mlxen1.stat.rx_chksum_none: 0
hw.mlxen1.stat.rx_chksum_good: 118
hw.mlxen1.stat.rx_alloc_failed: 0
hw.mlxen1.stat.tx_oversized_packets: 0
hw.mlxen1.stat.tx_timeout: 0
hw.mlxen1.stat.wake_queue: 599
hw.mlxen1.stat.queue_stopped: 599
hw.mlxen1.stat.tso_packets: 373869842
hw.mlxen1.conf.eeprom_info: 0
hw.mlxen1.conf.coalesce.adaptive_rx_coal: 1
hw.mlxen1.conf.coalesce.sample_interval: 0
hw.mlxen1.conf.coalesce.rx_usecs_high: 128
hw.mlxen1.conf.coalesce.pkt_rate_high: 450000
hw.mlxen1.conf.coalesce.rx_usecs_low: 0
hw.mlxen1.conf.coalesce.pkt_rate_low: 400000
hw.mlxen1.conf.device_name: mlx4_core0
hw.mlxen1.conf.port_num: 2
hw.mlxen1.conf.rx_ppp: 0
hw.mlxen1.conf.tx_ppp: 0
hw.mlxen1.conf.tx_size: 1024
hw.mlxen1.conf.rx_size: 1024
hw.mlxen1.conf.tx_rings: 24
hw.mlxen1.conf.rx_rings: 16
hw.mlxen1.conf.msg_enable: 20

[target]# sysctl hw.mlxen1
hw.mlxen1.stat.rx_ring15.error: 0
hw.mlxen1.stat.rx_ring15.bytes: 245362991998
hw.mlxen1.stat.rx_ring15.packets: 125007210
hw.mlxen1.stat.rx_ring14.error: 0
hw.mlxen1.stat.rx_ring14.bytes: 69938348448
hw.mlxen1.stat.rx_ring14.packets: 12006397
hw.mlxen1.stat.rx_ring13.error: 0
hw.mlxen1.stat.rx_ring13.bytes: 801100594458
hw.mlxen1.stat.rx_ring13.packets: 324484029
hw.mlxen1.stat.rx_ring12.error: 0
hw.mlxen1.stat.rx_ring12.bytes: 500584314630
hw.mlxen1.stat.rx_ring12.packets: 63722864
hw.mlxen1.stat.rx_ring11.error: 0
hw.mlxen1.stat.rx_ring11.bytes: 133319524538
hw.mlxen1.stat.rx_ring11.packets: 15553073
hw.mlxen1.stat.rx_ring10.error: 0
hw.mlxen1.stat.rx_ring10.bytes: 1204971095886
hw.mlxen1.stat.rx_ring10.packets: 473964848
hw.mlxen1.stat.rx_ring9.error: 0
hw.mlxen1.stat.rx_ring9.bytes: 3587005798210
hw.mlxen1.stat.rx_ring9.packets: 423516917
hw.mlxen1.stat.rx_ring8.error: 0
hw.mlxen1.stat.rx_ring8.bytes: 1603198498178
hw.mlxen1.stat.rx_ring8.packets: 517866399
hw.mlxen1.stat.rx_ring7.error: 0
hw.mlxen1.stat.rx_ring7.bytes: 769432176604
hw.mlxen1.stat.rx_ring7.packets: 125552308
hw.mlxen1.stat.rx_ring6.error: 0
hw.mlxen1.stat.rx_ring6.bytes: 3616464682348
hw.mlxen1.stat.rx_ring6.packets: 431320453
hw.mlxen1.stat.rx_ring5.error: 0
hw.mlxen1.stat.rx_ring5.bytes: 147803881652
hw.mlxen1.stat.rx_ring5.packets: 30514582
hw.mlxen1.stat.rx_ring4.error: 0
hw.mlxen1.stat.rx_ring4.bytes: 59936047126
hw.mlxen1.stat.rx_ring4.packets: 16529062
hw.mlxen1.stat.rx_ring3.error: 0
hw.mlxen1.stat.rx_ring3.bytes: 305461527316
hw.mlxen1.stat.rx_ring3.packets: 120780446
hw.mlxen1.stat.rx_ring2.error: 0
hw.mlxen1.stat.rx_ring2.bytes: 1663980980828
hw.mlxen1.stat.rx_ring2.packets: 576915628
hw.mlxen1.stat.rx_ring1.error: 0
hw.mlxen1.stat.rx_ring1.bytes: 2341832227428
hw.mlxen1.stat.rx_ring1.packets: 332370776
hw.mlxen1.stat.rx_ring0.error: 0
hw.mlxen1.stat.rx_ring0.bytes: 1718316360628
hw.mlxen1.stat.rx_ring0.packets: 208016985
hw.mlxen1.stat.tx_ring23.bytes: 761640234844
hw.mlxen1.stat.tx_ring23.packets: 140570741
hw.mlxen1.stat.tx_ring22.bytes: 235825746846
hw.mlxen1.stat.tx_ring22.packets: 47582760
hw.mlxen1.stat.tx_ring21.bytes: 6307679416
hw.mlxen1.stat.tx_ring21.packets: 13587887
hw.mlxen1.stat.tx_ring20.bytes: 489519672798
hw.mlxen1.stat.tx_ring20.packets: 56191717
hw.mlxen1.stat.tx_ring19.bytes: 10121686810
hw.mlxen1.stat.tx_ring19.packets: 4241850
hw.mlxen1.stat.tx_ring18.bytes: 4558715397262
hw.mlxen1.stat.tx_ring18.packets: 639846327
hw.mlxen1.stat.tx_ring17.bytes: 32286951370
hw.mlxen1.stat.tx_ring17.packets: 22959085
hw.mlxen1.stat.tx_ring16.bytes: 1899512819522
hw.mlxen1.stat.tx_ring16.packets: 282998424
hw.mlxen1.stat.tx_ring15.bytes: 117297176370
hw.mlxen1.stat.tx_ring15.packets: 27096254
hw.mlxen1.stat.tx_ring14.bytes: 11272324536
hw.mlxen1.stat.tx_ring14.packets: 136151702
hw.mlxen1.stat.tx_ring13.bytes: 5085846009768
hw.mlxen1.stat.tx_ring13.packets: 653473793
hw.mlxen1.stat.tx_ring12.bytes: 3172283876
hw.mlxen1.stat.tx_ring12.packets: 12387380
hw.mlxen1.stat.tx_ring11.bytes: 1815249849862
hw.mlxen1.stat.tx_ring11.packets: 227269892
hw.mlxen1.stat.tx_ring10.bytes: 5099264496730
hw.mlxen1.stat.tx_ring10.packets: 655527719
hw.mlxen1.stat.tx_ring9.bytes: 945986137300
hw.mlxen1.stat.tx_ring9.packets: 303182670
hw.mlxen1.stat.tx_ring8.bytes: 942441980
hw.mlxen1.stat.tx_ring8.packets: 2898885
hw.mlxen1.stat.tx_ring7.bytes: 3123343187824
hw.mlxen1.stat.tx_ring7.packets: 357250514
hw.mlxen1.stat.tx_ring6.bytes: 51356715444
hw.mlxen1.stat.tx_ring6.packets: 11868226
hw.mlxen1.stat.tx_ring5.bytes: 991389854
hw.mlxen1.stat.tx_ring5.packets: 7233820
hw.mlxen1.stat.tx_ring4.bytes: 72483778448
hw.mlxen1.stat.tx_ring4.packets: 13382952
hw.mlxen1.stat.tx_ring3.bytes: 26534426276
hw.mlxen1.stat.tx_ring3.packets: 7776173
hw.mlxen1.stat.tx_ring2.bytes: 5089185758862
hw.mlxen1.stat.tx_ring2.packets: 658036115
hw.mlxen1.stat.tx_ring1.bytes: 10793041110
hw.mlxen1.stat.tx_ring1.packets: 80825537
hw.mlxen1.stat.tx_ring0.bytes: 5109262741542
hw.mlxen1.stat.tx_ring0.packets: 735938500
hw.mlxen1.stat.tx_gt_1548_bytes_packets: 0
hw.mlxen1.stat.tx_1548_bytes_packets: 0
hw.mlxen1.stat.tx_1522_bytes_packets: 0
hw.mlxen1.stat.tx_1518_bytes_packets: 0
hw.mlxen1.stat.tx_1023_bytes_packets: 0
hw.mlxen1.stat.tx_511_bytes_packets: 0
hw.mlxen1.stat.tx_255_bytes_packets: 0
hw.mlxen1.stat.tx_127_bytes_packets: 0
hw.mlxen1.stat.tx_lt_64_bytes_packets: 0
hw.mlxen1.stat.tx_dropped: 0
hw.mlxen1.stat.tx_errors: 0
hw.mlxen1.stat.tx_broadcast_packets: 1769
hw.mlxen1.stat.tx_multicast_packets: 0
hw.mlxen1.stat.tx_bytes: 34577305063732
hw.mlxen1.stat.tx_packets: 5098278918
hw.mlxen1.stat.rx_gt_1548_bytes_packets: 2205482837
hw.mlxen1.stat.rx_1548_bytes_packets: 2738
hw.mlxen1.stat.rx_1522_bytes_packets: 1
hw.mlxen1.stat.rx_1518_bytes_packets: 4075320
hw.mlxen1.stat.rx_1023_bytes_packets: 492236
hw.mlxen1.stat.rx_511_bytes_packets: 3810259
hw.mlxen1.stat.rx_255_bytes_packets: 3183982
hw.mlxen1.stat.rx_127_bytes_packets: 1581068219
hw.mlxen1.stat.rx_lt_64_bytes_packets: 6402
hw.mlxen1.stat.rx_out_range_length_error: 0
hw.mlxen1.stat.rx_in_range_length_error: 0
hw.mlxen1.stat.rx_jabbers: 0
hw.mlxen1.stat.rx_crc_errors: 0
hw.mlxen1.stat.rx_over_errors: 0
hw.mlxen1.stat.rx_length_errors: 0
hw.mlxen1.stat.rx_dropped: 0
hw.mlxen1.stat.rx_errors: 0
hw.mlxen1.stat.rx_broadcast_packets: 3355
hw.mlxen1.stat.rx_multicast_packets: 27
hw.mlxen1.stat.rx_packets: 3798121994
hw.mlxen1.stat.rx_bytes: 18783901543019
hw.mlxen1.stat.tx_chksum_offload: 2003355285
hw.mlxen1.stat.rx_chksum_none: 0
hw.mlxen1.stat.rx_chksum_good: 10
hw.mlxen1.stat.rx_alloc_failed: 0
hw.mlxen1.stat.tx_oversized_packets: 0
hw.mlxen1.stat.tx_timeout: 0
hw.mlxen1.stat.wake_queue: 26759
hw.mlxen1.stat.queue_stopped: 26759
hw.mlxen1.stat.tso_packets: 549442205
hw.mlxen1.conf.eeprom_info: 0
hw.mlxen1.conf.coalesce.adaptive_rx_coal: 1
hw.mlxen1.conf.coalesce.sample_interval: 0
hw.mlxen1.conf.coalesce.rx_usecs_high: 128
hw.mlxen1.conf.coalesce.pkt_rate_high: 450000
hw.mlxen1.conf.coalesce.rx_usecs_low: 0
hw.mlxen1.conf.coalesce.pkt_rate_low: 400000
hw.mlxen1.conf.device_name: mlx4_core0
hw.mlxen1.conf.port_num: 2
hw.mlxen1.conf.rx_ppp: 0
hw.mlxen1.conf.tx_ppp: 0
hw.mlxen1.conf.tx_size: 1024
hw.mlxen1.conf.rx_size: 1024
hw.mlxen1.conf.tx_rings: 24
hw.mlxen1.conf.rx_rings: 16
hw.mlxen1.conf.msg_enable: 20




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?486A6DA0-54C8-40DF-8437-F6E382DA01A8>