Date: Fri, 10 Oct 2008 08:29:54 -0700 From: Randy Bush <randy@psg.com> To: FreeBSD Current <current@freebsd.org> Subject: bad net state and not rebootable Message-ID: <48EF74F2.3090703@psg.com>
next in thread | raw e-mail | index | archive | help
in the last days, i have had strange hangs on two servers, where they
were pingable, i could log in to console port, but no response to ssh
and other net services. they happily ran arbitrary programs. but, they
hung in reboot and required a power-bar whack.
now it is a third server and one where i have no power-bar.
FreeBSD srv0.iad.rg.net 8.0-CURRENT FreeBSD 8.0-CURRENT #19: Mon Sep 15
01:23:26 UTC 2008 root@srv0.iad.rg.net:/usr/obj/usr/src/sys/SRV0 i386
rmac.psg.com:/Users/randy/config> ping srv0
PING srv0.iad.rg.net (198.180.150.1): 56 data bytes
64 bytes from 198.180.150.1: icmp_seq=0 ttl=51 time=136.989 ms
64 bytes from 198.180.150.1: icmp_seq=1 ttl=51 time=145.055 ms
64 bytes from 198.180.150.1: icmp_seq=2 ttl=51 time=144.968 ms
64 bytes from 198.180.150.1: icmp_seq=3 ttl=51 time=144.957 ms
ifconfig looks ok
srv0.iad.rg.net:/root# ifconfig bge0
bge0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> metric 0 mtu 1500
options=9b<RXCSUM,TXCSUM,VLAN_MTU,VLAN_HWTAGGING,VLAN_HWCSUM>
ether 00:30:48:82:11:a2
inet 198.180.150.1 netmask 0xffffff80 broadcast 198.180.150.127
inet6 fe80::230:48ff:fe82:11a2%bge0 prefixlen 64 scopeid 0x1
inet 174.128.32.42 netmask 0xfffff000 broadcast 174.128.47.255
inet6 2001:418:8006::42 prefixlen 64
media: Ethernet autoselect (1000baseT <full-duplex>)
status: active
srv0.iad.rg.net:/root# netstat -in
Name Mtu Network Address Ipkts Ierrs Opkts
Oerrs Coll
bge0 1500 <Link#1> 00:30:48:82:11:a2 22158026 0 23134038
0 0
bge0 1500 198.180.150.0 198.180.150.1 9420649 - 9338420
- -
bge0 1500 fe80:1::230:4 fe80:1::230:48ff: 0 - 1
- -
bge0 1500 174.128.32.0/ 174.128.32.42 116 - 0
- -
bge0 1500 2001:418:8006 2001:418:8006::42 10460449 - 14334303
- -
bge1* 1500 <Link#2> 00:30:48:82:11:a3 0 0 0
0 0
lo0 16384 <Link#3> 1492574 0 1492574
0 0
lo0 16384 127.0.0.0/8 127.0.0.1 915886 - 915886
- -
lo0 16384 ::1/128 ::1 2556 - 2567
- -
lo0 16384 fe80:3::1/64 fe80:3::1 0 - 0
- -
ping looks normal
srv0.iad.rg.net:/root# ping 147.28.0.39
PING 147.28.0.39 (147.28.0.39): 56 data bytes
64 bytes from 147.28.0.39: icmp_seq=0 ttl=56 time=76.845 ms
^C
--- 147.28.0.39 ping statistics ---
1 packets transmitted, 1 packets received, 0.0% packet loss
round-trip min/avg/max/stddev = 76.845/76.845/76.845/0.000 ms
until you realize that i waited two minutes before hitting ^C
srv0.iad.rg.net:/root# df
Filesystem 1024-blocks Used Avail Capacity Mounted on
/dev/mirror/gm0s1a 507630 258478 208542 55% /
devfs 1 1 0 100% /dev
/dev/mirror/gm0s1d 63214 152 58006 0% /root
/dev/mirror/gm0s1g 71055144 5565276 59805458 9% /usr
/dev/mirror/gm0s1e 1012974 497442 434496 53% /var
/dev/mirror/gm0s1f 1012974 18596 913342 2% /var/spool
procfs 4 4 0 100% /proc
/dev/md0 126702 34 116532 0% /tmp
srv0.iad.rg.net:/root# gmirror status
Name Status Components
mirror/gm0 COMPLETE ad4
ad6
ssrv0.iad.rg.net:/root# /bin/ps -auxww
USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND
nagios 78154 0.3 0.4 6816 3932 ?? I 12:04AM 0:00.00
/usr/local/bin/nagios -d /usr/local/etc/nagios/nagios.cfg
nagios 78155 0.3 0.2 3552 1616 ?? I 12:04AM 0:00.00 sh -c
/usr/local/libexec/nagios/check_ssh 210.138.216.50
nagios 78156 0.3 0.1 4228 1324 ?? I 12:04AM 0:00.00
/usr/local/libexec/nagios/check_ssh 210.138.216.50
smokeping 64574 0.1 1.3 15944 13264 ?? I 15Sep08 3:57.48
/usr/local/bin/smokeping [FPing] (perl)
root 0 0.0 0.0 0 48 ?? DLs 15Sep08 0:02.80 [kernel]
root 1 0.0 0.0 1888 328 ?? ILs 15Sep08 1:09.48
/sbin/init --
root 2 0.0 0.0 0 8 ?? DL 15Sep08 0:49.11 [g_event]
root 3 0.0 0.0 0 8 ?? DL 15Sep08 6:13.69 [g_up]
root 4 0.0 0.0 0 8 ?? DL 15Sep08 6:03.08 [g_down]
root 5 0.0 0.0 0 8 ?? DL 15Sep08 0:01.78
[pagedaemon]
root 6 0.0 0.0 0 8 ?? DL 15Sep08 0:00.00 [vmdaemon]
root 7 0.0 0.0 0 8 ?? DL 15Sep08 0:00.02 [pagezero]
root 8 0.0 0.0 0 8 ?? DL 15Sep08 0:08.11
[bufdaemon]
root 9 0.0 0.0 0 8 ?? DL 15Sep08 38:55.53 [syncer]
root 10 0.0 0.0 0 8 ?? DL 15Sep08 0:00.00 [audit]
root 11 0.0 0.0 0 8 ?? RL 15Sep08 34612:02.65 [idle]
root 12 0.0 0.0 0 112 ?? WL 15Sep08 31:11.27 [intr]
root 13 0.0 0.0 0 8 ?? DL 15Sep08 1:25.55 [yarrow]
root 14 0.0 0.0 0 8 ?? DL 15Sep08 0:08.97 [vnlru]
root 15 0.0 0.0 0 8 ?? DL 15Sep08 1:29.57
[softdepflush]
root 16 0.0 0.0 0 8 ?? DL 15Sep08 1:49.61
[g_mirror gm0]
root 553 0.0 0.0 1888 396 ?? Is 15Sep08 0:00.13 /sbin/devd
root 712 0.0 0.0 0 8 ?? DL 15Sep08 0:01.01 [md0]
root 887 0.0 0.2 4432 2044 ?? I 15Sep08 0:00.55
/usr/local/sbin/smartd -p /var/run/smartd.pid -c /usr/local/etc/smartd.conf
www 941 0.0 0.6 9984 6180 ?? I 25Sep08 0:25.62
/usr/local/sbin/httpd -DNOHTTPACCEPT
root 945 0.0 0.3 4796 2608 ?? Is 15Sep08 1:12.80
/usr/sbin/ntpd -c /etc/ntp.conf -g -p /var/run/ntpd.pid -f
/var/db/ntpd.drift
mailnull 1022 0.0 0.3 8244 3204 ?? Is 15Sep08 0:19.31
/usr/local/sbin/exim -bd -q1m (exim-4.69-0)
root 1047 0.0 0.3 4236 2804 ?? Is 15Sep08 1:33.09
/usr/sbin/dhcpd -cf /etc/dhcpd.conf
root 1085 0.0 0.3 5232 2956 ?? Is 15Sep08 0:05.31
/usr/sbin/sshd
root 1095 0.0 0.1 3300 1404 ?? Is 15Sep08 0:03.33
/usr/sbin/cron -s
root 1163 0.0 0.1 3272 1288 ?? I 15Sep08 0:00.00
/usr/libexec/getty std.9600 ttyu1
www 2139 0.0 0.6 9984 6240 ?? I 25Sep08 0:26.15
/usr/local/sbin/httpd -DNOHTTPACCEPT
unbound 27209 0.0 0.8 12888 8612 ?? Is Sat12AM 0:20.46
/usr/local/sbin/unbound
www 40079 0.0 0.6 9984 6208 ?? I 30Sep08 0:14.57
/usr/local/sbin/httpd -DNOHTTPACCEPT
www 40296 0.0 0.6 9984 6200 ?? I 30Sep08 0:13.84
/usr/local/sbin/httpd -DNOHTTPACCEPT
root 41167 0.0 0.5 9980 5520 ?? Is 15Sep08 0:45.77
/usr/local/sbin/httpd -DNOHTTPACCEPT
smokeping 43819 0.0 1.6 20388 16216 ?? Is Wed11PM 0:00.01
/usr/local/bin/smokeping (perl)
smokeping 43820 0.0 1.6 20416 16240 ?? I Wed11PM 0:11.41
/usr/local/bin/smokeping [FPing] (perl)
root 55477 0.0 0.1 3272 1404 ?? Is 18Sep08 0:08.62
/usr/sbin/syslogd -s
smokeping 64573 0.0 1.3 15912 13188 ?? Is 15Sep08 0:00.01
/usr/local/bin/smokeping (perl)
root 77052 0.0 0.1 3428 1504 ?? Is 12:00AM 0:00.01
/usr/local/sbin/sshguard
smokeping 77944 0.0 0.1 3212 1296 ?? I 12:04AM 0:00.03
/usr/local/sbin/fping -C 20 -q -B1 -r1 -i10 r0 iphone1 afnog.org
work0.psg.com soek0 srv0 psg1 iphone0 rip psg0 raid1 drinx.linx.net r1
psg bbgp psg2 ver0 ran rip1.psg.com
root 78307 0.0 0.4 8244 3732 ?? Is 12:04AM 0:00.02
/usr/local/sbin/exim -Mc 1Ko5UP-000KMv-OH (exim-4.69-0)
mailnull 78309 0.0 0.4 8244 3740 ?? I 12:04AM 0:00.00
/usr/local/sbin/exim -Mc 1Ko5UP-000KMv-OH (exim-4.69-0)
root 78313 0.0 0.4 8244 3732 ?? Is 12:04AM 0:00.02
/usr/local/sbin/exim -Mc 1Ko5UP-000KN0-Sj (exim-4.69-0)
mailnull 78314 0.0 0.4 8244 3744 ?? I 12:04AM 0:00.00
/usr/local/sbin/exim -Mc 1Ko5UP-000KN0-Sj (exim-4.69-0)
root 78333 0.0 0.4 8244 3732 ?? Is 12:04AM 0:00.02
/usr/local/sbin/exim -Mc 1Ko5UR-000KNP-5u (exim-4.69-0)
mailnull 78334 0.0 0.4 8244 3740 ?? I 12:04AM 0:00.00
/usr/local/sbin/exim -Mc 1Ko5UR-000KNP-5u (exim-4.69-0)
root 78347 0.0 0.4 8280 3752 ?? Is 12:04AM 0:00.02
/usr/local/sbin/exim -Mc 1Ko5UU-000KNe-0s (exim-4.69-0)
mailnull 78348 0.0 0.4 8280 3764 ?? I 12:04AM 0:00.00
/usr/local/sbin/exim -Mc 1Ko5UU-000KNe-0s (exim-4.69-0)
root 78371 0.0 0.3 6576 3148 ?? Is 8:37AM 0:00.05 sshd:
[accepted] (sshd)
sshd 78372 0.0 0.3 6576 3168 ?? I 8:38AM 0:00.01 sshd:
[net] (sshd)
root 78391 0.0 0.3 6576 3148 ?? Is 3:02PM 0:00.05 sshd:
[accepted] (sshd)
sshd 78392 0.0 0.3 6576 3160 ?? I 3:02PM 0:00.01 sshd:
[net] (sshd)
nagios 79837 0.0 0.4 6816 3920 ?? Is 23Sep08 1023:38.11
/usr/local/bin/nagios -d /usr/local/etc/nagios/nagios.cfg
www 86137 0.0 1.3 18464 13524 ?? Is Thu10AM 0:00.44
/usr/local/bin/speedy_backend /usr/local/smokeping/htdocs/smokeping.cgi
target=Westin
www 86138 0.0 3.8 43960 39536 ?? I Thu10AM 2:14.61
/usr/local/bin/speedy_backend /usr/local/smokeping/htdocs/smokeping.cgi
target=Westin
www 87153 0.0 0.6 9984 6196 ?? I 25Sep08 0:27.22
/usr/local/sbin/httpd -DNOHTTPACCEPT
www 90538 0.0 0.6 9984 6188 ?? I 25Sep08 0:25.02
/usr/local/sbin/httpd -DNOHTTPACCEPT
www 92815 0.0 0.6 9984 6200 ?? I 25Sep08 0:25.94
/usr/local/sbin/httpd -DNOHTTPACCEPT
www 95051 0.0 0.6 9984 6200 ?? I 25Sep08 0:25.50
/usr/local/sbin/httpd -DNOHTTPACCEPT
www 96166 0.0 0.6 9984 6188 ?? I 25Sep08 0:24.96
/usr/local/sbin/httpd -DNOHTTPACCEPT
www 97331 0.0 0.6 9984 6212 ?? I 25Sep08 0:24.87
/usr/local/sbin/httpd -DNOHTTPACCEPT
root 1154 0.0 0.1 3272 1292 v0 Is+ 15Sep08 0:00.00
/usr/libexec/getty Pc ttyv0
root 1155 0.0 0.1 3272 1292 v1 Is+ 15Sep08 0:00.00
/usr/libexec/getty Pc ttyv1
root 1156 0.0 0.1 3272 1292 v2 Is+ 15Sep08 0:00.00
/usr/libexec/getty Pc ttyv2
root 1157 0.0 0.1 3272 1292 v3 Is+ 15Sep08 0:00.00
/usr/libexec/getty Pc ttyv3
root 1158 0.0 0.1 3272 1292 v4 Is+ 15Sep08 0:00.00
/usr/libexec/getty Pc ttyv4
root 1159 0.0 0.1 3272 1292 v5 Is+ 15Sep08 0:00.00
/usr/libexec/getty Pc ttyv5
root 1160 0.0 0.1 3272 1292 v6 Is+ 15Sep08 0:00.00
/usr/libexec/getty Pc ttyv6
root 1161 0.0 0.1 3272 1292 v7 Is+ 15Sep08 0:00.00
/usr/libexec/getty Pc ttyv7
tacacs 852 0.0 0.1 3472 1460 u0- I 15Sep08 0:19.88
/usr/local/bin/tac_plus -C /usr/local/etc/tac_plus.conf
root 1162 0.0 0.2 3720 1704 u0 Is 15Sep08 0:00.16 login
[pam] (login)
root 78396 0.0 0.2 4484 2304 u0 S 3:03PM 0:00.04 -bash
(bash)
root 78433 0.0 0.1 3348 1436 u0 R+ 3:15PM 0:00.00
/bin/ps -auxww
srv0.iad.rg.net:/root# netstat -nf inet
Active Internet connections
Proto Recv-Q Send-Q Local Address Foreign Address (state)
tcp4 63 0 198.180.150.1.443 209.20.186.192.59658
ESTABLISHED
tcp4 178 0 198.180.150.1.443 209.20.186.192.59655 CLOSE_WAIT
tcp4 549 0 198.180.150.1.443 209.20.186.192.59654
ESTABLISHED
tcp4 0 0 198.180.150.1.49 206.223.132.86.12837 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12835 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12834 TIME_WAIT
tcp4 0 0 198.180.150.1.22 209.20.186.192.59606 CLOSE_WAIT
tcp4 0 0 198.180.150.1.* 209.20.186.192.59602 CLOSED
tcp4 469 0 198.180.150.1.* 209.20.186.192.59601 CLOSED
tcp4 549 0 198.180.150.1.* 209.20.186.192.59600 CLOSED
tcp4 458 0 198.180.150.1.* 209.20.186.192.59599 CLOSED
tcp4 0 0 198.180.150.1.49 206.223.132.86.12833 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12832 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12831 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12830 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12829 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12828 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12827 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12826 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12825 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12824 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12823 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12822 TIME_WAIT
tcp4 0 0 198.180.150.1.49 198.180.150.121.40309 TIME_WAIT
tcp4 0 0 198.180.150.1.49 198.180.150.121.40306 TIME_WAIT
tcp4 0 0 198.180.150.1.49 198.180.150.121.40226 TIME_WAIT
tcp4 0 0 198.180.150.1.49 198.180.150.121.40016 TIME_WAIT
tcp4 0 0 198.180.150.1.49 198.180.150.121.39811 TIME_WAIT
tcp4 0 0 198.180.150.1.49 198.180.150.121.39767 TIME_WAIT
tcp4 0 0 198.180.150.1.22 218.25.54.152.51578 CLOSE_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12821 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12820 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12819 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12818 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12817 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12816 TIME_WAIT
tcp4 0 0 198.180.150.1.* 221.192.199.36.2106 CLOSED
tcp4 0 0 198.180.150.1.49 206.223.132.86.12815 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12814 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12813 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12812 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12811 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12810 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12809 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12808 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12807 TIME_WAIT
tcp4 0 0 198.180.150.1.49 206.223.132.86.12806 TIME_WAIT
tcp4 0 0 198.180.150.1.* 209.20.186.192.56192 CLOSED
tcp4 0 0 198.180.150.1.49 206.223.132.86.12805 TIME_WAIT
tcp4 37 0 198.180.150.1.* 209.20.186.192.56179 CLOSED
tcp4 0 0 198.180.150.1.49 206.223.132.86.12804 TIME_WAIT
tcp4 0 0 198.180.150.1.* 209.20.186.192.56084 CLOSED
tcp4 517 0 198.180.150.1.* 209.20.186.192.56039 CLOSED
tcp4 0 0 198.180.150.1.45052 147.28.0.62.25 SYN_SENT
tcp4 0 0 198.180.150.1.60087 147.28.0.62.25 SYN_SENT
tcp4 0 0 198.180.150.1.37996 147.28.0.62.25 SYN_SENT
tcp4 0 0 198.180.150.1.25993 147.28.0.62.25 SYN_SENT
tcp4 0 0 198.180.150.1.58187 210.138.216.50.22 SYN_SENT
udp4 0 0 127.0.0.1.11458 127.0.0.1.53
udp4 0 0 127.0.0.1.60611 127.0.0.1.53
udp4 0 0 127.0.0.1.26925 127.0.0.1.53
udp4 0 0 127.0.0.1.52800 127.0.0.1.53
udp4 0 0 127.0.0.1.57827 127.0.0.1.53
udp4 0 0 127.0.0.1.29704 127.0.0.1.53
udp4 0 0 127.0.0.1.49628 127.0.0.1.53
udp4 0 0 127.0.0.1.46057 127.0.0.1.53
udp4 0 0 127.0.0.1.56978 127.0.0.1.53
udp4 0 0 127.0.0.1.58174 127.0.0.1.53
udp4 0 0 127.0.0.1.30269 127.0.0.1.53
udp4 0 0 127.0.0.1.45296 127.0.0.1.53
udp4 0 0 127.0.0.1.123 *.*
udp4 0 0 174.128.32.42.123 *.*
udp4 0 0 198.180.150.1.123 *.*
206.223.132.86 is the local oob cisco, and 49 is tacacs. so someone was
trying an ssh attack on the local oob server (the one on which i am
using to get to the console port).?
no lsof installed and can't fetch it now.
any ideas before i try to reboot?
randy
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?48EF74F2.3090703>
