Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 24 Aug 2012 17:17:27 +0530
From:      Sriram Gorti <gsriram@gmail.com>
To:        freebsd-questions@freebsd.org
Subject:   Lockups/panics with arbitrary 7.2 boxes
Message-ID:  <CAF_V1q7sCNoG7Yj9wcvMge7=4A3T4tZnSM-kq3w_Ui9hQTfvBA@mail.gmail.com>

next in thread | raw e-mail | index | archive | help
Hello,

We have been observing the following on quite a few of our i386
FreeBSD 7.2 systems in the course of the last few months:
1. Boxes locking up - to clarify, manual reboot necessary to bring it
back into operation.
2. Boxes panicking somewhere in virtual memory code eg., vdrop, page_fault.

In both cases, did not find anything specifically unusual in their
usage profile, when compared with other similar machines. For example,
SWAPMETA was well within limits, available vm.kmem_map_free was quite
normal. One observation was that the re-start of a periodically
re-starting memory-intensive process coincided with the lockup/panic.

Suspecting that we did not have the fix for some kernel bug that was
perhaps fixed later, looked thru the known FreeBSD 7.2 issues that had
similar back-traces but did not find any. I know this is somewhat of
an old release but any history/pointers for known bugs or any other
suggestions would be most helpful.

Unfortunately, it is not quite an option to debug these boxes when
they hit the issue.

Data for config, selected sysctls and sample backtraces follow.

[1] Key-portions of config

machine          i386
cpu                 I686_CPU
maxusers        256
makeoptions    DEBUG=3D-g

options     SCHED_4BSD
options     PREEMPTION              # Enable kernel thread preemption
options     COMPAT_43                 #Compatible with BSD 4.3 [KEEP THIS!]
options     INET                             #InterNETworking
options     INET6                           #IPv6
options     FFS                             #Berkeley Fast Filesystem
options     SOFTUPDATES            #Enable FFS soft updates support
options     MD_ROOT                    #MD is a potential root device
options     PROCFS                      #Process filesystem
options     PSEUDOFS
options     SCSI_DELAY=3D2000      #Delay (in ms) before probing SCSI
options     UFS_DIRHASH
options     VFS_AIO
options     _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensio=
ns

options     SYSVSHM
options     SYSVSEM
options     SYSVMSG
options     SHMMAXPGS=3D65536
options     SEMMNI=3D40
options     SEMMNS=3D240
options     SEMUME=3D40
options     SEMMNU=3D120

options     SMP         # Symmetric MultiProcessor Kernel
device      apic            # Symmetric (APIC) I/O

device      pci
device      ata
device      atadisk         # ATA disk drives
device      atapicd         # ATAPI CDROM drives
device      ataraid         # ATA RAID drives
options     ATA_STATIC_ID       #Static device numbering

device      ahc            # AHA2940 and onboard AIC7xxx devices
device      scbus        # SCSI bus (required)
device      da             # Direct Access (disks)
device      pass         #CAM passthrough driver
device      aac           # Adaptec FSA RAID, Dell PERC2/PERC3
device      amr           # AMI MegaRAID

device      atkbdc
device      atkbd
device      vga
device      sc
device      sio

device      miibus      # MII bus support
device      fxp     # Intel EtherExpress PRO/100B (82557, 82558)
device      bge     # Broadcom BCM570x (``Tigon III'')
device      bce
device      em
device      le

device  loop        # Network loopback
device  ether       # Ethernet support
device  vlan        # Virtual LAN support
device  pty     # Pseudo-ttys (telnet etc)
device  md      # Memory "disks"
device  bpf     #Berkeley packet filter

options         KDB
options     ALT_BREAK_TO_DEBUGGER   # ~<CR><^B>
options     KDB_UNATTENDED      # reboot on default
options     DDB         # reboot on default
options     KDB_TRACE       # traceback
options     GDB

#HW Crypto
device      crypto        # core crypto support
device      cryptodev   # /dev/crypto for access to h/w
device      ubsec        # Broadcom 5501, 5601, 58xx
device      hifn            # Hifn 7951, 7781, etc.
device      safe           # SafeNet 1141

device      smb           # /dev/smb*
device      smbus       # bus
device      ichsmb      # Intel ICH SMBus controller chips

device      acpi
device      random

options     COMPAT_FREEBSD4
options     COMPAT_FREEBSD6
options     COMPAT_LINUX
options     LINPROCFS
options     LINSYSFS
device      ichwd
device      mpt

device      ed

# add in IPv6 support
options     INET6       # IPv6 communications protocols
device      gif             # IPv6 and IPv4 tunneling
device      faith          # IPv6-to-IPv4 relaying (translation)

#buffer kernel logging so messages don't get split
options     PRINTF_BUFR_SIZE=3D256

device      sg              # Linux SCSI passthrough
device      mfip            # LSI MegaRAID SAS passthrough, requires CAM
#Bump up kernel message buffer size to capture tty logs.
options    MSGBUF_SIZE=3D98304

device ichgpio

# 2 fibs.
options     ROUTETABLES=3D2

[2] Sample back-traces

page fault
db_trace_self_wrapper+38
kdb_backtrace+41
panic+615
trap_fatal+819
trap_pfault+592
trap+946
calltrap+6
vm_page_cache_remove+14
vm_page_alloc+789
vm_fault+2215
trap_pfault+315
trap+560
calltrap+6
sched_switch+406
mi_switch+326
sleepq_switch+203
sleepq_timedwait+55
_sleep+676
mfi_abort+258
mfi_shutdown+209
boot+1848
panic+696
trap_fatal+819
trap_pfault+592
trap+946
calltrap+6
vm_page_cache_remove+14
vm_page_alloc+789
vm_fault+2215
trap_pfault+315
trap+560
calltrap+6

One more:
vdrop: hold
cnt 0
db_trace_self_wrapper+38
kdb_backtrace+41
panic+615
vdropl+36
vdrop+87
vm_page_alloc+1064
vm_fault+2215
trap_pfault+315
trap+560
calltrap+6


[3] Related sysctl=92s

vm.vmtotal:
System wide totals computed every five seconds: (values in kilobytes)
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
Processes:              (RUNQ: 1 Disk Wait: 2 Page Wait: 0 Sleep: 74)
Virtual Memory:         (Total: 4664532K, Active 2532332K)
Real Memory:            (Total: 858816K Active 814316K)
Shared Virtual Memory:  (Total: 55668K Active: 46684K)
Shared Real Memory:     (Total: 29452K Active: 26344K)
Free Memory Pages:      2139120K

vm.loadavg: { 0.08 0.04 0.01 }
vm.v_free_min: 5293
vm.v_free_target: 22305
vm.v_free_reserved: 1133
vm.v_inactive_target: 33457
vm.v_cache_min: 22305
vm.v_cache_max: 44610
vm.v_pageout_free_min: 34
vm.pageout_algorithm: 0
vm.swap_enabled: 1
vm.kmem_size_scale: 3
vm.kmem_size_max: 335544320
vm.kmem_size_min: 0
vm.kmem_size: 335544320
vm.nswapdev: 1
vm.dmmax: 32
vm.swap_async_max: 4
vm.zone_count: 79
vm.swap_idle_threshold2: 10
vm.swap_idle_threshold1: 2
vm.exec_map_entries: 16
vm.stats.misc.zero_page_count: 25
vm.stats.misc.cnt_prezero: 0
vm.stats.vm.v_kthreadpages: 0
vm.stats.vm.v_rforkpages: 0
vm.stats.vm.v_vforkpages: 3656983
vm.stats.vm.v_forkpages: 12444038
vm.stats.vm.v_kthreads: 57
vm.stats.vm.v_rforks: 0
vm.stats.vm.v_vforks: 13231
vm.stats.vm.v_forks: 54094
vm.stats.vm.v_interrupt_free_min: 2
vm.stats.vm.v_pageout_free_min: 34
vm.stats.vm.v_cache_max: 44610
vm.stats.vm.v_cache_min: 22305
vm.stats.vm.v_cache_count: 53
vm.stats.vm.v_inactive_count: 102005
vm.stats.vm.v_inactive_target: 33457
vm.stats.vm.v_active_count: 178156
vm.stats.vm.v_wire_count: 17322
vm.stats.vm.v_free_count: 534705
vm.stats.vm.v_free_min: 5293
vm.stats.vm.v_free_target: 22305
vm.stats.vm.v_free_reserved: 1133
vm.stats.vm.v_page_count: 832337
vm.stats.vm.v_page_size: 4096
vm.stats.vm.v_tfree: 24208690
vm.stats.vm.v_pfree: 19927734
vm.stats.vm.v_dfree: 0
vm.stats.vm.v_tcached: 245
vm.stats.vm.v_pdpages: 0
vm.stats.vm.v_pdwakeups: 0
vm.stats.vm.v_reactivated: 177
vm.stats.vm.v_intrans: 2741
vm.stats.vm.v_vnodepgsout: 5568
vm.stats.vm.v_vnodepgsin: 10073
vm.stats.vm.v_vnodeout: 5568
vm.stats.vm.v_vnodein: 1108
vm.stats.vm.v_swappgsout: 0
vm.stats.vm.v_swappgsin: 0
vm.stats.vm.v_swapout: 0
vm.stats.vm.v_swapin: 0
vm.stats.vm.v_ozfod: 231215
vm.stats.vm.v_zfod: 18990182
vm.stats.vm.v_cow_optim: 11096
vm.stats.vm.v_cow_faults: 3886053
vm.stats.vm.v_vm_faults: 26827859
vm.stats.sys.v_soft: 10975714
vm.stats.sys.v_intr: 1527093
vm.stats.sys.v_syscall: 262664224
vm.stats.sys.v_trap: 27352281
vm.stats.sys.v_swtch: 67880786
vm.stats.object.bypasses: 29995
vm.stats.object.collapses: 309828
vm.v_free_severe: 3213
vm.max_proc_mmap: 49344
vm.old_msync: 0
vm.msync_flush_flags: 3
vm.boot_pages: 48
vm.max_wired: 276913
vm.pageout_lock_miss: 0
vm.disable_swapspace_pageouts: 0
vm.defer_swapspace_pageouts: 0
vm.swap_idle_enabled: 0
vm.pageout_stats_interval: 5
vm.pageout_full_stats_interval: 20
vm.pageout_stats_max: 22305
vm.max_launder: 32
vm.reserv.freed: 72740
vm.reserv.broken: 98
vm.idlezero_enable: 0
vm.kvm_free: 343928832
vm.kvm_size: 1073737728
vm.pmap.pmap_collect_active: 0
vm.pmap.pmap_collect_inactive: 0
vm.pmap.pv_entry_spare: 8168
vm.pmap.pv_entry_allocs: 119151728
vm.pmap.pv_entry_frees: 118896472
vm.pmap.pc_chunk_tryfail: 0
vm.pmap.pc_chunk_frees: 410869
vm.pmap.pc_chunk_allocs: 411653
vm.pmap.pc_chunk_count: 784
vm.pmap.pv_entry_count: 255256
vm.pmap.pde.promotions: 0
vm.pmap.pde.p_failures: 0
vm.pmap.pde.mappings: 0
vm.pmap.pde.demotions: 0
vm.pmap.shpgperproc: 200
vm.pmap.pv_entry_max: 1500240
vm.pmap.pg_ps_enabled: 0


thanks in advance,
Sriram



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAF_V1q7sCNoG7Yj9wcvMge7=4A3T4tZnSM-kq3w_Ui9hQTfvBA>