DEC Crash

From: Rakesh Agarwal <rakesha%gg2mail_at_delhi.tcs.co.in>
Date: Sat, 28 Nov 1998 10:24:59 +0530

Hi,
 Got your address from digital faq's. Wish to know about
 a) My server DEC 1000 ver 4.0 crashed occasionally. I am attaching a
 sample crash_data file . Can U pl tell what different info can we
 extract from this data?
 b) How can it crash at vi/xterm/dttterm ?
 c) What does the Warning imply?
 d) How to use vmunix and vmcore to get crash info ? Or what can we get
 from them ?

 Waiting for the reply
 -Rakesh



#
# Crash Data Collection (Version 1.4)
#
_crash_data_collection_time: Wed Dec 2 19:10:02 HKT 1998
_current_directory: /
_crash_kernel: /var/adm/crash/vmunix.7
_crash_core: /var/adm/crash/vmcore.7
_crash_arch: alpha
_crash_os: Digital UNIX
_host_version: Digital UNIX V4.0 (Rev. 386); Tue Nov 10 04:52:50 HKT 1998
_crash_version: Digital UNIX V4.0 (Rev. 386); Tue Nov 10 04:52:50 HKT 1998

_crashtime: struct {
    tv_sec = 912596234
    tv_usec = 25376
}
_boottime: struct {
    tv_sec = 912486910
    tv_usec = 110288
}
_config: struct {
    sysname = "OSF1"
    nodename = "dec1000"
    release = "V4.0"
    version = "386"
    machine = "alpha"
}
_cpu: 43
_system_string: 0xffffffffff800a58 = "AlphaServer 1000 4/266"
_ncpus: 1
_avail_cpus: 1
_partial_dump: 1
_physmem(MBytes): 383
_panic_string: 0xfffffc00004c38c0 = "Machine check - Hardware error"
_paniccpu: 0
_panic_thread: 0xfffffc000c947b80
_preserved_message_buffer_begin:
struct {
    msg_magic = 0x63061
    msg_bufx = 0x77
    msg_bufr = 0x57c
    msg_bufc = "6 0 0 0 0 0, block 992559
device string for dump = SCSI 0 6 0 0 0 0 0.
DUMP.prom: dev SCSI 0 6 0 0 0 0 0, block 992559
ysical memory = 384.00 megabytes.
available memory = 372.53 megabytes.
using 1467 buffers containing 11.46 megabytes of memory
AlphaServer 1000 4/266
Firmware revision: 4.5
PALcode: OSF version 1.45
pci0 at nexus
psiop0 at pci0 slot 6
Loading SIOP: script 800100, reg 82000000, data 405040c8
scsi0 at psiop0 slot 0
rz0 at scsi0 target 0 lun 0 (LID=0) (DEC RZ28 (C) DEC 442D)
rz1 at scsi0 target 1 lun 0 (LID=1) (SEAGATE ST19171N 0024)
rz2 at scsi0 target 2 lun 0 (LID=2) (DEC RZ28M (C) DEC 0568)
rz3 at scsi0 target 3 lun 0 (LID=3) (DEC RZ28M (C) DEC 0568)
rz4 at scsi0 target 4 lun 0 (LID=4) (DEC RRD45 (C) DEC 1645)
tz5 at scsi0 target 5 lun 0 (LID=5) (DEC TLZ07 (C)DEC 553A)
rz6 at scsi0 target 6 lun 0 (LID=6) (SEAGATE ST19171N 0024)
eisa0 at pci0
ace0 at eisa0
ace1 at eisa0
lp0 at eisa0
fdi0 at eisa0
cirrus0 at eisa0
cirrus0: Cirrus Logic CL-GD5424 (SVGA) 512 Kbytes
tu0: DECchip 21040-AA: Revision: 2.4
tu0 at pci0 slot 11
tu0: DEC TULIP Ethernet Interface, hardware address: 00-00-F8-21-91-1E
tu0: console mode: selecting UTP (10BaseT) port
gpc0 at eisa0
kernel console: cirrus0
dli: configured
WARNING: too many Processor corrected errors detected on cpu 0. Reporting suspended.
Machine Check error corrected by processor
AlphaServer 1000 4/266 machine check type 0x670.
  retry = 0x0
  mchk_code = 0x88
  paltemp[1] = 0x0
  paltemp[2] = 0x4
  paltemp[3] = 0x0
  paltemp[4] = 0x40087c00
  paltemp[5] = 0xffffff00
  paltemp[6] = 0x23
  paltemp[7] = 0x4200
  paltemp[8] = 0x400
  paltemp[9] = 0x0
  paltemp[10] = 0x3b3310
  paltemp[11] = 0x0
  paltemp[12] = 0x3b36b0
  paltemp[13] = 0x3b36e0
  paltemp[14] = 0x3b3740
  paltemp[15] = 0x3b34b0
  paltemp[16] = 0x3b3190
  paltemp[17] = 0x45
  paltemp[18] = 0x1fffc980
  paltemp[19] = 0x9895fa38
  paltemp[20] = 0x4dcf70
  paltemp[21] = 0x0
  paltemp[22] = 0x626e6e6e
  paltemp[23] = 0x0
  paltemp[24] = 0x0
  paltemp[25] = 0x90000
  paltemp[26] = 0x23
  paltemp[27] = 0x0
  paltemp[28] = 0x13d14000
  paltemp[29] = 0x0
  paltemp[30] = 0x1
  paltemp[31] = 0x118ba38
  exc_addr = 0x18a51
  exc_sum = 0x0
  msk = 0x0
  iccsr = 0x4
  pal_base = 0x14000
  hier = 0x1cd0
  hirr = 0x0
  mm_csr = 0x5a01
  dc_stat = 0x3
  dc_addr = 0xffffffff
  abox_ctl = 0x942a
  biu_stat = 0x4340
  biu_addr = 0x155d22c0
  biu_ctl = 0x10002227
  fill_syndrome = 0x29
  fill_adr = 0x135c84e0
  va = 0x6170
  bc_tag = 0x13415

  coma_gcr = 0x7fb20034
  coma_edsr = 0xc3e02070
  coma_ter = 0x7fb27fe0
  coma_elar = 0x7fb2ffe4
  coma_ehar = 0x7fb21fff
  coma_ldlr = 0x7fb2f450
  coma_ldhr = 0x6fb1007d
  coma_base0 = 0x6fb10400
  coma_base1 = 0x6fb10500
  coma_base2 = 0x22310000
  coma_base3 = 0xc3e00000
  coma_cnfg0 = 0x22310049
  coma_cnfg1 = 0x22310049
  coma_cnfg2 = 0x22310005
  coma_cnfg3 = 0x7fb20000

  epic_dcsr = 0x801e0019
  epic_pear = 0x804100
  epic_sear = 0x5f8f570
  epic_tbr1 = 0x2ac000
  epic_tbr2 = 0x0
  epic_pbr1 = 0x8c0000
  epic_pbr2 = 0x40080000
  epic_pmr1 = 0x700000
  epic_pmr2 = 0x3ff00000
  epic_harx1 = 0x80000000
  epic_harx2 = 0x0
  epic_pmlt = 0xff
  epic_tag0 = 0x824000
  epic_tag1 = 0x826000
  epic_tag2 = 0x801000
  epic_tag3 = 0x807000
  epic_tag4 = 0x803000
  epic_tag5 = 0x805000
  epic_tag6 = 0x804000
  epic_tag7 = 0x822000
  epic_data0 = 0x10600
  epic_data1 = 0x3c02
  epic_data2 = 0x4fc
  epic_data3 = 0x502
  epic_data4 = 0x4fe
  epic_data5 = 0x500
  epic_data6 = 0x500
  epic_data7 = 0x13ffe

  pceb_vid = 0x8086
  pceb_did = 0x482
  pceb_revision = 0x5
  pceb_command = 0x7
  pceb_status = 0x200
  pceb_latency = 0xf8
  pceb_control = 0x60
  pceb_arbcon = 0x9d
  pceb_arbpri = 0x4

  esc_id = 0xf
  esc_revision = 0x3
  esc_int0 = 0xa1
  esc_int1 = 0xef
  esc_elcr0 = 0x0
  esc_elcr1 = 0x0
  esc_last_eisa = 0xff
  esc_nmi_stat = 0x20

  pci_ir = 0xff
  pci_imr = 0x1
  svr_mgr = 0xd4
panic (cpu 0): Machine check - Hardware error
syncing disks... device string for dump = SCSI 0 6 0 0 0 0 0.
DUMP.prom: dev SCSI 0 "
}
_preserved_message_buffer_end:
_kernel_process_status_begin:
  PID COMM
00000 kernel idle
00001 init
00003 kloadsrv
13332 vi
00024 update
01067 ksh
01068 xterm
18483 vi
01078 ksh
01080 xterm
18489 ProxyServer
15420 ksh
01088 xterm
01092 ksh
01101 xterm
01102 ksh
15451 xterm
00097 syslogd
00099 binlogd
18540 sqlplus
01134 tnslsnr
17527 xterm
09353 vi
18602 udpt
13487 ksh
18614 dtscreen
18638 ksh
08405 vi
00241 portmap
00243 nfsiod
17654 dtexec
13582 dtlogin
17686 xterm
00303 sendmail
17725 .su
01357 oracle
01365 oracle
01366 oracle
01368 oracle
01371 oracle
00348 inetd
01373 oracle
00350 os_mibs
01376 oracle
00354 snmpd
01380 oracle
00358 cron
01385 oracle
01386 oracle
01388 oracle
01389 oracle
01394 oracle
00375 lpd
13695 xterm
01422 radiusd
15767 PaysServer
00412 dtlogin
13730 dtsession
13734 xterm
00423 getty
00432 Xdec
17864 ksh
14801 exe
14822 udpsvrTemp
17895 ksh
13819 xterm
08702 ksh
00523 rpc.ttdbserverd
01560 ksh
01567 telnetd
13897 dtwm
13903 ttsession
18006 ttsession
00609 telnetd
00610 ksh
00614 xterm
00615 ksh
00620 telnetd
00621 ksh
00625 xterm
00626 ksh
12917 xterm
12921 xterm
00640 xterm
00641 ksh
12940 ksh
12941 ksh
12943 telnetd
12946 ksh
12952 telnetd
18076 dtlogin
00672 xterm
00673 ksh
18098 sh
18101 dtsession
14011 ksh
14017 telnetd
14025 ksh
13002 xterm
16075 ksh
18134 ksh
02778 rpc.cmsd
16122 vi
13053 dtwm
13063 xterm
18192 dtterm
06943 ksh
09004 sh
18255 ksh
18277 .su
18278 tail
09071 xterm
18316 ksh
17301 ksh
17318 ksh
18361 exe
18387 sqlplus
13280 ksh
09185 ksh
18416 dtexec
16370 sqlplus
_kernel_process_status_end:
_current_pid: 9353
_current_tid: 0xfffffc000c947b80
_proc_thread_list_begin:
thread 0xfffffc000c947b80 stopped at [boot:2361 ,0xfffffc00003b6f08] Source not available
_proc_thread_list_end:
_dump_begin:
> 0 boot(0x400000000, 0xfffffc0017df5b80, 0xfffffc000027b654, 0xfffffc00004a6860, 0xfffffc00004a6860) ["../../../../src/kernel/arch/alpha/machdep.c":2361, 0xfffffc00003b6f08]

   1 panic(s = 0xfffffc00004a8a50 = "thread_block: interrupt level call") ["../../../../src/kernel/bsd/subr_prf.c":707, 0xfffffc0000279d1c]
pcpu = 0xfffffc00004dcf70
i = 4885072
bootopt = 2792136
mycpu = 0
spl = 5
prevcc = 18446739675665832648
nextcc = 18446739675665832648
timer = -4398043718968
limit = -4398043718968

   2 thread_block() ["../../../../src/kernel/kern/sched_prim.c":1853, 0xfffffc00002a7070]
thread = 0xfffffc000c947b80
new_thread = 0x2c
mycpu = 0
myprocessor = 0xfffffc0000156100
s = 5
pset = (nil)
prev = 0xfffffc00003b6df4

   3 thread_preempt(thread = 0x26, processor = 0xfffffc0000156100) ["../../../../src/kernel/kern/sched_prim.c":3712, 0xfffffc00002a9ac4]
s = 2
pri = 2596544
pset = 0x1

   4 boot(0x0, 0xfffffc000c947b80, 0x2c0000002c, 0x2f, 0x1) ["../../../../src/kernel/arch/alpha/machdep.c":2305, 0xfffffc00003b6df0]

   5 panic(s = 0xfffffc00004c38c0 = "Machine check - Hardware error") ["../../../../src/kernel/bsd/subr_prf.c":791, 0xfffffc0000279ebc]
pcpu = 0xfffffc00004dcf70
i = 5099376
bootopt = 3959324
mycpu = 0
spl = 7
prevcc = 18446739675666999836
nextcc = 18446739675666999836
timer = -4398042551780
limit = -4398042551780

   6 machcheck(0x1, 0x140007020, 0x1, 0x0, 0x200000014) ["../../../../src/kernel/arch/alpha/hal/kn22a.c":2737, 0xfffffc00003e1b48]

   7 mach_error(0x1, 0x0, 0x200000014, 0xffffffff9895f930, 0xfffffc00003b3420) ["../../../../src/kernel/arch/alpha/hal/cpusw.c":716, 0xfffffc00003c6a18]

   8 _XentInt(0x8, 0x12002e2c0, 0x14000d0d0, 0x140087d40, 0x0) ["../../../../src/kernel/arch/alpha/locore.s":1076, 0xfffffc00003b341c]

_dump_end:

warning: Files compiled -g3: parameter values probably wrong
_kernel_thread_list_begin:
thread 0xfffffc0017df42c0 stopped at [thread_run:2397 ,0xfffffc00002a7b78] Source not available
thread 0xfffffc0017df4580 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017df4840 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017df4b00 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017df4dc0 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017df5080 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017df5340 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017df5600 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017df58c0 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017df5b80 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0016cf82c0 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0016cf8580 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0016cf8840 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0016cf8b00 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0016cf8dc0 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0016cf9080 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0016cf9340 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0016cf9600 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017cd1080 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017cd1340 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017cd1600 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017cd18c0 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0017cd1b80 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc0002398000 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
thread 0xfffffc00023982c0 stopped at [thread_block:2025 ,0xfffffc00002a73a0] Source not available
_kernel_thread_list_end:
_savedefp: (nil)
_kernel_memory_fault_data_begin:
struct {
    fault_va = 0x0
    fault_pc = 0x0
    fault_ra = 0x0
    fault_sp = 0x0
    access = 0x0
    status = 0x0
    cpunum = 0x0
    count = 0x0
    pcb = (nil)
    thread = (nil)
    task = (nil)
    proc = (nil)
}
_kernel_memory_fault_data_end:
_uptime: 30.36 hours

paniccpu: 0x0
machine_slot[paniccpu]: struct {
    is_cpu = 0x1
    cpu_type = 0xf
    cpu_subtype = 0x11
    running = 0x1
    cpu_ticks = {
        [0] 0x1832d0d
        [1] 0xad8115
        [2] 0x4d6a32
        [3] 0x40cb92d
        [4] 0x217299
    }
    clock_freq = 0x400
    error_restart = 0x0
    cpu_panicstr = 0xfffffc00004c38c0 = "Machine check - Hardware error"
    cpu_panic_thread = 0xfffffc000c947b80
}
tset machine_slot[paniccpu].cpu_panic_thread:
Begin Trace for machine_slot[paniccpu].cpu_panic_thread:
> 0 boot(0x400000000, 0xfffffc0017df5b80, 0xfffffc000027b654, 0xfffffc00004a6860, 0xfffffc00004a6860) ["../../../../src/kernel/arch/alpha/machdep.c":2361, 0xfffffc00003b6f08]
   1 panic(s = 0xfffffc00004a8a50 = "thread_block: interrupt level call") ["../../../../src/kernel/bsd/subr_prf.c":707, 0xfffffc0000279d1c]
   2 thread_block() ["../../../../src/kernel/kern/sched_prim.c":1853, 0xfffffc00002a7070]
   3 thread_preempt(thread = 0x26, processor = 0xfffffc0000156100) ["../../../../src/kernel/kern/sched_prim.c":3712, 0xfffffc00002a9ac4]
   4 boot(0x0, 0xfffffc000c947b80, 0x2c0000002c, 0x2f, 0x1) ["../../../../src/kernel/arch/alpha/machdep.c":2305, 0xfffffc00003b6df0]
   5 panic(s = 0xfffffc00004c38c0 = "Machine check - Hardware error") ["../../../../src/kernel/bsd/subr_prf.c":791, 0xfffffc0000279ebc]
   6 machcheck(0x1, 0x140007020, 0x1, 0x0, 0x200000014) ["../../../../src/kernel/arch/alpha/hal/kn22a.c":2737, 0xfffffc00003e1b48]
   7 mach_error(0x1, 0x0, 0x200000014, 0xffffffff9895f930, 0xfffffc00003b3420) ["../../../../src/kernel/arch/alpha/hal/cpusw.c":716, 0xfffffc00003c6a18]
   8 _XentInt(0x8, 0x12002e2c0, 0x14000d0d0, 0x140087d40, 0x0) ["../../../../src/kernel/arch/alpha/locore.s":1076, 0xfffffc00003b341c]
End Trace for machine_slot[paniccpu].cpu_panic_thread:

"cpu_data" is not an array
_stack_trace[0]_begin:
> 0 boot(0x400000000, 0xfffffc0017df5b80, 0xfffffc000027b654, 0xfffffc00004a6860, 0xfffffc00004a6860) ["../../../../src/kernel/arch/alpha/machdep.c":2361, 0xfffffc00003b6f08]
   1 panic(s = 0xfffffc00004a8a50 = "thread_block: interrupt level call") ["../../../../src/kernel/bsd/subr_prf.c":707, 0xfffffc0000279d1c]
   2 thread_block() ["../../../../src/kernel/kern/sched_prim.c":1853, 0xfffffc00002a7070]
   3 thread_preempt(thread = 0x26, processor = 0xfffffc0000156100) ["../../../../src/kernel/kern/sched_prim.c":3712, 0xfffffc00002a9ac4]
   4 boot(0x0, 0xfffffc000c947b80, 0x2c0000002c, 0x2f, 0x1) ["../../../../src/kernel/arch/alpha/machdep.c":2305, 0xfffffc00003b6df0]
   5 panic(s = 0xfffffc00004c38c0 = "Machine check - Hardware error") ["../../../../src/kernel/bsd/subr_prf.c":791, 0xfffffc0000279ebc]
   6 machcheck(0x1, 0x140007020, 0x1, 0x0, 0x200000014) ["../../../../src/kernel/arch/alpha/hal/kn22a.c":2737, 0xfffffc00003e1b48]
   7 mach_error(0x1, 0x0, 0x200000014, 0xffffffff9895f930, 0xfffffc00003b3420) ["../../../../src/kernel/arch/alpha/hal/cpusw.c":716, 0xfffffc00003c6a18]
   8 _XentInt(0x8, 0x12002e2c0, 0x14000d0d0, 0x140087d40, 0x0) ["../../../../src/kernel/arch/alpha/locore.s":1076, 0xfffffc00003b341c]
_stack_trace[0]_end:
/usr/bin/crashdc: /bin/kdbx: not found
#
_crash_data_collection_finished:
Received on Thu Dec 10 1998 - 04:55:30 NZDT

This archive was generated by hypermail 2.4.0 : Wed Nov 08 2023 - 11:53:38 NZDT