blob: 8e2620d3429833a3af9bb303547754f79f6fe19a [file] [log] [blame]
#!/bin/sh
pid=$1
signal=$2
exe=$3
# TODO(kedong) need to specify different size based on platform.
# If we have a disk available, dump the core there and don't overwrite older
# ones. Since the only usable partition of decent size is /var/media, we just
# create a "cores" folder in there.
# If no disk is available, use flash and overwrite previous cores (due to size
# limits).
hdd_media_dir="/var/media"
hdd_cores_dir="${hdd_media_dir}/cores"
flash_cores_dir="/user"
flash_cores_last_core_link="${flash_cores_dir}/last-core"
if { is-storage-box || is-spacecast; } && [ -w /var/media ]; then
mkdir -p "$hdd_cores_dir"
outdir="$hdd_cores_dir"
out="${hdd_cores_dir}/core-${pid}-${exe}.gz"
else
outdir="$flash_cores_dir"
out="${flash_cores_dir}/core.gz"
fi
# Send all output to kernel log. We don't want to use the 'logger' program
# here in case there's something wrong with syslogd (eg. it's the one core
# dumping).
exec >/dev/kmsg 2>&1
log()
{
echo "coredump:" $@ >&2;
}
uniqid=coredump-$$-coredump
alarm()
{
pgrep -f "$uniqid" >/dev/null || return
log core took too long to save, quitting to avoid panic, core will be truncated
sysctl -w kernel.hung_task_timeout_secs=120
pkill -f "$uniqid"
}
log "pid $pid ($exe) dying on signal $2"
if [ ! -e "/tmp/DEBUG" ]; then
log "skipping coredump for process '$exe'"
elif [ ! -w "$outdir" ] || [ -e "$out" -a ! -w "$out" ]; then
log "can't dump core: $out is not writable."
elif pgrep -x coredump-delay >/dev/null; then
log "coredump-delay not yet reached; not saving."
else
# read and compress the core dump from stdin, limiting max output size
log "dumping core in '$out'"
# siege coredumps can take a long time and we need them, increase the hung task
# timeout in the kernel and wait for longer to accomodate this
if [ "$exe" = "siege" ]; then
coredump-delay 900 &
sysctl -w kernel.hung_task_timeout_secs=900
(sleep 840; alarm) &
else
coredump-delay 600 &
# abort after 100 seconds. bruno kernel will panic with hung_task at 120 seconds
(sleep 100; alarm) &
fi
# --suffix is a trick to be able to pkill exactly this gzip
gzip --fast -c --suffix "$uniqid" | dd bs=1024 count=163840 2>/dev/null >"$out"
upload-logs-now
sysctl -w kernel.hung_task_timeout_secs=120
# if we dumped to HDD, create a symbolic link at the alternate place in flash
if [ "$outdir" = "$hdd_cores_dir" ]; then
ln -s -f "$out" "$flash_cores_last_core_link"
fi
log "finished dumping core for pid $pid ($exe)"
fi