blob: 709b67a82123af66b0732c131488ba07dcd167c5 [file] [log] [blame]
#!/bin/sh
RETRY_TIMEOUT=$1
if [ -e /proc/$$/oom_score_adj ] ; then
OOM_FILE=oom_score_adj
echo -500 >/proc/$$/$OOM_FILE
else
OOM_FILE=oom_adj
echo -17 >/proc/$$/$OOM_FILE
fi
shift
if [ -z "$RETRY_TIMEOUT" -o -z "$*" ]; then
echo "Usage: $0 <retry_timeout> <program> [args...]" >&2
exit 1
fi
progname() {
local first="$1"
while [ $# -gt 0 ]; do
case $1 in
alivemonitor) shift 5 ;;
babysit) shift 2 ;;
startpid) shift 2 ;;
deathrattle) shift 2 ;;
*) echo "$1"; return ;;
esac
done
echo "$first"
}
run() {
"$@" &
local pid=$!
echo 0 >/proc/$pid/$OOM_FILE
wait $pid
}
error() {
echo "Error: '$name' exited with code $RV. Retry in $RETRY_TIMEOUT secs."
}
# Run until the program manages to exit successfully.
name=$(progname "$@")
TMP_LOG=/tmp/babysit_errors_$(basename "$name")
until run "$@"; do
RV=$?
if [ "$RV" = 143 ]; then
# SIGTERM means someone asked for it to die.
# We can't treat SIGKILL the same way because it might have been
# caused by the kernel OOM killer, in which case we definitely want to
# restart the task.
echo "SIGTERM: '$name' was killed explicitly. Not retrying." >&2
exit $RV
fi
error >&2
error > $TMP_LOG
sleep $RETRY_TIMEOUT || sleep 1 # just in case $RETRY_TIMEOUT is invalid
done
echo "'$name' exited successfully. Not restarting." >&2
exit 0