blob: b79630b5aef0cf79cb050a9a7570821cd89cad17 [file] [log] [blame]
Wes Hardaker0ec088a2010-10-11 14:11:17 +00001#!/usr/bin/perl
2#
3# fixproc [-min n] [-max n] [-check | -kill | -restart | -exist | -fix] proc ...
4#
5# fixproc exit code:
6# 0 ok
7# 1 check failed
8# 2 cannot restart
9# 3 cannot kill
10# 4 fix failed if fix is defined as kill or restart, then
11# cannot kill or cannot restart is return instead
12# 10 fixproc error
13#
14#
15# Fixes a process named "proc" by performing the specified action. The
16# actions can be check, kill, restart, exist, or fix. The action is specified
17# on the command line or is read from a default database, which describes
18# the default action to take for each process. The database format and
19# the meaning of each action are described below.
20#
21# database format
22# ---------------
23#
24# name foo required
25# cmd /a/b/name args required
26# min number optional, defaults to 1
27# max number optional, defaults to 1
28#
29# check {null, exist, shell} optional, defaults to exist if not defined
30# [shell command shell commands needed only if check=shell
31# ...
32# shell command
33# end_shell] keyword end_shell marks end of shell commands
34# fix {kill, restart, shell} required
35# [shell command shell commands needed only if fix=shell
36# ...
37# shell command
38# end_shell] keyword end_shell marks end of shell commands
39#
40# Blank lines and lines beginning with "#" are ignored.
41#
42#
43# Example:
44#
45# name test1
46# cmd nice /home/kong/z/test1 > /dev/null &
47# max 2
48# fix shell
49# xterm&
50# nice /home/kong/z/test1 > /dev/null &
51# end_shell
52#
53#
54# actions
55# -------
56# There are 5 possible actions: kill, restart, fix, exist, check. Fix is
57# defined to be the kill action, the restart action, or a series of shell
58# commands. Check is optionally defined in the database. If check is not
59# defined, it defaults to exist.
60#
61# If the action is specified on the cmd line, it is executed regardless of
62# check. The commands executed for each action type is as follow:
63#
64# switch action:
65# kill:
66# kill process, wait 5 seconds, kill -9 if still exist
67# if still exist
68# return "cannot kill"
69# else
70# return "ok"
71#
72# restart:
73# execute kill
74# if kill returned "cannot kill"
75# return "cannot kill"
76# restart by issuing cmd to shell
77# if check defined
78# execute check
79# if check succeeds
80# return "ok"
81# else
82# return "cannot restart"
83#
84# fix:
85# if fix=kill
86# execute kill
87# else if fix=restart
88# execute restart
89# else
90# execute shell commands
91# execute check
92#
93# check:
94# if check defined as null
95# return "fixproc error"
96# else
97# execute check
98# if check succeeds
99# return (execute exist)
100# return "check failed"
101#
102# exist:
103# if proc exists in ps && (min <= num. of processes <= max)
104# return "ok"
105# else
106# return "check failed"
107#
108#
109# If the action is not specified on the cmd line, the default action is the
110# fix action defined in the database. Fix is only executed if check fails:
111#
112# if fix defined
113# if check is not defined as null
114# execute check
115# if check succeeds
116# return "ok"
117# execute action defined for fix
118# else
119# return "fixproc error"
120#
121#
122# If proc is not specified on the command line, return "fixproc error."
123# Multiple proc's can be defined on the cmd line. When an error occurs
124# when multiple proc's are specified, the first error encountered halts the
125# script.
126#
127# For check shell scripts, any non-zero exit code means the check has failed.
128#
129#
130# Timothy Kong 3/1995
131
132use File::Temp qw(tempfile);
133
134$database_file = '/local/etc/fixproc.conf';
135
136$debug = 0; # specify debug level using -dN
137 # currently defined: -d1
138
139$no_error = 0;
140$check_failed_error = 1;
141$cannot_restart_error = 2;
142$cannot_kill_error = 3;
143$cannot_fix_error = 4;
144$fixproc_error = 10;
145
146$min = 1;
147$max = 1;
148$cmd_line_action = '';
149%min = ();
150%max = ();
151%cmd = ();
152%check = ();
153%fix = ();
154$shell_lines = ();
155@proc_list = ();
156
157$shell_header = "#!/bin/sh\n";
158$shell_end_marker = 'shell_end_marker';
159
160&read_args();
161&read_database();
162# &dump_database(); # debug only
163
164# change the default min. and max. number of processes allowed
165if ($min != 1)
166 {
167 for $name ( keys (%min) )
168 {
169 $min{$name} = $min;
170 }
171 }
172if ($max != 1)
173 {
174 for $name ( keys (%max) )
175 {
176 $max{$name} = $max;
177 }
178 }
179
180# work on one process at a time
181for $proc ( @proc_list )
182 {
183 $error_code = &work_on_proc ($proc);
184
185############# uncomment next line when fully working ############
186# exit $error_code if ($error_code);
187
188 die "error_code = $error_code\n" if ($error_code);
189 }
190
191
192# create an executable shell script file
193sub create_sh_script
194{
195 local ($file) = pop (@_);
196 local ($fh) = pop (@_);
197 local ($i) = pop (@_);
198
199 printf (STDERR "create_sh_script\n") if ($debug > 0);
200
201 $! = $fixproc_error;
202 while ( $shell_lines[$i] ne $shell_end_marker )
203 {
204 printf ($fh "%s", $shell_lines[$i]);
205 $i++;
206 }
207 close ($fh);
208 chmod 0755, $file;
209}
210
211
212sub do_fix
213{
214 local ($proc) = pop(@_);
215
216 printf (STDERR "do_fix\n") if ($debug > 0);
217
218 if ($fix{$proc} eq '')
219 {
220 $! = $fixproc_error;
221 die "$0: internal error 4\n";
222 }
223 if ($fix{$proc} eq 'kill')
224 {
225 return &do_kill ($proc);
226 }
227 elsif ($fix{$proc} eq 'restart')
228 {
229 return &do_restart ($proc);
230 }
231 else
232 {
233 # it must be "shell", so execute the shell script defined in database
234 local ($tmpfh, $tmpfile) = tempfile("fix_XXXXXXXX", DIR => "/tmp");
235
236 &create_sh_script ($fix{$proc}, $tmpfh, $tmpfile);
237
238 # return code is number divided by 256
239 $error_code = (system "$tmpfile") / 256;
240 unlink($tmpfile);
241 return ($fix_failed_error) if ($error_code != 0);
242 # sleep needed here?
243 return &do_exist ($proc);
244 }
245}
246
247
248sub do_check
249{
250 local ($proc) = pop(@_);
251
252 printf (STDERR "do_check\n") if ($debug > 0);
253
254 if ($check{$proc} eq '')
255 {
256 $! = $fixproc_error;
257 die "$0: internal error 2\n";
258 }
259
260 if ($check{$proc} ne 'exist')
261 {
262 # if not "exist", then it must be "shell", so execute the shell script
263 # defined in database
264
265 local ($tmpfh, $tmpfile) = tempfile("check_XXXXXXXX", DIR => "/tmp");
266
267 &create_sh_script ($fix{$proc}, $tmpfh, $tmpfile);
268
269 # return code is number divided by 256
270 $error_code = (system "$tmpfile") / 256;
271 unlink($tmpfile);
272 return ($check_failed_error) if ($error_code != 0);
273
274 # check passed, continue
275 }
276 return &do_exist ($proc);
277}
278
279
280sub do_exist
281{
282 local ($proc) = pop(@_);
283
284 printf (STDERR "do_exist\n") if ($debug > 0);
285
286 # do ps, check to see if min <= no. of processes <= max
287 $! = $fixproc_error;
288 open (COMMAND, "/bin/ps -e | /bin/grep $proc | /bin/wc -l |")
289 || die "$0: can't run ps-grep-wc command\n";
290 $proc_count = <COMMAND>;
291 if (($proc_count < $min{$proc}) || ($proc_count > $max{$proc}))
292 {
293 return $check_failed_error;
294 }
295 return $no_error;
296}
297
298
299sub do_kill
300{
301 local ($proc) = pop(@_);
302 local ($second_kill_needed);
303
304 printf (STDERR "do_kill\n") if ($debug > 0);
305
306 # first try kill
307 $! = $fixproc_error;
308 open (COMMAND, "/bin/ps -e | /bin/grep $proc |")
309 || die "$0: can't run ps-grep-awk command\n";
310 while (<COMMAND>)
311 {
312 # match the first field of ps -e
313 $! = $fixproc_error;
314 /^\s*(\d+)\s/ || die "$0: can't match ps -e output\n";
315 system "kill $1";
316 }
317
318 # if process still exist, try kill -9
319 sleep 2;
320 $! = $fixproc_error;
321 open (COMMAND, "/bin/ps -e | /bin/grep $proc |")
322 || die "$0: can't run ps-grep-awk command\n";
323 $second_kill_needed = 0;
324 while (<COMMAND>)
325 {
326 # match the first field of ps -e
327 $! = $fixproc_error;
328 /^\s*(\d+)\s/ || die "$0: can't match ps -e output\n";
329 system "kill -9 $1";
330 $second_kill_needed = 1;
331 }
332 return ($no_error) if ($second_kill_needed == 0);
333
334 # see if kill -9 worked
335 sleep 2;
336 $! = $fixproc_error;
337 open (COMMAND, "/bin/ps -e | /bin/grep $proc |")
338 || die "$0: can't run ps-grep-awk command\n";
339 while (<COMMAND>)
340 { # a process still exist, return error
341 return $cannot_kill_error;
342 }
343 return $no_error; # good, all dead
344}
345
346
347sub do_restart
348{
349 local ($proc) = pop(@_);
350 local ($error_code);
351
352 printf (STDERR "do_restart\n") if ($debug > 0);
353
354 $error_code = &do_kill ($proc);
355 return $error_code if ($error_code != $no_error);
356 die "$0: internal error 3\n" if ($cmd{$proc} eq '');
357 system "$cmd{$proc}";
358 # sleep needed here?
359 if ($check{$proc} ne 'null')
360 {
361 return $no_error if (&do_check($proc) == $no_error);
362 return $cannot_restart_error;
363 }
364}
365
366
367sub work_on_proc
368{
369 local ($proc) = pop(@_);
370 local ($error_code);
371
372 printf (STDERR "work_on_proc\n") if ($debug > 0);
373
374 if ($cmd_line_action eq '')
375 {
376 # perform action from database
377
378 if ($check{$proc} ne 'null')
379 {
380 $error_code = &do_check ($proc);
381 if ($error_code != $check_failed_error)
382 {
383 return $error_code;
384 }
385 }
386 return &do_fix ($proc);
387 }
388 else
389 {
390 # perform action from command line
391
392 $error_code = $no_error;
393 if ($cmd_line_action eq 'kill')
394 {
395 $error_code = &do_kill ($proc);
396 }
397 elsif ($cmd_line_action eq 'restart')
398 {
399 $error_code = &do_restart ($proc);
400 }
401 elsif ($cmd_line_action eq 'fix')
402 {
403 $error_code = &do_fix ($proc);
404 }
405 elsif ($cmd_line_action eq 'check')
406 {
407 if ( $check{$proc} eq 'null' )
408 {
409 exit $fixproc_error;
410 }
411 $error_code = &do_check ($proc);
412 }
413 elsif ($cmd_line_action eq 'exist')
414 {
415 $error_code = &do_exist ($proc);
416 }
417 else
418 {
419 $! = $fixproc_error;
420 die "$0: internal error 1\n";
421 }
422 }
423}
424
425
426sub dump_database
427{
428 local ($name);
429
430 for $name (keys(%cmd))
431 {
432 printf ("name\t%s\n", $name);
433 printf ("cmd\t%s\n", $cmd{$name});
434 printf ("min\t%s\n", $min{$name});
435 printf ("max\t%s\n", $max{$name});
436 if ( $check{$name} =~ /[0-9]+/ )
437 {
438 printf ("check\tshell\n");
439 $i = $check{$name};
440 while ( $shell_lines[$i] ne $shell_end_marker )
441 {
442 printf ("%s", $shell_lines[$i]);
443 $i++;
444 }
445 }
446 else
447 {
448 printf ("check\t%s\n", $check{$name});
449 }
450 if ( $fix{$name} =~ /[0-9]+/ )
451 {
452 printf ("fix\tshell\n");
453 $i = $fix{$name};
454 while ( $shell_lines[$i] ne $shell_end_marker )
455 {
456 printf ("%s", $shell_lines[$i]);
457 $i++;
458 }
459 }
460 else
461 {
462 printf ("fix\t%s\n", $fix{$name});
463 }
464 printf ("\n");
465 }
466}
467
468
469sub read_database
470{
471 local ($in_check_shell_lines) = 0;
472 local ($in_fix_shell_lines) = 0;
473 local ($name) = '';
474 local ($str1);
475 local ($str2);
476
477 $! = $fixproc_error;
478 open (DB, $database_file) || die 'cannot open database file $database_file\n';
479 while (<DB>)
480 {
481 if ((! /\S/) || (/^[ \t]*#.*$/))
482 {
483 # ignore blank lines or lines beginning with "#"
484 }
485 elsif ($in_check_shell_lines)
486 {
487 if ( /^\s*end_shell\s*$/ )
488 {
489 $in_check_shell_lines = 0;
490 push (@shell_lines, $shell_end_marker);
491 }
492 else
493 {
494 push (@shell_lines, $_);
495 }
496 }
497 elsif ($in_fix_shell_lines)
498 {
499 if ( /^\s*end_shell\s*$/ )
500 {
501 $in_fix_shell_lines = 0;
502 push (@shell_lines, $shell_end_marker);
503 }
504 else
505 {
506 push (@shell_lines, $_);
507 }
508 }
509 else
510 {
511 if ( ! /^\s*(\S+)\s+(\S.*)\s*$/ )
512 {
513 $! = $fixproc_error;
514 die "$0: syntax error in database\n$_";
515 }
516 $str1 = $1;
517 $str2 = $2;
518 if ($str1 eq 'name')
519 {
520 &finish_db_entry($name);
521 $name = $str2;
522 }
523 elsif ($str1 eq 'cmd')
524 {
525 $! = $fixproc_error;
526 die "$0: cmd specified before name in database\n$_\n"
527 if ($name eq '');
528 die "$0: cmd specified multiple times for $name in database\n"
529 if ($cmd{$name} ne '');
530 $cmd{$name} = $str2;
531 }
532 elsif ($str1 eq 'min')
533 {
534 $! = $fixproc_error;
535 die "$0: min specified before name in database\n$_\n"
536 if ($name eq '');
537 die "$0: min specified multiple times in database\n$_\n"
538 if ($min{$name} ne '');
539 die "$0: non-numeric min value in database\n$_\n"
540 if ( ! ($str2 =~ /[0-9]+/ ));
541 $min{$name} = $str2;
542 }
543 elsif ($str1 eq 'max')
544 {
545 $! = $fixproc_error;
546 die "$0: max specified before name in database\n$_\n"
547 if ($name eq '');
548 die "$0: max specified multiple times in database\n$_\n"
549 if ($max{$name} ne '');
550 die "$0: non-numeric max value in database\n$_\n"
551 if ( ! ($str2 =~ /[0-9]+/ ));
552 $max{$name} = $str2;
553 }
554 elsif ($str1 eq 'check')
555 {
556 $! = $fixproc_error;
557 die "$0: check specified before name in database\n$_\n"
558 if ($name eq '');
559 die "$0: check specified multiple times in database\n$_\n"
560 if ($check{$name} ne '');
561 if ( $str2 eq 'shell' )
562 {
563 # if $check{$name} is a number, it is a pointer into
564 # $shell_lines[] where the shell commands are kept
565 $shell_lines[$#shell_lines+1] = $shell_header;
566 $check{$name} = $#shell_lines;
567 $in_check_shell_lines = 1;
568 }
569 else
570 {
571 $check{$name} = $str2;
572 }
573 }
574 elsif ($str1 eq 'fix')
575 {
576 $! = $fixproc_error;
577 die "$0: fix specified before name in database\n$_\n"
578 if ($name eq '');
579 die "$0: fix specified multiple times in database\n$_\n"
580 if ($fix{$name} ne '');
581 if ( $str2 eq 'shell' )
582 {
583 # if $fix{$name} is a number, it is a pointer into
584 # $shell_lines[] where the shell commands are kept
585 $shell_lines[$#shell_lines+1] = $shell_header;
586 $fix{$name} = $#shell_lines;
587 $in_fix_shell_lines = 1;
588 }
589 else
590 {
591 $fix{$name} = $str2;
592 }
593 }
594 }
595 }
596 &finish_db_entry($name);
597}
598
599
600sub finish_db_entry
601{
602 local ($name) = pop(@_);
603
604 if ($name ne '')
605 {
606 $! = $fixproc_error;
607 die "$0: fix not defined for $name in database\n"
608 if ($fix{$name} eq '');
609 die "$0: cmd not defined for $name in database\n"
610 if ($cmd{$name} eq '');
611 $check{$name} = 'exist' if ($check{$name} eq '');
612 $max{$name} = 1 if ($max{$name} eq '');
613 $min{$name} = 1 if ($min{$name} eq '');
614 }
615}
616
617
618sub read_args
619{
620 local ($i) = 0;
621 local ($arg);
622 local ($action_arg_count) = 0;
623
624 while ( $i <= $#ARGV )
625 {
626 $arg = $ARGV[$i];
627 if (($arg eq '-min') || ($arg eq '-max'))
628 {
629 if (($i == $#ARGV - 1) || ($ARGV[$i+1] =~ /\D/)) # \D is non-numeric
630 {
631 $! = $fixproc_error;
632 die "$0: numeric arg missing after -min or -max\n";
633 }
634 if ($arg eq '-min')
635 {
636 $min = $ARGV[$i+1];
637 }
638 else
639 {
640 $max = $ARGV[$i+1];
641 }
642 $i += 2;
643 }
644 elsif ($arg eq '-kill')
645 {
646 $cmd_line_action = 'kill';
647 $action_arg_count++;
648 $i++;
649 }
650 elsif ($arg eq '-check')
651 {
652 $cmd_line_action = 'check';
653 $action_arg_count++;
654 $i++;
655 }
656 elsif ($arg eq '-restart')
657 {
658 $cmd_line_action = 'restart';
659 $action_arg_count++;
660 $i++;
661 }
662 elsif ($arg eq '-exist')
663 {
664 $cmd_line_action = 'exist';
665 $action_arg_count++;
666 $i++;
667 }
668 elsif ($arg eq '-fix')
669 {
670 $cmd_line_action = 'fix';
671 $action_arg_count++;
672 $i++;
673 }
674 elsif ($arg =~ /-d(\d)$/)
675 {
676 $debug = $1;
677 $i++;
678 }
679 elsif ($arg =~ /^-/)
680 {
681 $! = $fixproc_error;
682 die "$0: unknown switch $arg\n";
683 }
684 else
685 {
686 push (@proc_list, $arg);
687 $i++;
688 }
689 }
690 $! = $fixproc_error;
691 die "$0: no process specified\n" if ($#proc_list == -1);
692 die "$0: more than one action specified\n" if ($action_arg_count > 1);
693 }
694