#!/hr5/dominik/local/bin/perl #!/usr/local/bin/perl #------------------------------------------------------------------------------ # This script can be used to run a chain of jobs. It executes the same # code for a number of different parameters and saves the output. The # parameters are supplied in a simple list, and chain has sophisticated # methods to make these parameters available to the code. # # "perldoc chain" will print the full documentation. # # (c) 1995 Carsten Dominik dominik@strw.leidenuniv.nl # # This is free software. It may be distributed under the terms of the # GNU Public License. #------------------------------------------------------------------------------ # RCS: $Id: chain,v 1.18 1998/03/04 10:30:51 dominik Exp $ #------------------------------------------------------------------------------ require 5.002; # Required perl version use strict; use Getopt::Long; # This is used for option parsing $Getopt::Long::ignorecase = 0; # We want to distinguish case # Version my($version) = "3.2"; # Current version of program # Defaults my($mailer) = "mail"; # The program to sent a mail my($parfile) = "chain.dat"; # Default for parfile name my($logbook) = "chain.log"; # Default logbook file my($sprefix) = ""; # Prefix for storage names my($spostfix) = ""; # Postfix for storage names my($epostfix) = "_ERROR"; # Postfix for storage names in case of an error my($mysignal) = "USR1"; # Signal to be used for $PID checking my($address) = ($ENV{"USER"} || $ENV{"LOGNAME"} || (getpwuid($<))[0]); # email address default # Variable presets my($maxrun) = 1000000; # Maximum number of runs total my($maxseq) = 1000; # Maximum number of runs from a single sequence my($max_diamond_replace) = 1000;# Maximum number of diamont replacements # Option variables my(%opt,$testing,$starting,$restarting,$force,$mail,$verbose,$extra_steps); # Other global variables my(@todolist,@linenr); # Array holding the TODO list and line nbrs. my($todofmt,$todofmttop); # Formats to print TODO list entries my($todofmtlength); # The length of the formatted lines my($i,$index); # loop variables my($abort); # error counter my(%failed,%exists,%seen,%nitems); # statistic and errors in TODO list my($whitespace_in_sn); # Flag for whitespace in storage names my($lines_with_star); # Flag for printing annotiation my(%macro,%testexp); # the macro and test definitions my($nfiles); # Number of output files per run my(@infiles,@fmtfiles,@outfiles); # Filenames from chain.dat my($exec_script); # the script. my(%outfileindex,@parnames,%parindex); # Lookup arrays my($E_item,$E_linelist); # Variables for error format STDERR my($completed,$active_pid,$nskip); # return from report_status(); my($script_is_perl_code); # Flag my($repair_error_message) = 1; # Should we try to fix perl syntax error # messages in user supplied perl script? # We need to know what our working directory is require "pwd.pl"; &initpwd; # Ignore $mysignal $SIG{$mysignal} = 'IGNORE'; # Startup message print STDERR <<"EOF"; CHAIN version $version. (c) 1995 Carsten Dominik dominik\@strw.leidenuniv.nl EOF # Command line arguments and switches if (! @ARGV) { # at least one option, please &print_usage; exit; } &GetOptions( \%opt, qw ( -help|h ), qw ( -term -kill -stop -cont -quit -pod), qw ( -create|c -list|l -lastlog|ll ), '-testing|t', \$testing, '-start|s', \$starting, '-restart|r', \$restarting, '-export|e', '-debug|d', '-mail|m', \$mail, '-force|f', \$force, '-verbose|v', \$verbose, '-n=i', \$maxrun, '-i:i', \$extra_steps, '-O:i', '-E:i' ) or ( &print_usage, exit ); # Change directory if one was given on the command line if ($ARGV[0] && -d $ARGV[0]) { print "chdir to $ARGV[0]\n" ; &chdir(shift); } # Reject any further argments if (@ARGV) { &print_usage; die "ABORT: Unrecognized command line argument: $ARGV[0].\n"; } if ($opt{help}) { &print_help; exit} if ($opt{pod}) { &print_manual; exit} if ($opt{term}) { &chain_group_signal('TERM'); exit} if ($opt{kill}) { &chain_group_signal('KILL'); exit} if ($opt{stop}) { &chain_group_signal('STOP'); exit} if ($opt{cont}) { &chain_group_signal('CONT'); exit} if ($opt{create}) { &create_data_file; exit} if ($opt{list}) { &report_status(\*STDOUT); exit} if ($opt{lastlog}) { &show_last_log(\*STDOUT); exit} if ( $testing + $starting + $restarting == 0) { &print_usage; die "ABORT: You must specify one of the options -d -t -s -r -l -c.\n"; exit; } elsif ($testing + $starting + $restarting > 1) { die" ABORT: Only one of the options -s -r -t is allowed.\n"; } # Let's see what the logbook says ($completed,$active_pid,$nskip) = &report_status; if ($completed && $restarting) { die "ABORT: Chain was already completed.\n", " Use \"chain -s\" if you want to start from scratch.\n"; } if ($active_pid && ! $testing) { die "ABORT: Chain is active in this directory (PID $active_pid).\n", " Wait for completion or kill it.\n"; } # Find out what to do with the output from the individual runs if (defined $opt{O}) { unless ($opt{O} == 0 || $opt{O} == 1 || $opt{O} == 2 ) { die "ERROR: Illegal value for option -O (must be 0 or 1 or 2)\n"; } } if (defined $opt{E}) { unless ($opt{E} == 0 || $opt{E} == 1 || $opt{E} == 2 ) { die "ERROR: Illegal value for option -O (must be 0 or 1 or 2)\n"; } } unlink "chain.out"; unlink "chain.err"; # Open the parameter file open(PARFILE,"$parfile") || die "ABORT: Can't open $parfile.\n"; # Read options &read_options; # Read the commands to execute &read_script; # Read the names of input and output file(s) &read_file_names; # Read the TODO list &read_todo_list; # Close the parameter file close(PARFILE); # Abort if any errors were found while reading chain.dat die "\nABORT: $abort error", ($abort==1?"":"s"), " found while reading $parfile\n" if ($abort && !$force); $abort = 0; # Expand the TODO list into a full list &expand_todo_list; # Print TODO list, when testing. if ($testing) { print "\nTEST: The following ",$#todolist+1, " run specifications were created:\n\n"; print $todofmttop; $i=0; foreach (@todolist) { printf "$todofmt\n",&recover_escaped_whitespace(split(/\s+/)); print "- " x int($todofmtlength/2),"\n" if ++$i == $nskip; } print "\n"; } # Check for whitespace in storage names print STDERR "WARNING: $whitespace_in_sn storage names contain whitespace characters\n\n" if $whitespace_in_sn; # Check for inconsistencies in number of items per line &complain_about_nitem_inconsistencies && $abort++; # Check for storage names corresponding to existing files &complain_about_existing_files; # Check for multiple use of storage names &complain_about_multiple_names && $abort++; # Check for parameter test failures &complain_about_parameter_tests && $abort++; print STDERR " * Line would be skipped with \"chain -r\".\n\n" if ($lines_with_star); if ($abort) { die "ABORT: because of errors found during expansion.\n" unless $force; } $abort = 0; # Check for logbook and restart if ($starting) { # start from scratch if (-e $logbook) { warn "WARNING: Removing old logbook file.\n"; unlink("$logbook") || die "ABORT: Can't remove file $logbook.\n"; } } elsif ($restarting) { # restart if (!-e $logbook) { warn "WARNING: Logbook $logbook does not exist. Starting from scratch.\n"; $starting = 1; $restarting = 0; } } # Make a note about start/restart time. if ($starting) { &log("chain_started", " " . $#todolist+1 . " runs, PID=$$"); } elsif ($restarting) { &log("chain_restarted", " " . $#todolist+1 . " runs, PID=$$"); } # Save the process id into a file if ($starting || $restarting) { open PID,">chain.pid" or die "Cannot write to file chain.pid"; print PID "$$\n"; close PID; } # From now on, trap a few signals so that we may put some # last words into the logbook file before we die. $SIG{'INT'} = 'signal_handler'; $SIG{'QUIT'} = 'signal_handler'; $SIG{'TERM'} = 'signal_handler'; $SIG{'HUP'} = 'signal_handler'; # Now we may free some memory undef %macro; foreach (keys %failed) { undef %{$failed{$_}} } undef %failed; undef %testexp; undef %exists; undef %seen; undef %nitems; # Loop through the parameter table # The variables need in the loop: my($nruntot,$runnr) = $#todolist+1; my($todoline,$therun); my(@pars,@storage_names); my($exitval,$errsum); my($start_utime,$end_utime); my($storage_name,$outfile); my($pid); my($subject); $nskip = 0 unless $restarting; for ($i=$nskip; $i<=$#todolist; $i++) { if ($i-$nskip+1 > $maxrun) {die "Exit after maxrun=$maxrun runs.\n"}; $runnr = $i+1; $todoline = $todolist[$i]; @pars = split(/\s+/,$todoline); # get hidden whitespace back &recover_escaped_whitespace(@pars); $therun = sprintf($todofmt,@pars); @storage_names = splice(@pars,0,$nfiles,"$i"); # Create the input files for the job foreach $index (0..$#infiles) { open(FROM,"$fmtfiles[$index]") || die "ABORT: Can't open $fmtfiles[$index].\n"; open(TO,">$infiles[$index]") || die "ABORT: Can't open $infiles[$index].\n"; while () { print TO &replace_diamonds($_,\@pars); } close(FROM); close(TO); } # Exit after creating input file if testing if ($testing) { # Abort if any errors were found die "\nABORT: $abort error", ($abort==1?"":"s"), " found\n" if ($abort && !$force); print "\nTEST: Sample input file(s) \"@infiles\" created.\n" if @infiles; print " No apparent errors in your setup.\n", " Use \"chain -s &\" to start the chain.\n"; exit; } # Logbook: here we go &log("job_started",$therun); $errsum = 0; # Divert STDOUT and STDERR accortiong to command line options &divert_stdout_stderr; $start_utime = (times)[2]; if ($script_is_perl_code) { # Run the perl program # Export variables into user package @user::pars = @pars[1..$#pars]; @user::files = @storage_names; $user::irun = $i; if ($opt{export}) { no strict qw(refs); foreach (@parnames[1..$#parnames]) { $ {"user::$_"} = $pars[$parindex{$_}]; } } # eval to catch exceptions $exitval = eval "&user::RUN();"; if ($@) { my($msg) = $@; $msg =~ s#\n#\\n#; &log("error","\&run failed: $msg"); $errsum++; } } else { # Run the script my $cmd = $exec_script; # replace diamonds $cmd = &replace_diamonds($cmd,\@pars); # pipe the script through a shell if ($opt{debug}) { $pid = open SHELL,"|/bin/sh -evx"; } else { $pid = open SHELL,"|/bin/sh -e"; } if (defined $pid) { if ($opt{export}) { # export parameters as variables foreach (@parnames[1..$#parnames]) { print SHELL "$_=\'$pars[$parindex{$_}]\'\n"; } } print SHELL $cmd; close SHELL; $exitval = ($? >> 8); if ($exitval != 0) { $errsum += $exitval; &log("error","Script returned with exit value $exitval"); } } else { &log("error","Can't open pipe to /bin/sh\n"); next; } } $end_utime = (times)[2]; # recover STDOUT and STDERR &recover_stdout_stderr; # Rename output to unique storage name unless @outfiles is empty if (@outfiles) { foreach (@outfiles) { unless (-e $_) { $errsum++; &log("error","output file not found: $_"); } } foreach $i (0 .. $#outfiles) { $outfile = $outfiles[$i]; $storage_name = $storage_names[$i]; if (-e $storage_name) { &log("warning","have to remove $storage_name"); unlink($storage_name) || die "ABORT: Could'nt unlink $storage_name.\n"; } if (-e $outfile) { # Add error postfix if something went wrong if ($errsum > 0) { $storage_name = $storage_name . $epostfix; &log("warning","Error postfix added to storage name"); if (-e $storage_name) { &log("warning","have to remove $storage_name"); unlink($storage_name) || die "ABORT: Could'nt unlink $storage_name.\n"; } } rename($outfile,$storage_name) || &log("warning","rename was not successful"); } } } # End of job note into logbook &log("job_completed","user time: " . &HMS($end_utime-$start_utime) ); # Mail notification if requested if ($mail && $address) { if ($errsum == 0) { $subject = "CHAIN: success in run $runnr/$nruntot"; } else { $subject = "CHAIN: *error* in run $runnr/$nruntot"; } open(MAIL,"|$mailer -s '$subject' $address"); print MAIL "The logbook entries for the completed run are:\n\n"; &show_last_log(\*MAIL); print MAIL "\n\nAnd here is the general status report:\n\n"; &report_status(\*MAIL); close(MAIL); } } # Make a note that chain is complete (this will disable 'chain -r' restarts) &log("chain_completed"); # Mail notification if ($address) { open(MAIL,"|$mailer -s 'CHAIN: completed' $address"); &report_status(\*MAIL); close(MAIL); } # End of main program # # Subroutine definitions start below #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ sub nfields { # compute the number of whitespace-separated fields my ($string) = @_; ($string =~ s/\s+//g) + 1 ; } sub HMS { # return HH:MM:SS.ss calculated from $_[0] in seconds my ($time) = @_; my ($hour,$min,$sec); $hour = int($time/3600); $time -= $hour*3600; $min = int($time/60); $time -= $min*60; $sec = $time; $time = sprintf("%d:%02d:%05.2f",$hour,$min,$sec); } sub sec { # return seconds calculated from HH:MM:SS.ss my ($h,$m,$s) = split(/:/,$_[0]); return $s+60*$m+3600*$h; } sub skip_comment { # move to the next line that is not a comment while () {last if (/^[^#]/) } } sub trim { # kill leading and trailing white space from a string $_[0] =~ s/^[ \t\n]*//; $_[0] =~ s/[ \t\n]*$//; return $_[0]; } sub isnum { # is this a number? Allows for leading and trailing spaces. $_[0] =~ /^\s*(\+|-)?(\d+\.?\d*|\.\d+)([eE](\+|-)?\d+)?\s*$/; } sub hide_escaped_whitespace { # hide whitespace in arglist. In list context, return changed arglist. # in scalar context, return number of replacements. my $count; foreach (@_) { $count += s/\\(\s)/"\374".ord($1)."\375"/eg } return wantarray ? @_ : $count; } sub recover_escaped_whitespace { # recover whitespace in arglist. In list context, return changed arglist. # in scalar context, return number of replacements. my $count; foreach (@_) { $count += s/\374(\d+)\375/chr($1)/eg } return wantarray ? @_ : $count; } sub hide_escaped_characters { # hide \char in arglist. In list context, return changed arglist. # in scalar context, return number of replacements. my $count; foreach (@_) { $count += s/\\(.)/"\376".ord($1)."\377"/eg } return wantarray ? @_ : $count; } sub recover_escaped_characters { # recover \char in arglist. In list context, return changed arglist. # in scalar context, return number of replacements. my $count; foreach (@_) { $count += s/\376(\d+)\377/chr($1)/eg } return wantarray ? @_ : $count; } sub nowstring { # return a string containing date and time my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); my ($thisday) = ("Sun","Mon","Tue","Wed","Thu","Fri","Sat")[$wday]; my ($thismon) = ("Jan","Feb","Mar","Apr","May","Jun","Jul", "Aug","Sep","Oct","Nov","Dec")[$mon]; $mday = sprintf("%2d",$mday); $hour = sprintf("%2d",$hour); $min = sprintf("%02d",$min); $sec = sprintf("%02d",$sec); return "$mday $thismon $year, $hour:$min:$sec"; } sub log { # Put an entry into the logbook file my ($key,$text) = @_; my $entry; if ($key eq "chain_started") { $entry = "\nCHAIN $version: Started " . &nowstring . " :: $text\n"; } elsif ($key eq "chain_restarted") { $entry = "\nCHAIN $version: Restarted " . &nowstring . " :: $text\n"; } elsif ($key eq "chain_completed") { $entry = "\nCHAIN $version: Completed " . &nowstring . "\n"; } elsif ($key eq "job_started") { $entry = "\n$text\n" . "-" x (length($text)>79 ? length($text) : 79) . "\n"; $entry .= "started " . &nowstring . "\n"; } elsif ($key eq "job_completed") { $entry = "finished " . &nowstring . " :: $text\n"; } elsif ($key eq "warning") { $entry = "WARNING " . &nowstring . " :: $text\n"; } elsif ($key eq "error") { $entry = "ERROR " . &nowstring . " :: $text\n"; } elsif ($key eq "signal") { $entry = "\nCHAIN $version: aborted " . &nowstring . " :: signal SIG$text\n"; } else { $entry = "\nunrecognized logbook key $key\n"; } print $entry if $verbose; open(LOGBOOK,">>$logbook") || die "ABORT: Can't open $logbook.\n"; print LOGBOOK $entry; close(LOGBOOK); # make sure the buffer is flushed } sub chain_group_signal { # send signal to chain's group of processes my ($signal) = @_; my ($pid) = 0+`cat chain.pid`; my $i = 0; my(%signals,$signr); # get the number of the signal (we need it in order to kill -groups) %signals = map {$_ => ++$i} split /\s+/,`kill -l`; $signr = $signals{$signal}; # kill it if ($pid) { if (kill -$signr,$pid) { print "Signal $signal successfully sent to process group $pid.\n"; } else { print "Could not send signal $signal to process group $pid.\n"; } } else { print "No active chain process associated with this directory.\n"; } } sub signal_handler { # put a note into the logbook file and die my ($sig) = @_; &log("signal",$sig); die "Caught a signal $sig. Exiting...\n"; exit; } sub divert_stdout_stderr { # divert STDOUT and STDERR to files, when the options require it if (defined $opt{O}) { open SAVEOUT, ">&STDOUT"; if ($opt{O} == 0) { open STDOUT, ">/dev/null" || die "Can't redirect STDOUT\n"; } elsif ($opt{O} == 1) { open STDOUT, ">chain.out" || die "Can't redirect STDOUT\n"; } elsif ($opt{O} == 2) { open STDOUT, ">>chain.out" || die "Can't redirect STDOUT\n"; } select STDOUT; $| = 1; } if (defined $opt{E}) { open SAVEERR, ">&STDERR"; if ($opt{E} == 0) { open STDERR, ">/dev/null" || die "Can't redirect STDERR\n"; } elsif ($opt{E} == 1) { open STDERR, ">chain.err" || die "Can't redirect STDERR\n"; } elsif ($opt{E} == 2) { open STDERR, ">>chain.err" || die "Can't redirect STDERR\n"; } select STDERR; $| = 1; } select STDOUT; } sub recover_stdout_stderr { # recover the original function of STDOUT and STDERR if (defined $opt{O}) { close STDOUT; open STDOUT, ">&SAVEOUT"; } if (defined $opt{E}) { close STDERR; open STDERR, ">&SAVEERR"; } } sub read_file_names { # look for the file name definitions and read them my ($infiles,$fmtfiles,$outfiles,$parnames); my ($linenr_o,$linenr_p,$nitems); &skip_comment; while (/^[^\#]/) { $infiles = $1 if (/^\s*INPUT\s+FILE\s+NAMES\s*:\s*(.*?)\s*$/i); $fmtfiles = $1 if (/^\s*FORMAT\s+FILE\s+NAMES\s*:\s*(.*?)\s*$/i); $outfiles = $1 if (/^\s*OUTPUT\s+FILE\s+NAMES\s*:\s*(.*?)\s*$/i); $linenr_o = $. if (/^\s*OUTPUT\s+FILE\s+NAMES\s*:\s*(.*?)\s*$/i); $parnames = $1 if (/^\s*PARAMETER\s+NAMES\s*:\s*(.*?)\s*$/i); $linenr_p = $. if (/^\s*PARAMETER\s+NAMES\s*:\s*(.*?)\s*$/i); $_ = || die "ABORT: Unexpected end of file in $parfile.\n"; } @infiles = split /\s+/, &horizontally_expand($infiles); @fmtfiles = split /\s+/, &horizontally_expand($fmtfiles,@infiles); @outfiles = split /\s+/, &horizontally_expand($outfiles); # define the parameter name arrays @parnames = ('#parnames',split /\s+/, $parnames); # make a hash for easy lookup @parindex{@parnames} = (0..$#parnames); delete $parindex{'#parnames'}; # Initialize the nitems array $nitems = scalar(@parnames) - 1 + scalar(@outfiles); $nitems{$nitems} = "($linenr_o/$linenr_p) "; # number of input and format files equal? if (@infiles != @fmtfiles) { print STDERR "ERROR: Number if input files (",$#infiles+1, ") differs from number of format files (",$#fmtfiles+1,").\n"; $abort++; } # do the format files really exist? foreach (@fmtfiles) { if (!-e) { warn "ERROR: Format file $_ does not exist.\n"; $abort++; } } # how many output files are there? $nfiles = $#outfiles+1; # make a hash to lookup indices of outfile names @outfileindex{@outfiles} = (1..@outfiles); foreach (1..$nfiles) {$outfileindex{"$_"} = $_}; } sub horizontally_expand { # expand {...} like csh. Use @default if {} is empty local($_) = shift; my(@default) = @_; my(@fields,$head,$core,$tail,@core); @fields = split(/\s+/,$_); LOOP: for ($i=0; $i<=$#fields; $i++) { if ( ($head,$core,$tail) = ($fields[$i] =~ /(.*?){(.*?)}(.*)/ ) ) { if ($core) { @core = split(/,/,$core); } else { @core = @default; } foreach (@core) {$_ = $head . $_ . $tail;} splice(@fields,$i,1,@core); redo LOOP; } else { next LOOP; } } return join(' ',@fields); } sub read_options { # look for prefix and postfix definitions and options my $minoropt; &skip_comment; while (/^[^\#]/) { $minoropt = $1 if (/^\s*MINOR\s*OPTIONS\s*:\s*(.*?)\s*$/i); $sprefix = $1 if (/^\s*STORAGE\s*NAME\s*PREFIX\s*:\s*(\S*)\s*$/i); $spostfix = $1 if (/^\s*STORAGE\s*NAME\s*POSTFIX\s*:\s*(\S*)\s*$/i); $epostfix = $1 if (/^\s*ERROR\s*CASE\s*POSTFIX\s*:\s*(\S*)\s*$/i); $address = $1 if (/^\s*MAIL\s*ADDRESS\s*:\s*(\S*)\s*$/i); $_ = || die "ABORT: Unexpected end of file in $parfile.\n"; } if ($minoropt) { @ARGV = split(/\s+/,$minoropt); &GetOptions(\%opt, '-export|e', '-debug|d', '-mail|m', \$mail, '-force|f', \$force, '-verbose|v',\$verbose, '-n=i', \$maxrun, '-i:i', \$extra_steps, '-O:i', '-E:i' ) or die "ABORT: Illegal options in DEFAULT MINOR OPTIONS (file $parfile).\n"; } } sub read_script { my $sline; $_ = until /^# ---- Start of Shell script -/; $sline = $.; $exec_script = ''; while () { last if /^# ---- End of Shell script -/; $exec_script .= $_; } if ($exec_script =~ /^ \s* sub \s+ \w+ \s+ \{ /mx) { # it is a perl program $script_is_perl_code = 1; # Evaluate subroutine definitions and check for errors eval "package user;\n$exec_script"; if ($@) { warn "ERROR: Perl program in $parfile contains errors:\n"; if ($repair_error_message) { # try to insert correct file name and line numbers $@ =~ s/(\(eval \d+\))/$parfile/g; $@ =~ s/(line\s+)(\d+)/"$1".($2+$sline-1)/eg; } else { # just tell them where to find those errors warn " (Line numbers are in file $parfile, relative to line ",$sline-1,")\n"; } print STDERR $@,"\n"; exit; } unless (defined &user::RUN) { warn "ERROR: Perl program in $parfile needs to define subroutine RUN\n"; $abort++; } } else { # just an ordinary shell script $script_is_perl_code = 0; die "ABORT: No script defined in $parfile.\n" unless $exec_script; } } sub read_todo_list { # read parameter list and expand macros my($key,$index,$value); my($line,$line0,@tmp,@pars,@files); my($head,$start,$core,$end,$tail,@core); my($p,$c); my($np,$np0); READLOOP: while () { $line = $.; # add any continuation lines $_ .= while (/\\\s*$/); s/\\\s*\n//g; # delete newlines next READLOOP if /^\s*$/; # skip empty lines s/^\s+//; # remove leading spaces s/\s+$//; # remove trailing spaces # Interpolate macros s/ (\b\w+\b) / defined($macro{$1}) ? $macro{$1} : $1 /xge; # hide backslash escape characters &hide_escaped_whitespace($_); &hide_escaped_characters($_); # Macro definition if (/^\#define\s+/) { if ( ($key,$value) = (/^ \#define \s+ (\w+) \s+ (\S.*) $/x)) { $macro{$key} = $value; next READLOOP; } } # Test definition if ( ($index,$value) = (/^\#test\s+(\S+)\s+(\S.*)$/)) { # keep test definition in TODO list for now. push(@todolist,$_); push(@linenr,$line); next READLOOP; } next READLOOP if /^\#/; # skip comment lines chop if /\n/; # kill trailing newline # Horizontal expansion: {...} (restricted to storage names) $_ = &horizontally_expand($_,@outfiles); # Sort parameters $_ = &sort_parameters($_,$line); # put on todo list push(@todolist,$_); push(@linenr,$line); } } sub expand_todo_list { my(@tmp,@width,@list,@list1,@files,@pars); my(%testdefline); my($head,$tail,$paren,$spec,$close,$i,$j,$iter,$npars,$name); my($number,$format); my($index,$index1,$key,$key1,$value); my %have_warned; my ($would_be_skipped); # How many parameters? foreach (@todolist) { next if /^\#/; @tmp = split /\s+/; last; } $npars = $#tmp-$#outfiles; foreach (1..$npars) {$parindex{$_} = $_} # Prepare @width array foreach $i (0 .. $#outfiles) {$width[$i] = length($outfiles[$i]);} foreach $i (($#outfiles+1) .. $#tmp) { if ($_ = $parnames[$i-$#outfiles]) { $width[$i] = length($_); } else { $width[$i] = 1; } } # Expand parameter list $i = 0; CHECKLOOP: while ($i<=$#todolist) { $would_be_skipped = ($i < $nskip); # Test definition? if ( ($index,$value) = ($todolist[$i] =~ /^\#test\s+(\S+)\s+(\S.*)$/)) { # save test expression if (defined $parindex{$index} || defined ($outfileindex{$index})) { $value =~ s/\s*$//; if ($value eq 'reset') {delete $testexp{$index}} $testexp{$index} = eval "sub { $value }"; if ($@) { die "ABORT: Perl syntax error in test definition ", "(chain.dat line $linenr[$i])\n", " #test $index ...\n", " Error message follows:\n",$@; } $testdefline{$index} = $linenr[$i]; } else { warn "ERROR: don't know how to test \"$index\" ($parfile line $linenr[$i])\n"; $abort++; } splice(@todolist,$i,1); splice(@linenr,$i,1); last CHECKLOOP if ($i > $#todolist); redo CHECKLOOP; } # Macro interpolation and vertical expansion $iter = $extra_steps + 1; while ($iter > 0) { # Expand lists and sequences while ( (($head,$paren,$number,$spec,$format,$close,$tail) = ($todolist[$i] =~ /^(.*?) (\[|\() (\#\d+[:,])? (.*?) ([:,]%.*?)? (\]|\)) (.*)$/x ) ) ) { chop($number) if $number; substr($format,0,1) = "" if $format; $format = $format || "%s"; @list = map {sprintf($format,$_)} &expand_me($paren,$spec,$close); # attach head and tail to each value foreach (@list) { $_ = $head . $_ . $tail; } if ($number) { # look for more lists/sequences with the same number # and expand them parallel while ( (($paren,$spec,$format,$close) = ($list[0] =~ /(\[|\() \Q$number\E [:,] (.*?) ([,:]%.*?)? (\]|\))/x ) ) ) { substr($format,0,1) = "" if $format; $format = $format || "%s"; @list1 = map {sprintf($format,$_)} &expand_me($paren,$spec,$close); # Interpolate if (@list != @list1) { print STDERR "ERROR: In $parfile line $linenr[$i]:\n", " Parallel sequences/lists $number have different lengths.\n" unless $have_warned{$linenr[$i],$number}++; $abort++; } @list = map {s/(\[|\() \Q$number\E (.*?) (%.*?)? (\]|\))/shift(@list1)/xe;$_} @list; } } # Replace old line with new lines. Keep track of line numbers. splice(@todolist,$i,1,@list); splice(@linenr,$i,1,($linenr[$i]) x @list) } $iter--; if ($iter > 0) { # Expand macros $todolist[$i] =~ s/ (\b\w+\b) / defined($macro{$1}) ? $macro{$1} : $1 /xge; } $iter--; } # Separate storage names and parameters @pars = split(/\s+/,$todolist[$i]); $nitems{$#pars+1} .= "$linenr[$i] " unless $nitems{$#pars+1} =~ /\b$linenr[$i] $/; @files = splice(@pars,0,$nfiles,"$i"); # get the escaped characters back in parameters (names have to wait) &recover_escaped_characters(@pars); if ($#pars > $npars) { # Ooops. No. of parameters increased. Fix the parindex hash. foreach ($npars+1..$#pars) {$parindex{$_} = $_} $npars = $#pars; } # Put parameters into the storage name and count occurrences of names foreach (@files) { $_ = &replace_diamonds($_,\@pars); # remove spaces to get useful names tr/ //d; # NOW recover escaped characters in storage names &recover_escaped_characters($_); # Add prefix and postfix if defined $_ = $sprefix . $_ . $spostfix; # count the number of uses of every name $seen{$_} .= $linenr[$i].($would_be_skipped?"*":"")." "; # check if the file name does already exist if (-e $_) { $exists{$_} = &add_linenr($exists{$_},$linenr[$i],$would_be_skipped); } } # Check parameters and names with #test foreach $name (sort keys %testexp) { $key1 = "#test $name $testdefline{$name}"; if (defined $parindex{$name}) { $j = $parindex{$name}; $_ = $pars[$j]; unless (&{$testexp{$name}}) { $failed{$key1}{$pars[$j]} = &add_linenr($failed{$key1}{$pars[$j]},$linenr[$i], $would_be_skipped); } $pars[$j] = $_; } elsif (defined $outfileindex{$name}) { $j = $outfileindex{$name}-1; $_ = $files[$j]; unless (&{$testexp{$name}}) { $failed{$key1}{$files[$j]} = &add_linenr($failed{$key1}{$files[$j]},$linenr[$i], $would_be_skipped); } $files[$j] = $_; } } # put everything together again shift(@pars); # kill the dummy @list = (@files,@pars); $todolist[$i] = join(" ",@list); # update width array &recover_escaped_whitespace(@list); foreach (@list[0..$#files]) {$whitespace_in_sn++ if /\s/} foreach $j (0..$#list) { if (length($list[$j]) > $width[$j]) {$width[$j] = length($list[$j])} } # Move on to next line $i++; } # create parameter format $todofmt = "%-" . join("s %-",@width) . "s"; # create headline for parameter format my($len,$len1,$len2,$todofmttop1,$todofmttop2); $len = 0; foreach $i (0 .. ($nfiles-1)) {$len += $width[$i]+2}; $len -= 2; if (@files) { $todofmttop1 = sprintf("%-${len}s %-s","FILES","PARAMETERS"); } else { $todofmttop1 = sprintf("%-${len}s","PARAMETERS"); } @tmp = @outfiles; foreach $i (1 .. $npars) { ($tmp[$nfiles-1+$i]=($parnames[$i]||"$i")); } $todofmttop2 = sprintf("$todofmt",@tmp); $len1 = length($todofmttop1); $len2 = length($todofmttop2); $todofmtlength = $len1>$len2 ? $len1 : $len2; $todofmttop = "$todofmttop1\n" . "-" x ($len1>$len2 ? $len1 : $len2) . "\n" . "$todofmttop2\n" . "=" x ($len1>$len2 ? $len1 : $len2) . "\n"; } sub sort_parameters { # sort the line when there are parameters specified with NAME=... my($line,$linenr) = @_; my(@old,@new,$i,$name,$rest); my($foundeq) = 0; unless ($line =~ /=/) { return $line; } @old = split ' ',$line; @new = @old[0..$nfiles-1]; for $i ($nfiles..$#old) { ($name,$rest) = ($old[$i] =~ /(\w+)=(.*)/); if ($name && $parindex{$name} >= $nfiles) { $new[$parindex{$name}] = $rest; $foundeq++; # disallow positional binding from now on } elsif ($foundeq == 0) { $new[$i] = $old[$i]; } } for $i (0..$#old) { unless (defined $new[$i]) { warn "ERROR: No value for parameter ",$i-$nfiles+1, defined $parnames[$i-$nfiles+1]?" (".$parnames[$i-$nfiles+1].")":"", " in $parfile line ",$linenr,".\n"; $abort++; $new[$i] = '????'; } } return join ' ',@new; } sub add_linenr { my($string,$nr,$star) = @_; $nr .= "*" if $star; $string .= "$nr " unless $string =~ /\b\Q$nr\E $/; return $string; } format STDERR = - ^<<<<<<<<<<<<<<<<<<<<<<<< in chain.dat line@ ^<<<<<<<<<<<<<<<<<<<<<< length($E_item)<26?$E_item:substr($E_item,0,17)." ... ".substr($E_item,-3), ($E_linelist=~/ ./?"s":" "), $E_linelist ~~ ^<<<<<<<<<<<<<<<<<<<<<< $E_linelist . sub complain_about_nitem_inconsistencies { # complain about TODO lines with different numbers of items in them my $abort = 0; if ( (keys %nitems) > 1 ) { warn "ERROR: Number of items is different in different lines:\n"; foreach (sort keys %nitems) { $E_item = sprintf("%3d items",$_); $E_linelist = $nitems{$_}; $lines_with_star++ if $E_linelist =~ /\*/; write STDERR; $abort++; } print STDERR "\n"; } return $abort; } sub complain_about_parameter_tests { # complain about parameter tests failures my $abort = 0; my($key,$item); if (%failed) { warn "ERROR: Storage names or parameters failed tests:\n" ; foreach $key (sort keys %failed) { printf STDERR " %-5s %-12s (defined in $parfile line %d)\n", split(/\s+/,$key); foreach $item (sort keys %{ $failed{$key} } ) { $E_item = $item; $E_linelist = $failed{$key}{$E_item}; $lines_with_star++ if $E_linelist =~ /\*/; write STDERR; } $abort++; } print STDERR "\n"; } return $abort; } sub complain_about_existing_files { # complain about existing files my($key); if (%exists) { warn "WARNING: The following storage names correspond to existing files:\n"; foreach $key (sort keys %exists) { $E_item = $key; $E_linelist = $exists{$key}; 1 while $E_linelist =~ s/(\S+) \1/$1/g; $lines_with_star++ if $E_linelist =~ /\*/; write STDERR; } print STDERR "\n"; } } sub complain_about_multiple_names { # Complain about multiple names my ($abort,$seentop) = (0,0); my($key); foreach $key (sort keys %seen) { if ($seen{$key} =~ / ./) { do { warn "ERROR: Storage names being used more than once:\n"; $seentop++; } unless $seentop; $E_item = $key; $E_linelist = $seen{$key}; 1 while $E_linelist =~ s/(\S+) \1/$1/g; $lines_with_star++ if $E_linelist =~ /\*/; write STDERR; $abort++; } } print STDERR "\n" if $seentop; return $abort; } sub expand_me { my ($paren,$spec,$close) = @_; my ($start,$limit,$step); my (@slist,@list,$firstcmp); my($v); # check if paren and close match if ( ( $paren eq '(' && $close ne ')' ) || ( $paren eq '[' && $close ne ']' ) ) { print "ERROR: In $parfile line $linenr[$i]\n", " parenthesis on sequence \"$spec\" don't match\n"; $abort++; return ($spec); } # it's just a list if ($paren eq "(") { @list = split /\s*,\s*/,$spec,-1; # lets see if there are any ranges in the list my($i) = 0; while ($i <= $#list) { if ($list[$i] =~ /^(\d+)\.\.(\d+)$/ && $1 <= $2) { splice @list,$i,1,$1..$2; } $i++; } return @list; } # get fields for sequences ($start,$limit,$step) = split(/:/,$spec); # break up the start list @slist = split(/,/,$start); if (! $limit) { $limit = $slist[0]; warn "ERROR: No limit specified in sequence \"$spec\"\n"; $abort++; } # transform step into a statement $step = "+1" unless ($step); # default increment $step = "+$step" unless ($step =~ /^[\-\+\*\/]/); # default operator $step = "\$_ = \$_ $step"; $firstcmp = ($slist[0] <=> $limit); # catch sequences that are only 1 element long if ($firstcmp == 0) { push(@list,$slist[0]); return @list; } LOOP: for (;;) { foreach $v (@slist) { if (($v <=> $limit) * $firstcmp >= 0) { push(@list,$v); die "PANIC: Sequence \"$spec\" creates >$maxseq values.\n" if (@list > $maxseq); } else { last LOOP; } } # In the following line, the sprint rounds the number to 10 digids. # This is necessary since perl adds random stuff beyond machine acc. foreach (@slist) {eval($step);$_ = sprintf("%.10g",$_)} } return @list; } sub replace_diamonds { # replace all diamonds in first argument with elements in # second arg. %fmt is an optional format my ($template,$list) = @_; my ($string,$rstring,$name,$num,$format,$count); while ( ($name,$format) = ($template =~ /<(\w+)(%[^>]+)?>/g) ) { die "PANIC: More than ",$max_diamond_replace, " named diamonds replaced in one command.\n", " Probably an infinite loop (diamond replaces diamond).\n" if (++$count > $max_diamond_replace); $num = $parindex{$name} || $name; if ($num>0) { $string = "<$name" . "$format>"; $string =~ s/(\W)/\\$1/g; # quote special characters if (! defined($list->[$num])) { $rstring = "\\<$name$format\\>"; &hide_escaped_characters($rstring); } elsif ($format) { if ($format =~ /%-?\d+\.?\d*[def]$/ && !&isnum($list->[$num])) { warn "WARNING: Using numerical format <$num$format> for string \"$list->[$num]\"\n"; } $rstring = sprintf($format,$list->[$num]); } else { $rstring = $list->[$num]; } } else { warn "ERROR: parameter name <$name> not defined.\n"; $abort++; $rstring = "??"; } # print "was $template\n"; $template =~ s/$string/$rstring/; # print "now $template\n"; } return $template; } sub show_last_log { # get the logbook entries about the current run my($HANDLE) = @_; my(@lastlog); if (!-e $logbook) { print $HANDLE "No logbook file ($logbook).\n" if $HANDLE; return (0,0,0); } open(LOGBOOK,"$logbook") || die "ABORT: Can't open $logbook.\n"; while () { push(@lastlog,$_); if (/^-+$/) { @lastlog = splice(@lastlog,$#lastlog-1); } } close(LOGBOOK); print $HANDLE @lastlog; } sub report_status { # look at logbook file and create a report # return ($completed,$active_pid,$nfinished) # print report to filehandle $_[1] my($HANDLE) = @_; my($nstarted,$nerrors,$nfinished) = (0,0,0); my($started,$restarted,$completed) = ("not yet","","not yet"); my($summary,$pwd,$lastlog); my($time,$time_total,$time_max)=(0,0,0); my($pid,$nruns,$counterror); my($lastlog,$started,$restarted); if (!-e $logbook) { print $HANDLE "No logbook file ($logbook).\n" if $HANDLE; return (0,0,0); } open(LOGBOOK,"$logbook") || die "ABORT: Can't open $logbook\n"; while () { ($lastlog) = /([\s\d]\d\s+[A-Z][a-z]{2}\s+\d+,\s+\d+:\d+:\d+)/; if (/^CHAIN.*(Res|S)tarted\s*([\s\d]\S.*\S)\s+::\s+(\d+)\s+runs,\s+PID=(.*\S)\s*$/) { $started = $2 if ($1 eq "S"); $restarted = $2 if ($1 eq "Res"); $nruns = $3; $pid = $4; } elsif (/^CHAIN.*Completed\s*([\s\d]\d.*)$/) { $completed = $1; } elsif (/^finished.*user time:\s+(\S.*\S)\s*$/) { $nfinished++; $time = &sec($1); $time_total += $time; $time_max = $time if ($time > $time_max); } elsif (/^-+$/) {$counterror=1} elsif (/^ERROR/) { if ($counterror) {$nerrors++;$counterror=0} } } # Find out if $pid is an active chain process $pid = 0 unless kill $mysignal,$pid; $pid = 0 unless ($completed eq "not yet"); # will be dead in a second ($pwd = $ENV{"PWD"}) =~ s/^.*(.{48})$/...$1/; $summary = "chain has been completed." unless ($completed eq "not yet"); $summary = "chain is still running (PID $pid)." if $pid; $summary = "chain uncomplete, but no active job." if ($completed eq "not yet" && !$pid); $summary = "chain has not been started." if ($started eq "not yet"); # Print report if ($HANDLE) { my($oldhandle,$format_top,$format) = (select,$^,$~); select($HANDLE); $~ = "main::REPORT"; write $HANDLE; $^ = $format_top;$~ = $format; select($oldhandle); }; $completed = 0 if ($completed eq "not yet"); return ($completed,$pid,$nfinished); format main::REPORT = |-----------------------------------------------------| | CHAIN STATUS REPORT: @<<<<<<<<<<<<<<<<<<<<< | &nowstring | - - - - - - - - - - - - - - - - - - - - - - - - - - | | @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< | $pwd |-----------------------------------------------------| | chain: started @<<<<<<<<<<<<<<<<<<<<< | $started | last restarted @<<<<<<<<<<<<<<<<<<<<< | $restarted | last log entry @<<<<<<<<<<<<<<<<<<<<< | $lastlog | completed @<<<<<<<<<<<<<<<<<<<<< | $completed | still active @<<<<<<<<<<<<<<<<<<<<< | ($pid ? "yes (PID $pid)" : "no") | - - - - - - - - - - - - - - - - - - - - - - - - - - | | runs: total @<<<<<< | $nruns | finished @<<<<<< @<<<<<<<<<<<<< | $nfinished, ($nerrors ? ($nerrors>1 ? "($nerrors errors)" : "(1 error)") : "") | remaining @<<<<<< | ($nruns-$nfinished) | - - - - - - - - - - - - - - - - - - - - - - - - - - | | utime: accumulated @<<<<<<<<<<<<<<<<<<<<< | &HMS($time_total) | max per run @<<<<<<<<<<<<<<<<<<<<< | &HMS($time_max) | average @<<<<<<<<<<<<<<<<<<<<< | &HMS($time_total/($nfinished || 1)) | - - - - - - - - - - - - - - - - - - - - - - - - - - | | SUMMARY: @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< | $summary |-----------------------------------------------------| . } #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ sub create_data_file { # create the data file if (-e $parfile) { print "Overwrite existing file $parfile (y/n)? [n]"; die "New datafile $parfile was NOT created.\n" unless ( =~ /^y/i); } open(PR,">$parfile"); print PR <<"EOF"; # # This is a parameter file for CHAIN (created by version $version). # # The contents of this file describe # - the commands to run one job # - the names of input and output files and of parameters # - the storage names that are used to save output from different runs # - the parameter combinations to be explored # # The file contains example text (to help your memory). # This text has to be replaced. # # USE "perldoc chain" TO GET A FULL DESCRIPTION # #=============================================================================== # # 1. Define some options, prefixes, and postfixes. # a) The minor options for CHAIN you want to be in effect always # b) A mail address for email notifications # c) An optional prefix to be added to all storage names # d) An optional postfix to be added to all storage names # e) A storage name postfix indicating an error # MINOR OPTIONS : MAIL ADDRESS : $address STORAGE NAME PREFIX : STORAGE NAME POSTFIX: ERROR CASE POSTFIX: $epostfix #=============================================================================== # # 2. Define the shell script that runs your code. Parameters will be # interpolated into that script # # ---- Start of Shell script -------------------------------------------------- prg < in.dat cat err.dat >> out.dat # ---- End of Shell script ---------------------------------------------------- # #=============================================================================== # # 3. Define some names: # a) The names of the input files your code is expecting. # b) The names of the format files corresponding to the input files. # c) The names of the output files/directories the code produces. # d) The names of the parameters (should be alpha_numeric) # INPUT FILE NAMES: in.dat in2.dat FORMAT FILE NAMES: {}.fmt OUTPUT FILE NAMES: out.dat PARAMETER NAMES : NAME1 NAME2 NAME3 #=============================================================================== # # 4. Define the TODO list for the job-chain. Each line contains storage names # (as many as OUTPUT FILE NAMES defined above) and a list of parameters. # '\\' at line end makes the following line a continuation line. # Embedded macros and much more is possible. See documentation. # #--STORAGE-NAMES----------INPUT-PARAMETERS--------------------------------------- testrun.out a b 2 4 series1_<3>_<4> a b (2,3,4) 4 series2_<3>_<4> a b 3 [2:6:+2] EOF close(PR); print "New datafile $parfile was created\n"; print "Edit this file before using it.\n"; exit; } sub print_usage { &print_data_section(\*STDERR,'USAGE'); } sub print_help { &print_data_section('USAGE'); print "\n"; &print_data_section('HELP'); } sub print_manual { &print_data_section('MANUAL'); } sub print_data_section { my($handle,$key) = @_; unless ($key) {$key=$handle; $handle = \*STDOUT} seek(DATA,0,0); $_ = until /^BOS: $key\s*$/; no strict 'refs'; while () { last if /^EOS:/; print $handle $_; } } __END__ BOS: USAGE Usage: chain [DIR] -ctsrll [-n NN] [-e] [-d] [-O N] [-E N] [-m] [-f] [-v] [-i NN] chain [DIR] -SIGNAL chain -h perldoc chain EOS: USAGE BOS: HELP MAJOR OPTIONS (give exactly one of these) -c Create a template version of the data file "chain.dat". -t Test specifications in chain.dat. -s Start the chain. -r Restart a chain that has been aborted before completion. -l Create a report based on the logbook file. -ll Show logbook entries for current run. -help Print help message to STDOUT. -SIGNAL Send a signal to the chain (-term,-kill,-stop,-cont). MINOR OPTIONS (several may be specified, defaults can be defined in chain.dat) DIR Run CHAIN in directory DIR instead of the current directory. -n NN Restrict chain to the next NN runs. For test purposes. -e Export parameters as scalar variables into shell script -d Debug shell script or perl program. -O N Divert STDOUT (N: 0=throw away 1/2 = save 1/all runs to chain.out) -E N Divert STDERR (N: 0=throw away 1/2 = save 1/all runs to chain.err) -m Send mail after each run. -f Force: ignore errors in chain.dat. -v Verbose: copy all logbook entries to STDOUT. -i NN Do NN extra steps of macro substitution/list expansion (experts only). Full manual: perldoc chain EOS: HELP BOS: MANUAL =head1 NAME chain - run a chain of jobs =head1 SYNOPSIS C C C C =head1 DESCRIPTION =head2 PURPOSE CHAIN is a tool to make many runs of a program with different sets of parameters. CHAIN provides a simple way to =over 4 =item o specify the different parameter combinations =item o automatically create the input files for each run =item o name output files uniquely =back =head2 CHECKLIST If you want to use CHAIN with a particular code, here is what you have to do: =over 4 =item 1. Make your code read its parameters from files. The input files may have any format you want. =item 2. Create I for the input files which you want to control with CHAIN. To do so, copy these files to unique name (e.g. simply add a ".fmt" to the input file name). Edit these I. Replace all parameters that you want controlled by CHAIN with named diamonds (, , , ...). See below in the documentation for an example. =item 3. Use C to create a template of the data file F. Edit F. Fill in file names, options, commands, storage names and parameter combinations. If you are not sure how to do this, see below in the documentation. =item 4. Use C to check if CHAIN expands your list of parameter combinations and storage names correctly. Also, check the sample input files. =item 5. Use C to run the first two jobs of the chain. Usually, problems (if any) show up during the first two runs. =item 6. Use C to start the chain. =item 7. Check the progress of the chain with C. If you need to kill it, use C or, if necessary, C. This send the corresponding signal to the whole group of processes related to CHAIN, including the current run of your code. =item 8. If someone manages to kill your chain or to crash your computer, C will restart the chain. =back =head2 Format Files CHAIN uses templates of the jobs input files to create new input files for each individual run. To create these templates, make a copy of any input files. The suggested name for these templates is the input file name with F<.fmt> appended (FORMAT file). In the format files, replace all parameters that should be controlled by CHAIN with named diamonds (e.g , ...). CHAIN will produce different input files by replacing these diamonds with current values before each run. If the values need to be especially formatted, the name in the diamond may be followed by a printf format specifier, e.g. . > EXAMPLE: > A format file might look like this: > > # Parameter file for job xyz > # this is the file name of some table > density > particle numbers > start time =head2 Logbook File F Each run is logged in a LOGBOOK FILE F. This file may be checked at any time to watch progress. It is also used for restarts (C). C produces a pretty report based on this file. =head2 Data File F CHAIN uses a DATA FILE F to determine =over 4 =item - the names of input and format files and of parameters =item - the commands to execute for every run =item - the combinations of parameters to explore and =item - how to name the output uniquely. =back A template data file F can be created with C. This file has to be edited: =over 4 =item 1. File and parameter names S Enter the names of the input files that your code uses. S Specify the names of the corresponding format files, one for each input file. The default can be written as C<{}.fmt> (input file names with C<.fmt> appended). S Enter the names of all output files that have to be saved. If the output has been collected in a directory, the name of the directory can be given here. CHAIN will rename all the files and directories mentioned here to unique names. S Enter the names of parameters as you use them in the format files. The parameters can be referenced under these names in format files, command lines, the TODO list and in Perl code. > EXAMPLE: > This example defines two input and three output file names > > INPUT FILE NAMES: in.dat in1.dat > FORMAT FILE NAMES: in.fmt in1.fmt > OUTPUT FILE NAMES: out1.dat out2.dat err.dat > PARAMETER NAMES: RHO NP1 NP2 TIME0 OPAC =item 1. Options S You may specify a list of minor options that you want to be always in effect. This may be useful if a special chain always needs some switches (in particular B<-i> and B<-e>). Note, that no major option will be accepted here. S CHAIN will inform you via Email when the chain is completed. You may change the default mailing address here. See also B<-m> option. S A prefix for all storage names - usually only used for specification of a directory. S A postfix for all storage names. Usually empty, but handy if you decide to compress (.z .Z .gz) or tar (.tar) the output. S A special postfix that will be added to the storage names only if your code or any of the system commands in the script returned a non-zero exit status. =item 2. Shell script to run your code Enter here the commands that run your code. These lines are a shell script which will be executed by /bin/sh for each individual run. If you have specified the B<-e> option, the parameter for the run will be available to the script not only in the input files, but also as shell variables. Thus things like C<$RHO> and C<${NP1}> will be interpreted correctly (and you should not use parameter names that conflict with important shell variables). Prior to execution, named diamonds with (optional) print formats (e.g. ) will also be interpolated into the script. Apart from running your program, the script may do other things, e.g. compressing the output, collecting several files into a tar-file or a directory, making a quick plot and sending it to the printer, etc. > EXAMPLE: > Here is a script that collects output into a directory "resdir" > > prg $TIME0 out.dat > rm -rf resdir > mkdir resdir > mv out.dat out1.dat out2.dat resdir CHAIN will notice (and say so in the logbook file) when errors occur during the execution of the script. Instead of using a shell script, you may actually provide a perl program here. See under "ADVANCED FEATURES" futher down in this man page. =item 4. The TODO list Each line in the TODO list specifies the parameters for one or several runs of your code. The first fields of each line are the storage names that will be used to save the output files. CHAIN will rename the files mentioned under OUTPUT FILE NAMES to these names. The number of storage names must be consistent with the number of output file names. > EXAMPLE: > A TODO list entry appropriate for the output file names given > in the example above (out1.dat out2.dat err.dat) might be: > > run1_out1 run1_out2 run1_err ... The actual values of parameters can be inserted into storage names. Any occurrences of named diamonds (e.g. ) will be replaced by the parameter with that name. As with the diamonds in the format file, you may include format specifiers. In order to avoid white space in file names, CHAIN will discard any whitespace characters introduced by the formatting process. To get fixed-length fields, use zero-padded formats. > EXAMPLE: > storage name template: xx_r_n.dat > parameters of the run: 3 42 > storage name used: xx_r3_n0042.dat The rest of each line specifies the parameters to be used in the run in the same sequence as given in the PARAMETER NAMES statement. CHAIN uses these parameters and the format files to put together the input files for a run of your code. Several lines may be used for parameters. When a line ends with a backslash, the next line is treated as continuation line. > EXAMPLE of a TODO list: > fn__ par1 par2 par3 par4 ... > fn1__ par1 par2 \ > par3 par4 ... > fn2__ par1 par2 par3 par4 ... It is not necessary to create an extra line for each combination of parameters. Instead, several values for any parameter may be specified and CHAIN will go through all possible combinations. Parameters can be specified in three ways: a) Plain value or string: e.g. "42" or "test.dat" b) List of values: (val1,val2,val3,val4,...) c) Sequence of values: [start:limit:step] Plain values will create only one run. Lists will create a run for each element of the list. Sequences will create a run for each value from I to I in steps of I. I consists of an operator (+-*/) and a number. The default for I is I<+1>. List and sequence specifiers must not contain spaces. When a list or a sequence specifier is used, the storage name MUST have a diamond for this particular parameter (or the storage name will not be unique). Expansion takes also place when the list/sequence is only a part of the parameter. > EXAMPLE: > The following TODO list: > > simple a b c d > > test1____ a (b,c) 5 [2:4:*2] > > test a b 1 file(2,3).dat > > will create the following storage names and parameter combinations: > > simple a b c d > > test1_a_b_1_2 a b 1 2 > test1_a_b_1_4 a b 1 4 > test1_a_c_5_2 a c 5 2 > test1_a_c_5_4 a c 5 4 > > test a b 1 file2.dat > test a b 1 file3.dat =head1 OPTIONS =head2 MAJOR OPTIONS CHAIN will do nothing without a command line option. Exactly one of the following options must be present. =over 4 =item B<-c> Create a template version of the data file "chain.dat". =item B<-t> Test. CHAIN will check out the specification in "chain.dat", expand sequences and lists, report errors. =item B<-s> Start the chain. An existing logbook file "chain.log" is removed. =item B<-r> Restart a chain that has been aborted before completion. With this option, CHAIN will examine the file "chain.log" and skip as many parameter combinations as denoted "finished" in this file. =item B<-l> Create a report based on the logbook file. =item B<-ll> Show logbook entries for current run. =item B<-help> Print a help message to STDOUT. =item B<-SIGNAL> Send a signal to the chain and its children. SIGNAL can be one of (term,kill,stop,cont). =back =head2 MINOR OPTIONS These options modify the way CHAIN works. Several may be specified. Some minor options may be defined default in the DEFAULT MINOR OPTIONS entry in the parameter file chain.dat) =over 4 =item DIR Run CHAIN in directory DIR instead of the current directory. (e.g. C). =item B<-n> NN Restrict chain to the next NN runs. For test purposes. =item -e Export the parameters as scalar variables into the shell script (or perl program). When this is active, a parameter RHO will be available as $RHO etc. Watch out for conflicts between parameter names and environment variables. =item -d Debug. For a shell script, this will run the shell with B<-vx> in order to give you better understanding of what is going on. Currently, this option has no effect on perl code supplied instead of the shell script. =item B<-O> N Divert the output from the runs of your code. By default, all the output will just go to STDOUT (normally your screen). B<-O 0> will discard all output. B<-O 1> will save the output of the current run into the file F, but overwrite it for every new run. B<-O 2> will put all output from all runs into F. Note that this operation will only divert the output from your script and code, not the output from CHAIN itself. =item B<-E> N Like B<-O>, but for STDERR and the file F. =item B<-m> Send mail as notification after *each* run. The default is to mail only after the final run. =item B<-f> Force: ignore errors in chain.dat. =item B<-v> Verbose: copy all logbook entries to STDOUT. =item B<-i> NN Make NN extra steps of macro interpolation and list/sequence expansion. By default, one macro interpolation followed by one expansion step is done. B<-i> 1 provides for an additional macro interpolation round, B<-i> 2 adds another expansion step etc. Don't use this unless you know what you are doing.' =back =head1 ADVANCED FEATURES This section covers some extra features for advanced use of CHAIN. They are not required for basic uses. For some of them, knowledge of the Perl language is required. =head2 Special Characters In the interpretation of TODO lists, the following characters have special meanings: {}[](),:\= and SPACE and TAB. If any of these characters has to be part of a storage name or parameter, it can be escaped with a backslash. =head2 Other Properties of Sequences and Lists =over 4 =item Ranges in Lists Perl-like ranges can be part of lists: > (1..5,7,10,12..14) =item A List as Sequence Starter The I specifier in sequences may be a list of values. This can be used to merge several sequences into each other: > The sequence [10,20,50:10000:*10] > > is equivalent to the following list: > > (10,20,50,100,200,500,1000,2000,5000,10000) =item Parallel Sequences/Lists By default, CHAIN consideres all sequence and list specifiers to be independent. Thus, CHAIN explores all possible combinations when expanding several lists/sequences in the same line (othogonal expansion). It is also possible to expand two or more lists parallel. In this case, CHAIN does not explore all combinations, but puts together only all first elements, then all second elements etc. In order use this feature, the lists to be expanded parallel must be identified and have the same length. You do this by inserting a numbered hash mark (e.g. #2) as the first element after the opening parenthesis. Sequences/lists carrying the same number will be expanded parallel. By chosing several numbers, you define several groups for parallel expansion. Here is an example. > EXAMPLES: The following two examples show the > difference between the two methods. > The sequences/lists in the first line > are expanded to what follows below. > > (a,b) [1:2:+1] | (#1,a,b) [#1:1:2:+1] > ---------------|---------------------- > a 1 | a 1 > a 2 | b 2 > b 1 | > b 2 | =back =head2 File Name Expansion Whenever a list of file names is given in the parameter file (i.e. input files, format files, output files and storage names), CHAIN looks for the C-Shell {..,..,..} construct and expands it. > The storage name run__{out1,out2,err}.dat > > is an abbreviation for the following names: > > run__out1.dat run__out2.dat run__err.dat An empty pair of braces "{}" has a special meaning. When used in storage names in the TODO list, "{}" expands to the list of output file names. Thus > OUTPUT FILE NAMES: out1.dat out2.dat err.dat > ... > #--STORAGE-NAME----------INPUT-PARAMETERS----------- > run__{} produces the same list of storage names as above. Similarily, in the FORMAT FILE NAMES list, "{}" expands to the list of input files. Thus > INPUT FILE NAMES: in1.dat in2.dat > FORMAT FILE NAMES: {}.fmt > > is equivalent to > > FORMAT FILE NAMES: in1.dat.fmt in2.dat.fmt =head2 Macros C-Preprocessor-like #define macros may be used in the TODO list. Macros are restricted to string replacement (no control structures, no arguments). > EXAMPLE: > #define LIST1 (10,20,30,40,50,60,70,80,90,100) > #define LIST2 (1,2,3,10,20,30,\ > 100,200,300) > > #define SEQ1 [100:1000:+100] > #define PAIR (1,2,3) (6,7,8) > > test1__ xx.dat LIST1 LIST2 > test2__ xx.dat LIST1 SEQ1 > test3__ xx.dat PAIR =head2 Parameter and Storage Name Tests The TODO list may contain embedded tests for specific parameters or storage names. This can help to avoid typing errors or to check the existence of files before starting the chain. It can also be misused to change the values themselves. The test definition lines start with "#test NAME". NAME is either an output file name or a parameter name, as defined in chain.dat. The rest of the line may be any Perl expression testing $_. CHAIN will assign the value of the corresponding parameter or storage name to $_ and evaluate your expression. Assigning to $_ will change the parameter. The test will be applied to all lines following its definition. To cancel a test, say "#test NAME reset". > EXAMPLE: > This example defines tests for parameters RHO and OPAC. > It checks if RHO is a positive number and if OPAC is > an existing file in the subdirectory "src/". > > #test RHO $_>0; > #test OPAC -e "src/$_"; =head2 Perlcode instead of a Shell Script Instead of a shell script to run your code, you may also use a full fledged Perl program. CHAIN will automatically detect which is being used. The Perl program must define a subroutine RUN, which will be called for each set of parameters. The Perl program will automatically be defined in a special package called I. The following variables will always contain current values: @files the storage names for the current run. Note that the first filename is $files[0] @pars the parameters for the current run. Note that the first parameter is $pars[0]. $irun index of the TODO list entry (0,1,...) Attention: This starts with 0 only when "chain -s" is used. "chain -r" may start with higher values. $NAME1... When running CHAIN with the `-e' option (export), the values of all parameters are directly available as scalar variables of the names given in the PARAMETER NAMES statement. > EXAMPLE: > # Initialization, will be done only once > $my_count = 0; > > # This sub MUST always be defined > sub RUN { > &print_parameters; > } > sub print_parameters { # subroutine definition > $my_count++; > print $my_count,$irun,@files,@pars; > } =head1 AUTHOR (c) 1996 Carsten Dominik dominik@strw.leidenuniv.nl CHAIN is free software. It may be distributed under the same terms as Perl. =head1 FILES ./chain.dat ./chain.log ./chain.pid ./chain.out ./chain.err =head1 BUGS When doing several macro interpolation steps (see option B<-i>), only the first remembers the original order of macro definitions and other lines. Thus, for the second iteration, all macros will be globally active, not just below their definition line. In particular, macros that are defined several times will have the value of their last definition. Debug is not implemented for Perl scripts. =cut EOS: MANUAL