#!/hr5/dominik/local/bin/perl 
#!/usr/local/bin/perl
#------------------------------------------------------------------------------
# This script can be used to run a chain of jobs. It executes the same
# code for a number of different parameters and saves the output. The
# parameters are supplied in a simple list, and chain has sophisticated
# methods to make these parameters available to the code.
#
# "perldoc chain" will print the full documentation.
#
# (c) 1995 Carsten Dominik     dominik@strw.leidenuniv.nl
#
# This is free software. It may be distributed under the terms of the
# GNU Public License.
#------------------------------------------------------------------------------
# RCS: $Id: chain,v 1.18 1998/03/04 10:30:51 dominik Exp $
#------------------------------------------------------------------------------

require 5.002;                  # Required perl version

use strict;

use Getopt::Long;               # This is used for option parsing
$Getopt::Long::ignorecase = 0;  # We want to distinguish case

# Version
my($version)    = "3.2";	# Current version of program

# Defaults
my($mailer)    = "mail";	# The program to sent a mail
my($parfile)   = "chain.dat";	# Default for parfile name
my($logbook)   = "chain.log";   # Default logbook file
my($sprefix)   = "";		# Prefix for storage names
my($spostfix)  = "";		# Postfix for storage names
my($epostfix)  = "_ERROR";	# Postfix for storage names in case of an error
my($mysignal)  = "USR1";	# Signal to be used for $PID checking
my($address)   = ($ENV{"USER"} || $ENV{"LOGNAME"} || (getpwuid($<))[0]);
                                # email address default
# Variable presets
my($maxrun)    = 1000000;       # Maximum number of runs total
my($maxseq)    = 1000;          # Maximum number of runs from a single sequence
my($max_diamond_replace) = 1000;# Maximum number of diamont replacements

# Option variables
my(%opt,$testing,$starting,$restarting,$force,$mail,$verbose,$extra_steps);

# Other global variables
my(@todolist,@linenr);             # Array holding the TODO list and line nbrs.
my($todofmt,$todofmttop);          # Formats to print TODO list entries
my($todofmtlength);                # The length of the formatted lines 
my($i,$index);                     # loop variables
my($abort);                        # error counter
my(%failed,%exists,%seen,%nitems); # statistic and errors in TODO list
my($whitespace_in_sn);             # Flag for whitespace in storage names
my($lines_with_star);              # Flag for printing annotiation
my(%macro,%testexp);               # the macro and test definitions
my($nfiles);                       # Number of output files per run
my(@infiles,@fmtfiles,@outfiles);  # Filenames from chain.dat
my($exec_script);                  # the script.
my(%outfileindex,@parnames,%parindex); # Lookup arrays
my($E_item,$E_linelist);           # Variables for error format STDERR
my($completed,$active_pid,$nskip); # return from report_status();
my($script_is_perl_code);          # Flag
my($repair_error_message) = 1;     # Should we try to fix perl syntax error
                                   #    messages in user supplied perl script?

# We need to know what our working directory is
require "pwd.pl"; 
&initpwd;

# Ignore $mysignal
$SIG{$mysignal} = 'IGNORE';

# Startup message
print STDERR <<"EOF";

CHAIN version $version.
(c) 1995 Carsten Dominik   dominik\@strw.leidenuniv.nl

EOF

# Command line arguments and switches
if (! @ARGV) {
    # at least one option, please
    &print_usage;
    exit;
}

&GetOptions( \%opt, qw ( -help|h ),
	            qw ( -term -kill -stop -cont -quit -pod),
                    qw ( -create|c -list|l -lastlog|ll ),
	    '-testing|t', \$testing, 
	    '-start|s',   \$starting, 
	    '-restart|r', \$restarting,
	    '-export|e',
	    '-debug|d', 
	    '-mail|m',    \$mail,
	    '-force|f',   \$force,
	    '-verbose|v', \$verbose,
	    '-n=i',       \$maxrun, 
	    '-i:i',       \$extra_steps,
	    '-O:i',
	    '-E:i'
	     )
    or ( &print_usage, exit );

# Change directory if one was given on the command line
if ($ARGV[0] && -d $ARGV[0]) {
    print "chdir to $ARGV[0]\n" ;
    &chdir(shift);
}

# Reject any further argments
if (@ARGV) {
    &print_usage;
    die "ABORT:   Unrecognized command line argument: $ARGV[0].\n";
}

if ($opt{help})    { &print_help;                 exit}
if ($opt{pod})     { &print_manual;               exit}   

if ($opt{term})    { &chain_group_signal('TERM'); exit}
if ($opt{kill})    { &chain_group_signal('KILL'); exit}
if ($opt{stop})    { &chain_group_signal('STOP'); exit}
if ($opt{cont})    { &chain_group_signal('CONT'); exit}

if ($opt{create})  { &create_data_file;           exit}
if ($opt{list})    { &report_status(\*STDOUT);    exit}
if ($opt{lastlog}) { &show_last_log(\*STDOUT);    exit}

if ( $testing + $starting + $restarting == 0) {
    &print_usage;
    die "ABORT:   You must specify one of the options -d -t -s -r -l -c.\n";
    exit;
}
elsif ($testing + $starting + $restarting > 1) {
    die" ABORT:   Only one of the options -s -r -t is allowed.\n";
}

# Let's see what the logbook says
($completed,$active_pid,$nskip) = &report_status;

if ($completed && $restarting) {
    die "ABORT:   Chain was already completed.\n",
        "         Use \"chain -s\" if you want to start from scratch.\n";
}
if ($active_pid && ! $testing) {
    die "ABORT:   Chain is active in this directory (PID $active_pid).\n",
        "         Wait for completion or kill it.\n";
}

# Find out what to do with the output from the individual runs
if (defined $opt{O}) {
    unless ($opt{O} == 0 || $opt{O} == 1 || $opt{O} == 2 ) {
	die "ERROR:   Illegal value for option -O (must be 0 or 1 or 2)\n";
    }
}
if (defined $opt{E}) {
    unless ($opt{E} == 0 || $opt{E} == 1 || $opt{E} == 2 ) {
	die "ERROR:   Illegal value for option -O (must be 0 or 1 or 2)\n";
    }
}
unlink "chain.out";
unlink "chain.err";

# Open the parameter file
open(PARFILE,"$parfile") || die "ABORT:   Can't open $parfile.\n";

# Read options
&read_options;

# Read the commands to execute
&read_script;

# Read the names of input and output file(s)
&read_file_names;

# Read the TODO list
&read_todo_list;

# Close the parameter file
close(PARFILE);

# Abort if any errors were found while reading chain.dat
die "\nABORT:   $abort error", ($abort==1?"":"s"),
    " found while reading $parfile\n" 
    if ($abort && !$force);

$abort = 0;

# Expand the TODO list into a full list
&expand_todo_list;

# Print TODO list, when testing.
if ($testing) {
    print "\nTEST:    The following ",$#todolist+1,
                    " run specifications were created:\n\n";
    print $todofmttop;
    $i=0;
    foreach (@todolist) {
	printf "$todofmt\n",&recover_escaped_whitespace(split(/\s+/));
	print "- " x int($todofmtlength/2),"\n" if ++$i == $nskip;
    }
    print "\n";
}

# Check for whitespace in storage names
print STDERR "WARNING: $whitespace_in_sn storage names contain whitespace characters\n\n" if $whitespace_in_sn;

# Check for inconsistencies in number of items per line
&complain_about_nitem_inconsistencies && $abort++;

# Check for storage names corresponding to existing files
&complain_about_existing_files;

# Check for multiple use of storage names
&complain_about_multiple_names && $abort++;

# Check for parameter test failures
&complain_about_parameter_tests  && $abort++;

print STDERR "         * Line would be skipped with \"chain -r\".\n\n"
    if ($lines_with_star);

if ($abort) {
    die "ABORT:   because of errors found during expansion.\n" unless $force;
}

$abort = 0;

# Check for logbook and restart
if ($starting) {
    # start from scratch
    if (-e $logbook) {
	warn "WARNING: Removing old logbook file.\n";
	unlink("$logbook") || die "ABORT:   Can't remove file $logbook.\n";
    }
}
elsif ($restarting) {
    # restart
    if (!-e $logbook) {
	warn "WARNING: Logbook $logbook does not exist. Starting from scratch.\n";
	$starting = 1; $restarting = 0;
    }
}


# Make a note about start/restart time.
if ($starting) {
    &log("chain_started", " " . $#todolist+1 . " runs, PID=$$");
}
elsif ($restarting) {
    &log("chain_restarted", " " . $#todolist+1 . " runs, PID=$$");
}

# Save the process id into a file
if ($starting || $restarting) {
    open PID,">chain.pid" or die "Cannot write to file chain.pid";
    print PID "$$\n";
    close PID;
}

# From now on, trap a few signals so that we may put some
# last words into the logbook file before we die.
$SIG{'INT'}  = 'signal_handler';
$SIG{'QUIT'} = 'signal_handler';
$SIG{'TERM'} = 'signal_handler';
$SIG{'HUP'}  = 'signal_handler';

# Now we may free some memory
undef %macro;
foreach (keys %failed) { undef %{$failed{$_}} }
undef %failed;
undef %testexp;
undef %exists;
undef %seen;
undef %nitems;

# Loop through the parameter table
# The variables need in the loop:
my($nruntot,$runnr) = $#todolist+1;
my($todoline,$therun);
my(@pars,@storage_names);
my($exitval,$errsum);
my($start_utime,$end_utime);
my($storage_name,$outfile);
my($pid);
my($subject);

$nskip = 0 unless $restarting;

for ($i=$nskip; $i<=$#todolist; $i++) {
    if ($i-$nskip+1 > $maxrun) {die "Exit after maxrun=$maxrun runs.\n"};
    $runnr = $i+1;
    $todoline = $todolist[$i];

    @pars = split(/\s+/,$todoline);

    # get hidden whitespace back
    &recover_escaped_whitespace(@pars);
    $therun = sprintf($todofmt,@pars);
    @storage_names = splice(@pars,0,$nfiles,"$i");

    # Create the input files for the job
    foreach $index (0..$#infiles) {
	open(FROM,"$fmtfiles[$index]") 
	    || die "ABORT:   Can't open $fmtfiles[$index].\n";
	open(TO,">$infiles[$index]") 
	    || die "ABORT:   Can't open $infiles[$index].\n";
	while (<FROM>) {
	    print TO &replace_diamonds($_,\@pars);
	}
	close(FROM); close(TO);
    }

    # Exit after creating input file if testing
    if ($testing) {
        # Abort if any errors were found
	die "\nABORT:   $abort error", ($abort==1?"":"s"), " found\n" 
	    if ($abort && !$force);
	print "\nTEST:    Sample input file(s) \"@infiles\" created.\n" 
	    if @infiles;
	print "         No apparent errors in your setup.\n",
	"         Use \"chain -s &\" to start the chain.\n";
	exit;
    }

    # Logbook: here we go
    &log("job_started",$therun);
    $errsum = 0;

    # Divert STDOUT and STDERR accortiong to command line options
    &divert_stdout_stderr;

    $start_utime = (times)[2];

    if ($script_is_perl_code) {

	# Run the perl program

	# Export variables into user package
	@user::pars  = @pars[1..$#pars];
	@user::files = @storage_names;
	$user::irun  = $i;
	if ($opt{export}) {
	    no strict qw(refs);
	    foreach (@parnames[1..$#parnames]) {
		$ {"user::$_"} = $pars[$parindex{$_}];
	    }
	}
	# eval to catch exceptions
	$exitval = eval "&user::RUN();";

	if ($@) {
	    my($msg) = $@;
	    $msg =~ s#\n#\\n#;
	    &log("error","\&run failed: $msg");
	    $errsum++;
	}

    } else {

	# Run the script

	my $cmd = $exec_script;

	# replace diamonds
	$cmd = &replace_diamonds($cmd,\@pars);
	# pipe the script through a shell
	if ($opt{debug}) {
	    $pid = open SHELL,"|/bin/sh -evx";
	} else {
	    $pid = open SHELL,"|/bin/sh -e";
	}
	
	if (defined $pid) {
	    if ($opt{export}) {
		# export parameters as variables
		foreach (@parnames[1..$#parnames]) {
		    print SHELL "$_=\'$pars[$parindex{$_}]\'\n";
		}
	    }
	    print SHELL $cmd;
	    close SHELL;
	    $exitval = ($? >> 8);
	    if ($exitval != 0) {
		$errsum += $exitval;
		&log("error","Script returned with exit value $exitval");
	    }

	} else {

	    &log("error","Can't open pipe to /bin/sh\n");
	    next;
	}
    }
    $end_utime = (times)[2];

    # recover STDOUT and STDERR
    &recover_stdout_stderr;

    # Rename output to unique storage name unless @outfiles is empty
    if (@outfiles) {
	foreach (@outfiles) {
	    unless (-e $_) {
		$errsum++;
		&log("error","output file not found: $_");
	    }
	}
	foreach $i (0 .. $#outfiles) {
	    $outfile = $outfiles[$i];
	    $storage_name = $storage_names[$i];
	    if (-e $storage_name)  {
		&log("warning","have to remove $storage_name");
		unlink($storage_name) || die "ABORT:   Could'nt unlink $storage_name.\n";
	    }
	    if (-e $outfile) {
		# Add error postfix if something went wrong
		if ($errsum > 0) {
		    $storage_name = $storage_name . $epostfix;
		    &log("warning","Error postfix added to storage name");
		    if (-e $storage_name) {
			&log("warning","have to remove $storage_name");
			unlink($storage_name) 
			    || die "ABORT:   Could'nt unlink $storage_name.\n";
		    }
		}
		rename($outfile,$storage_name) 
		    || &log("warning","rename was not successful");
	    }
	}
    }

    # End of job note into logbook
    &log("job_completed","user time:   " . &HMS($end_utime-$start_utime) );

    # Mail notification if requested
    if ($mail && $address) {
	if ($errsum == 0) {
	    $subject = "CHAIN: success in run $runnr/$nruntot";
	} else {
	    $subject = "CHAIN: *error* in run $runnr/$nruntot";
	}
	open(MAIL,"|$mailer -s '$subject' $address");
	print MAIL "The logbook entries for the completed run are:\n\n";
	&show_last_log(\*MAIL);
	print MAIL "\n\nAnd here is the general status report:\n\n";
	&report_status(\*MAIL);
	close(MAIL);
    }
}

# Make a note that chain is complete (this will disable 'chain -r' restarts)
&log("chain_completed");

# Mail notification
if ($address) {
    open(MAIL,"|$mailer -s 'CHAIN: completed' $address");
    &report_status(\*MAIL);
    close(MAIL);
}

# End of main program
#
# Subroutine definitions start below
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------

sub nfields {
    # compute the number of whitespace-separated fields
    my ($string) = @_;
    ($string =~ s/\s+//g) + 1 ;
}

sub HMS {
    # return HH:MM:SS.ss calculated from $_[0] in seconds
    my ($time) = @_;
    my ($hour,$min,$sec);
    $hour = int($time/3600);
    $time -= $hour*3600;
    $min  = int($time/60);
    $time -= $min*60;
    $sec  = $time;
    $time = sprintf("%d:%02d:%05.2f",$hour,$min,$sec);
}
sub sec {
    # return seconds calculated from HH:MM:SS.ss
    my ($h,$m,$s) = split(/:/,$_[0]);
    return $s+60*$m+3600*$h;
}

sub skip_comment {
    # move to the next line that is not a comment
    while (<PARFILE>) {last if (/^[^#]/) }
}

sub trim {
    # kill leading and trailing white space from a string
    $_[0] =~ s/^[ \t\n]*//;
    $_[0] =~ s/[ \t\n]*$//;
    return $_[0];
}
sub isnum {
    # is this a number? Allows for leading and trailing spaces.
    $_[0] =~ /^\s*(\+|-)?(\d+\.?\d*|\.\d+)([eE](\+|-)?\d+)?\s*$/;
}

sub hide_escaped_whitespace {
    # hide whitespace in arglist. In list context, return changed arglist.
    # in scalar context, return number of replacements.
    my $count; 
    foreach (@_) { $count += s/\\(\s)/"\374".ord($1)."\375"/eg }
    return wantarray ? @_ : $count;
}
sub recover_escaped_whitespace {
    # recover whitespace in arglist. In list context, return changed arglist.
    # in scalar context, return number of replacements.
    my $count; 
    foreach (@_) { $count += s/\374(\d+)\375/chr($1)/eg }
    return wantarray ? @_ : $count;
}
sub hide_escaped_characters {
    # hide \char in arglist. In list context, return changed arglist.
    # in scalar context, return number of replacements.
    my $count; 
    foreach (@_) { $count += s/\\(.)/"\376".ord($1)."\377"/eg }
    return wantarray ? @_ : $count;
}
sub recover_escaped_characters {
    # recover \char in arglist. In list context, return changed arglist.
    # in scalar context, return number of replacements.
    my $count; 
    foreach (@_) { $count += s/\376(\d+)\377/chr($1)/eg }
    return wantarray ? @_ : $count;
}

sub nowstring {
    # return a string containing date and time
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
	localtime(time);
    my ($thisday) = ("Sun","Mon","Tue","Wed","Thu","Fri","Sat")[$wday];
    my ($thismon) = 
	("Jan","Feb","Mar","Apr","May","Jun","Jul",
	 "Aug","Sep","Oct","Nov","Dec")[$mon];
    $mday = sprintf("%2d",$mday);
    $hour = sprintf("%2d",$hour);
    $min  = sprintf("%02d",$min);
    $sec  = sprintf("%02d",$sec);
    return "$mday $thismon $year, $hour:$min:$sec";
}

sub log {
    # Put an entry into the logbook file
    my ($key,$text) = @_;
    my $entry;

    if ($key eq "chain_started") {
	$entry = "\nCHAIN $version: Started " . &nowstring . " :: $text\n";
    }
    elsif ($key eq "chain_restarted") {
	$entry = "\nCHAIN $version: Restarted " . &nowstring . " :: $text\n";
    }
    elsif ($key eq "chain_completed") {
	$entry = "\nCHAIN $version: Completed " . &nowstring . "\n";
    }
    elsif ($key eq "job_started") {
	$entry  = "\n$text\n" . "-" x (length($text)>79 ? length($text) : 79) . "\n";
	$entry .= "started  " . &nowstring . "\n";
    }
    elsif ($key eq "job_completed") {
	$entry = "finished " . &nowstring . " :: $text\n";
    }
    elsif ($key eq "warning") {
	$entry = "WARNING  " . &nowstring . " :: $text\n";
    }
    elsif ($key eq "error") {
	$entry = "ERROR    " . &nowstring . " :: $text\n";
    }
    elsif ($key eq "signal") {
	$entry = "\nCHAIN $version: aborted " . &nowstring . " :: signal SIG$text\n";
    }
    else {
	$entry = "\nunrecognized logbook key $key\n";
    }
    print $entry if $verbose;
    open(LOGBOOK,">>$logbook") || die "ABORT:   Can't open $logbook.\n";
    print LOGBOOK $entry;
    close(LOGBOOK);		# make sure the buffer is flushed
}

sub chain_group_signal {
    # send signal to chain's group of processes
    my ($signal) = @_;
    my ($pid) = 0+`cat chain.pid`;
    my $i = 0;
    my(%signals,$signr);

    # get the number of the signal (we need it in order to kill -groups)
    %signals = map {$_ => ++$i} split /\s+/,`kill -l`;
    $signr = $signals{$signal};

    # kill it
    if ($pid) {
	if (kill -$signr,$pid) {
	    print "Signal $signal successfully sent to process group $pid.\n";
	} else {
	    print "Could not send signal $signal to process group $pid.\n";
	}
    } else {
	print "No active chain process associated with this directory.\n";
    }
}

sub signal_handler {
    # put a note into the logbook file and die
    my ($sig) = @_;
    &log("signal",$sig);
    die "Caught a signal $sig. Exiting...\n";
    exit;
}

sub divert_stdout_stderr {
    # divert STDOUT and STDERR to files, when the options require it
    if (defined $opt{O}) {
	open SAVEOUT, ">&STDOUT";
	if ($opt{O} == 0) {
	    open STDOUT, ">/dev/null"  || die "Can't redirect STDOUT\n";
	} elsif ($opt{O} == 1) {
	    open STDOUT, ">chain.out"  || die "Can't redirect STDOUT\n";
	} elsif ($opt{O} == 2) {
	    open STDOUT, ">>chain.out" || die "Can't redirect STDOUT\n";
	}
	select STDOUT; $| = 1;
    }
    if (defined $opt{E}) {
	open SAVEERR, ">&STDERR";
	if ($opt{E} == 0) {
	    open STDERR, ">/dev/null"  || die "Can't redirect STDERR\n";
	} elsif ($opt{E} == 1) {
	    open STDERR, ">chain.err"  || die "Can't redirect STDERR\n";
	} elsif ($opt{E} == 2) {
	    open STDERR, ">>chain.err" || die "Can't redirect STDERR\n";
	}
	select STDERR; $| = 1;
    }
    select STDOUT;
}
sub recover_stdout_stderr {
    # recover the original function of STDOUT and STDERR
    if (defined $opt{O}) {
	close STDOUT;
	open STDOUT, ">&SAVEOUT";
    }
    if (defined $opt{E}) {
	close STDERR;
	open STDERR, ">&SAVEERR";
    }
}

sub read_file_names {
    # look for the file name definitions and read them
    my ($infiles,$fmtfiles,$outfiles,$parnames);
    my ($linenr_o,$linenr_p,$nitems);

    &skip_comment;
    while (/^[^\#]/) {
	$infiles  = $1 if (/^\s*INPUT\s+FILE\s+NAMES\s*:\s*(.*?)\s*$/i);
	$fmtfiles = $1 if (/^\s*FORMAT\s+FILE\s+NAMES\s*:\s*(.*?)\s*$/i);
	$outfiles = $1 if (/^\s*OUTPUT\s+FILE\s+NAMES\s*:\s*(.*?)\s*$/i);
	$linenr_o = $. if (/^\s*OUTPUT\s+FILE\s+NAMES\s*:\s*(.*?)\s*$/i);
	$parnames = $1 if (/^\s*PARAMETER\s+NAMES\s*:\s*(.*?)\s*$/i);
	$linenr_p = $. if (/^\s*PARAMETER\s+NAMES\s*:\s*(.*?)\s*$/i);
        $_ = <PARFILE> || die "ABORT:   Unexpected end of file in $parfile.\n";
    }
    @infiles  = split /\s+/, &horizontally_expand($infiles);
    @fmtfiles = split /\s+/, &horizontally_expand($fmtfiles,@infiles);
    @outfiles = split /\s+/, &horizontally_expand($outfiles);

    # define the parameter name arrays
    @parnames = ('#parnames',split /\s+/, $parnames);
    # make a hash for easy lookup
    @parindex{@parnames} = (0..$#parnames);
    delete $parindex{'#parnames'};

    # Initialize the nitems array
    $nitems = scalar(@parnames)  - 1 + scalar(@outfiles);
    $nitems{$nitems} = "($linenr_o/$linenr_p) ";

    # number of input and format files equal?
    if (@infiles != @fmtfiles) {
	print STDERR "ERROR:   Number if input files (",$#infiles+1,
	") differs from number of format files (",$#fmtfiles+1,").\n";
	$abort++;
    }

    # do the format files really exist?
    foreach (@fmtfiles) {
	if (!-e) {
	    warn "ERROR:   Format file $_ does not exist.\n";
	    $abort++;
	}
    }

    # how many output files are there?
    $nfiles = $#outfiles+1;
    # make a hash to lookup indices of outfile names
    @outfileindex{@outfiles} = (1..@outfiles);
    foreach (1..$nfiles) {$outfileindex{"$_"} = $_};
}

sub horizontally_expand {
    # expand {...} like csh. Use @default if {} is empty
    local($_) = shift;
    my(@default) = @_;
    my(@fields,$head,$core,$tail,@core);

    @fields = split(/\s+/,$_);
  LOOP:
    for ($i=0; $i<=$#fields; $i++) {
	if ( ($head,$core,$tail) = ($fields[$i] =~ /(.*?){(.*?)}(.*)/ ) ) {
	    if ($core) {
		@core = split(/,/,$core);
	    } else {
		@core = @default;
	    }
	    foreach (@core) {$_ = $head . $_ . $tail;}
	    splice(@fields,$i,1,@core);
	    redo LOOP;
	} else {
	    next LOOP;
	}
    }
    return join(' ',@fields);
}

sub read_options {
    # look for prefix and postfix definitions and options
    my $minoropt;

    &skip_comment;
    while (/^[^\#]/) {
	$minoropt = $1 if (/^\s*MINOR\s*OPTIONS\s*:\s*(.*?)\s*$/i);
	$sprefix  = $1 if (/^\s*STORAGE\s*NAME\s*PREFIX\s*:\s*(\S*)\s*$/i);
	$spostfix = $1 if (/^\s*STORAGE\s*NAME\s*POSTFIX\s*:\s*(\S*)\s*$/i);
	$epostfix = $1 if (/^\s*ERROR\s*CASE\s*POSTFIX\s*:\s*(\S*)\s*$/i);
	$address  = $1 if (/^\s*MAIL\s*ADDRESS\s*:\s*(\S*)\s*$/i);
        $_ = <PARFILE> || die "ABORT:   Unexpected end of file in $parfile.\n";
    }
    if ($minoropt) {
	@ARGV = split(/\s+/,$minoropt);
	&GetOptions(\%opt,
		    '-export|e',
		    '-debug|d',
		    '-mail|m',   \$mail, 
		    '-force|f',  \$force,
                    '-verbose|v',\$verbose, 
		    '-n=i',      \$maxrun,
		    '-i:i',      \$extra_steps,
		    '-O:i',
		    '-E:i'
		    )
	    or die "ABORT:   Illegal options in DEFAULT MINOR OPTIONS (file $parfile).\n";
    }	
}

sub read_script {

    my $sline;

    $_ = <PARFILE> until /^# ---- Start of Shell script -/;
    $sline = $.;

    $exec_script = '';
    while (<PARFILE>) {
	last if /^# ---- End of Shell script -/;
	$exec_script .= $_;
    }
    if ($exec_script =~ /^ \s* sub \s+ \w+ \s+ \{ /mx) {
	# it is a perl program
	$script_is_perl_code = 1;
	# Evaluate subroutine definitions and check for errors
	eval "package user;\n$exec_script";
	if ($@) {
	    warn "ERROR:   Perl program in $parfile contains errors:\n";
	    if ($repair_error_message) {
		# try to insert correct file name and line numbers
		$@ =~ s/(\(eval \d+\))/$parfile/g;
		$@ =~ s/(line\s+)(\d+)/"$1".($2+$sline-1)/eg;
	    } else {
		# just tell them where to find those errors
		warn "         (Line numbers are in file $parfile, relative to line ",$sline-1,")\n";
	    }
	    print STDERR $@,"\n";
	    exit;
	}
	unless (defined &user::RUN) {
	    warn "ERROR:   Perl program in $parfile needs to define subroutine RUN\n";
	    $abort++;
	}
    } else {
	# just an ordinary shell script
	$script_is_perl_code = 0;
	die "ABORT:   No script defined in $parfile.\n"
	    unless $exec_script;
    }
}

sub read_todo_list {
    # read parameter list and expand macros
    my($key,$index,$value);
    my($line,$line0,@tmp,@pars,@files);
    my($head,$start,$core,$end,$tail,@core);
    my($p,$c);
    my($np,$np0);

  READLOOP:
    while (<PARFILE>) {
	$line  = $.;

	# add any continuation lines
	$_ .= <PARFILE> while (/\\\s*$/);
	s/\\\s*\n//g;	# delete newlines

	next READLOOP if /^\s*$/;	# skip empty lines

	s/^\s+//;		# remove leading spaces
	s/\s+$//;		# remove trailing spaces

	# Interpolate macros
        s/ (\b\w+\b) / defined($macro{$1}) ? $macro{$1} : $1 /xge;

	# hide backslash escape characters
	&hide_escaped_whitespace($_);
	&hide_escaped_characters($_);

	# Macro definition
	if (/^\#define\s+/) {
	    if ( ($key,$value) = (/^ \#define \s+ (\w+) \s+ (\S.*) $/x)) {
		$macro{$key} = $value;
		next READLOOP;
	    }
	}

	# Test definition
        if ( ($index,$value) = 
	       (/^\#test\s+(\S+)\s+(\S.*)$/)) {
	    # keep test definition in TODO list for now.
	    push(@todolist,$_);
	    push(@linenr,$line);
	    next READLOOP;
        }

	next READLOOP if /^\#/;	# skip comment lines
	chop if /\n/;		# kill trailing newline

	# Horizontal expansion: {...} (restricted to storage names)
	$_ = &horizontally_expand($_,@outfiles);
	    
	# Sort parameters
	$_ = &sort_parameters($_,$line);

	# put on todo list   
	push(@todolist,$_);
	push(@linenr,$line);

    }
}

sub expand_todo_list {
    my(@tmp,@width,@list,@list1,@files,@pars);
    my(%testdefline);
    my($head,$tail,$paren,$spec,$close,$i,$j,$iter,$npars,$name);
    my($number,$format);
    my($index,$index1,$key,$key1,$value);
    my %have_warned;
    my ($would_be_skipped);

    # How many parameters?
    foreach (@todolist) {
	next if /^\#/;
	@tmp = split /\s+/;
	last;
    }
    $npars = $#tmp-$#outfiles;
    foreach (1..$npars) {$parindex{$_} = $_}

    # Prepare @width array
    foreach $i (0 .. $#outfiles) {$width[$i] = length($outfiles[$i]);}
    foreach $i (($#outfiles+1) .. $#tmp) {
	if ($_ = $parnames[$i-$#outfiles]) {
	    $width[$i] = length($_);
	} else {
	    $width[$i] = 1;
	}
    }

    # Expand parameter list
    $i = 0;
  CHECKLOOP:
    while ($i<=$#todolist) {

	$would_be_skipped = ($i < $nskip);

	# Test definition?
        if ( ($index,$value) = 
	       ($todolist[$i] =~ /^\#test\s+(\S+)\s+(\S.*)$/)) {

	    # save test expression
	    if (defined $parindex{$index} || defined ($outfileindex{$index})) {
		$value =~ s/\s*$//;
		if ($value eq 'reset') {delete $testexp{$index}}
		$testexp{$index} = eval "sub { $value }";
		if ($@) {
		    die "ABORT:   Perl syntax error in test definition ",
		    "(chain.dat line $linenr[$i])\n",
		        "         #test $index ...\n",
			"         Error message follows:\n",$@;
		}
		$testdefline{$index} = $linenr[$i];
	    } else {
		warn "ERROR:   don't know how to test \"$index\" ($parfile line $linenr[$i])\n";
		$abort++;
	    }
	    splice(@todolist,$i,1);
	    splice(@linenr,$i,1);
	    last CHECKLOOP if ($i > $#todolist);
	    redo CHECKLOOP;
        }
	
	# Macro interpolation and vertical expansion
	$iter = $extra_steps + 1;
	while ($iter > 0) {

	    # Expand lists and sequences
	    while ( (($head,$paren,$number,$spec,$format,$close,$tail) = 
		     ($todolist[$i] =~ 
		      /^(.*?) (\[|\() (\#\d+[:,])? (.*?) ([:,]%.*?)? (\]|\)) (.*)$/x ) ) ) {

		chop($number) if $number;

		substr($format,0,1) = "" if $format;
		$format = $format || "%s";

		@list =  map {sprintf($format,$_)} 
		              &expand_me($paren,$spec,$close);
		
		# attach head and tail to each value
		foreach (@list) {
		    $_ = $head . $_ . $tail;
		}

		if ($number) {
		    # look for more lists/sequences with the same number
		    # and expand them parallel
		    while ( (($paren,$spec,$format,$close) =  ($list[0] =~ 
			      /(\[|\() \Q$number\E [:,]
			      (.*?) ([,:]%.*?)? (\]|\))/x ) ) ) {
			
			substr($format,0,1) = "" if $format;
			$format = $format || "%s";
			
			@list1 = map {sprintf($format,$_)} 
			         &expand_me($paren,$spec,$close);

			# Interpolate
			if (@list != @list1) {
			    print STDERR "ERROR:   In $parfile line $linenr[$i]:\n",
			    "         Parallel sequences/lists $number have different lengths.\n"
				unless $have_warned{$linenr[$i],$number}++;
			    
			    $abort++;
			}
			@list = map {s/(\[|\() \Q$number\E (.*?) 
					 (%.*?)? (\]|\))/shift(@list1)/xe;$_} @list;
		    }
		}
		# Replace old line with new lines. Keep track of line numbers.
		splice(@todolist,$i,1,@list);
		splice(@linenr,$i,1,($linenr[$i]) x @list)
	    }
	    $iter--;

	    if ($iter > 0) {
		# Expand macros
		$todolist[$i] =~ 
		    s/ (\b\w+\b) / defined($macro{$1}) ? $macro{$1} : $1 /xge;
	    }
	    $iter--;
	}

	# Separate storage names and parameters
	@pars  = split(/\s+/,$todolist[$i]);
	$nitems{$#pars+1} .= "$linenr[$i] " unless
	    $nitems{$#pars+1} =~ /\b$linenr[$i] $/;
	@files = splice(@pars,0,$nfiles,"$i");

	# get the escaped characters back in parameters (names have to wait)
	&recover_escaped_characters(@pars);

        if ($#pars > $npars) {
	    # Ooops. No. of parameters increased. Fix the parindex hash.
	    foreach ($npars+1..$#pars) {$parindex{$_} = $_}
	    $npars = $#pars;
	}

	# Put parameters into the storage name and count occurrences of names
	foreach (@files) {
	    $_ = &replace_diamonds($_,\@pars);
	    # remove spaces to get useful names
	    tr/ //d;
	    # NOW recover escaped characters in storage names
	    &recover_escaped_characters($_); 
	    # Add prefix and postfix if defined
	    $_ = $sprefix . $_ . $spostfix;
	    # count the number of uses of every name
	    $seen{$_} .= $linenr[$i].($would_be_skipped?"*":"")." ";
	    # check if the file name does already exist
	    if (-e $_) {
		$exists{$_} = 
		    &add_linenr($exists{$_},$linenr[$i],$would_be_skipped);
	    }
	}
	       
	# Check parameters and names with #test
	foreach $name (sort keys %testexp) {
	    $key1 = "#test $name $testdefline{$name}";
	    if (defined $parindex{$name}) {
		$j = $parindex{$name};
		$_ = $pars[$j];
		unless (&{$testexp{$name}}) {
		    $failed{$key1}{$pars[$j]} = 
			&add_linenr($failed{$key1}{$pars[$j]},$linenr[$i],
				    $would_be_skipped);
		}
		$pars[$j] = $_;
	    } elsif (defined $outfileindex{$name}) {
		$j = $outfileindex{$name}-1;
		$_ = $files[$j];
		unless (&{$testexp{$name}}) {
		    $failed{$key1}{$files[$j]} = 
			&add_linenr($failed{$key1}{$files[$j]},$linenr[$i],
				    $would_be_skipped);
		}
		$files[$j] = $_;
	    }
	}

	# put everything together again
	shift(@pars);		# kill the dummy
	@list = (@files,@pars);
	$todolist[$i] = join(" ",@list);

	# update width array
	&recover_escaped_whitespace(@list);
	foreach (@list[0..$#files]) {$whitespace_in_sn++ if /\s/}
	foreach $j (0..$#list) {
	    if (length($list[$j]) > $width[$j]) {$width[$j] = length($list[$j])}
	}

	# Move on to next line
	$i++;
    }

    # create parameter format
    $todofmt = "%-" . join("s %-",@width) . "s";
    # create headline for parameter format
    my($len,$len1,$len2,$todofmttop1,$todofmttop2);
    $len  = 0; foreach $i (0 .. ($nfiles-1)) {$len += $width[$i]+2};
    $len -= 2;
    if (@files) {
	$todofmttop1  = sprintf("%-${len}s  %-s","FILES","PARAMETERS");
    } else {
	$todofmttop1  = sprintf("%-${len}s","PARAMETERS");
    }
    @tmp = @outfiles;
    foreach $i (1 .. $npars) {
	($tmp[$nfiles-1+$i]=($parnames[$i]||"$i"));
    }
    $todofmttop2  = sprintf("$todofmt",@tmp);
    $len1 = length($todofmttop1);
    $len2 = length($todofmttop2);
    $todofmtlength = $len1>$len2 ? $len1 : $len2;
    $todofmttop = "$todofmttop1\n" . "-" x ($len1>$len2 ? $len1 : $len2) . "\n" .
                  "$todofmttop2\n" . "=" x ($len1>$len2 ? $len1 : $len2) . "\n";
}

sub sort_parameters {
    # sort the line when there are parameters specified with NAME=... 
    my($line,$linenr) = @_;
    my(@old,@new,$i,$name,$rest);
    my($foundeq) = 0;

    unless ($line =~ /=/) {
	return $line;
    }

    @old = split ' ',$line;
    @new = @old[0..$nfiles-1];

    for $i ($nfiles..$#old) {
	($name,$rest) = ($old[$i] =~ /(\w+)=(.*)/);
	if ($name && $parindex{$name} >= $nfiles) {
	    $new[$parindex{$name}] = $rest;
	    $foundeq++;		# disallow positional binding from now on
	} elsif ($foundeq == 0) {
	    $new[$i] = $old[$i];
	}
    }
    for $i (0..$#old) {
	unless (defined $new[$i]) {
	    warn "ERROR:   No value for parameter ",$i-$nfiles+1,
	    defined $parnames[$i-$nfiles+1]?" (".$parnames[$i-$nfiles+1].")":"",
	         " in $parfile line ",$linenr,".\n";
	    $abort++;
	    $new[$i] = '????';
	} 
    }
    return join ' ',@new;
}


sub add_linenr {
    my($string,$nr,$star) = @_;
    $nr .= "*" if $star;
    $string .= "$nr " unless $string =~ /\b\Q$nr\E $/;
    return $string;
}


format STDERR =
         - ^<<<<<<<<<<<<<<<<<<<<<<<< in chain.dat line@ ^<<<<<<<<<<<<<<<<<<<<<<
           length($E_item)<26?$E_item:substr($E_item,0,17)." ... ".substr($E_item,-3),        ($E_linelist=~/ ./?"s":" "),  $E_linelist
~~                                                       ^<<<<<<<<<<<<<<<<<<<<<<
                                                         $E_linelist
.

sub complain_about_nitem_inconsistencies {
    # complain about TODO lines with different numbers of items in them
    my $abort = 0;

    if ( (keys %nitems) > 1 ) {
	warn "ERROR:   Number of items is different in different lines:\n";
	foreach (sort keys %nitems) {
	    $E_item = sprintf("%3d items",$_);
	    $E_linelist = $nitems{$_};
	    $lines_with_star++ if $E_linelist =~ /\*/;
	    write STDERR;
	    $abort++;
	}
	print STDERR "\n";
    }
    return $abort;
}

sub complain_about_parameter_tests {
    # complain about parameter tests failures
    my $abort = 0;
    my($key,$item);


    if (%failed) {
	warn "ERROR:   Storage names or parameters failed tests:\n" ;
	foreach $key (sort keys %failed) {
	    printf STDERR "         %-5s %-12s (defined in $parfile line  %d)\n",
		split(/\s+/,$key);
	    foreach $item (sort keys %{ $failed{$key} } ) {
		$E_item = $item;
		$E_linelist = $failed{$key}{$E_item};
		$lines_with_star++ if $E_linelist =~ /\*/;
		write STDERR;
	    }
	    $abort++;
	}
	print STDERR "\n";
    }
    return $abort;
}

sub complain_about_existing_files {
    # complain about existing files
    my($key);

    if (%exists) {
	warn "WARNING: The following storage names correspond to existing files:\n";
	foreach $key (sort keys %exists) {
	    $E_item = $key;
	    $E_linelist = $exists{$key};
            1 while $E_linelist =~ s/(\S+) \1/$1/g;
	    $lines_with_star++ if $E_linelist =~ /\*/;
	    write STDERR;
	}
	print STDERR "\n";
    }
}

sub complain_about_multiple_names {
    # Complain about multiple names
    my ($abort,$seentop) = (0,0);
    my($key);

    foreach $key (sort keys %seen) {
	if ($seen{$key} =~ / ./) {
	    do {
		warn "ERROR:   Storage names being used more than once:\n";
		$seentop++;
	    } unless $seentop;
	    $E_item = $key;
	    $E_linelist = $seen{$key};
            1 while $E_linelist =~ s/(\S+) \1/$1/g;
	    $lines_with_star++ if $E_linelist =~ /\*/;
	    write STDERR;
	    $abort++;
	}
    }
    print STDERR "\n" if $seentop;
    return $abort;
}

sub expand_me {
    my ($paren,$spec,$close) = @_;
    my ($start,$limit,$step);
    my (@slist,@list,$firstcmp);
    my($v);

    # check if paren and close match
    if ( ( $paren eq '(' && $close ne ')' ) ||
 	 ( $paren eq '[' && $close ne ']' ) ) {
	print "ERROR:   In $parfile line $linenr[$i]\n",
	"         parenthesis on sequence \"$spec\" don't match\n";
	$abort++;
	return ($spec);
    }

    # it's just a list
    if ($paren eq "(") { 
	@list = split /\s*,\s*/,$spec,-1;
	# lets see if there are any ranges in the list
	my($i) = 0;
	while ($i <= $#list) {
	    if ($list[$i] =~ /^(\d+)\.\.(\d+)$/ && $1 <= $2) {
		splice @list,$i,1,$1..$2;
	    }
	    $i++;
	}
	return @list;
    }

    # get fields for sequences
    ($start,$limit,$step) = split(/:/,$spec);

    # break up the start list
    @slist = split(/,/,$start);
    if (! $limit) {
	$limit = $slist[0];
	warn "ERROR:   No limit specified in sequence \"$spec\"\n";
	$abort++;
    }

    # transform step into a statement
    $step = "+1" unless ($step);                      # default increment
    $step = "+$step" unless ($step =~ /^[\-\+\*\/]/); # default operator
    $step = "\$_ = \$_ $step";

    $firstcmp = ($slist[0] <=> $limit);

    # catch sequences that are only 1 element long
    if ($firstcmp == 0) {
	push(@list,$slist[0]);
	return @list;
    }

  LOOP:
    for (;;) {
	foreach $v (@slist) {
	    if (($v <=> $limit) * $firstcmp >= 0) {
		push(@list,$v);
		die "PANIC:   Sequence \"$spec\" creates >$maxseq values.\n"
		    if (@list > $maxseq);
	    } else {
		last LOOP;
	    }
	}
	# In the following line, the sprint rounds the number to 10 digids.
	# This is necessary since perl adds random stuff beyond machine acc.
	foreach (@slist) {eval($step);$_ = sprintf("%.10g",$_)}
    }
    return @list;
}

sub replace_diamonds {
    # replace all diamonds <N%fmt> in first argument with elements in
    # second arg. %fmt is an optional format
    my ($template,$list) = @_;
    my ($string,$rstring,$name,$num,$format,$count);
    while ( ($name,$format) = ($template =~ /<(\w+)(%[^>]+)?>/g) ) {
	die "PANIC:   More than ",$max_diamond_replace,
	" named diamonds replaced in one command.\n",
	"         Probably an infinite loop (diamond replaces diamond).\n"
	    if (++$count > $max_diamond_replace);
	$num = $parindex{$name} || $name;
	if ($num>0) {
	    $string = "<$name" . "$format>";
	    $string =~ s/(\W)/\\$1/g; # quote special characters
	    if (! defined($list->[$num])) {
		$rstring = "\\<$name$format\\>";
		&hide_escaped_characters($rstring);
	    } elsif ($format) {
		if ($format =~ /%-?\d+\.?\d*[def]$/ && !&isnum($list->[$num])) {
		    warn "WARNING: Using numerical format <$num$format> for string \"$list->[$num]\"\n";
		}
		$rstring = sprintf($format,$list->[$num]);
	    } else {
		$rstring = $list->[$num];
	    }
	} else {
	    warn "ERROR:   parameter name <$name> not defined.\n";
	    $abort++;
	    $rstring = "??";
	}
	# print "was $template\n";
	$template =~ s/$string/$rstring/;
	# print "now $template\n";
    }
    return $template;
}

sub show_last_log {
    # get the logbook entries about the current run
    my($HANDLE) = @_;
    my(@lastlog);

    if (!-e $logbook) {
	print $HANDLE "No logbook file ($logbook).\n" if $HANDLE;
	return (0,0,0);
    }
    open(LOGBOOK,"$logbook") || die "ABORT:   Can't open $logbook.\n";
    while (<LOGBOOK>) {
	push(@lastlog,$_);
	if (/^-+$/) {
	    @lastlog = splice(@lastlog,$#lastlog-1);
	}
    }
    close(LOGBOOK);
    print $HANDLE @lastlog;
}

sub report_status {
    # look at logbook file and create a report
    # return ($completed,$active_pid,$nfinished)
    # print report to filehandle $_[1]

    my($HANDLE) = @_;
    my($nstarted,$nerrors,$nfinished) = (0,0,0);
    my($started,$restarted,$completed) = ("not yet","","not yet");
    my($summary,$pwd,$lastlog);
    my($time,$time_total,$time_max)=(0,0,0);
    my($pid,$nruns,$counterror);
    my($lastlog,$started,$restarted);
    
    if (!-e $logbook) {
	print $HANDLE "No logbook file ($logbook).\n" if $HANDLE;
	return (0,0,0);
    }

    open(LOGBOOK,"$logbook") || die "ABORT:   Can't open $logbook\n";
    while (<LOGBOOK>) {
	($lastlog) = /([\s\d]\d\s+[A-Z][a-z]{2}\s+\d+,\s+\d+:\d+:\d+)/;
	if (/^CHAIN.*(Res|S)tarted\s*([\s\d]\S.*\S)\s+::\s+(\d+)\s+runs,\s+PID=(.*\S)\s*$/) {
	    $started = $2 if ($1 eq "S");
	    $restarted = $2 if ($1 eq "Res");
	    $nruns = $3;
	    $pid   = $4;
	}
	elsif (/^CHAIN.*Completed\s*([\s\d]\d.*)$/) {
	    $completed = $1;
	}
	elsif (/^finished.*user time:\s+(\S.*\S)\s*$/) {
	    $nfinished++;
	    $time = &sec($1);
	    $time_total += $time;
	    $time_max = $time if ($time > $time_max);
	}
	elsif (/^-+$/) {$counterror=1}
	elsif (/^ERROR/) {
	    if ($counterror) {$nerrors++;$counterror=0}
	}
    }

    # Find out if $pid is an active chain process
    $pid = 0 unless kill $mysignal,$pid;
    $pid = 0 unless ($completed eq "not yet"); # will be dead in a second

    ($pwd = $ENV{"PWD"}) =~ s/^.*(.{48})$/...$1/;
    $summary = "chain has been completed." unless ($completed eq "not yet");
    $summary = "chain is still running (PID $pid)." if $pid;
    $summary = "chain uncomplete, but no active job." 
	if ($completed eq "not yet" && !$pid);
    $summary = "chain has not been started." if ($started eq "not yet");

    # Print report
    if ($HANDLE) {
	my($oldhandle,$format_top,$format) = (select,$^,$~);
	select($HANDLE); $~ = "main::REPORT";
	write $HANDLE;
	$^ = $format_top;$~ = $format; select($oldhandle);
    };

    $completed = 0 if ($completed eq "not yet");
    return ($completed,$pid,$nfinished);

format main::REPORT =
|-----------------------------------------------------|
| CHAIN STATUS REPORT:         @<<<<<<<<<<<<<<<<<<<<< |
                       &nowstring
| - - - - - - - - - - - - - - - - - - - - - - - - - - |
| @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
  $pwd
|-----------------------------------------------------|
| chain:   started             @<<<<<<<<<<<<<<<<<<<<< |
                               $started
|          last restarted      @<<<<<<<<<<<<<<<<<<<<< |
                               $restarted
|          last log entry      @<<<<<<<<<<<<<<<<<<<<< |
                               $lastlog
|          completed           @<<<<<<<<<<<<<<<<<<<<< |
                               $completed
|          still active        @<<<<<<<<<<<<<<<<<<<<< |
                               ($pid ? "yes (PID $pid)" : "no")
| - - - - - - - - - - - - - - - - - - - - - - - - - - |
| runs:    total               @<<<<<<                |
                               $nruns
|          finished            @<<<<<< @<<<<<<<<<<<<< |
                               $nfinished, ($nerrors ? ($nerrors>1 ? "($nerrors errors)" : "(1 error)") : "")
|          remaining           @<<<<<<                |
                               ($nruns-$nfinished)
| - - - - - - - - - - - - - - - - - - - - - - - - - - |
| utime:   accumulated         @<<<<<<<<<<<<<<<<<<<<< |
                               &HMS($time_total)
|          max per run         @<<<<<<<<<<<<<<<<<<<<< |
                               &HMS($time_max)
|          average             @<<<<<<<<<<<<<<<<<<<<< |
                               &HMS($time_total/($nfinished || 1))
| - - - - - - - - - - - - - - - - - - - - - - - - - - |
| SUMMARY: @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
           $summary
|-----------------------------------------------------|
.
}

#------------------------------------------------------------------------------
#------------------------------------------------------------------------------

sub create_data_file {
    # create the data file
    if (-e $parfile) {
	print "Overwrite existing file $parfile (y/n)? [n]";
	die "New datafile $parfile was NOT created.\n" unless (<STDIN> =~ /^y/i);
    }
    open(PR,">$parfile");
    print PR <<"EOF";
#
# This is a parameter file for CHAIN (created by version $version).
#
# The contents of this file describe
#    - the commands to run one job
#    - the names of input and output files and of parameters
#    - the storage names that are used to save output from different runs
#    - the parameter combinations to be explored
#
# The file contains example text (to help your memory).
# This text has to be replaced.
#
# USE "perldoc chain" TO GET A FULL DESCRIPTION
#
#===============================================================================
#
# 1. Define some options, prefixes, and postfixes.
#    a) The minor options for CHAIN you want to be in effect always
#    b) A mail address for email notifications
#    c) An optional prefix  to be added to all storage names
#    d) An optional postfix to be added to all storage names
#    e) A storage name postfix indicating an error
#
MINOR OPTIONS       : 
MAIL ADDRESS        : $address
STORAGE NAME PREFIX : 
STORAGE NAME POSTFIX: 
ERROR   CASE POSTFIX: $epostfix
#===============================================================================
#
# 2. Define the shell script that runs your code. Parameters will be
#    interpolated into that script
#
# ---- Start of Shell script --------------------------------------------------
prg < in.dat
cat err.dat >> out.dat
# ---- End of Shell script ----------------------------------------------------
#
#===============================================================================
#
# 3. Define some names:
#    a) The names of the input files your code is expecting.
#    b) The names of the format files corresponding to the input files.
#    c) The names of the output files/directories the code produces.
#    d) The names of the parameters (should be alpha_numeric)
#
INPUT  FILE NAMES: in.dat in2.dat
FORMAT FILE NAMES: {}.fmt
OUTPUT FILE NAMES: out.dat
PARAMETER NAMES  : NAME1 NAME2 NAME3
#===============================================================================
#
# 4. Define the TODO list for the job-chain. Each line contains storage names
#    (as many as OUTPUT FILE NAMES defined above) and a list of parameters.
#    '\\' at line end makes the following line a continuation line.
#    Embedded macros and much more is possible. See documentation.
#
#--STORAGE-NAMES----------INPUT-PARAMETERS---------------------------------------

testrun.out       a     b     2        4

series1_<3>_<4>   a     b     (2,3,4)  4
series2_<3>_<4>   a     b     3        [2:6:+2]

EOF
    close(PR);
    print "New datafile $parfile was created\n";
    print "Edit this file before using it.\n";
    exit;
}

sub print_usage {
    &print_data_section(\*STDERR,'USAGE');
}

sub print_help {
    &print_data_section('USAGE');
    print "\n";
    &print_data_section('HELP');
}

sub print_manual {
    &print_data_section('MANUAL');
}

sub print_data_section {
    my($handle,$key) = @_;
    unless ($key) {$key=$handle; $handle = \*STDOUT}
    seek(DATA,0,0);
    $_ = <DATA> until /^BOS: $key\s*$/;

    no strict 'refs';
    while (<DATA>) {
	last if /^EOS:/;
	print $handle $_;
    }
}

__END__


BOS: USAGE
Usage: chain [DIR] -ctsrll [-n NN] [-e] [-d] [-O N] [-E N] [-m] [-f] [-v] [-i NN]
       chain [DIR] -SIGNAL
       chain -h
       perldoc chain
EOS: USAGE

BOS: HELP
MAJOR OPTIONS (give exactly one of these)
  -c      Create a template version of the data file "chain.dat".
  -t      Test specifications in chain.dat.
  -s      Start the chain.
  -r      Restart a chain that has been aborted before completion.
  -l      Create a report based on the logbook file.
  -ll     Show logbook entries for current run.
  -help   Print help message to STDOUT.
  -SIGNAL Send a signal to the chain (-term,-kill,-stop,-cont).

MINOR OPTIONS (several may be specified, defaults can be defined in chain.dat)
  DIR     Run CHAIN in directory DIR instead of the current directory.
  -n NN   Restrict chain to the next NN runs. For test purposes.
  -e      Export parameters as scalar variables into shell script
  -d      Debug shell script or perl program.
  -O N    Divert STDOUT (N: 0=throw away  1/2 = save 1/all runs to chain.out)
  -E N    Divert STDERR (N: 0=throw away  1/2 = save 1/all runs to chain.err)
  -m      Send mail after each run.
  -f      Force: ignore errors in chain.dat.
  -v      Verbose: copy all logbook entries to STDOUT.
  -i NN   Do NN extra steps of macro substitution/list expansion (experts only).

Full manual: perldoc chain
EOS: HELP

BOS: MANUAL

=head1 NAME

chain - run a chain of jobs

=head1 SYNOPSIS

C<chain [ DIR ]  -ctsrl  [-n NN]  [-i NN]  [-m]  [-f]  [-v]>

C<chain [ DIR ]  -SIGNAL>

C<chain -help>

C<perldoc chain>

=head1 DESCRIPTION

=head2 PURPOSE

CHAIN is a tool to make many runs of a program with different sets of
parameters. CHAIN provides a simple way to

=over 4

=item o

specify the different parameter combinations

=item o

automatically create the input files for each run

=item o

name output files uniquely

=back

=head2 CHECKLIST

If you want to use CHAIN with a particular code, here is what you have
to do:

=over 4

=item 1. 

Make your code read its parameters from files. The input files may
have any format you want.

=item 2. 

Create I<format files> for the input files which you want to control
with CHAIN. To do so, copy these files to unique name (e.g. simply add
a ".fmt" to the input file name). Edit these I<format files>. Replace
all parameters that you want controlled by CHAIN with named diamonds
(<NAME1>, <NAME2>, <NAME3>, ...). See below in the documentation for
an example.

=item 3. 

Use C<chain -c> to create a template of the data file F<chain.dat>.
Edit F<chain.dat>. Fill in file names, options, commands,
storage names and parameter combinations. If you are not sure
how to do this, see below in the documentation.

=item 4. 

Use C<chain -t> to check if CHAIN expands your list of parameter
combinations and storage names correctly. Also, check the
sample input files.

=item 5. 

Use C<chain -s -n 2> to run the first two jobs of the chain. Usually,
problems (if any) show up during the first two runs.

=item 6. 

Use C<chain -s &> to start the chain.

=item 7. 

Check the progress of the chain with C<chain -l>. If you need to kill
it, use C<chain -term> or, if necessary, C<chain -kill>. This send the
corresponding signal to the whole group of processes related to CHAIN,
including the current run of your code.

=item 8. 

If someone manages to kill your chain or to crash your computer,
C<chain -r> will restart the chain.

=back

=head2 Format Files

CHAIN uses templates of the jobs input files to create new input files
for each individual run. To create these templates, make a copy of any
input files. The suggested name for these templates is the input file
name with F<.fmt> appended (FORMAT file). In the format files, replace
all parameters that should be controlled by CHAIN with named diamonds
(e.g <RHO>, ...).  CHAIN will produce different input files by
replacing these diamonds with current values before each run.  If the
values need to be especially formatted, the name in the diamond may be
followed by a printf format specifier, e.g. <RHO%13.5e>.

    >   EXAMPLE:
    >   A format file might look like this:
    >
    >   # Parameter file for job xyz
    >   <OPAC>          # this is the file name of some table
    >   density             <RHO%10.2f>
    >   particle numbers    <NP1>  <NP2>
    >   start time          <TIME0%10.2e>

=head2 Logbook File F<chain.log>

Each run is logged in a LOGBOOK FILE F<chain.log>. This file may be
checked at any time to watch progress. It is also used for restarts
(C<chain -r>).  C<chain -l> produces a pretty report based on this
file.


=head2 Data File F<chain.dat>

CHAIN uses a DATA FILE F<chain.dat> to determine

=over 4

=item -

the names of input and format files and of parameters

=item -

the commands to execute for every run

=item -

the combinations of parameters to explore and

=item -

how to name the output uniquely.

=back

A template data file F<chain.dat> can be created with C<chain -c>.
This file has to be edited:

=over 4

=item 1. File and parameter names

S<INPUT FILE NAMES:> Enter the names of the input files that your code
uses.

S<FORMAT FILE NAMES:> Specify the names of the corresponding format
files, one for each input file. The default can be written as
C<{}.fmt> (input file names with C<.fmt> appended).

S<OUTPUT FILE NAMES:> Enter the names of all output files that have to
be saved. If the output has been collected in a directory, the name of
the directory can be given here.  CHAIN will rename all the files and
directories mentioned here to unique names.

S<PARAMETER NAMES:> Enter the names of parameters as you use them in
the format files. The parameters can be referenced under these names
in format files, command lines, the TODO list and in Perl code.

   >   EXAMPLE:
   >   This example defines two input and three output file names
   >
   >   INPUT  FILE NAMES: in.dat in1.dat
   >   FORMAT FILE NAMES: in.fmt in1.fmt
   >   OUTPUT FILE NAMES: out1.dat out2.dat err.dat
   >   PARAMETER NAMES:   RHO NP1 NP2 TIME0 OPAC

=item 1. Options

S<DEFAULT MINOR OPTIONS:> You may specify a list of minor options that
you want to be always in effect. This may be useful if a special chain
always needs some switches (in particular B<-i> and B<-e>). Note, that
no major option will be accepted here.

S<MAIL ADDRESS:> CHAIN will inform you via Email when the chain is
completed.  You may change the default mailing address here. See also
B<-m> option.

S<STORAGE NAME PREFIX:> A prefix for all storage names - usually only
used for specification of a directory.

S<STORAGE NAME POSTFIX:> A postfix for all storage names. Usually
empty, but handy if you decide to compress (.z .Z .gz) or tar (.tar)
the output.

S<ERROR CASE POSTFIX:> A special postfix that will be added to the
storage names only if your code or any of the system commands in the
script returned a non-zero exit status.

=item 2. Shell script to run your code

Enter here the commands that run your code. These lines are a shell
script which will be executed by /bin/sh for each individual run.  If
you have specified the B<-e> option, the parameter for the run will be
available to the script not only in the input files, but also as shell
variables. Thus things like C<$RHO> and C<${NP1}> will be interpreted
correctly (and you should not use parameter names that conflict with
important shell variables). Prior to execution, named diamonds with
(optional) print formats (e.g. <RHO%13.5e>) will also be interpolated
into the script.

Apart from running your program, the script may do other things, e.g.
compressing the output, collecting several files into a tar-file or a
directory, making a quick plot and sending it to the printer, etc.

    >   EXAMPLE:
    >   Here is a script that collects output into a directory "resdir"
    >
    >   prg $TIME0 <in.dat >out.dat
    >   rm -rf resdir
    >   mkdir resdir
    >   mv out.dat out1.dat out2.dat resdir

CHAIN will notice (and say so in the logbook file) when errors occur
during the execution of the script.

Instead of using a shell script, you may actually provide a perl
program here. See under "ADVANCED FEATURES" futher down in this man
page.

=item 4. The TODO list

Each line in the TODO list specifies the parameters for one or several
runs of your code.

The first fields of each line are the storage names that will be used
to save the output files. CHAIN will rename the files mentioned under
OUTPUT FILE NAMES to these names. The number of storage names must be
consistent with the number of output file names.

    >   EXAMPLE:
    >   A TODO list entry appropriate for the output file names given 
    >   in the example above (out1.dat out2.dat err.dat) might be:
    >
    >   run1_out1 run1_out2 run1_err  ...

The actual values of parameters can be inserted into storage names.
Any occurrences of named diamonds (e.g. <NAME>) will be replaced by
the parameter with that name. As with the diamonds in the format file,
you may include format specifiers. In order to avoid white space in
file names, CHAIN will discard any whitespace characters introduced by
the formatting process. To get fixed-length fields, use zero-padded
formats.

    >   EXAMPLE:
    >   storage name template:   xx_r<RHO>_n<NP1%04d>.dat
    >   parameters of the run:   3 42
    >   storage name used:       xx_r3_n0042.dat

The rest of each line specifies the parameters to be used in the run
in the same sequence as given in the PARAMETER NAMES statement.  CHAIN
uses these parameters and the format files to put together the input
files for a run of your code. Several lines may be used for
parameters. When a line ends with a backslash, the next line is
treated as continuation line.

    >   EXAMPLE of a TODO list:
    >   fn_<NAME1>_<NAME2>    par1 par2 par3 par4 ...
    >   fn1_<NAME1>_<NAME2>   par1 par2  \
    >                         par3 par4 ...
    >   fn2_<NAME1>_<NAME2>   par1 par2 par3 par4 ...

It is not necessary to create an extra line for each combination of
parameters. Instead, several values for any parameter may be specified
and CHAIN will go through all possible combinations.  Parameters can
be specified in three ways:

     a) Plain value or string:     e.g. "42"  or "test.dat"
     b) List of values:            (val1,val2,val3,val4,...)
     c) Sequence of values:        [start:limit:step]

Plain values will create only one run. Lists will create a run for
each element of the list. Sequences will create a run for each value
from I<start> to I<end> in steps of I<step>. I<step> consists of an
operator (+-*/) and a number. The default for I<step> is I<+1>.  List
and sequence specifiers must not contain spaces. When a list or a
sequence specifier is used, the storage name MUST have a diamond for
this particular parameter (or the storage name will not be unique).
Expansion takes also place when the list/sequence is only a part of
the parameter.

    >   EXAMPLE:
    >   The following TODO list:
    >
    >      simple                     a  b      c  d
    >
    >      test1_<N1>_<N2>_<N3>_<N4>  a  (b,c)  5  [2:4:*2]
    >
    >      test                       a  b      1  file(2,3).dat        
    >
    >   will create the following storage names and parameter combinations:
    >
    >      simple         a  b  c  d
    >
    >      test1_a_b_1_2  a  b  1  2
    >      test1_a_b_1_4  a  b  1  4
    >      test1_a_c_5_2  a  c  5  2
    >      test1_a_c_5_4  a  c  5  4
    >
    >      test           a  b  1  file2.dat
    >      test           a  b  1  file3.dat


=head1 OPTIONS

=head2 MAJOR OPTIONS

CHAIN will do nothing without a command line option. Exactly one of the
following options must be present.

=over 4

=item B<-c> 

Create a template version of the data file "chain.dat".

=item B<-t>

Test. CHAIN will check out the specification in "chain.dat", expand
sequences and lists, report errors.

=item B<-s>

Start the chain. An existing logbook file "chain.log" is removed.

=item B<-r>

Restart a chain that has been aborted before completion.  With this
option, CHAIN will examine the file "chain.log" and skip as many
parameter combinations as denoted "finished" in this file.

=item B<-l>

Create a report based on the logbook file.

=item B<-ll>

Show logbook entries for current run.

=item B<-help>

Print a help message to STDOUT.

=item B<-SIGNAL>

Send a signal to the chain and its children. SIGNAL can be one of
(term,kill,stop,cont).

=back

=head2 MINOR OPTIONS

These options modify the way CHAIN works. Several may be
specified. Some minor options may be defined default in the DEFAULT
MINOR OPTIONS entry in the parameter file chain.dat)

=over 4

=item DIR

Run CHAIN in directory DIR instead of the current directory.
(e.g. C<chain ~/work/doit -s>).

=item B<-n> NN

Restrict chain to the next NN runs. For test purposes.

=item -e

Export the parameters as scalar variables into the shell script (or
perl program). When this is active, a parameter RHO will be available
as $RHO etc. Watch out for conflicts between parameter names and
environment variables.

=item -d

Debug. For a shell script, this will run the shell with B<-vx> in
order to give you better understanding of what is going on. Currently,
this option has no effect on perl code supplied instead of the shell
script.

=item B<-O> N

Divert the output from the runs of your code. By default, all the
output will just go to STDOUT (normally your screen). B<-O 0> will
discard all output. B<-O 1> will save the output of the current run
into the file F<chain.out>, but overwrite it for every new run. 
B<-O 2> will put all output from all runs into F<chain.out>.
Note that this operation will only divert the output from your script
and code, not the output from CHAIN itself.

=item B<-E> N

Like B<-O>, but for STDERR and the file F<chain.err>.

=item B<-m>

Send mail as notification after *each* run. The default is to mail
only after the final run.

=item B<-f>

Force: ignore errors in chain.dat.

=item B<-v>

Verbose: copy all logbook entries to STDOUT.

=item B<-i> NN

Make NN extra steps of macro interpolation and list/sequence
expansion. By default, one macro interpolation followed by one
expansion step is done. B<-i> 1 provides for an additional macro
interpolation round, B<-i> 2 adds another expansion step etc. Don't
use this unless you know what you are doing.' 

=back 

=head1 ADVANCED FEATURES

This section covers some extra features for advanced use of CHAIN.
They are not required for basic uses. For some of them, knowledge of
the Perl language is required.

=head2 Special Characters

In the interpretation of TODO lists, the following characters have
special meanings: {}[](),:\= and SPACE and TAB.  If any of these
characters has to be part of a storage name or parameter, it can be
escaped with a backslash.

=head2 Other Properties of Sequences and Lists

=over 4

=item Ranges in Lists

Perl-like ranges can be part of lists:
    >   (1..5,7,10,12..14)

=item A List as Sequence Starter

The I<start> specifier in sequences may be a list of values. This can
be used to merge several sequences into each other:

    >   The sequence  [10,20,50:10000:*10]
    >
    >   is equivalent to the following list:
    >
    >   (10,20,50,100,200,500,1000,2000,5000,10000)

=item Parallel Sequences/Lists

By default, CHAIN consideres all sequence and list specifiers to be
independent. Thus, CHAIN explores all possible combinations when
expanding several lists/sequences in the same line (othogonal
expansion).  It is also possible to expand two or more lists
parallel. In this case, CHAIN does not explore all combinations, but
puts together only all first elements, then all second elements etc.
In order use this feature, the lists to be expanded parallel must be
identified and have the same length. You do this by inserting a
numbered hash mark (e.g. #2) as the first element after the opening
parenthesis. Sequences/lists carrying the same number will be expanded
parallel. By chosing several numbers, you define several groups for
parallel expansion. Here is an example.

    >   EXAMPLES: The following two examples show the
    >             difference between the two methods.
    >             The sequences/lists in the first line
    >             are expanded to what follows below.
    >
    >   (a,b) [1:2:+1] | (#1,a,b) [#1:1:2:+1] 
    >   ---------------|----------------------
    >    a 1           |  a 1                 
    >    a 2           |  b 2                 
    >    b 1           |                      
    >    b 2           |                      

=back

=head2 File Name Expansion

Whenever a list of file names is given in the parameter file
(i.e. input files, format files, output files and storage names),
CHAIN looks for the C-Shell {..,..,..} construct and expands it.

    >   The storage name   run_<N1>_{out1,out2,err}.dat
    >
    >   is an abbreviation for the following names:
    >
    >   run_<N1>_out1.dat run_<N1>_out2.dat run_<N1>_err.dat

An empty pair of braces "{}" has a special meaning.
When used in storage names in the TODO list, "{}" expands to the list of
output file names. Thus

    >   OUTPUT FILE NAMES: out1.dat out2.dat err.dat
    >   ...
    >   #--STORAGE-NAME----------INPUT-PARAMETERS-----------
    >   run_<N1>_{}
       
produces the same list of storage names as above.

Similarily, in the FORMAT FILE NAMES list, "{}" expands to the list of
input files. Thus

    >   INPUT  FILE NAMES: in1.dat in2.dat
    >   FORMAT FILE NAMES: {}.fmt
    >
    >   is equivalent to
    >
    >   FORMAT FILE NAMES: in1.dat.fmt in2.dat.fmt


=head2 Macros

C-Preprocessor-like #define macros may be used in the TODO list.
Macros are restricted to string replacement (no control structures, no
arguments).

    >   EXAMPLE:
    >   #define LIST1  (10,20,30,40,50,60,70,80,90,100)
    >   #define LIST2  (1,2,3,10,20,30,\
    >   100,200,300)
    >
    >   #define SEQ1   [100:1000:+100]
    >   #define PAIR   (1,2,3) (6,7,8)
    >
    >   test1_<N2>_<N3>  xx.dat LIST1 LIST2
    >   test2_<N2>_<N3>  xx.dat LIST1 SEQ1
    >   test3_<N2>_<N3>  xx.dat PAIR


=head2 Parameter and Storage Name Tests

The TODO list may contain embedded tests for specific parameters or
storage names. This can help to avoid typing errors or to check the
existence of files before starting the chain. It can also be misused
to change the values themselves. The test definition lines start with
"#test NAME". NAME is either an output file name or a parameter name,
as defined in chain.dat. The rest of the line may be any Perl
expression testing $_. CHAIN will assign the value of the
corresponding parameter or storage name to $_ and evaluate your
expression. Assigning to $_ will change the parameter. The test will
be applied to all lines following its definition.  To cancel a test,
say "#test NAME reset".

    >   EXAMPLE:
    >   This example defines tests for parameters RHO and OPAC.
    >   It checks if RHO is a positive number and if OPAC is
    >   an existing file in the subdirectory "src/".
    >
    >   #test RHO    $_>0;
    >   #test OPAC   -e "src/$_";


=head2 Perlcode instead of a Shell Script

Instead of a shell script to run your code, you may also use a full
fledged Perl program. CHAIN will automatically detect which is being
used. The Perl program must define a subroutine RUN, which will be
called for each set of parameters.  The Perl program will
automatically be defined in a special package called I<user::>. The
following variables will always contain current values:

    @files      the storage names for the current run.
                Note that the first filename is $files[0]
    @pars       the parameters for the current run.
                Note that the first parameter is $pars[0].
    $irun       index of the TODO list entry (0,1,...)
                Attention: This starts with 0 only when "chain -s" is
                used. "chain -r" may start with higher values.
    $NAME1...   When running CHAIN with the `-e' option (export), the
                values of all parameters are directly available as
                scalar variables of the names given in the 
                PARAMETER NAMES statement.

    >   EXAMPLE:
    >   # Initialization, will be done only once
    >   $my_count = 0;
    >
    >   # This sub MUST always be defined
    >   sub RUN {
    >       &print_parameters;
    >   }
    >   sub print_parameters {		# subroutine definition
    >       $my_count++;
    >       print $my_count,$irun,@files,@pars;
    >   }

=head1 AUTHOR

 (c) 1996  Carsten Dominik                 dominik@strw.leidenuniv.nl

CHAIN is free software. It may be distributed under the same terms as
Perl.

=head1 FILES

  ./chain.dat  ./chain.log  ./chain.pid  ./chain.out  ./chain.err

=head1 BUGS 

When doing several macro interpolation steps (see option B<-i>), only
the first remembers the original order of macro definitions and other
lines. Thus, for the second iteration, all macros will be globally
active, not just below their definition line. In particular, macros
that are defined several times will have the value of their last
definition.

Debug is not implemented for Perl scripts.

=cut

EOS: MANUAL