#!/usr/bin/perl
#
# tfmt - a formatting tool for tables
#
# $Id: tfmt,v 1.12 2001/03/22 09:37:19 dominik Exp $
#

# the global variables are
my(@lines);        # holds the lines of the file
my(@start);        # line numbers of lines with &tfmtstart
my(@stop);         # line numbers of lines with &tfmtstop
my($joinstring);   # the string that is used to join the individual fields
my($minindent);    # minimum indentation
my(@fmt);          # formats for numbers
my(@sfmt);         # formats for strings
my(%force_fmt);    # format dictate from options etc.
my($underline_re); # re regular expression matching a line only made of = or -
my($total_length); # length length of a formatted line
my($j);            # a counter             

#option_variables
#local ($opt_help,$opt_plus,$opt_minus,$opt_i,$opt_s,$opt_S,$opt_j,
#    $opt_w,$opt_b,$opt_e,%opt_force_fmt);


# the following regexp matches lines made of 15 or more '-' or '='

$underline_re = "^\\s*([=-])\\1{5,}\\s*\$";


&yago( 10, 
       qw( -help +1:SKIPLINES -1:SKIPLINES -i -s:SEPSTRING%s -S:SEPSTRING%s
	   -j:JOINSTRING%s -w:SPACES%d -b:BLOCKSEPLINES%d 
	   -e =(%opt_force_fmt)) )
    or &print_usage,exit(1);

if ($opt_help) {
    &print_help;
    exit;
}

# -w actually should set $opt_j and $opt_s
&use_minus_w if $opt_w;

# we read entire files
undef $/;

 FILE:
while (<>) {

    # make a backup if editing in place
    if ($opt_i) {
	rename($ARGV,"$ARGV.bak") or die "Can't make backup of file $ARGV\n";
    }

    @lines = split(/^/m,$_);

    # Compute the range over which formatting is desired
    @start = ();
    @stop  = ();

    if (/\&tfmt(start|stop)/) {
	# internal directives present
	&make_start_stop_arrays || next FILE;
    }
    elsif ($opt_e || defined($opt_plus) || defined($opt_minus) ) {
	# range given on command line
	push(@start,$opt_plus-1);
	push(@stop,$#lines-$opt_minus+1);
    } else {
	# find the largest block (block defined by $opt_b)
	($start[0],$stop[0]) = &find_largest_block( $opt_b || 2) ;
	$start[0]--;
	$stop[0]++;
    }

    for $j (0..$#start) {

	# initialize a few arrays and variables
	&clean;

	if ( $start[$j] == -1 ) {
	    make_settings($opt_s,$opt_S,$opt_j,%opt_force_fmt);
	} else {
	    make_settings( &parse_tfmtstart($lines[$start[$j]]) );
	}

	&analyse_table ( $start[$j]+1 , $stop[$j]-1 );

	&format_lines  ( $start[$j]+1 , $stop[$j]-1 );
    }

    # write
    if ($opt_i) {
	open(OUT,">$ARGV") || die "Can't open $ARGV for writing\n";
	print OUT @lines;
	close OUT;
    } else {
	print STDOUT @lines;
    }
}

# --------------------------------------------------------------------
# --------------------------------------------------------------------
#   Subroutine definitions follow below
# --------------------------------------------------------------------
# --------------------------------------------------------------------

sub print_usage {
    print <<EOU
usage: tfmt [ -s SEPSTRING ]  [ -j JOINSTRING ]  [ N=%format ..]  [ N=EXPR ..]
            [ -i ]  [ filename ... ]  [ -e ]  [ +NN ]  [ -NN ]  [ -b NN ]
       tfmt -help
EOU
}

sub print_help {
    print <<"EOF";

tfmt: simple formatting for tables in ASCII files
=================================================

Usage: tfmt  [ OPTIONS ]  [filename...]

Options include:

-s STRING   Use STRING instead of whitespace as separator between columns
-j STRING   Use STRING as separator upon output
-w N        Use N space chars for -s and -j

N=%fmt      Format column N using the printf format specifier %fmt
N=EXPR      Replace column N with the value of EXPR (a perl expr. or block)

-i          Change files in place (backup copies with .bak extension)

+NN         Ignore first NN lines in each file
-NN         Ignore last NN lines in each file
-e          Format entire file (default is largest block in file)
-b NN       Number of consequtive empty lines separating blocks

For more options and detailed info use "perldoc $0".
EOF
}

sub make_start_stop_arrays {

    # scan file for lines containing &tfmtstart or &tfmtstop
    my ($i);

    for $i (0..$#lines) {
	push(@start,$i) if $lines[$i] =~ /\&tfmtstart/;
	push(@stop,$i)  if $lines[$i] =~ /\&tfmtstop/;
    }
    unshift(@start,-1)    unless @start;
    push(@stop,$#lines+1) unless @stop;
    unshift(@start,-1)    if $start[0] > $stop[0];
    push(@stop,$#lines+1) if $start[$#start] > $stop[$#stop];

    # sanity check
    for $i (0..$#start) {
	if ($stop[$i] <= $start[$i]) {
	    warn "$0: start/stop lines for tfmt confused in file $ARGV. Skipping file.\n";
	    return 0;
	}
    }
    return 1;
}

sub find_largest_block {
    # find the table by looking for the largest block containing 
    # at most N empty lines in a row.

    my ($N) = @_;
    my ($togo,$i,$max,$start,$stop,$length,$blockstart);

    for ($i=0; $i<=$#lines;$i++) {
	next unless $lines[$i] =~ /\S/;
	$start = $i++;
	$togo = $N;
	$i++ while ( $i <= $#lines && 
		    ( ($lines[$i] =~ /\S/) ? ($togo = $N) : --$togo ) );
	$stop = $i;
	$length = $stop-$start;
	
	if ($length > $max) {
	    $max = $length;
	    $blockstart = $start;
	}
    }
    if (defined($blockstart)) {
	return ($blockstart,$blockstart+$max-1);
    } else {
	return (0,0);
    }
}

sub make_settings {
    # compute and export splitpattern, joinstring and force_fmt

    my ($split,$Split,$join,$pat,$sub);

    ($split,$Split,$join,%force_fmt) = @_;

    $split = $Split if $Split;

    $pat = $split ? "\\s*\Q$split\E\\s*(?!\Q$split\E)" : "\\s+";
    # FIXME this breaks tfmt -s\&  for thing like &&. Why did I do this?

    # Create the split function which also applys any evaluate items

    $sub = <<EOF;
    package tfmt;
sub main::mysplit {
    local(\$_);
    my(\@F) = split /$pat/,\$_[0];

    # Unshift something, so that \$F[1] points to first column
    unshift \@F,0;
EOF
    foreach $k (keys %force_fmt) {
	$v = $force_fmt{$k};
	next if $v =~ /^%/;
	$sub .= "\n\n    # modify column $k\n    \$_ = \$F[$k];\n";
	if ($v =~ s/^\s*\#//s) {
	    $sub .= "    if (\&is_num) {\$_ = do {$v}}\n";
	} elsif ($v =~ s/^\s*\!\#//s) {
	    $sub .= "    unless (\&is_num) {\$_ = do {$v}}\n";
	} else {
	    $sub .= "    \$_ = do {$v};\n";
	}
	$sub .= "    \$F[$k] = \$_;\n";
    }
    delete $force_fmt{$k};
    $sub .= <<EOF;

    # shift, to kill first element again
    shift \@F;
    return \@F;
\}
EOF
    eval $sub;
    if ($@) {
	warn "Syntax error while evaluating definition of sub mysplit:\n$sub\n$@\n";
	exit(1);
    }
    
    unless ($join) {
	if ($split) {
	    if ($Split) {
		# join exactly equal to split
		$join = $split;
	    } else {
		# surround with spaces
		$join = $split;
		$join = " $join" unless $join =~ /^ /;
		$join = "$join " unless $join =~ / $/;
	    }
	} else {
	    $join = "  ";
	}
    }

    # export variables
    $joinstring = $join;
    # %force_fmt is automatically exported
}

sub parse_tfmtstart {
    # parse tfmt specification for -s, -j, and N=%? specifications
    no strict 'vars';
    my ($line) = @_;
    local($_)  = @_;

    # kill leading garbage
    chop;
    s/^.*?\&tfmtstart/\&tfmtstart/;

    # make quoted characters escaped
    s#([\'\"])(.*?)\1#join("\\","",split(//,$2))#eg;

    # hide escaped characters
    s/\\(.)/"\376".ord($1)."\377"/eg;

    # get the args
    s/&tfmtstart(?:\((.*?)\))?.*$/$1/;

    return ($opt_s,$opt_S,$opt_j,%opt_force_fmt) unless $_;

    # now: The options in the &tfmt line are like command line options.
    # in order to use the command line option parsing routines, we
    # localize @ARGV and the $opt* variables before calling the routines.

    local (@ARGV,$opt_s,$opt_S,$opt_j,%opt_force_fmt);

    @ARGV = split;
    foreach (@ARGV) { s/\376(\d+)\377/chr($1)/eg }

    yago( 10, qw(-s:%s -S:%s -j:%s -w:%i =(%opt_force_fmt) ) ) ||
	die "Don't understand arguments ($line) in file ARGV\n";

    &use_minus_w if $opt_w;
    return ($opt_s,$opt_S,$opt_j,%opt_force_fmt);
}

sub analyse_table {

    # default formatting
    my ($start,$stop) = @_;
    
    my @req;        # array of hashes to collect formating information

    my ($sign,$pre,$post,$exp,$alldigits);
    my ($lsign,$lpre,$lpost,$lexp);
    my ($length,$dlength,$flength1,$flength2,$elength,$slength);
    my ($facc,$eacc,$iacc);
    my ($width,$type,$acc);
    my ($force_sign,$force_width,$force_acc,$force_type);
    my ($indent,$isnumber);
    my ($lall,$s,$tmp,$i);
    my (@strings);

    # Let's see what the numbers are like...

  LINE:
    foreach (@lines[$start..$stop]) {

	next LINE if /^\s*$/;
	next LINE if /$underline_re/o;

	# indentation
	($indent,$_) = /^(\s*)(.*)/;
	if (defined($minindent)) {
	    $minindent = &min($minindent,length($indent));
	} else {
	    $minindent = length($indent);
	}

	@strings = &mysplit($_);

      STRING:
	for $i (0..$#strings) {

	    $s = $strings[$i];

	    $isnumber = ( $s =~ / ^ \s* ([-+])? (\d*) \.? (\d*?) 0* 
			 (?: [eE]([+-]?\d+) )? \s* $ /x ) 
		&& $_ ne '' && $_ ne '.';

	    if ($isnumber) {

		($sign,$pre,$post,$exp) = ($1,$2,$3,$4);
		($alldigits = $pre.$post) =~ s/0*$//;

		if ($exp ne '') {
		    $req[$i]{'e'}++;
		} elsif ($post ne '') {
		    $req[$i]{'f'}++;
		} elsif ($pre ne '') {
		    $req[$i]{'d'}++;
		}
		$req[$i]{'sign'}++     if $sign;
		$req[$i]{'signmust'}++ if $sign eq '+';
		
		$length  = length($s);
		$lpre    = length($pre);
		$lpost   = length($post);
		$lall    = length($alldigits);
		$lsign   = length($sign);
		$dlength = $lpre + $exp;
		$elength = $lall + 1 + 4;
		$eacc    = $lall - 1;
		$lpre   += $exp;
		$lpost  -= $exp;
		$lpre    = 0 if $lpre < 0;
		$lpost   = 0 if $lpost < 0;
		$tmp = sprintf("%#.${lpost}f",$s);
		($sign,$pre,$post) = 
		    ($tmp =~ / ^ \s* ([-+])? (\d*) \.? (\d*?) 0* \s* $/x);
		$flength1 = length($pre) + 1;
		$flength2 = length($post);
		$facc     = $flength2;

		$req[$i]{'length'}   = &max( $req[$i]{'length'}   , $length   );
		$req[$i]{'dlength'}  = &max( $req[$i]{'dlength'}  , $dlength  );
		$req[$i]{'flength1'} = &max( $req[$i]{'flength1'} , $flength1 );
		$req[$i]{'flength2'} = &max( $req[$i]{'flength2'} , $flength2 );
		$req[$i]{'elength'}  = &max( $req[$i]{'elength'}  , $elength  );
		$req[$i]{'eacc'}     = &max( $req[$i]{'eacc'}     , $eacc     );
		$req[$i]{'facc'}     = &max( $req[$i]{'facc'}     , $facc     );
	    } else {
		# it is a string
		$req[$i]{'s'}++;
		$slength  = length($s);
		$req[$i]{'slength'} = &max( $req[$i]{'slength'} , $slength );
	    }
	}
    }

    # Now use the collected information to create the format

    $total_length = -length($joinstring);

    for $i (0..$#req) {
	($force_sign,$force_width,$force_acc,$force_type) = 
	    ($force_fmt{$i+1} =~ /^ % ([-\+]{0,2}) (\d+)? \.? (\d+)? ([sdfe])? $/x);
	$type = 's';
	$type = 'd' if $req[$i]{'d'};
	$type = 'f' if $req[$i]{'f'};
	$type = 'e' if $req[$i]{'e'};
	$type = $force_type if $force_type;

	if ($type eq 'f') {
	    $width = $req[$i]{'flength1'} + $req[$i]{'flength2'};
	} else {
	    $width = $req[$i]{$type.'length'}; 
	}
	$width++ if $req[$i]{'sign'} || ($force_sign=~/\+/);
	$width = &max($width,$req[$i]{'slength'}); 
	$width = &max($width,$req[$i]{'length'}) if $force_type eq 's';
	$width = $force_width if $force_width;

	$total_length += $width + length($joinstring);

	$acc   = $req[$i]{$type.'acc'};
	$acc   = $force_acc if $force_acc;

	$sign  = ($req[$i]{'signmust'} || $force_sign=~/\+/) ? '+' : '';
	$sign .= '-' if $force_sign=~/-/;

	if ($type eq 's') {
	    $fmt[$i]  = "%-${width}s";
	    $sfmt[$i] = "%-${width}s";
	}
	elsif ($type eq 'd') {
	    $fmt[$i]  = "%${sign}${width}d";
	    $sfmt[$i] = "%${width}s";
	}
	elsif ($type eq 'f') {
	    $fmt[$i]  = "%${sign}#$width.${acc}f";
	    $sfmt[$i] = "%${width}s";
	}
	else {
	    $fmt[$i]  = "%${sign}${width}.${acc}${type}";
	    $sfmt[$i] = "%${width}s";
	}
    }
}

sub format_lines {
    # this routine does the actual formatting

    my ($start,$stop) = @_;
    my (@strings,$i);
    my ($indent) = ' ' x $minindent;

  LINE:
    foreach ( @lines[$start..$stop] ) {

	next LINE if /^\s*$/;
	if (/$underline_re/o) {
	    $_ = $indent . "$1" x $total_length . "\n";
	    next LINE;
	}

	@strings = mysplit($_);

	for $i (0..$#strings) {
	    if ($strings[$i] =~ 
		/^ \s* (\+|-)?(\d+\.?\d*|\.\d+)([eE](\+|-)?\d+)? \s* $/x) {
		$strings[$i] = int($strings[$i]) if $fmt[$i] =~ /i$/;
		$strings[$i] = sprintf($fmt[$i],$strings[$i]);
	    } else {
		$strings[$i] = sprintf($sfmt[$i],$strings[$i]);
	    }
	}
	$_ = $indent . join($joinstring,@strings)."\n";
    }
}

sub tfmt::is_num {
    $_ =~ /^ \s* (\+|-)?(\d+\.?\d*|\.\d+)([eE](\+|-)?\d+)? \s* $/x;
}

sub max {
    ($_[0] >= $_[1] ?  $_[0] : $_[1]);
}
sub min {
    ($_[0] >= $_[1] ?  $_[1] : $_[0]);
}
    
sub clean {
    # reset formatting information
    @fmt = ();
    @sfmt = ();
    undef $minindent;
}

sub get_my_opts {
    # My favorite way of parsing arguments:
    # Long options, argument type checking, bundling, other variable names,
    # defaults, automatic usage message, name=value pair interpretaion
    # I would use one of the modules, but I want both bundling and long names.
    # 
    # usage: get_my_opts( NMIX , USAGEREF, 
    #                    '-NAME:ARGTYPE ($VARIABLE) [DEFAULT]', ...   )
    #
    #   NMIX:     Optional number of non-options allowed mixed with options
    #             -1 means unlimited. 0 is default
    #   USAGEREF: Optional reference to a print_usage function
    #             If not given, one is created on the fly.
    #
    #   NAME:     name of option, can be more than one letter
    #             special:   -   looks for a bare '-'
    #                        -1  looks for -NN where NN is a number
    #                        +1  looks for +NN where NN is a number
    #                        =   looks for VAR=VALUE and sets $VAR to VALUE
    #                            or $opt_equal{VAR} to VALUE
    #   ARGTYPE:  i (integer)    f (float)   s (string)
    #   VARIABLE: Name of variable to be set. Default is $opt_NAME
    #             - when given as @VARIABLE, args will be pushed on that array
    #             - when given as %VARIABLE, VAR=VALUE pairs will go into hash
    #   DEFAULT:  Default value for option.
    #
    # Example:  get_my_opts( 3 , undef,
    #                        qw( -a -x -v -help -n:i[40]  =(%named)
    #                            -f($force)  -d:s(@dirs) 
    #                            ) ) or die "Error in options\n"; 
    #
    #           ... will - allow for up to three non-option mixed in
    #                    - print an automatic usage message on error
    #                    - look for -a -x -v -help and set $opt_...
    #                    - look for -n INT with a default of 40
    #                    - put all name=val pairs into hash %named
    #                    - look for -f and set $force
    #                    - push all -d args onto @dirs

    my ($nmix) = ($_[0] =~ /^-?\d+$/) ? shift(@_) : 0;
    my ($uref);
    my ($opt,$opt1,$arg,$var,$prefix,$def,$first,$rest,$err,$sign);
    my (%var,%arg,%prefix,@keep,@optlist);
    my ($check);
    local($_);

    # what kind of args do we expect?
    foreach (@_) {
	if (
	    ($opt,$arg,$prefix,$var,$def) = 
   	        /^ 
	        ( -\w+ | \+\d | \- | \= )      \s*  # option name
		([=:][ifs])?                   \s*  # arg? and arg type
		(?: \( ([\$\@\%])(\w*) \) )?   \s*  # varaible name and prefix
		(?: \[ (\S*?) \] )?            \s*  # default
 	        $/x ) {

	    ($opt1 = $opt) =~ s/^-//;
	    $var = "opt_$opt1" unless $var;
	    $var = 'opt_minus' if ($var eq 'opt_-1'); 
	    $var = 'opt_plus'  if ($var eq 'opt_+1'); 
	    $var = 'opt_equal' if ($var eq 'opt_=');  
	    $prefix = "\$" unless $prefix;
	    $arg = ':s' if $arg eq ':';
	    $arg{$opt} = $arg;
	    $prefix{$opt} = $prefix;
 	    eval "\$var{\$opt} = \\$prefix$var"; 
      	    $ {$var{$opt}} = $def if $def;
	    $arg = 'STRING'  if $arg eq ':s';
	    $arg = 'INTEGER' if $arg eq ':i';
            $arg = 'FLOAT'   if $arg eq ':f';
	    $opt1 = "[ $opt" . ($arg?" $arg":"") . " ]";
	    $opt1 = "[ NAME=VALUE ... ]" if $opt eq '=';
	    $opt1 = "[ -INTEGER ]"       if $opt eq '-1';
	    $opt1 = "[ +INTEGER ]"       if $opt eq '+1';
	    push(@optlist,$opt1);
	} elsif (ref($_) || $_ == 1) {
	    $uref = $_;
	} else {
	    die "Illegal option specifier $_\n";
	}
    }

    # What is there on the command line?
  ARG:
    while ($opt = shift(@ARGV)) {
	last ARG if $opt eq '--';
	if ( defined($var{'-'}) && $opt eq '-') {
	    # single dash
	    $ {$var{'-'}} = 1;
	}
	elsif ( defined($var{'='}) && 
	    ( ($var,$arg) = ($opt =~ /^(\w+)=(.*)$/) ) ) {
	    # var=arg syntax
	    if ($prefix{'='} eq '%') {
		# put into hash
		$ {$var{'='}}{$var} = $arg;
	    } else {
		# set variable directly;
		eval "\$$var = \$arg";
	    }
	    next ARG;
	}
	elsif ( ($sign,$arg) = ($opt =~ /^([-+])(-?\d+)$/ ) ) {
	    # +NN or -NN option
	    $ {$var{$sign.'1'}} = $arg;
	    next ARG;
	}
	elsif ( ($first,$rest) = ($opt =~ /^(-.)(.*)$/) ) {
	    if (defined $var{"$first$rest"}) {
		$opt = $first.$rest;
		$rest = '';
	    }
	    elsif (defined $var{$first}) {
		$opt = $first;
	    }
	    else {
		warn "$0: Unknown option $opt\n"; $err++;
		next;
	    }
	}
	else {
	    # not an argument
	    push(@keep,$opt);
	    $nmix-- ? next : last;
	}

	# no argument?
	unless ($arg{$opt}) {
	    $ {$var{$opt}} = 1;
	    unshift(@ARGV,"-$rest") if $rest;
	    next ARG;
	}

	# argument!
	$arg = ($rest ne "") ? $rest : shift(@ARGV);
	unless ($arg ne "") {
	    warn "Option -$opt requires an argument\n"; $err++;
	    last;
	}

	# argument type
	if ($arg{$opt} eq ":i") {
	    $check = ($arg =~ /^[+-]?\d+$/);
	}
	elsif ($arg{$opt} eq ":f") {
	    $check = ($arg =~ /(\+|-)?(\d+\.?\d*|\.\d+)([eE](\+|-)?\d+)?/);
	}
	else {
	    $check = 1;
	}

	if ($check) {
	    if ($prefix{$opt} eq '@') {
		push (@{$var{$opt}},$arg);
	    } else {
		$ {$var{$opt}} = $arg;
	    }
	    next ARG;
	} else {
	    warn "argument $arg is wrong type for option -$opt\n";
	    $err++;
	}
    }

    # put the kept stuff back on @ARGV
    unshift(@ARGV,@keep);
    if ($err) {
	# we had some errors
	if (ref($uref) eq "CODE") {
	    &$uref;
	} elsif ($uref) {
	    # create usage message on the fly
	    push(@optlist, ref($uref) eq "SCALAR" ? $$uref : "OTHER ARGS ...");
	    my($line) = "usage: $0";
	    my($len)  = length($line);
	    foreach (@optlist) {
		if (length($line)+2+length($_) <= 78) {
		    $line .= "  $_";
		} else {
		    print STDERR $line,"\n";
		    $line = ' ' x $len . "  $_";
		}
	    }
	    print STDERR $line,"\n";
	}
    }

    return $err == 0;
}

sub use_minus_w {
    if ($opt_w) {
	$opt_s = ' ' x $opt_w unless $opt_s || $opt_S;
	$opt_j = ' ' x $opt_w unless $opt_j;
    }
}


my @optlist;

sub yago {

    # Defaults
    my ($usage)  = 0;		# Don't print usage message
    my ($nmix)   = 0;		# No mixing of options with other args

    # Local variables
    my (@args) = @_;
    my ($opt,$opt1);
    my ($arg,$argname,$argtype,%arg);
    my ($var,%var);
    my ($prefix,%prefix);
    my ($first,$rest,$err,$sign,$check,$checkkey);
    my (@keep);
    @optlist = ();
    local($_);

    # Parse option specifiers
    foreach (@args) {
	print "working on $_\n";
	s/\s+// unless /^\d+$/;  # If it is a number it might be a constant..
	if (
	    ($opt,$arg,$argname,$argtype,$prefix,$var) = 
   	        /^ 
	        ( -\w+ | \+\d | \- | \= )       # option name
		(?: ([=:]) (\w+)? (%[idfs])? )? # arg?, name and type
		(?: \( ([\$\@\%]) (\w*) \) )?   # varaible name and prefix
 	        $/x ) {

	    $opt1 = substr($opt,1);
	    unless ($var) {
		$var = "opt_$opt1";
		$var = 'opt_minus' if ($opt eq '-1'); 
		$var = 'opt_plus'  if ($opt eq '+1'); 
		$var = 'opt_equal' if ($opt eq '=');
	    }
	    $prefix  = "\$" unless $prefix;
	    $prefix  = '%'  if $opt eq '=';
	    $argtype = '%s' if $arg && $argtype eq '';
	    $arg{$opt} = $argtype;
	    $prefix{$opt} = $prefix;
	    # create reference to variable
 	    eval "\$var{\$opt} = \\$prefix$var";
	    if ($arg && ! $argname) {
		$argname = 'STRING'  if $argtype eq '%s';
		$argname = 'INTEGER' if $argtype eq '%d' || $argtype eq '%i';
		$argname = 'FLOAT'   if $argtype eq '%f';
	    }
	    $opt1 = "[$opt" . ($arg?" $argname":"") . "]";
	    $opt1 = "[NAME=VALUE ...]" if $opt eq '=';
	    $opt1 = "[-INTEGER]"       if $opt eq '-1';
	    $opt1 = "[+INTEGER]"       if $opt eq '+1';
	    push(@optlist,$opt1);
	} elsif (/^\d+$/) {
	    $nmix = $_;
	} elsif ($_ eq 'usage') {
	    $usage = 1;
	} else {
	    die "Illegal option specifier $_\n";
	}
    }

    # Do we need to check the keys in NAME=VALUE pairs?
    $checkkey = scalar(keys %{$var{'='}}) if (exists($prefix{'='}));
 
    # What is there on the command line?
  ARG:
    while (scalar(@ARGV)) {
	$opt = shift(@ARGV); 
	last ARG if $opt eq '--';
	if ( defined($var{'-'}) && $opt eq '-') {
	    # single dash
	    $ {$var{'-'}} = 1;
	}
	elsif ( defined($var{'='}) && 
	    ( ($var,$arg) = ($opt =~ /^(\w+)=(.*)$/) ) ) {
	    # var=arg syntax
	    if ( !$checkkey || exists($ {$var{'='}}{$var})) {
		$ {$var{'='}}{$var} = $arg;
	    } else {
		push (@keep,$opt);
	    }
	    next ARG;
	}
	elsif ( ($sign,$arg) = ($opt =~ /^([-+])(-?\d+)$/ ) ) {
	    # +NN or -NN option
	    $ {$var{$sign.'1'}} = $arg;
	    next ARG;
	}
	elsif ( ($first,$rest) = ($opt =~ /^(-.)(.*)$/) ) {
	    if (defined $var{"$first$rest"}) {
		$opt = $first.$rest;
		$rest = '';
	    }
	    elsif (defined $var{$first}) {
		$opt = $first;
	    }
	    else {
		warn "$0: Unknown option $opt\n"; $err++;
		next ARG;
	    }
	}
	else {
	    # not an argument
	    push(@keep,$opt);
	    $nmix-- ? next : last;
	}

	# no argument?
	unless ($arg{$opt}) {
	    $ {$var{$opt}} = 1;
	    unshift(@ARGV,"-$rest") if $rest;
	    next ARG;
	}

	# argument!
	$arg = ($rest ne "") ? $rest : shift(@ARGV);
	unless ($arg ne "") {
	    warn "Option $opt requires an argument.\n"; $err++;
	    last;
	}

	# argument type
	if ($arg{$opt} eq "%d") {
	    $check = ($arg =~ /^[+-]?\d+$/);
	}
	elsif ($arg{$opt} eq "%f") {
	    $check = ($arg =~ /(\+|-)?(\d+\.?\d*|\.\d+)([eE](\+|-)?\d+)?/);
	}
	else {
	    $check = 1;
	}

	if ($check) {
	    if ($prefix{$opt} eq '@') {
		push (@{$var{$opt}},$arg);
	    } else {
		$ {$var{$opt}} = $arg;
	    }
	    next ARG;
	} else {
	    warn "argument \'$arg\' for option $opt is not type $arg{$opt}\n";
	    $err++;
	}
    }

    # Put non-options back on @ARGV
    unshift(@ARGV,@keep);

    # Usage message if required
    &yago_usage if ($err && $usage);

    return $err == 0;
}

sub yago_usage {
    # Create usage message
    my $HANDLE = 'STDERR';
    my $width  = 80;
    my $line   = "usage: $0";
    my $len    = length($line);

    push(@optlist, "OTHER ARGS ...")
	unless $optlist[$#optlist] eq "OTHER ARGS ...";

    print $HANDLE "\n";
    foreach (@optlist) {
	if (length($line)+2+length($_) <= $width) {
	    $line .= "  $_";
	} else {
	    print $HANDLE $line,"\n";
	    $line = ' ' x $len . "  $_";
	}
    }
    print $HANDLE $line,"\n\n";
}


__END__

=head1 NAME

B<tfmt> - simple formatting for tables in ASCII files

=head1 SYNOPSIS

  tfmt [ -s SEPSTRING ]  [ -j JOINSTRING ]  [ N=%format ..]  [ N=EXPR ..]
       [ -e ]  [ +NN ]  [ -NN ]  [ -b NN ]  [ -i ]  [ filename ... ]  

but usually simply

  tfmt [ filename... ]

=head1 DESCRIPTION

B<tfmt> is a formatter for simple tables. It reads text arranged in
columns from files or from STDIN and prints the table nicely
formatted. The basic idea was to have something as simple as B<fmt>
for tables. Like B<fmt>, B<tfmt> is not intended to typeset books but
to keep hand-maintained tables in files in readable shape. It can also
be used as in-line text filter for vi or Emacs.

B<tfmt> splits a line into columns on whitespace. After formatting,
the columns are joint back together with two space characters as
separator. Both the string used for line splitting and for field
joining can be changed (see B<-s> and B<-j> options).

To determine a format automatically, B<tfmt> looks at all numbers in a
column. If all are integers, they are printed as integers. If at least
one is float or exponential, the number format for the entire column
is chosen accordingly. The format is picked in a way that no
significant digits are lost. Strings are printed as they are. Columns
that contain any numbers are printed right justified, string-only
columns left justified. It is also possible to force a certain format
on a column with a C<N=%format> command line option.

In order to protect the non-tabular parts of a file, B<tfmt> formats
only the largest I<block> in a file. A block is defined as a sequence
of lines that contain no more than one (but see B<-b> switch)
consecutive empty lines. Thus, as long as the table is the biggest
thing in the file, it is sufficient to separate it by two empty lines
from any other text. If that does not work, the region to be formatted
may also be specified with the B<+NN> and B<-NN> switches, or with
directives directly inserted into the file. See below.

=head1 OPTIONS

=over 5

=item B<-s SEPSTRING>

=item B<-S SEPSTRING>

Use SEPSTRING rather than whitespace to separate columns. SEPSTRING is
also used as the default for JOINSTRING (see B<-j> option). Normally,
JOINSTRING will be chosen so that it contains at least one leading and
trailing SPACE chracter (i.e., C<-s '|'> implies C<-j ' | '>).
However, when specified with the large B<-S>, JOINSTRING will be
exactly equal to SEPSTRING.

=item B<-j JOINSTRING>

Use JOINSTRING rather than the default (two SPACE characters, or equal
to SEPSTRING, see option B<-s>) to join the columns together.

=item B<-w NN>

Use NN space characters for both SEPSTRING and JOINSTRING. This is
just a short cut for setting B<-s> and B<-j> explicitly.

=item B<N=%format>

Force formatting of column C<N> with the format C<%format>. You may
specify any parts of the format (e.g. 1=%e or 2=%13.2 or 3=%+), and
only those parts mentioned will be forced on the column.

=item B<N=EXPRESSION>

Replace column N by the value of EXPRESSION evaluated as a perl
block. During the evaluation, the current value of column N is aliased
to $_. The values from all columns can also be accessed in the
expression as $F[1], $F[2],... This allows you to execute some minimal
spreadsheet-like calculations.  If the expression starts with a '#',
it will be only applied to numeric fields, not to strings. If it
starts with !#, it will only be applied to strings.

=item B<-i> 

Format all files mentioned on the command line in place. Backup copies
of the old version are kept with an extension ".bak".

=item B<-e>

Format the entire file, not just the largest block. See B<-b>.

=item B<-b> NN

Number of consequtive empty lines that separate blocks. The default
for this is 2. Thus paragraphs separated by single empty lines are
still considered the same block.

=item B<+NN>

Skip the first B<NN> lines in each file as those lines are not part of
the table. The skipped lines are printed unchanged.

=item B<-NN>

Skip the last B<NN> lines in each file as those lines are not part of
the table. The skipped lines are printed unchanged.

=back

=head1 DIRECTIVES

The region to be formatted by B<tfmt> can be restricted in the file
itself with directives. These directives can be put into a comment
line. Table formatting is switched on with a directive

    &tfmtstart               or
    &tfmtstart()             or
    &tfmtstart( OPTIONS )

where C<OPTIONS> are command line-like options that will be used for
the local formatting operation. B<-s>, B<-S>, B<-j>, B<-w>, and
C<N=%format> are allowed here. An empty pair of parentheses will
invoke defaults.

Table formatting is switched of with a directive

    &tfmtstop

Several pairs of start/stop directives can be in one file. If the
first directive in a file is &tfmtstop, formatting starts from the
beginning of the file. Likewise, if the last directive is a
&tfmtstart, formatting continues to the end of the file.

=head1 EXAMPLES

The first example formats a table in a file F<tab.dat> and prints it
to the terminal.

    tfmt tab.dat

If the comments in the file are bigger than the table itself, you will
have to specify the location of the table. This may be done with
directives in the file (see further down) or directly on the command
line. Say, the first 62 lines are comment.

    tfmt +62 tab.dat

The next example first makes a backup copy with the name
F<tab.dat.bak> and then replaces F<tab.dat> with the formatted
version. 

    tfmt -i tab.dat

Format tab.dat to STDOUT, force exponential format on column 3.

    tfmt 3=%e tab.dat

Same as above, but add 1000 to the number and do the formatting with
a call to sprintf.

    tfmt 3='sprintf("%13.5e",$_+1000)'

Here is how you could use an C<&tfmtstart> directive in a LaTeX file
comment to format your table. Note that the C<&tfmtstart(...)>
directive is also used to set the separator string to '&' and the join
string to S<'  &  '>.

    \begin{table}
    %
    %   &tfmtstart( -s& -j '  &  ')
    %
    {\bf atom} & {\bf weight} \\ \hline
    H          & 1.004        \\
    He         & 4.000        \\ \hline
    %
    %   &tfmtstop
    %
    \end{table}

Finally, here is an emacs lisp function which will pipe your current
region through tfmt, prompting for command line arguments first.

 (defun tfmt-region (start end)
   "Filter region through tfmt."
   (interactive "r")
   (let ((cmd (read-input "filter region: " "tfmt")))
     (call-process-region start end shell-file-name t t nil "-c" cmd)))


=head1 BUGS

Whitespace separated columns can never have an empty field. You always
have to put in a place holder like C<---> or so. Also, you cannot have
more than one word in a column with the default settings. The best
workaround is to use explicitly two spaces (B<-w2>) as column
separator.

=head1 AUTHOR

Carsten Dominik    <dominik@strw.LeidenUniv.nl>