#! /usr/bin/perl -w

use strict;
use Data::Dumper;
use Getopt::Long;
use File::Path;
use File::Copy;

use FindBin qw($Bin);   #add directory with the script 
use lib "$Bin";    #add bin to the library path
use shared;

my $USAGE = "
$0 [<options>] <query fasta> <species> <evidence>

Runs external program with one source of evidence,
produces .gtf file, .about file and possibly others.

<species> specifies which set of parameters to use
<evidence> specifies the name of the source of evidence 
           (set to - to get a list of possible options,
            set to ALL to execute all possibilities)

Options:
--dir <directory> overrides default temporary directory
      (should be a subdirectory of the local directory).
--set <variable>=<value> overrides value of option <variable>
      from the configuration file (can be used multiple times).
--debug    will not delete temporary files
--restart  will restart interrupted job 
--continue restarts interrupted job but starts anew if temporary 
           directory for that evidence does not exists. Work well only
           if prepare-evidence is run with -debug.
--param <path>      path to parametric files
--config <filename>  
           By default, config witll be in the parameter directory for 
           the species under eh.config. This option allows to change it to 
           any filename (including the path). Useful if using the same 
           parameters and changing only path to evidence.
--norun will not run the external programs, only creates .about file.
";

#GLOBAL VARIABLES
my $Debug = 0;       #keep temporary files?
my $Restart;         #restart interrupted job?
my $Continue;        #continue interrupted job?
my $Evidence;        #symbolic name of the evidence
my $Species;         #name of directory with params
my $Query_filename;  #name of the fasta file
my %Options;         #options obtained by a combination of config file
                     #and command-line options
my $Temp_dir;        #name of temporary directory
my $Config_filename; #name of configuration file
my $No_run;          #no running of wrapper
my @Rewrite_options; #options set with --set <var>=<value>
my $Param_path 
    = $PARAM_PATH;   #path to parametric files

# parse options and remove them from @ARGV
my $ret = GetOptions("set=s" => \@Rewrite_options,
		     "debug" => \$Debug, "dir=s" => \$Temp_dir,
		     "param=s" => \$Param_path,
		     "config=s" => \$Config_filename,
		     "norun" => \$No_run, 
		     "restart" => \$Restart, 
		     "continue" => \$Continue
                     );

# at least three parameters should remain in @ARGV
if(!$ret || scalar(@ARGV)<3) { die $USAGE; }
($Query_filename, $Species, $Evidence) = @ARGV;

#check config file
if(! defined $Config_filename) {
    $Config_filename = $Param_path . "/" . $Species . "/" . "eh.config";
}
die "Configuration file $Config_filename not found" 
    unless -r $Config_filename; 

#parse config file and --set options from command line
my @config_options;  #lines of the config file
@config_options = read_config($Config_filename);
parse_options(\%Options, \@config_options);
parse_options(\%Options, \@Rewrite_options);

#list possible names of evidence, if user specified -
if($Evidence eq '-') {
    my @evidence_list = get_evidence_list(\%Options);
    print "Possible values of <evidence> from config file $Config_filename:\n",
    " ", join("\n ", @evidence_list), "\n\n";
    exit 0;
}

# check that fasta exists
die "Fasta file $Query_filename not found" 
    unless -r $Query_filename; 

#determine temporary directory and create it if necessary
if(!defined $Temp_dir) {
    $Temp_dir = temp_dir_name($Query_filename);
}
if(!-d $Temp_dir) {
    mkdir($Temp_dir) or die "Cannot create temporary directory $Temp_dir";
    print STDERR "Created temporary directory $Temp_dir\n";
}

#change to temporary dir
chdir($Temp_dir) or die "Cannot change to $Temp_dir";

my @evidence_list;
if($Evidence eq 'ALL') {
   @evidence_list = get_evidence_list(\%Options);
}
else {
    @evidence_list = ($Evidence);
}  

foreach my $evidence (@evidence_list) {
    if(scalar @evidence_list > 1) {
	print STDERR "Processing evidence $evidence\n";
    }

    if(!$No_run) {
	run_wrapper($evidence);
    }
    make_about($evidence);
}
exit 0;

############################
sub run_wrapper {

    my ($evidence) = @_;

    my $task = get_ev_option('TASK', $evidence);
    my $new_bin = adjust_relpath($Bin);
    my $new_config = adjust_relpath($Config_filename);
    my $new_fasta = adjust_relpath($Query_filename);
    if(get_ev_option('USE_MASKED', $evidence) eq 'true'){
	$new_fasta = 'repeat.fasta';
    }
    my $new_databases = '';
    foreach my $db (split ' ', get_ev_option('FILES', $evidence)) {
	$new_databases .= ' ' . adjust_relpath($db);
    }
    my $options = '';
    if($Debug) {
	$options .= ' --debug';
    }
    if(defined $Restart) {
	$options .= " --restart ''";
    }
    if(defined $Continue) {
	$options .= " --continue";
    }
    foreach my $clause (@Rewrite_options) {
	$options .= " --set \'$clause\'";
    } 
    
    my_run("$new_bin/program-wrapper $options $task $new_config "
	   . "$evidence $new_fasta $new_databases");
}    

############################
sub make_about {
    my ($evidence) = @_;

    local *ABOUT;
    open ABOUT, ">$evidence.about" or die "Cannot open about file";
    printf ABOUT "option schedule_parameter_name %s\n",
    get_ev_option('BUCKETS', $evidence);
    printf ABOUT "option gtf_output_file $evidence.filtered.gtf\n\n";
    foreach my $include (split ' ', get_ev_option('ABOUT', $evidence)) {
	$include = $Param_path . "/" . $Species . "/about/" . $include;
	$include = adjust_relpath($include);
	printf ABOUT "include $include\n", 
    }
    close ABOUT or die;
}

############################
sub get_ev_option {
    my ($name, $evidence) = @_;
    $name = "EVIDENCE_" . uc $evidence . "_" . $name;
    die "Required option $name not set." unless exists $Options{$name};
    return $Options{$name};
}

############################
sub get_evidence_list {
    my ($options) = @_;

    my @evidence;
    if(exists $options->{'EVIDENCE_REPEAT_TASK'}) {
	push @evidence, 'repeat';
    }

    foreach my $option (keys %$options) {
	if($option =~ /^EVIDENCE_(.*)_TASK$/) {
	    next if $1 eq 'REPEAT';
	    push @evidence, lc $1;
	}
    }
    return @evidence;
}
