#! /usr/bin/perl -w

# !! to do: get a list of about files/gtf files to process.

use strict;
use Getopt::Long;
use File::Copy;
use File::Path;
use Cwd;
use Data::Dumper;

use FindBin qw($Bin);  #$Bin is the directory with the script
use lib "$Bin";        #add bin to the library path
use shared;

my $USAGE = "
Usage: $0 [options] <query fasta> <species>

Takes all .gtf and .about files in the temporary directory (if any),
and runs ExonHunter on this evidence.

<species> specifies which set of parameters to use

Options:
--dir <directory>   overrides default temporary directory
                    (should be a subdirectory in the local directory).
--set <variable>=<value> overrides value of option <variable>
                    from the config. file (can be used multiple times)
--cleanup           delete all temporary files.
--output <filename> output gtf file (default is stdout)
--param <path>      path to parametric files
--config <filename> location of eh.config (see prepare-evidence) 

--nogtf             do not process gtf files, assume they are processed already
--nosig             do not run signal advisors
--noadv             do not run advisors
--nomain            do not run the main engine
--gtfonly           equivalent to --nosig --noadv --nomain
                    (used to create files for training)
--exclude <comma-separated list>
                    do not use evidence files listed in the list
                    in combination 
";

#GLOBAL VARIABLES
my $Cleanup = 0;     #clean temporary files?
my $Species;         #name of directory with params
my $Query_filename;  #name of the fasta file
my $Output_filename; #name of the output gtf file
my $Temp_dir;        #name of temporary directory
my $Param_path 
    = $PARAM_PATH;   #path to parametric files
my $Orig_dir = cwd();
my @Rewrite_options; #options set with --set <var>=<value>
my $Config_filename; #name of configuration file
my $Exclude = '';    #list of evidence sources to exclude
my %Options;         #options obtained by a combination of config file
                     #and command-line options

my %Run_options = ('nogtf'=>0, 'nosig'=>0, 'noadv'=>0,
                   'nomain'=>0, 'gtfonly'=>0);

# parse options and remove them from @ARGV
my $ret = GetOptions("set=s" => \@Rewrite_options,
		     "cleanup" => \$Cleanup, "dir=s" => \$Temp_dir,
		     "output=s" => \$Output_filename,
		     "param=s" => \$Param_path, 
		     "config=s" => \$Config_filename,
		     "exclude=s" => \$Exclude,
		     "gtfonly" => \$Run_options{'gtfonly'},
                     "nogtf" => \$Run_options{'nogtf'},
                     "nosig" => \$Run_options{'nosig'},
                     "noadv" => \$Run_options{'noadv'},
                     "nomain" => \$Run_options{'nomain'}
		     );

# two parameters should remain in @ARGV
if(!$ret || scalar(@ARGV)!=2) { die $USAGE; }
($Query_filename, $Species) = @ARGV;

#check config file
if(! defined $Config_filename) {
    $Config_filename = $Param_path . "/" . $Species . "/" . "eh.config";
}
die "Configuration file $Config_filename not found" 
    unless -r $Config_filename; 

#parse config file and --set options from command line
my @config_options;  #lines of the config file
@config_options = read_config($Config_filename);
parse_options(\%Options, \@config_options);
parse_options(\%Options, \@Rewrite_options);

if($Run_options{'gtfonly'}) {
    $Run_options{'nosig'} = 1;
    $Run_options{'noadv'} = 1;
    $Run_options{'nomain'} = 1;
}

# check that fasta exists
die "Fasta file $Query_filename not found" 
    unless -r $Query_filename; 

#determine temporary directory and create it if necessary
if(!defined $Temp_dir) {
    $Temp_dir = temp_dir_name($Query_filename);
}
if(!-d $Temp_dir) {
    mkdir($Temp_dir) or die "Cannot create temporary directory $Temp_dir";
    print STDERR "Created temporary directory $Temp_dir\n";
}
chdir($Temp_dir) or die "Cannot change to $Temp_dir";
my $new_bin = adjust_relpath($Bin);
my $new_param_path = adjust_relpath($Param_path);
my $new_query_filename = adjust_relpath($Query_filename);

my $superadvisor = 'super';

if(!$Run_options{'nogtf'}) {
    my @about_files = glob("*.about");
    my @schedule_files;

    #split the list to array, than turn into keys of a hash
    my @exclude = split ',', $Exclude;
    my %exclude;  
    @exclude{@exclude} = (1) x scalar(@exclude);

    # For each .about file run filter-gtf
    foreach my $file (@about_files) {
	my $base = $file;
	$base =~ s/.about$//;
	next if exists $exclude{$base};

	my $log = $base . ".filterlog";
	my_run("$new_bin/filter-gtf $file $new_query_filename &>\"$log\"");
	my $schedule = $base . ".schedule";
	if(-r $schedule) {
	    push @schedule_files, $schedule;
	}
	else {
	    warn "Cannot open schedule $schedule";
	}
    }
    
    # Create schedule
    my_run("$new_bin/make-schedule "
	   . "$new_param_path/$Species/advisors/combine.schedule "
	   . "$superadvisor.advisor " . join(" ", @schedule_files) 
	   . " >$superadvisor.schedule");
}

if(!$Run_options{'noadv'}) {
    # Run advisors
    my_run("$new_bin/advisors $new_param_path/$Species/advisors/gccont.adv"
	   . " $new_param_path/$Species/advisors/ ''"
	   . " \"\@$superadvisor.schedule\" $new_query_filename"
	   . " &> \"adv.err\"");
}    

if(!$Run_options{'nosig'}) {
    # Run signal advisors
    my_run("$new_bin/advisors $new_param_path/$Species/model.tab" 
	   . " $new_param_path/$Species/signals/ ''"
	   . " external-signals.schedule $new_query_filename"
	   . " &> \"sig.err\"");
    
}

if(!$Run_options{'nomain'}) {
    # Run main HMM engine
    my_run("$new_bin/main -t -o \"external-signal.result\""
	   . " -a \"$superadvisor.advisor\""
	   . " $new_param_path/$Species/model.hmm"
	   . " $new_param_path/$Species/model.tab"
	   . " $new_query_filename result"
	   . " &> main.err");

    # Score the HMM output 
    my $threshold = $Options{'SUPPORT_THRESHOLD'};
    die "Required option SUPPORT_THRESHOLD not set" 
	unless defined $threshold;

    my_run("$new_bin/score_support.pl -i -t $threshold $superadvisor.advisor "
	   . "result.gtf > result-scored.gtf");
}


chdir($Orig_dir) or die "Cannot change to $Orig_dir";

if(!$Run_options{'nomain'}) {
    if(defined $Output_filename) {
	#move output file to a user-specified location
	move("$Temp_dir/result-scored.gtf", $Output_filename) 
	    or die "Cannot move output to $Output_filename";
    }
    else {
	#copy it to STDOUT
	copy("$Temp_dir/result-scored.gtf", \*STDOUT)
	    or die "Cannot copy result to stdout";
    }
}

if($Cleanup) {
    print STDERR "Deleting the temporary directory.\n";
    File::Path::rmtree($Temp_dir);
}

print STDERR "Done ", scalar localtime(), "\n";

exit 0;


