#! /usr/bin/perl -w
#Extract genome from genbank/*.gb files to algn/'.$filename.'-'.$organism.'.fa fasta files
use strict;
use Bio::SeqIO;
use Data::Dumper;

my $global_file = 'output/all_genomes.fasta';
open(GLOBAL, ">$global_file");

foreach my $filename (glob("genbank/*.gb")) {
    my $stream = Bio::SeqIO->new(-file => $filename,
			      -format => 'GenBank');

    $filename =~ s/.*\///;
    $filename =~ s/\.g.*//;
    print "$filename\n";
    my $seq = $stream->next_seq();
    my $genome = $seq->seq();

    my @ann = $seq->get_SeqFeatures();
    my $organism = 'null';
    #print Dumper(\@ann);
    foreach my $feature (@ann) {
	    if($feature->primary_tag() eq 'source') {
		$organism = get_value($feature->annotation(), 'organism');
		$organism =~ /^(\S+) (\S+).*/;
		$organism = substr($1,0,3).substr($2,0,2);
		my $strain = get_value($feature->annotation(), 'strain');
		if(defined $strain){
		      $strain =~ s/\W+//g;
		      $organism.= uc $strain;
		}
	    }
    }
    
    my $file = 'algn/'.$filename.'-'.$organism.'.fa';
    open(INFO, ">$file");
    print INFO ">$organism\n$genome\n";
    print GLOBAL ">$organism\n$genome\n";
	
}

close(INFO);

sub get_value {
    my ($collection, $key) = @_;

    my @values = $collection->get_Annotations($key);
    return unless @values==1;
    return $values[0]->value();
}
 
