package mlproject.io;

import java.io.FileNotFoundException;
import java.io.IOException;

import java.util.LinkedList;

import mlproject.phylo.PhylogeneticTree;

public class MultipleAlignmentReader implements InputReader<String>{
    PhylogeneticTree tree;
    FastaIterator fi;
    String currentAlignmentName;
    int currentOrganismIx;
    String[] sequences;

    public MultipleAlignmentReader(String file, PhylogeneticTree tree) throws FileNotFoundException, IOException {
        this.tree = tree;
        fi = new FastaIterator(file);
        
        String[] nextSeq = fi.next();
        if(nextSeq == null) return;
        String[] fields = nextSeq[0].split(" ");
        sequences = new String[tree.getNumberOfSpecies()];
        currentOrganismIx = tree.getSpecieIndex(fields[0].split("_")[0]);
        currentAlignmentName = fields[0];
        sequences[currentOrganismIx] = nextSeq[1];
    }


    /* reads next multiple alignment , row for the organism of the gene doesn't cointain any gaps
     * fasta header has to be in format organism_gene
     * */
    public InputSequence<String> readNextSequence() throws IOException {
        if(currentAlignmentName == null) return null;
        String[] nextSeq;
        
        StringInputSequence result = null;
        while(true){
            nextSeq = fi.next();
            if(nextSeq == null || !nextSeq[0].split(" ")[0].equals(currentAlignmentName)){ //if there is  no sequence or all sequences of the alignment have been read
                LinkedList<String> resultSeq = new LinkedList<String>();
                for(int i=0;i<sequences[currentOrganismIx].length();i++){
                    if(sequences[currentOrganismIx].charAt(i) == '-') continue; // continue to next column if gap
                    StringBuffer buffer = new StringBuffer();
                    for(int j=0;j<sequences.length;j++){ // get column
                        if(sequences[j] == null){
                            buffer.append('-');
                        } else {
                            buffer.append(sequences[j].charAt(i));   
                        }
                    }
                    resultSeq.add(buffer.toString()); // add column
                }
                result = new StringInputSequence(currentAlignmentName,(String[]) resultSeq.toArray(new String[0]));
            }
            
            if(nextSeq == null){
                currentAlignmentName = null;
                return result;
            }
            
            /* prepare data for new alignment and add first sequence */
            String[] fields = nextSeq[0].split(" ");
            if(fields[0].equals(currentAlignmentName)){
                sequences[tree.getSpecieIndex(fields[1])] = nextSeq[1];
            } else {
                sequences = new String[tree.getNumberOfSpecies()];
                currentOrganismIx = tree.getSpecieIndex(fields[0].split("_")[0]);
                currentAlignmentName = fields[0];
                sequences[currentOrganismIx] = nextSeq[1];
                return result;
            }
        }
        
    }
}
