package mlproject;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;

import java.io.PrintWriter;

import java.util.ArrayList;
import java.util.LinkedList;

import mlproject.hmm.EvidenceEmissionModel;

import mlproject.hmm.EvidenceHMM;
import mlproject.hmm.EvidenceStateModel;

import mlproject.hmm.StateModel;

import mlproject.io.EvidenceCompositeInputReader;
import mlproject.io.GTFStateInputReaderEvidence137;
import mlproject.io.GTFWriter;
import mlproject.io.TrainingSequence;

import mlproject.phylo.EvolutionaryModel;
import mlproject.phylo.Kimura80Model;
import mlproject.phylo.PhylogeneticTree;

import mlproject.stats.Statistics;

import mlproject.util.Util;

public class MLProject {
    public MLProject() {
    }

    public static void main(String[] args) throws FileNotFoundException, IOException {
        //String location = "/home/juraj/projects/school/ml/project/datasets/holdout-Fri-Nov-20-01-54-17-2009/";
        boolean completeTest = args[0].equals("testAll");
        String location = args[1];
        boolean allowATAStart = args[2].equals("1");
        double transitionCoeficient = Double.parseDouble(args[3]);
        int emissionModelType = 1;
        if(!completeTest)
            emissionModelType = Integer.parseInt(args[4]);
        
        /* read tree from first dataset */
        BufferedReader br = new BufferedReader(new FileReader(location+"dataset_1/train/tree.tre"));
        String newick = br.readLine();
        br.close();
        
        
       
        StateModel stateModel = new EvidenceStateModel(allowATAStart);
        PhylogeneticTree tree = new PhylogeneticTree(newick);
        EvidenceEmissionModel emissionModel = new EvidenceEmissionModel();
        
        
        /* Initialize hmm */
        emissionModel.setEmodelIntergenic(new EvolutionaryModel(tree,new double[]{0.44448760556490713,0.05551239443509287,0.05551239443509287,0.44448760556490713},new Kimura80Model(0.23740500177751464, 0.6536810457617929)));
        emissionModel.setEmodelIntronic(new EvolutionaryModel(tree, new double[]{0.43914528597477975,0.08378919793512597,0.1197832395921617,0.35728227649793254}, new Kimura80Model(0.3394214824142834,0.5146789433220592)));
        ArrayList<EvolutionaryModel> emodelExonic = new ArrayList<EvolutionaryModel>(3);
        emodelExonic.add(new EvolutionaryModel(tree, new double[]{0.337173765122421,0.09340437555501736,0.2211996447888916,0.3482222145336701}, new Kimura80Model(0.29820919354158326,0.20188192420999146)));
        emodelExonic.add(new EvolutionaryModel(tree, new double[]{0.24048632499336725,0.19082719082719082,0.12942519984773507,0.43926128433170686}, new Kimura80Model(0.22162973326046825,0.1656976351280791)));
        emodelExonic.add(new EvolutionaryModel(tree, new double[]{0.5036354823073195,0.05671352399418323,0.04398356531172818,0.39566742838676916}, new Kimura80Model(0.25051188519131423,0.40170737974641324)));
        emissionModel.setEmodelExonic(emodelExonic);
        EvidenceHMM hmm = new EvidenceHMM(stateModel,emissionModel);
        
        hmm.setTransitionCoeficient(transitionCoeficient);
        
        File datasetDir = new File(location);
        File[] datasets = datasetDir.listFiles();
        
        
        
        if(completeTest){
            StringBuffer results = new StringBuffer();
            for(int emModel=1;emModel<9;emModel++){
                emissionModel.setEmissionModelType(emModel);
                Statistics stats = new Statistics(stateModel);
                /* read datasets */
                for(int i=0;i<datasets.length;i++){
                    System.out.println("--- STARTING ITERATION "+ i +" ---");
                    if(!datasets[i].isDirectory()) continue;
                    String trainLocation = datasets[i].getAbsolutePath() + "/train/";
                    String testLocation = datasets[i].getAbsolutePath() + "/test/";
                    
                    LinkedList<TrainingSequence> list = new LinkedList();
                    EvidenceCompositeInputReader reader = new EvidenceCompositeInputReader(
                        trainLocation+"all_genes.fasta",tree,trainLocation+"alignments.faa",
                        trainLocation+"exonerate.fasta", trainLocation+"introns.fasta");
                    GTFStateInputReaderEvidence137 gtfReader = new GTFStateInputReaderEvidence137(reader, trainLocation+"all_genes.gtf",stateModel);
                    TrainingSequence seq;
                    while((seq = gtfReader.readNextTrainingSequence())!=null){
                        //train only on valid sequences
                        if(seq.isValid())
                            list.add(seq);
                    }
                    System.out.println("--- Statistics for training data in iteration "+ i +" ---");
                    System.out.println(gtfReader.summarize());
                    hmm.train(list);
                    System.out.println("--- Finished training iteration "+ i +" ---");
                    
                    list = new LinkedList();
                    LinkedList<TrainingSequence> allSeq = new LinkedList(); //just for statistics
                    reader = new EvidenceCompositeInputReader(
                        testLocation+"all_genes.fasta",tree,testLocation+"alignments.faa",
                        testLocation+"exonerate.fasta", testLocation+"introns.fasta");
                    gtfReader = new GTFStateInputReaderEvidence137(reader, testLocation+"all_genes.gtf",stateModel);
                    while((seq = gtfReader.readNextTrainingSequence())!=null){
                        allSeq.add(seq);
                        if(seq.isValid())
                            list.add(seq);
                    }
                    System.out.println("--- Statistics for testing data in iteration "+ i +" ---");
                    System.out.println(gtfReader.summarize());
                    
                    //Util.printPhyloValues2(testLocation,allSeq,emissionModel.getEmodelIntergenic(),emissionModel.getEmodelIntronic(), emissionModel.getEmodelExonic(), stateModel);
                    
                    /*  test*/
                    
                    for(TrainingSequence s:list){
                        int [] bestPath = hmm.infereBestPath(s.getInputSequence());
                        stats.addTrainingSequence(s,bestPath);
                    }
                    System.out.println("--- Finished testing iteration "+ i +" ---");
                    
                }
                System.out.println("--- Finished testing model "+emModel +" ---");
                results.append(emModel + "\t" + stats.toTabbedInfoString()+"\n");
            }
            System.out.println("--- RESULTS ---");
            System.out.println("Test\tPerfect\tExon TP\tExon FP\tExon FN\tExon sens\tExon prec\tCoding n. sens\tCoding n. spec\tCoding n. prec\tIntron n. sens\tIntron n. spec\tIntron n. prec");
            System.out.println(results);
        } else {
            emissionModel.setEmissionModelType(emissionModelType);
            Statistics stats = new Statistics(stateModel);
            /* read datasets */
            for(int i=0;i<datasets.length;i++){
                if(!datasets[i].isDirectory()) continue;
                System.out.println("--- STARTING ITERATION "+ i +" ---");
                String trainLocation = datasets[i].getAbsolutePath() + "/train/";
                String testLocation = datasets[i].getAbsolutePath() + "/test/";
                
                LinkedList<TrainingSequence> list = new LinkedList();
                EvidenceCompositeInputReader reader = new EvidenceCompositeInputReader(
                    trainLocation+"all_genes.fasta",tree,trainLocation+"alignments.faa",
                    trainLocation+"exonerate.fasta", trainLocation+"introns.fasta");
                GTFStateInputReaderEvidence137 gtfReader = new GTFStateInputReaderEvidence137(reader, trainLocation+"all_genes.gtf",stateModel);
                TrainingSequence seq;
                while((seq = gtfReader.readNextTrainingSequence())!=null){
                    //train only on valid sequences
                    if(seq.isValid())
                        list.add(seq);
                }
                System.out.println("--- Statistics for training data in iteration "+ i +" ---");
                System.out.println(gtfReader.summarize());
                hmm.train(list);
                System.out.println("--- Finished training iteration "+ i +" ---");
                
                list = new LinkedList();
                LinkedList<TrainingSequence> allSeq = new LinkedList(); //just for statistics
                reader = new EvidenceCompositeInputReader(
                    testLocation+"all_genes.fasta",tree,testLocation+"alignments.faa",
                    testLocation+"exonerate.fasta", testLocation+"introns.fasta");
                gtfReader = new GTFStateInputReaderEvidence137(reader, testLocation+"all_genes.gtf",stateModel);
                while((seq = gtfReader.readNextTrainingSequence())!=null){
                    allSeq.add(seq);
                    if(seq.isValid())
                        list.add(seq);
                }
                System.out.println("--- Statistics for testing data in iteration "+ i +" ---");
                System.out.println(gtfReader.summarize());
                
                //Util.printPhyloValues2(testLocation,allSeq,emissionModel.getEmodelIntergenic(),emissionModel.getEmodelIntronic(), emissionModel.getEmodelExonic(), stateModel);
                
                /*  test and write output GTF*/
                GTFWriter gtfWriter = new GTFWriter(stateModel,testLocation+"output_hmm.gtf");
                for(TrainingSequence s:list){
                    int [] bestPath = hmm.infereBestPath(s.getInputSequence());
                    stats.addTrainingSequence(s,bestPath);
                    gtfWriter.writeStates(s.getInputSequence().getName(),bestPath);
                }
                System.out.println("--- Finished testing iteration "+ i +" ---");
                gtfWriter.close();
            }
            System.out.println("--- Finished testing model "+emissionModelType +" ---");
            System.out.println("--- RESULTS ---");
            System.out.println(stats);
        }
        
        
    }
    
    
    
}
