/*
 * Decompiled with CFR 0.152.
 */
package calhoun.analysis.crf.executables;

import calhoun.analysis.crf.Conrad;
import calhoun.analysis.crf.ModelManager;
import calhoun.analysis.crf.io.InputSequence;
import calhoun.analysis.crf.io.TrainingSequence;
import calhoun.util.Assert;
import calhoun.util.FileUtil;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class InputSequenceSubsetter {
    private static final Log log = LogFactory.getLog(InputSequenceSubsetter.class);

    public static void main(String[] args) throws Exception {
        if (args.length != 6) {
            System.out.println("The proper usage is [configfile] [inputfile] [regionsfile] [pad] [outputfile] [flagForceGenic]");
            System.out.println("  Note: if you don't care whether the region(s) are gene(s) or not, then set pad=0, flag=0.");
            System.out.println("  A regions file is tab delimited with [seqname] [start] [end], one line per region");
            Assert.a(false);
        }
        String configFile = args[0];
        String inputFile = args[1];
        String regionsFile = args[2];
        int pad = Integer.parseInt(args[3]);
        String outputFile = args[4];
        int flagForceGenic = Integer.parseInt(args[5]);
        Conrad c = new Conrad(configFile);
        ModelManager cm = c.getModel();
        Iterator<TrainingSequence<?>> iter = c.getInputHandler().readTrainingData(inputFile).iterator();
        String[][] regions = FileUtil.readFlatFile(regionsFile);
        System.out.println("Number of regions in regions file is " + regions.length);
        ArrayList<InputSequence> s = new ArrayList<InputSequence>();
        while (iter.hasNext()) {
            TrainingSequence<?> t = iter.next();
            String targetseqname = (String)t.getInputSequence().getComponent("name").getX(0);
            for (int j = 0; j < regions.length; ++j) {
                Assert.a(regions[j].length == 3);
                String seqname = regions[j][0];
                if (!targetseqname.equals(seqname)) continue;
                int start = Integer.parseInt(regions[j][1]) - pad;
                int end = Integer.parseInt(regions[j][2]) + pad;
                log.debug((Object)("Region: " + seqname + ": " + start + "-" + end));
                if (start < 1 || start > end || end > t.length()) {
                    log.debug((Object)("Skipping Region: " + seqname + ": " + start + "-" + end));
                    continue;
                }
                InputSequence u = t.subSequence(start, end);
                int len = ((TrainingSequence)u).length();
                System.out.println("OK so far and length = " + len);
                if (flagForceGenic > 0) {
                    int k;
                    for (k = 0; k < pad; ++k) {
                        if (((TrainingSequence)u).getY(k) == cm.getStateIndex("intergenic")) continue;
                    }
                    if (((TrainingSequence)u).getY(pad + 3) == cm.getStateIndex("intergenic") || ((TrainingSequence)u).getY(len - pad - 4) == cm.getStateIndex("intergenic")) continue;
                    for (k = len - pad; k < len; ++k) {
                        if (((TrainingSequence)u).getY(k) == cm.getStateIndex("intergenic")) continue;
                    }
                }
                s.add(u);
            }
        }
        System.out.println("Will write " + s.size() + " training sequences to file");
        c.getInputHandler().writeTrainingData(outputFile, s);
    }
}

