/*
 * Decompiled with CFR 0.152.
 */
package tsg.parsingExp;

import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Map;
import java.util.Scanner;
import tsg.Label;
import tsg.TSNodeLabel;
import tsg.corpora.Wsj;
import tsg.mb.TreeMarkoBinarization;
import tsg.mb.TreeMarkoBinarizationLeft_LC;
import tsg.parseEval.EvalB;
import tsg.parseEval.EvalC;
import util.FileUtil;
import util.Utility;
import wordModel.UkWordMapping;
import wordModel.UkWordMappingStd;

public class PCFG_reranker {
    static UkWordMapping ukModel;
    static boolean markovBinarize;
    static TreeMarkoBinarization treeMarkovBinarizer;
    static double defaultProbUnknown;
    File trainingCorpus;
    File goldFile;
    File nBestFile;
    String rerankedFilePrefix;
    int nBest;
    Hashtable<String, Double> cfgRuleLogProb;

    static {
        defaultProbUnknown = 1.0E-10;
    }

    public PCFG_reranker(File trainingCorpus, File goldFile, File nBestFile, String rerankedFilePrefix, int nBest) throws Exception {
        this.trainingCorpus = trainingCorpus;
        this.goldFile = goldFile;
        this.nBestFile = nBestFile;
        this.rerankedFilePrefix = rerankedFilePrefix;
        this.nBest = nBest;
        this.trainFromCorpus();
        this.rerank();
    }

    public static ArrayList<TSNodeLabel> nextNBest(int nBest, Scanner s) throws Exception {
        throw new Error("Unresolved compilation problem: \n\tThe method transformTree(TSNodeLabel) is undefined for the type UkWordMapping\n");
    }

    private void trainFromCorpus() throws Exception {
        Hashtable cfgRuleFreqTable = new Hashtable();
        Hashtable lhsFreqTable = new Hashtable();
        ArrayList corpus = TSNodeLabel.getTreebank(this.trainingCorpus);
        double minProb = Double.MAX_VALUE;
        if (UkWordMapping.ukThreashold > 0) {
            ukModel.init(corpus, null);
            corpus = ukModel.transformTrainingTreebank();
        }
        if (markovBinarize) {
            corpus = treeMarkovBinarizer.markoBinarizeTreebank(corpus);
        }
        for (TSNodeLabel tSNodeLabel : corpus) {
            ArrayList<TSNodeLabel> nodes = tSNodeLabel.collectAllNodes();
            for (TSNodeLabel n : nodes) {
                if (n.isLexical) continue;
                String rule = n.cfgRule();
                String lhs = n.label();
                Utility.increaseInTableInt(cfgRuleFreqTable, rule);
                Utility.increaseInTableInt(lhsFreqTable, lhs);
            }
        }
        this.cfgRuleLogProb = new Hashtable();
        for (Map.Entry entry : cfgRuleFreqTable.entrySet()) {
            int lhsFreq;
            String rule = (String)entry.getKey();
            String lhs = rule.substring(0, rule.indexOf(32));
            int freq = ((int[])entry.getValue())[0];
            Double prob = (double)freq / (double)(lhsFreq = ((int[])lhsFreqTable.get(lhs))[0]);
            if (prob < minProb) {
                minProb = prob;
            }
            this.cfgRuleLogProb.put(rule, Math.log(prob));
        }
        System.out.println("Min Rule prob: " + minProb);
    }

    public double getProb(TSNodeLabel t) {
        ArrayList<TSNodeLabel> nodes = t.collectAllNodes();
        double totalLogProb = 1.0;
        for (TSNodeLabel n : nodes) {
            if (n.isLexical) continue;
            String rule = n.cfgRule();
            Double ruleLogProb = this.cfgRuleLogProb.get(rule);
            if (ruleLogProb == null) {
                ruleLogProb = defaultProbUnknown;
            }
            totalLogProb += ruleLogProb.doubleValue();
        }
        return Math.exp(totalLogProb);
    }

    private void rerank() throws Exception {
        Scanner nBestScanner = FileUtil.getScanner(this.nBestFile);
        ArrayList<TSNodeLabel> goldTreebank = TSNodeLabel.getTreebank(this.goldFile);
        Iterator<TSNodeLabel> goldIter = goldTreebank.iterator();
        File rerankedFileEvalB = new File(String.valueOf(this.rerankedFilePrefix) + ".evalB");
        File rerankedFileEvalC = new File(String.valueOf(this.rerankedFilePrefix) + ".evalC");
        File rerankedFile = new File(String.valueOf(this.rerankedFilePrefix) + ".mrg");
        PrintWriter pw = FileUtil.getPrintWriter(rerankedFile);
        int size = goldTreebank.size();
        int activelyReranked = 0;
        System.out.println("Gold Test TreeBank size: " + size);
        TSNodeLabel bestReranked = null;
        int i = 0;
        while (i < size) {
            ArrayList<TSNodeLabel> nBestTrees = PCFG_reranker.nextNBest(this.nBest, nBestScanner);
            Iterator<TSNodeLabel> nbestIter = nBestTrees.iterator();
            TSNodeLabel goldTree = goldIter.next();
            ArrayList<Label> goldTreeLexLabels = goldTree.collectLexicalLabels();
            bestReranked = nbestIter.next();
            double maxProb = this.getProb(bestReranked);
            boolean reranked = false;
            while (nbestIter.hasNext()) {
                TSNodeLabel t = nbestIter.next();
                double p = this.getProb(t);
                if (!(p > maxProb)) continue;
                maxProb = p;
                bestReranked = t;
                reranked = true;
            }
            if (reranked) {
                ++activelyReranked;
            }
            if (markovBinarize) {
                bestReranked = treeMarkovBinarizer.undoMarkovBinarization(bestReranked);
            }
            if (UkWordMapping.ukThreashold > 0) {
                bestReranked.changeLexLabels(goldTreeLexLabels);
            }
            pw.println(bestReranked.toString());
            ++i;
        }
        pw.close();
        new EvalB(this.goldFile, rerankedFile, rerankedFileEvalB);
        EvalC eval = new EvalC(this.goldFile, rerankedFile, rerankedFileEvalC, null, true);
        float[] results = eval.makeEval();
        System.out.println("Actively Reranked: " + activelyReranked);
        System.out.println("Reranked Recall Precision FScore: " + Arrays.toString(results));
    }

    public static void main(String[] args) throws Exception {
        int[] nBest;
        File trainingCorpus = new File(args[0]);
        File goldFile = new File(args[1]);
        File nBestFile = new File(args[2]);
        String rerankedFile = args[3];
        UkWordMapping.ukThreashold = 4;
        ukModel = new UkWordMappingStd();
        markovBinarize = true;
        treeMarkovBinarizer = new TreeMarkoBinarizationLeft_LC();
        TreeMarkoBinarization.markH = 0;
        TreeMarkoBinarization.markV = 0;
        defaultProbUnknown = 1.0E-10;
        System.out.println("default Prob Unknown:" + defaultProbUnknown);
        int[] nArray = nBest = new int[]{1, 5, 10, 100, 500, 1000};
        int n = nBest.length;
        int n2 = 0;
        while (n2 < n) {
            int n3 = nArray[n2];
            System.out.println("n = " + n3);
            String rerankedFilePrefix = String.valueOf(rerankedFile) + "_" + n3;
            new PCFG_reranker(trainingCorpus, goldFile, nBestFile, rerankedFilePrefix, n3);
            ++n2;
        }
    }

    public static void main1(String[] args) throws Exception {
        PCFG_reranker.main1(new String[]{String.valueOf(Wsj.WsjOriginalCleanedTopSemTagsOff) + "wsj-02-21.mrg", String.valueOf(Wsj.WsjOriginalCleanedTopSemTagsOff) + "wsj-24.mrg", "/scratch/fsangati/RESULTS/ChiarniakParser/WSJ24/wsj24_EN_noAux/wsj-24_1000best_noAux_cleaned.mrg", "tmp/reranked"});
    }
}

