/*
 * Decompiled with CFR 0.152.
 */
package tsg.parsingExp;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Map;
import java.util.TreeSet;
import java.util.Vector;
import settings.Parameters;
import tsg.TSNodeLabel;
import tsg.utils.CleanPetrov;
import util.Utility;

public class ExtractPetrovPCFG {
    ArrayList<TSNodeLabel> trainingTreebank;
    double petrovSmoothingFactor;
    Hashtable<String, double[]> smoothedPCFG;
    String[] baseCats;
    String[] refinedCats;
    int[] baseCatsFreq;
    int[] refinedCatsFreq;
    int[] splitsPerBaseCats;

    public ExtractPetrovPCFG(ArrayList<TSNodeLabel> trainingTreebank, double petrovSmoothingFactor) {
        Parameters.reportLineFlush((String)"\nExtractPetrovPCFG");
        this.trainingTreebank = trainingTreebank;
        this.petrovSmoothingFactor = petrovSmoothingFactor;
        this.getInternalBaseCats();
        this.buildSmoothedPCFG();
    }

    private void getInternalBaseCats() {
        TreeSet<String> baseCatsSet = new TreeSet<String>();
        TreeSet<String> refinedCatsSet = new TreeSet<String>();
        Hashtable<String, int[]> baseCatsFreqTable = new Hashtable<String, int[]>();
        Hashtable<String, int[]> refinedCatsFreqTable = new Hashtable<String, int[]>();
        for (TSNodeLabel t : this.trainingTreebank) {
            ArrayList<TSNodeLabel> intNodes = t.collectInternalNodes();
            for (TSNodeLabel n : intNodes) {
                String cat = n.label();
                String baseNode = CleanPetrov.cleanPetrovLabel(cat);
                baseCatsSet.add(baseNode);
                refinedCatsSet.add(cat);
                Utility.increaseStringIntArray(baseCatsFreqTable, baseNode);
                Utility.increaseStringIntArray(refinedCatsFreqTable, cat);
            }
        }
        this.baseCats = baseCatsSet.toArray(new String[0]);
        this.refinedCats = refinedCatsSet.toArray(new String[0]);
        this.baseCatsFreq = new int[this.baseCats.length];
        this.splitsPerBaseCats = new int[this.baseCats.length];
        this.refinedCatsFreq = new int[this.refinedCats.length];
        int i = 0;
        while (i < this.baseCats.length) {
            int freq;
            this.baseCatsFreq[i] = freq = baseCatsFreqTable.get(this.baseCats[i])[0];
            ++i;
        }
        i = 0;
        while (i < this.refinedCats.length) {
            int baseCatIndex;
            int freq;
            String refinedCat = this.refinedCats[i];
            this.refinedCatsFreq[i] = freq = refinedCatsFreqTable.get(this.refinedCats[i])[0];
            String baseCat = CleanPetrov.cleanPetrovLabel(refinedCat);
            int n = baseCatIndex = Arrays.binarySearch(this.baseCats, baseCat);
            this.splitsPerBaseCats[n] = this.splitsPerBaseCats[n] + 1;
            ++i;
        }
        Parameters.reportLine((String)"Internal nodes base cats:");
        ExtractPetrovPCFG.reportCatFreq(this.baseCats, this.baseCatsFreq);
        Parameters.reportLine((String)"Internal nodes refined cats:");
        ExtractPetrovPCFG.reportCatFreq(this.refinedCats, this.refinedCatsFreq);
        Parameters.reportLine((String)"Number of splits per base cats:");
        ExtractPetrovPCFG.reportCatFreq(this.baseCats, this.splitsPerBaseCats);
        Parameters.reportLineFlush((String)"");
    }

    private static void reportCatFreq(String[] cats, int[] freq) {
        int i = 0;
        while (i < cats.length) {
            Parameters.reportLine((String)(String.valueOf(cats[i]) + "\t" + freq[i]));
            ++i;
        }
    }

    private void buildSmoothedPCFG() {
        ArrayList<TSNodeLabel> nodes;
        Parameters.reportLineFlush((String)"Building smoothed PCFG");
        Vector genericLhsRhsFreq = new Vector();
        int i = 0;
        while (i < this.baseCats.length) {
            genericLhsRhsFreq.add(new Hashtable());
            ++i;
        }
        Hashtable<String, int[]> genericIntRulesFreq = new Hashtable<String, int[]>();
        for (TSNodeLabel tSNodeLabel : this.trainingTreebank) {
            nodes = tSNodeLabel.collectInternalNodes();
            for (TSNodeLabel n : nodes) {
                String rule = n.cfgRule();
                String[] ruleSplit = rule.split("\\s");
                String lhsBase = CleanPetrov.cleanPetrovLabel(ruleSplit[0]);
                rule = String.valueOf(lhsBase) + " " + ExtractPetrovPCFG.getRhs(ruleSplit);
                Utility.increaseStringIntArray(genericIntRulesFreq, rule);
            }
        }
        for (Map.Entry entry : genericIntRulesFreq.entrySet()) {
            String genericLhsRule = (String)entry.getKey();
            String[] ruleSplit = genericLhsRule.split("\\s");
            String baseLhs = ruleSplit[0];
            int baseLhsIndex = Arrays.binarySearch(this.baseCats, baseLhs);
            String rhs = ExtractPetrovPCFG.getRhs(ruleSplit);
            int freq = ((int[])entry.getValue())[0];
            double smoothedFreq = this.petrovSmoothingFactor * (double)freq / (double)this.splitsPerBaseCats[baseLhsIndex];
            ((Hashtable)genericLhsRhsFreq.get(baseLhsIndex)).put(rhs, smoothedFreq);
        }
        this.smoothedPCFG = new Hashtable();
        for (TSNodeLabel tSNodeLabel : this.trainingTreebank) {
            nodes = tSNodeLabel.collectAllNodes();
            for (TSNodeLabel n : nodes) {
                if (n.isLexical) continue;
                String rule = n.cfgRule();
                Utility.increaseStringDoubleArray(this.smoothedPCFG, rule, 1.0);
            }
        }
        int n = this.smoothedPCFG.size();
        String[] stringArray = this.refinedCats;
        int n2 = this.refinedCats.length;
        int n3 = 0;
        while (n3 < n2) {
            Object cat = stringArray[n3];
            String baseLhs = CleanPetrov.cleanPetrovLabel((String)cat);
            int baseLhsIndex = Arrays.binarySearch(this.baseCats, baseLhs);
            Hashtable smoothedRulesFreq = (Hashtable)genericLhsRhsFreq.get(baseLhsIndex);
            for (Map.Entry e : smoothedRulesFreq.entrySet()) {
                String rhs = (String)e.getKey();
                double smoothedFreq = (Double)e.getValue();
                String rule = String.valueOf(cat) + " " + rhs;
                Utility.increaseStringDoubleArray(this.smoothedPCFG, rule, smoothedFreq);
            }
            ++n3;
        }
        int cfgNumber = this.smoothedPCFG.size();
        Parameters.reportLine((String)("Total CFG rules: " + cfgNumber));
        Parameters.reportLine((String)("Total CFG present rules: " + n));
        Parameters.reportLine((String)("Total CFG new smoothed rules: " + (cfgNumber - n)));
    }

    private static String getRhs(String[] rule) {
        StringBuilder sb = new StringBuilder(rule[1]);
        int i = 2;
        while (i < rule.length) {
            sb.append(' ');
            sb.append(rule[i]);
            ++i;
        }
        return sb.toString();
    }

    public Hashtable<String, double[]> getCFGfreq() {
        return this.smoothedPCFG;
    }
}

