/*
 * Decompiled with CFR 0.152.
 */
package tsg;

import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import settings.Parameters;
import tsg.TSNode;
import tsg.corpora.Wsj;
import util.FileUtil;
import util.Utility;

public class CFSG<T extends Number> {
    public Hashtable<String, T> lexRules;
    public Hashtable<String, T> internalRules;

    public CFSG() {
        Parameters.newRun((Object)this);
        this.lexRules = new Hashtable();
        this.internalRules = new Hashtable();
    }

    public CFSG(Hashtable<String, T> lex, Hashtable<String, T> gram) {
        this.lexRules = lex;
        this.internalRules = gram;
    }

    public void readCFGFromCorpus() {
        for (TSNode TreeLine : Parameters.trainingCorpus.treeBank) {
            this.readCFGFromTreeLine(TreeLine);
        }
        String log = "Read rules from corups. \n # Internal Rules: " + this.internalRules.size() + "\n # Lex Rules: " + this.lexRules.size();
        FileUtil.appendReturn(log, Parameters.logFile);
    }

    public void readCFGFromTreeLine(TSNode TreeLine) {
        if (Parameters.toNormalForm) {
            TreeLine.toNormalForm();
        }
        List<TSNode> nonLexicalNodes = TreeLine.collectNonLexicalNodes();
        for (TSNode nonTerminal : nonLexicalNodes) {
            Hashtable<String, T> table = nonTerminal.isPrelexical() ? this.lexRules : this.internalRules;
            String rule = nonTerminal.toCFG(false);
            Utility.increaseStringInteger(table, rule, 1);
        }
    }

    public Hashtable<String, Hashtable<String, T>> buildLexCatTable() {
        Hashtable<String, Hashtable<String, T>> lexCat = new Hashtable<String, Hashtable<String, T>>();
        Enumeration<String> e = this.lexRules.keys();
        while (e.hasMoreElements()) {
            String rule = e.nextElement();
            Number count = (Number)this.lexRules.get(rule);
            String[] category_lexicon = rule.split(" ");
            String cat = category_lexicon[0];
            String lex = category_lexicon[1];
            Hashtable<String, Object> lexTable = lexCat.get(lex);
            if (lexTable == null) {
                lexTable = new Hashtable();
                lexCat.put(lex, lexTable);
            }
            if (lexTable.put(cat, count) == null) continue;
            System.out.println("Error!!");
        }
        return lexCat;
    }

    public void processLexicon() {
        File ambiguousFile = new File(String.valueOf(Parameters.outputPath) + "lex_ambiguous");
        File unambiguousFile = new File(String.valueOf(Parameters.outputPath) + "lex_unambiguous");
        PrintWriter amb = FileUtil.getPrintWriter(ambiguousFile);
        PrintWriter unamb = FileUtil.getPrintWriter(unambiguousFile);
        Hashtable<String, Hashtable<String, T>> lexCat = this.buildLexCatTable();
        Enumeration<String> e = lexCat.keys();
        while (e.hasMoreElements()) {
            String lex = e.nextElement();
            Hashtable<String, T> lexTable = lexCat.get(lex);
            HashSet<String> categories = new HashSet<String>();
            Enumeration<String> f = lexTable.keys();
            while (f.hasMoreElements()) {
                String cat = f.nextElement();
                cat = cat.replaceAll("@\\d+", "");
                categories.add(cat);
            }
            if (categories.size() == 1) {
                unamb.write(String.valueOf(lex) + "\n");
                continue;
            }
            amb.write(String.valueOf(lex) + "\n");
        }
        amb.close();
        unamb.close();
    }

    public void checkNegativeFrequencies() {
        int i = 0;
        while (i < 2) {
            Hashtable<String, T> table = i == 0 ? this.internalRules : this.lexRules;
            Enumeration<String> e = table.keys();
            while (e.hasMoreElements()) {
                boolean negative;
                String rule = e.nextElement();
                Number count = (Number)table.get(rule);
                if (count.getClass().isInstance(new Long(0L))) {
                    negative = (Long)count < 0L;
                } else {
                    boolean bl = negative = (Integer)count < 0;
                }
                if (!negative) continue;
                System.err.println("Negative frequency in : " + rule);
            }
            ++i;
        }
    }

    public void printFullGrammar() {
        File fullGrammarFile = new File(String.valueOf(Parameters.outputPath) + "FullGrammar");
        PrintWriter grammar = FileUtil.getPrintWriter(fullGrammarFile);
        int i = 0;
        while (i < 2) {
            Hashtable<String, T> table = i == 0 ? this.internalRules : this.lexRules;
            Enumeration<String> e = table.keys();
            while (e.hasMoreElements()) {
                String rule = e.nextElement();
                Number count = (Number)table.get(rule);
                String line = String.valueOf(count.toString()) + " " + rule;
                grammar.write(String.valueOf(line) + "\n");
            }
            if (i == 0) {
                grammar.write("\n");
            }
            ++i;
        }
        grammar.close();
        String log = "Printed full grammar";
        FileUtil.appendReturn(log, Parameters.logFile);
    }

    private Hashtable<String, String[]> buildCompactLexicon() {
        Hashtable<String, String[]> compactLexicon = new Hashtable<String, String[]>();
        Enumeration<String> e = this.lexRules.keys();
        while (e.hasMoreElements()) {
            String rule = e.nextElement();
            T count = this.lexRules.get(rule);
            String[] ruleSplit = rule.split(" ");
            String word = ruleSplit[1];
            String posTag = ruleSplit[0];
            String[] posTags = compactLexicon.get(word);
            if (posTags == null) {
                posTags = new String[]{""};
                compactLexicon.put(word, posTags);
            }
            if (posTags[0].length() > 0) {
                posTags[0] = String.valueOf(posTags[0]) + "\t";
            }
            posTags[0] = String.valueOf(posTags[0]) + posTag + " " + count;
        }
        return compactLexicon;
    }

    public void printLexiconAndGrammarFiles() {
        File lexiconFile = new File(String.valueOf(Parameters.outputPath) + "lexicon");
        File grammarFile = new File(String.valueOf(Parameters.outputPath) + "grammar");
        PrintWriter grammar = FileUtil.getPrintWriter(grammarFile);
        TreeSet<String> orderedInternal = new TreeSet<String>(this.internalRules.keySet());
        for (String rule : orderedInternal) {
            T count = this.internalRules.get(rule);
            String line = String.valueOf(count.toString()) + "\t" + rule;
            grammar.write(String.valueOf(line) + "\n");
        }
        grammar.close();
        PrintWriter lexicon = FileUtil.getPrintWriter(lexiconFile);
        Hashtable<String, String[]> compactLexicon = this.buildCompactLexicon();
        TreeSet<String> orderedLexical = new TreeSet<String>(compactLexicon.keySet());
        for (String word : orderedLexical) {
            String posTags = compactLexicon.get(word)[0];
            String line = String.valueOf(word) + "\t" + posTags;
            lexicon.write(String.valueOf(line) + "\n");
        }
        lexicon.close();
        String log = "Printed `lexicon` and `grammar` files";
        FileUtil.appendReturn(log, Parameters.logFile);
    }

    public void printTrainingCorpusToFile() {
        Parameters.printTrainingCorpusToFile();
    }

    public void printTestCorpusToFile() {
        Parameters.printTestCorpusToFile();
    }

    private Hashtable<String, Integer> buildDaughterParentTable() {
        boolean countUnary = false;
        Hashtable<String, Integer> daughterParentTable = new Hashtable<String, Integer>();
        for (TSNode TreeLine : Parameters.trainingCorpus.treeBank) {
            List<TSNode> allNodes = TreeLine.collectAllNodes();
            for (TSNode n : allNodes) {
                TSNode parent;
                if (n.isTerminal() || n.isUniqueDaughter() && !countUnary || (parent = n.parent) == null) continue;
                String daugherParent = String.valueOf(n.label) + " " + parent.label;
                Utility.increaseStringInteger(daughterParentTable, daugherParent, 1);
            }
        }
        return daughterParentTable;
    }

    public void assignHeadAnnotations(boolean allowPunctuation, boolean onlyExternalChoices) {
        Parameters.trainingCorpus.removeHeadAnnotations();
        Hashtable<String, Integer> daughterParentTable = this.buildDaughterParentTable();
        int totalAmbiguity = 0;
        int totalChoices = 0;
        for (TSNode TreeLine : Parameters.trainingCorpus.treeBank) {
            List<TSNode> allNodes = TreeLine.collectAllNodes();
            for (TSNode n : allNodes) {
                if (n.isLexical || n.isPrelexical()) continue;
                if (n.prole() == 1) {
                    n.firstDaughter().headMarked = true;
                    continue;
                }
                ++totalChoices;
                int maxCount = -1;
                TSNode bestDaughter = null;
                boolean ambiguity = false;
                TSNode[] tSNodeArray = n.daughters;
                int n2 = n.daughters.length;
                int n3 = 0;
                while (n3 < n2) {
                    String daugherParent;
                    int count;
                    TSNode d = tSNodeArray[n3];
                    if (!(!allowPunctuation && d.isPrelexical() && Utility.isPunctuation(d.firstDaughter().label) || onlyExternalChoices && d != n.firstDaughter() && d != n.lastDaughter() || (count = daughterParentTable.get(daugherParent = String.valueOf(d.label) + " " + n.label).intValue()) < maxCount)) {
                        if (count == maxCount) {
                            ambiguity = true;
                        }
                        bestDaughter = d;
                        maxCount = count;
                    }
                    ++n3;
                }
                if (bestDaughter == null) {
                    System.out.println("All punctuation daughter in " + n);
                    n.daughters[n.prole() - 1].headMarked = true;
                    continue;
                }
                bestDaughter.headMarked = true;
                if (!ambiguity) continue;
                ++totalAmbiguity;
            }
        }
        float ratio = (float)totalAmbiguity / (float)totalChoices;
        String report = "Ambiguity on head assignment: [" + totalAmbiguity + " | " + totalChoices + "] -> " + ratio;
        System.out.println(report);
        FileUtil.appendReturn(report, Parameters.logFile);
    }

    public static void rulesStatistics() {
        List<TSNode> nonLexicalNodes;
        Parameters.corpusName = "Wsj";
        Parameters.lengthLimitTraining = 1000;
        Parameters.lengthLimitTest = 1000;
        Parameters.semanticTags = false;
        Parameters.outputPath = "/scratch/fsangati/RESULTS/TSG/CFG/RuleStatistics/";
        CFSG Grammar2 = new CFSG();
        int threshold = 1;
        int sentenceBelowThreshold = 0;
        Hashtable<String, Integer> parentCategoryBelowThreshold = new Hashtable<String, Integer>();
        for (TSNode treeLine : Parameters.trainingCorpus.treeBank) {
            treeLine.removeNumberInLabels();
            nonLexicalNodes = treeLine.collectNonLexicalNodes();
            for (TSNode nT : nonLexicalNodes) {
                if (nT.isPrelexical()) continue;
                Utility.increaseStringInteger(Grammar2.internalRules, nT.toCFG(false), 1);
            }
        }
        for (TSNode treeLine : Parameters.trainingCorpus.treeBank) {
            treeLine.removeNumberInLabels();
            nonLexicalNodes = treeLine.collectNonLexicalNodes();
            boolean sentenceUncovered = false;
            for (TSNode nT : nonLexicalNodes) {
                String rule;
                int count;
                if (nT.isPrelexical() || (count = ((Integer)Grammar2.internalRules.get(rule = nT.toCFG(false))).intValue()) > threshold) continue;
                if (!sentenceUncovered) {
                    sentenceUncovered = true;
                    ++sentenceBelowThreshold;
                }
                Utility.increaseStringInteger(parentCategoryBelowThreshold, nT.label, 1);
            }
        }
        Utility.hashtableOrderedToFile(Grammar2.internalRules, new File(String.valueOf(Parameters.outputPath) + "CFG_freq"));
        Utility.hashtableRankedToFile(Grammar2.internalRules, new File(String.valueOf(Parameters.outputPath) + "CFG_rank"));
        Utility.hashtableOrderedToFile(parentCategoryBelowThreshold, new File(String.valueOf(Parameters.outputPath) + "catStat_below_" + threshold));
        int trainTotalTypes = Utility.countTotalTypesInTable(Grammar2.internalRules);
        int trainTotalTokens = Utility.countTotalTokensInTable(Grammar2.internalRules);
        String report = "\nRULES STATITSTICS:\nTraining Corpus initial total types|tokens:\t" + trainTotalTypes + "\t" + trainTotalTokens + "\n" + "Sentence below threshold (" + threshold + ")|total:\t" + sentenceBelowThreshold + "\t" + Parameters.trainingCorpus.size() + "\n";
        System.out.println(report);
        FileUtil.appendReturn(report, Parameters.logFile);
    }

    public static void checkCoverage() {
        Parameters.corpusName = "Wsj";
        Parameters.lengthLimitTraining = 1000;
        Parameters.lengthLimitTest = 1000;
        Parameters.semanticTags = false;
        Wsj.testSet = "22";
        Parameters.toNormalForm = false;
        Parameters.outputPath = "/scratch/fsangati/RESULTS/TSG/CFG/Coverage/";
        CFSG Grammar2 = new CFSG();
        Grammar2.readCFGFromCorpus();
        Hashtable<String, Integer> internalRulesTraining = Grammar2.internalRules;
        Set<String> internalRulesTrainingSet = internalRulesTraining.keySet();
        Hashtable<String, Integer> internalRulesTesting = new Hashtable<String, Integer>();
        int unmatchedSentences = 0;
        ArrayList<Integer> unmatchedSentencesLength = new ArrayList<Integer>();
        block0: for (TSNode treeLine : Parameters.testCorpus.treeBank) {
            Grammar2.internalRules = new Hashtable();
            Grammar2.readCFGFromTreeLine(treeLine);
            Utility.addAll(Grammar2.internalRules, internalRulesTesting);
            for (String rule : Grammar2.internalRules.keySet()) {
                if (internalRulesTrainingSet.contains(rule)) continue;
                ++unmatchedSentences;
                unmatchedSentencesLength.add(treeLine.countLexicalNodes());
                continue block0;
            }
        }
        int trainTotalTypes = Utility.countTotalTypesInTable(internalRulesTraining);
        int trainTotalTokens = Utility.countTotalTokensInTable(internalRulesTraining);
        int testTotalTypes = Utility.countTotalTypesInTable(internalRulesTesting);
        int testTotalTokens = Utility.countTotalTokensInTable(internalRulesTesting);
        Utility.hashtableOrderedToFile(internalRulesTraining, new File(String.valueOf(Parameters.outputPath) + "CFG_train"));
        Utility.hashtableOrderedToFile(internalRulesTesting, new File(String.valueOf(Parameters.outputPath) + "CFG_test"));
        internalRulesTesting.keySet().removeAll(internalRulesTraining.keySet());
        Utility.hashtableOrderedToFile(internalRulesTesting, new File(String.valueOf(Parameters.outputPath) + "CFG_test_unmatched"));
        int unmatchedEvalTotalTypes = Utility.countTotalTypesInTable(internalRulesTesting);
        int unmatchedEvalTotalTokens = Utility.countTotalTokensInTable(internalRulesTesting);
        float unmatchedEvalPercentTypes = (float)unmatchedEvalTotalTypes / (float)testTotalTypes;
        float unmatchedEvalPercentTokens = (float)unmatchedEvalTotalTokens / (float)testTotalTokens;
        String report = "\nCOVERAGE ANALYSIS:\nTraining Corpus initial total types|tokens: " + trainTotalTypes + "|" + trainTotalTokens + "\n" + "Test Corpus initial total types|tokens: " + testTotalTypes + "|" + testTotalTokens + "\n" + "Test Corpus unmatched total types|tokens: " + unmatchedEvalTotalTypes + "|" + unmatchedEvalTotalTokens + "\n" + "Test Corpus unmatched % types|tokens: " + unmatchedEvalPercentTypes + "|" + unmatchedEvalPercentTokens + "\n" + "Test Corpus unmatched sentences|total: " + unmatchedSentences + "|" + Parameters.testCorpus.size() + "\n" + "Unmatched Sentences Length classes :\n" + Utility.printIntegerListClasses(unmatchedSentencesLength, 10);
        System.out.println(report);
        FileUtil.appendReturn(report, Parameters.logFile);
    }

    public static void main(String[] args) {
        Parameters.setDefaultParam();
        Parameters.lengthLimitTraining = 40;
        Parameters.lengthLimitTest = 40;
        Parameters.outputPath = "/scratch/fsangati/RESULTS/TSG/CFSG/";
        CFSG Grammar2 = new CFSG();
        Grammar2.readCFGFromCorpus();
        Grammar2.printLexiconAndGrammarFiles();
        Grammar2.printTrainingCorpusToFile();
        Grammar2.printTestCorpusToFile();
    }
}

