/*
 * Decompiled with CFR 0.152.
 */
package tsg.parser;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Map;
import java.util.Scanner;
import settings.Parameters;
import tsg.ConvertFragmentsToCFGRules;
import tsg.TSNodeLabel;
import tsg.TSNodeLabelFreq;
import tsg.corpora.Wsj;
import tsg.parser.ConvertGrammarInBitParFormat;
import util.FileUtil;
import util.Utility;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ParseWSJprob {
    public static String topSymbol = "TOP";
    public static String posTerminalSuffix = "_#TERM#";
    public static String artificialNodePrefix = ConvertGrammarInBitParFormat.artificialNodePrefix;
    public static int nBest = 1000;
    File trainingFile;
    File testFile;
    File fragmentFile;
    File probGrammarFile;
    File ambiguousRulesFile;
    File logFile;
    File testSentencesPreprocessed;
    File ruleBestFragmentMappingFile;
    File ruleFragmentsAmbiguousFile;
    File grammarFile;
    File lexiconFile;
    File outputBitParSentenceFile;
    File newIntroduceCFGRulesFile;
    PrintWriter logPw;
    String workingDir;
    String outputBitParDir;
    HashSet<String> posLex;
    ConvertFragmentsToCFGRules converter;
    static String bitparApp = "/home/fsangati/SOFTWARE/BitPar_Web/bitpar";
    static String bitparArgs = "-p -v -b " + nBest + " -o -s TOP ";

    public ParseWSJprob(File trainingFile, File testFile, File fragmentFile) throws Exception {
        this.trainingFile = trainingFile;
        this.testFile = testFile;
        this.fragmentFile = fragmentFile;
        this.posLex = new HashSet();
        this.prepareWorkingDir();
        this.converter = new ConvertFragmentsToCFGRules(fragmentFile, this.ruleFragmentsAmbiguousFile);
        System.out.println("Converted fragments to CFG rules. Total Rules: " + this.converter.ruleSize());
        this.addCFGAndInitialAndTerminalPosRules();
        System.out.println("Added initial and terminal and CFG rules. Total Rules: " + this.converter.ruleSize());
        this.converter.printGrammarFile(this.probGrammarFile);
        this.converter.printRuleBestFragmentMappingFile(this.ruleBestFragmentMappingFile);
        this.logPw.close();
    }

    private void prepareWorkingDir() {
        this.workingDir = String.valueOf(Parameters.resultsPath) + "TSG/TSGkernels/parsing/" + FileUtil.dataFolder() + "/";
        this.outputBitParDir = String.valueOf(this.workingDir) + "bitparOutput/";
        new File(this.workingDir).mkdirs();
        new File(this.outputBitParDir).mkdirs();
        this.probGrammarFile = new File(String.valueOf(this.workingDir) + "probGrammar.txt");
        this.grammarFile = new File(String.valueOf(this.workingDir) + "grammar.txt");
        this.lexiconFile = new File(String.valueOf(this.workingDir) + "lexicon.txt");
        this.ruleBestFragmentMappingFile = new File(String.valueOf(this.workingDir) + "ruleBestFragmentMapping.txt");
        this.ambiguousRulesFile = new File(String.valueOf(this.workingDir) + "ambiguous_rules.txt");
        this.newIntroduceCFGRulesFile = new File(String.valueOf(this.workingDir) + "newIntroduceCFGRules.txt");
        this.testSentencesPreprocessed = new File(String.valueOf(this.workingDir) + "testSentencesPreprocessed.txt");
        this.ruleFragmentsAmbiguousFile = new File(String.valueOf(this.workingDir) + "ruleFragmentsAmbiguous.txt");
        this.logFile = new File(String.valueOf(this.workingDir) + "log.txt");
        this.logPw = FileUtil.getPrintWriter(this.logFile);
    }

    private void addInitialAndTerminalPosRules() throws Exception {
        Hashtable<String, int[]> initialRules = new Hashtable<String, int[]>();
        HashSet<String> posTags = new HashSet<String>();
        ArrayList<TSNodeLabel> treebank = TSNodeLabel.getTreebank(this.trainingFile);
        for (TSNodeLabel t : treebank) {
            Utility.increaseStringIntArray(initialRules, t.label());
            ArrayList<TSNodeLabel> lexItems = t.collectLexicalItems();
            for (TSNodeLabel l : lexItems) {
                String parentLabel = l.parent.label();
                String pL = String.valueOf(parentLabel) + " " + l.label(false, true);
                this.posLex.add(pL);
                posTags.add(parentLabel);
            }
        }
        PrintWriter newRulesPW = FileUtil.getPrintWriter(this.newIntroduceCFGRulesFile);
        for (Map.Entry e : initialRules.entrySet()) {
            String initialRule = String.valueOf(topSymbol) + " " + (String)e.getKey();
            TSNodeLabel fragment = new TSNodeLabel("(" + initialRule + ")", false);
            TSNodeLabelFreq fragmentFreq = new TSNodeLabelFreq(fragment, ((int[])e.getValue())[0]);
            newRulesPW.println(fragmentFreq.toString(false, true));
            this.converter.add(initialRule, fragmentFreq);
        }
        for (String p : posTags) {
            String pTerminal = "\"" + p + posTerminalSuffix + "\"";
            String rule = String.valueOf(p) + " " + pTerminal;
            TSNodeLabel fragment = new TSNodeLabel("(" + rule + ")", false);
            TSNodeLabelFreq fragmentFreq = new TSNodeLabelFreq(fragment, 1);
            newRulesPW.println(fragmentFreq.toString(false, true));
            this.converter.add(rule, fragmentFreq);
        }
        newRulesPW.close();
    }

    private void addCFGAndInitialAndTerminalPosRules() throws Exception {
        Hashtable<String, int[]> cfgRules = new Hashtable<String, int[]>();
        HashSet<String> posTags = new HashSet<String>();
        ArrayList<TSNodeLabel> treebank = TSNodeLabel.getTreebank(this.trainingFile);
        for (TSNodeLabel t : treebank) {
            String initalRule = String.valueOf(topSymbol) + " " + t.label();
            Utility.increaseStringIntArray(cfgRules, initalRule);
            ArrayList<TSNodeLabel> nodes = t.collectAllNodes();
            for (TSNodeLabel n : nodes) {
                if (n.isLexical) continue;
                String rule = n.cfgRule();
                if (n.isPreLexical()) {
                    posTags.add(n.label());
                    this.posLex.add(rule);
                }
                Utility.increaseStringIntArray(cfgRules, rule);
            }
        }
        PrintWriter newRulesPW = FileUtil.getPrintWriter(this.newIntroduceCFGRulesFile);
        for (Map.Entry e : cfgRules.entrySet()) {
            String rule = (String)e.getKey();
            if (this.converter.containsRule(rule)) continue;
            TSNodeLabel fragment = new TSNodeLabel("(" + rule + ")", false);
            TSNodeLabelFreq fragmentFreq = new TSNodeLabelFreq(fragment, ((int[])e.getValue())[0]);
            newRulesPW.println(fragmentFreq.toString(false, true));
            this.converter.add(rule, fragmentFreq);
        }
        for (String p : posTags) {
            String pTerminal = "\"" + p + posTerminalSuffix + "\"";
            String rule = String.valueOf(p) + " " + pTerminal;
            TSNodeLabel fragment = new TSNodeLabel("(" + rule + ")", false);
            TSNodeLabelFreq fragmentFreq = new TSNodeLabelFreq(fragment, 1);
            newRulesPW.println(fragmentFreq.toString(false, true));
            this.converter.add(rule, fragmentFreq);
        }
        newRulesPW.close();
    }

    private void parseTestFile() throws Exception {
        ArrayList<TSNodeLabel> testTreebank = TSNodeLabel.getTreebank(this.testFile);
        int sentenceNumber = 1;
        for (TSNodeLabel t : testTreebank) {
            ArrayList<TSNodeLabel> lexItems = t.collectLexicalItems();
            int length = lexItems.size();
            String[] flatWordArray = new String[length];
            String[] flatWordArrayPosTerminal = new String[length];
            int i = 0;
            for (TSNodeLabel l : lexItems) {
                String iWord;
                flatWordArray[i] = iWord = l.label();
                TSNodeLabel lParent = l.parent;
                String p = lParent.label();
                String pL = String.valueOf(p) + " " + l.label(false, true);
                flatWordArrayPosTerminal[i] = this.posLex.contains(pL) ? iWord : String.valueOf(p) + posTerminalSuffix;
                ++i;
            }
            this.reportLine("Preparing grammar for sentence: " + sentenceNumber);
            this.reportLine("Original  sentece: " + Utility.joinStringArrayToString(flatWordArray, " "));
            this.reportLine("Preprocessed  sentece: " + Utility.joinStringArrayToString(flatWordArrayPosTerminal, " "));
            long startTime = System.currentTimeMillis();
            ConvertGrammarInBitParFormat.probGrammarForOneSentence(this.probGrammarFile, flatWordArrayPosTerminal, this.grammarFile, this.lexiconFile);
            int tookSec = (int)((System.currentTimeMillis() - startTime) / 1000L);
            this.reportLine("Took " + tookSec);
            this.createTestSentenceFile(flatWordArrayPosTerminal);
            this.outputBitParSentenceFile = new File(String.valueOf(this.outputBitParDir) + "bitParOut_" + Utility.padZero(4, sentenceNumber) + ".txt");
            startTime = System.currentTimeMillis();
            this.reportLine("Parsing sentence...");
            this.runBitPar();
            tookSec = (int)((System.currentTimeMillis() - startTime) / 1000L);
            this.reportLine("Finished parsing. Took " + tookSec + " sec.");
            this.reportLine("");
            ++sentenceNumber;
        }
    }

    private void createTestSentenceFile(String[] flatWordArrayPosTerminal) {
        PrintWriter flatPW = FileUtil.getPrintWriter(this.testSentencesPreprocessed);
        flatPW.println(String.valueOf(Utility.joinStringArrayToString(flatWordArrayPosTerminal, "\n")) + "\n");
        flatPW.close();
    }

    private void runBitPar() {
        try {
            String line;
            Process p = Runtime.getRuntime().exec(String.valueOf(bitparApp) + " " + bitparArgs + " " + this.grammarFile + " " + this.lexiconFile + " " + this.testSentencesPreprocessed + " " + this.outputBitParSentenceFile, null, new File(this.workingDir));
            BufferedReader input = new BufferedReader(new InputStreamReader(p.getInputStream()));
            while ((line = input.readLine()) != null) {
                this.reportLine(line);
            }
            input.close();
        }
        catch (Exception err) {
            err.printStackTrace();
        }
    }

    private void reportLine(String line) {
        System.out.println(line);
        this.logPw.println(line);
    }

    public static void getFirstParsesAndProcess(String dir, File outputFile, Hashtable<String, TSNodeLabel> ruleFragmentTable, ArrayList<String> flatSentences) throws Exception {
        File dirFile = new File(dir);
        Object[] outputFiles = dirFile.listFiles();
        Arrays.sort(outputFiles);
        PrintWriter pw = FileUtil.getPrintWriter(outputFile);
        Iterator<String> sentenceIterator = flatSentences.iterator();
        Object[] objectArray = outputFiles;
        int n = outputFiles.length;
        int n2 = 0;
        while (n2 < n) {
            Object f = objectArray[n2];
            Scanner scan = FileUtil.getScanner((File)f);
            String line = scan.nextLine();
            scan.close();
            line = line.replaceAll("\\\\", "");
            String flatSentence = sentenceIterator.next();
            String[] sentenceWords = flatSentence.split("\\s+");
            TSNodeLabel fragment = new TSNodeLabel(line);
            fragment.removePreterminalWithPrefix(artificialNodePrefix, -1);
            TSNodeLabel fragmentConverted = fragment.replaceRulesWithFragments(ruleFragmentTable);
            fragmentConverted = fragmentConverted.daughters[0];
            fragmentConverted.adjustLexicalItems(sentenceWords);
            pw.println(fragmentConverted.toString());
            ++n2;
        }
        pw.close();
    }

    public static ArrayList<String> readTestFlat(File inputFile) {
        ArrayList<String> result = new ArrayList<String>();
        Scanner scan = FileUtil.getScanner(inputFile);
        while (scan.hasNextLine()) {
            String line = scan.nextLine();
            line = line.replaceAll("\\\\", "");
            result.add(line);
        }
        scan.close();
        return result;
    }

    public static void writeGoldFile(File inputFile, File outputFile) {
        Scanner scan = FileUtil.getScanner(inputFile);
        PrintWriter pw = FileUtil.getPrintWriter(outputFile);
        while (scan.hasNextLine()) {
            String line = scan.nextLine();
            line = line.replaceAll("\\\\", "");
            pw.println(line);
        }
        scan.close();
        pw.close();
    }

    public static void main(String[] args) throws Exception {
        String workingDir = String.valueOf(Parameters.resultsPath) + "TSG/TSGkernels/parsing/kernelDOP1s_prob/";
        String bitparOutputDir = String.valueOf(workingDir) + "bitparOutput/";
        File flatSentences = new File(String.valueOf(Wsj.WsjFlatNoTraces) + "wsj-22.mrg");
        ArrayList<String> flatSentencesArray = ParseWSJprob.readTestFlat(flatSentences);
        File ruleFragmentTableFile = new File(String.valueOf(workingDir) + "ruleBestFragmentMapping.txt");
        File outputFile = new File(String.valueOf(workingDir) + "bitParBest.txt");
        File goldFile = new File(String.valueOf(workingDir) + "gold.txt");
        ParseWSJprob.writeGoldFile(new File(String.valueOf(Wsj.WsjOriginalCleaned) + "wsj-22.mrg"), goldFile);
        Hashtable<String, TSNodeLabel> ruleFragmentTable = ConvertFragmentsToCFGRules.readRuleBestFragmentMappingFile(ruleFragmentTableFile);
        ParseWSJprob.getFirstParsesAndProcess(bitparOutputDir, outputFile, ruleFragmentTable, flatSentencesArray);
    }
}

