/*
 * Decompiled with CFR 0.152.
 */
package tsg;

import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Map;
import tsg.TSNodeLabel;
import tsg.corpora.Wsj;
import util.FileUtil;
import util.Utility;

public class PCFG_extractor {
    public static void extractPCFG(File treebankFile, File grammarOutputFile) throws Exception {
        ArrayList<TSNodeLabel> treebank = Wsj.getTreebank(treebankFile);
        Hashtable<String, int[]> cfgFreq = new Hashtable<String, int[]>();
        for (TSNodeLabel t : treebank) {
            ArrayList<TSNodeLabel> nodes = t.collectAllNodes();
            for (TSNodeLabel n : nodes) {
                if (n.isLexical) continue;
                String rule = n.cfgRule();
                Utility.increaseStringIntArray(cfgFreq, rule);
            }
        }
        PrintWriter pw = FileUtil.getPrintWriter(grammarOutputFile);
        for (Map.Entry e : cfgFreq.entrySet()) {
            pw.println(String.valueOf((String)e.getKey()) + "\t" + ((int[])e.getValue())[0]);
        }
        pw.close();
    }

    public static void main(String[] args) throws Exception {
        String usage = "java -jar PCFG_extractor.jar treebankFile outputfile";
        if (args.length != 2) {
            System.err.println("Usage: " + usage);
            return;
        }
        File treebankFile = new File(args[0]);
        File grammarOutputFile = new File(args[1]);
        PCFG_extractor.extractPCFG(treebankFile, grammarOutputFile);
        FileUtil.sortFile(grammarOutputFile);
    }
}

