antlr4/java: pretty print parse tree to stdout

Besides a graphical parse tree my ANTLR4 extension for Visual Studio Code also produces a formatted text parse tree:

enter image description here


Extracted from SnippetsTest as a standalone utility class:

import java.util.List;

import org.antlr.v4.runtime.misc.Utils;
import org.antlr.v4.runtime.tree.Tree;
import org.antlr.v4.runtime.tree.Trees;

public class TreeUtils {

    /** Platform dependent end-of-line marker */
    public static final String Eol = System.lineSeparator();
    /** The literal indent char(s) used for pretty-printing */
    public static final String Indents = "  ";
    private static int level;

    private TreeUtils() {}

    /**
     * Pretty print out a whole tree. {@link #getNodeText} is used on the node payloads to get the text
     * for the nodes. (Derived from Trees.toStringTree(....))
     */
    public static String toPrettyTree(final Tree t, final List<String> ruleNames) {
        level = 0;
        return process(t, ruleNames).replaceAll("(?m)^\\s+$", "").replaceAll("\\r?\\n\\r?\\n", Eol);
    }

    private static String process(final Tree t, final List<String> ruleNames) {
        if (t.getChildCount() == 0) return Utils.escapeWhitespace(Trees.getNodeText(t, ruleNames), false);
        StringBuilder sb = new StringBuilder();
        sb.append(lead(level));
        level++;
        String s = Utils.escapeWhitespace(Trees.getNodeText(t, ruleNames), false);
        sb.append(s + ' ');
        for (int i = 0; i < t.getChildCount(); i++) {
            sb.append(process(t.getChild(i), ruleNames));
        }
        level--;
        sb.append(lead(level));
        return sb.toString();
    }

    private static String lead(int level) {
        StringBuilder sb = new StringBuilder();
        if (level > 0) {
            sb.append(Eol);
            for (int cnt = 0; cnt < level; cnt++) {
                sb.append(Indents);
            }
        }
        return sb.toString();
    }
}

Call the method as follows:

List<String> ruleNamesList = Arrays.asList(parser.getRuleNames());
String prettyTree = TreeUtils.toPrettyTree(tree, ruleNamesList);

If you like to use regex only for what it's really for, you can always print a tree by yourself:

import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Trees;

public static String printSyntaxTree(Parser parser, ParseTree root) {
    StringBuilder buf = new StringBuilder();
    recursive(root, buf, 0, Arrays.asList(parser.getRuleNames()));
    return buf.toString();
}

private static void recursive(ParseTree aRoot, StringBuilder buf, int offset, List<String> ruleNames) {
    for (int i = 0; i < offset; i++) {
        buf.append("  ");
    }
    buf.append(Trees.getNodeText(aRoot, ruleNames)).append("\n");
    if (aRoot instanceof ParserRuleContext) {
        ParserRuleContext prc = (ParserRuleContext) aRoot;
        if (prc.children != null) {
            for (ParseTree child : prc.children) {
                recursive(child, buf, offset + 1, ruleNames);
            }
        }
    }
}

Usage:

ParseTree root = parser.yourOwnRule();
System.out.println(printSyntaxTree(parser, root));

I wanted to put in my own spin on this, taking advantage of the fact that I already use StringTemplate in my project. This means I don't have to manually deal with levels like the other answers. It also makes the output format easier to customize.

On top of that, the main reason I'm posting this is because I decided to skip printing rules that I'm only 'passing through', i.e. when using chain rules

a : b | something_else ;
b : c | another ;
c : d | yet_more ;
d : rule that matters ;

since they cluttered my output when checking trees from small inputs without adding any usefull information. This is also easy to change, at the //pass-through rules comment location.

I also copied in the definition of Trees.getNodeText and modified it to use a plain array to get rid of the unnecessary wrapping, and even let me customize it if I feel like it.

Finally, I made it take the parser and tree and just straight dump to System.out, since that's the only situation I need it in.

import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ErrorNode;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.antlr.v4.runtime.tree.Tree;
import org.stringtemplate.v4.ST;

//for pretty-dumping trees in short form
public class TreeUtils {
    private static final ST template() {
        return new ST("<rule_text>\n\t<child; separator=\"\n\">");
    }
    private static final ST literal(String text) {
        return new ST("<text>").add("text", text);
    }

    public static void dump(Parser parser, Tree tree) {
        System.out.println(process(parser.getRuleNames(),tree).render());
    }
    
    private static String getNodeText(Tree t, String[] ruleNames) {
        if ( t instanceof RuleContext ) {
            int ruleIndex = ((RuleContext)t).getRuleContext().getRuleIndex();
            String ruleName = ruleNames[ruleIndex];
            return ruleName;
        }
        else if ( t instanceof ErrorNode) {
            return t.toString();
        }
        else if ( t instanceof TerminalNode) {
            Token symbol = ((TerminalNode)t).getSymbol();
            if (symbol != null) {
                String s = symbol.getText();
                return s;
            }
        }

        Object payload = t.getPayload();
        if ( payload instanceof Token ) {
            return ((Token)payload).getText();
        }
        return t.getPayload().toString();
    }

    private static ST process(String[] ruleNames, Tree t) {
        if(t.getChildCount()==0) {
            return literal(getNodeText(t, ruleNames));
        } else if(t.getChildCount()==1) {
            //pass-through rules
            return process(ruleNames,t.getChild(0));
        } else {
            ST out=template();
            out.add("rule_text", getNodeText(t, ruleNames));
            for(int i=0;i<t.getChildCount();i++) {
                out.add("child", process(ruleNames,t.getChild(i)));
            }
            return out;
        }
    }
}