A collection of functions for transforming the ANTLR AST into a more desirable AST, as well as functions to operate on the transformed AST.

(ns mini-java.ast
  (:require [clojure.reflect :refer [typename]]
            [mini-java.util :as util])
  (:import [org.antlr.v4.runtime.tree TerminalNodeImpl]
           [mini_java.antlr MiniJavaParser]))

Returns the line and column of the first token in a given node.

(defn- node-line-and-column
  [node]
  (let [token (.getStart node)]
    (util/token-line-and-column token)))

Attaches line, column, and context metadata to the given object.

(defn- with-line-and-column
  [node ctx obj]
  (let [[line column] (node-line-and-column node)]
    (with-meta obj
      {:line line
       :column column
       :context ctx})))

Returns the context metadata from the given node.

(defn context
  [node]
  (-> node meta :context))

An array of all inner classes of MiniJavaParser.

(def ^:private parser-inner-classes
  (.getClasses MiniJavaParser))

Transform the given type generated by ANTLR into a clojure keyword.

For example, if given the type ClassDeclarationContext, outputs :class-declaration.

(defn- typeify
  [type]
  (let [str-name (-> type
                     typename
                     (clojure.string/replace #".*MiniJavaParser\$" "")
                     (clojure.string/replace #"Context" "")
                     util/camel->lisp)
        kw-name  (keyword str-name)]
    [kw-name type]))

A mapping from MiniJavaParser inner class types to their keyword representations. TerminalNodeImpl -> :terminal-node is added manually, as it is the only type not ending with "Context" which needs to be used.

(def ^:private type->key
  (assoc (into {} (map (comp vec reverse typeify) parser-inner-classes))
    TerminalNodeImpl :terminal-node))

A mapping from objects to their type keywords, as given by type->key.

(def ^:private obj->type-key
  (comp type->key type))

Returns all children of a given node.

(defn- children
  [node]
  (map #(.getChild node %) (range (.getChildCount node))))

Removes the outer braces from a node.

(defn- remove-braces
  [nodes]
  (-> nodes rest butlast))
(defn- var-declaration? [x]
  (= :var-declaration
     (context x)))
(defn- field-declaration? [x]
  (= :field-declaration
     (context x)))
(defn- method-declaration? [x]
  (= :method-declaration
     (context x)))

Multimethod for transforming an ANTLR TreeNode into a minimal hash-map representation. Dispatches on the keyword representation of the node's type. Any transformed node which implements IObj will have attached metadata containing the line, column, and context of the node.

(defmulti ast
  obj->type-key)
(defmethod ast :default [node]
  "Unknown node type reached. If this happens then a bug occurred. Crash."
  (throw (ex-info "Unknown node type"
                  {:type :unknown-node-type,
                   :node node})))
(defmethod ast :terminal-node [node]
  "Reached a terminal node, simply transform it into the underlying text of
  its symbol."
  (-> node .-symbol .getText))
(defmethod ast :goal [node]
  "The root of any valid parse tree. Transform into a hash map containing
  the transformed main and other classes."
  (let [children   (children node)
        main-class (first children)
        classes    (-> children rest butlast)]
    (with-line-and-column node :goal
      {:main    (ast main-class),
       :classes (map ast classes)})))
(defmethod ast :main-class-declaration [node]
  "Transform a main class declaration into a hash-map containing its name
  and body statement."
  (with-line-and-column node :main-class-declaration
    {:name (ast (.getChild node 1)),
     :body (ast (.getChild node 2))}))
(defmethod ast :class-declaration [node]
  "Transform a non-main class declaration into a hash-map containing its
  name, its parent's name, its variables, and methods."
  (let [child? (= 5 (.getChildCount node))
        ;; determine the index of the body, which depends on whether it
        ;; has a parent
        body-idx (if child? 4 2)
        ;; parsing the body yields a hash-map of fields and methods
        {:keys [vars methods]} (ast (.getChild node body-idx))]
    (with-line-and-column node :class-declaration
      {:name    (ast (.getChild node 1)),
       :parent  (when child? (ast (.getChild node 3))),
       :vars    vars
       :methods methods})))
(defmethod ast :main-class-body [node]
  "Transform a main class body, which just results in the transformation of
  its only method."
  (ast (.getChild node 1)))
(defmethod ast :main-method [node]
  "Transform the main method, which just results in the transformation of
  its only statement."
  (ast (.getChild node 2)))
(defmethod ast :class-body [node]
  "Transform a non-main class body, which results in a hash-map containing its
  transformed variables and methods."
  (let [children     (children node)
        ;; transform all declarations within the class, which results in a
        ;; seq of both variable and method declarations, which are separated
        ;; here based on their context metadata
        declarations (map ast (remove-braces children))
        vars         (filter field-declaration? declarations)
        methods      (filter method-declaration? declarations)]
    (with-line-and-column node :class-body
     {:vars    vars,
      :methods methods})))
(defmethod ast :method-declaration [node]
  "Transform a method, resulting in a hash-map containing its name,
  return type, arguments, local variables, and body statements,
  all transformed.."
  (let [;; transforming the body of a method results in a hash-map separating
        ;; the variable declarations and actual statements of the method
        {:keys [vars body]} (ast (.getChild node 4))]
    (with-line-and-column node :method-declaration
      {:name (ast (.getChild node 2)),
       :type (ast (.getChild node 1)),
       :args (ast (.getChild node 3)),
       :vars vars,
       :body body})))
(defmethod ast :method-body [node]
  "Transforms a method body, separating its variable declarations and body
  statements into a hash-map. No context metadata is preserved, as it is
  destructured into the method declaration context."
  (let [children (remove-braces (children node))
        body-nodes (map ast children)]
    {:vars (filter var-declaration? body-nodes)
     :body (filter (comp not var-declaration?) body-nodes)}))
(defmethod ast :field-declaration [node]
  "Transforms a field declaration into a hash-map containing both its
  name and its type."
  (with-line-and-column node :field-declaration
    {:name (ast (.getChild node 1)),
     :type (ast (.getChild node 0))}))
(defmethod ast :var-declaration [node]
  "Transforms a variable declaration into a hash-map containing both its
  name and its type."
  (with-line-and-column node :var-declaration
    {:name (ast (.getChild node 1)),
     :type (ast (.getChild node 0))}))
(defmethod ast :nested-statement [node]
  "Transforms a nested statement into a seq of the statements it contains."
  (with-line-and-column node :nested-statement
    (->> node
         children
         remove-braces
         (map ast))))
(defmethod ast :if-else-statement [node]
  "Transforms an if/else statement into a hash-map containing the predicate,
  then, and else parts."
  (with-line-and-column node :if-else-statement
    {:pred (ast (.getChild node 2)),
     :then (ast (.getChild node 4)),
     :else (ast (.getChild node 6))}))
(defmethod ast :while-statement [node]
  "Transforms a while statement into a hash-map containing the predicate and
  body statement."
  (with-line-and-column node :while-statement
    {:pred (ast (.getChild node 2)),
     :body (ast (.getChild node 4))}))
(defmethod ast :print-statement [node]
  "Transforms a print statement into a hash-map containing only its single
  argument."
  (with-line-and-column node :print-statement
    {:arg (ast (.getChild node 2))}))
(defmethod ast :assign-statement [node]
  "Transforms an assignment statement into a hash-map containing the name
  of its target and the source expression."
  (with-line-and-column node :assign-statement
    {:target (ast (.getChild node 0)),
     :source (ast (.getChild node 2))}))
(defmethod ast :array-assign-statement [node]
  "Transforms an array assignment statement into a hash-map containing the
  name of its target and the source expression."  
  (with-line-and-column node :array-assign-statement
    {:target (ast (.getChild node 0)),
     :index  (ast (.getChild node 2)),
     :source (ast (.getChild node 5))}))
(defmethod ast :return-statement [node]
  "Transforms a return statement into a hash-map containing only its return
  value."
  (with-line-and-column node :return-statement
    {:return-value (ast (.getChild node 1))}))
(defmethod ast :recur-statement [node]
  "Transforms a recur statement into a hash-map containing its predicate,
  argument list, and base-case."
  (with-line-and-column node :recur-statement
    {:pred (ast (.getChild node 1)),
     :args (ast (.getChild node 3)),
     :base (ast (.getChild node 5))}))
(defmethod ast :method-argument-list [node]
  "Transforms a method argument list into a seq of the arguments being passed."
  (let [children (children node)
        args     (take-nth 2 (-> children rest butlast))]
    (with-line-and-column node :method-argument-list
     (map ast args))))
(defmethod ast :formal-parameters [node]
  "Transforms formal parameters into either an empty seq, or a list of formal
  parameters."
  (with-line-and-column node :formal-parameters
    (let [length (.getChildCount node)]
      (if (= 3 length)
        (ast (.getChild node 1))
        ()))))
(defmethod ast :formal-parameter-list [node]
  "Transforms a non-empty formal parameter list into a seq of its formal
  parameters. Each argument is assigned a sequential argument index, starting
  from 0."
  (->> node
       children
       (take-nth 2) ; ignore commas
       (map ast)
       (map (fn [i arg] (assoc arg :arg-index i))
            (range))))
(defmethod ast :formal-parameter [node]
  "Transforms a formal parameter into a hash-map containing its type and name."
  (with-line-and-column node :formal-parameter
    {:type  (ast (.getChild node 0)),
     :name  (ast (.getChild node 1))}))
(defmethod ast :type [node]
  "Transforms a type into its underlying representation."
  (ast (.getChild node 0)))
(defn- unary-expression [node]
  "Transforms a unary expression into a hash-map containing its operand."
  (with-line-and-column node (obj->type-key node)
    {:operand (ast (.getChild node 1))}))
(defn- binary-expression [node]
  "Transforms a binary expression into a hash-map containing its operands."
  (with-line-and-column node (obj->type-key node)
    {:left  (ast (.getChild node 0)),
     :right (ast (.getChild node 2))}))
(defmethod ast :and-expression [node]
  (binary-expression node))
(defmethod ast :lt-expression [node]
  (binary-expression node))
(defmethod ast :add-expression [node]
  (binary-expression node))
(defmethod ast :sub-expression [node]
  (binary-expression node))
(defmethod ast :mul-expression [node]
  (binary-expression node))
(defmethod ast :array-access-expression [node]
  "Transforms an array access expression into a hash-map containing an
  expression which evaluates to an array, and the index being accessed."
  (with-line-and-column node :array-access-expression
    {:array (ast (.getChild node 0)),
     :index (ast (.getChild node 2))}))
(defmethod ast :array-length-expression [node]
  "Transforms an array length expression into a hash-map containing only the
  expression which evaluates to the array."
  (with-line-and-column node :array-length-expression
    {:array (ast (.getChild node 0))}))
(defmethod ast :method-call-expression [node]
  "Transforms a method call expression into a hash-map containing the
  expression it is being called on, the name of the method, and the arguments
  it is being called with."
  (with-line-and-column node :method-call-expression
    {:caller (ast (.getChild node 0)),
     :method (ast (.getChild node 2)),
     :args   (ast (.getChild node 3))}))
(defmethod ast :int-lit-expression [node]
  "Transforms an integer literal expression into a hash-map containing only
  its value as an Integer object."
  (with-line-and-column node :int-lit-expression
    {:value (-> node
                (.getChild 0)
                ast
                Integer.)}))
(defmethod ast :boolean-lit-expression [node]
  "Transforms a boolean literal expression into a hash-map containing only
  its value as a Boolean object."
  (with-line-and-column node :boolean-lit-expression
    {:value (-> node
                (.getChild 0)
                ast
                Boolean.)}))
(defmethod ast :identifier-expression [node]
  "Transforms an identitifier expression into a hash-map containing only
  the string representation of the ID."
  (with-line-and-column node :identifier-expression
    {:id (ast (.getChild node 0))}))
(defmethod ast :this-expression [node]
  "Transforms a this expression into just the :this keyword."
  :this)
(defmethod ast :array-instantiation-expression [node]
  "Transforms an array instantiation expression into a hash-map containing only
  its size. Arrays can only be int arrays, so there is no need to specify the
  type."
  (with-line-and-column node :array-instantiation-expression
    {:size (ast (.getChild node 3))}))
(defmethod ast :object-instantiation-expression [node]
  "Transforms an object instantiation expression into a hash-map containing
  only its type. No constructor parameters need to be preserved, as MiniJava
  constructors take no arguments."
  (with-line-and-column node :object-instantiation-expression
    {:type (ast (.getChild node 1))}))
(defmethod ast :not-expression [node]
  (unary-expression node))
(defmethod ast :neg-expression [node]
  (unary-expression node))
(defmethod ast :paren-expression [node]
  "Transforms a parenthesis expression into whatever is contained within it.
  Parentheses are only important at parse-time."
  (ast (.getChild node 1)))
(defmethod ast :int-type [node]
  "Transforms an int type expression into the keyword :int."
  :int)
(defmethod ast :int-array-type [node]
  "Transforms an int array type expression into the keyword :int<>."
  :int<>)
(defmethod ast :boolean-type [node]
  "Transforms a boolean type expression into the keyword :boolean."
  :boolean)