Add binding / lookup pass

This commit is contained in:
Jeremy Penner 2024-07-28 14:24:46 -04:00
parent 6c6d674c2a
commit f19d274ea3
9 changed files with 123 additions and 151 deletions

View file

@ -1,7 +1,9 @@
(ns tock.compiler
(:require [tock.compiler.util :refer [form-dispatch compile-error lookup bind!] :as u]
[tock.compiler.type :refer [typecheck-expr]]
[tock.compiler.wasm :refer [ctx-to-wasm]]
(:require [tock.compiler.desugar :refer [desugar]]
[tock.compiler.bind :refer [bind]]
[tock.compiler.specials :refer [specials]]
;; [tock.compiler.type :refer [typecheck-expr]]
;; [tock.compiler.wasm :refer [ctx-to-wasm]]
[meander.epsilon :as m]))
;; compiler structure:
@ -12,29 +14,47 @@
;; 2. expansion pass
;; * recursively desugars expressions until the entire source tree is in the form of (special expr...)
;; * any non-typed parameters (names, types, etc) are stored in metadata
;; 3. typechecking pass
;; 3. binding pass
;; * create scope atoms and attach them to metadata of name lookups (l/lookup)
;; * create binding expressions and associate them with the appropriate scopes
;; * (l/local {:name ?name :id ?stable-counter})
;; * (l/param {:name ?name :id ?position})
;; * stable-counter is defined in the top-level function scope
;; * replace l/lookup expressions with their binding expressions
;; 4. typechecking pass
;; * types are calculated for each expression in the tree, bottom-up
;; * parents use `coerce` or `unify` to validate each subexpression
;; 4. codegen pass
;; * parents' :typecheck method is passed its metadata and its children's types, and produces a list of
;; possible valid typings for the expression
;; * if the children can all be coerced to the given types, then typechecking for the expression passes
;; * whenever a variable definition is typed, its binding expression in its scope is updated with the appropriate metadata
;; * binding expressions must be checked against the metadata in scope before typechecking happens - perhaps l/lookup replacement
;; should happen here??
;; 5. type-lowering pass
;; * static allocation of globals
;; * allocation of locals
;; * memory-based stack frame allocation (so structs can be passed by reference)
;; * tock types are converted into wasm types
;; * struct access is converted into pointer arithmetic
;; 6. codegen pass
;; * function expression trees are recursively walked to generate linear wasm bytecode
;; * tock types are lowered to wasm types?
(defmulti compile-toplevel (fn [form _ctx] (form-dispatch form)))
(defmethod compile-toplevel :default [form _ctx]
(throw (compile-error form "Unrecognized form")))
;; (defmulti compile-toplevel (fn [form _ctx] (form-dispatch form)))
;; (defmethod compile-toplevel :default [form _ctx]
;; (throw (compile-error form "Unrecognized form")))
(defmethod compile-toplevel 'defn [form ctx]
(m/match form
(_ (m/pred simple-symbol? ?name) . !fndef ...)
(let [funcref (typecheck-expr (apply list 'fn !fndef) ctx)
exported-funcs (lookup ctx ::u/exported-funcs)]
(bind! ctx ?name {:funcref funcref})
(swap! exported-funcs assoc (name ?name) (last funcref)))))
;; (defmethod compile-toplevel 'defn [form ctx]
;; (m/match form
;; (_ (m/pred simple-symbol? ?name) . !fndef ...)
;; (let [funcref (typecheck-expr (apply list 'fn !fndef) ctx)
;; exported-funcs (lookup ctx ::u/exported-funcs)]
;; (bind! ctx ?name {:funcref funcref})
;; (swap! exported-funcs assoc (name ?name) (last funcref)))))
(defn new-ctx [] [(atom {::u/module-funcs (atom []) ::u/exported-funcs (atom {})})])
(defn new-ctx [] [(atom {:function-count 0})])
(defn compile [forms]
(defn compile [form]
(let [ctx (new-ctx)]
(doseq [form forms] (compile-toplevel form ctx))
(ctx-to-wasm ctx)))
(-> form
(desugar specials)
(bind specials ctx))))

View file

@ -0,0 +1,34 @@
(ns tock.compiler.bind
(:require [meander.epsilon :as m]
[tock.compiler.util :refer [get-special new-scope bind! lookup]]
[tock.compiler.meander :refer [bottom-up all-subexpressions m+ merge-metafield]]))
(defn decorate-ctx [specials ctx form]
(let [special (get-special specials form)
scope (:scope special)
ctx (if scope (new-scope ctx scope) ctx)
new-bindings (:new-bindings special)
bindings (if new-bindings (new-bindings form ctx) {})
new-form (m/rewrite form
('l/lookup ?m) ('l/lookup {:ctx ~ctx & ?m})
(m/and ?form ~new-bindings) (m+ {:bindings ~bindings} ?form)
?form ?form)]
(doseq [[symbol binding] bindings]
(bind! ctx symbol binding))
((all-subexpressions (partial decorate-ctx specials ctx)) new-form)))
(def perform-lookups
(bottom-up
(fn [form]
(m/match form
('l/lookup (m/and {:name ?symbol :ctx ?ctx} ?m))
(if-let [binding (lookup ?ctx ?symbol)]
(merge-metafield binding ?m)
(merge-metafield form {:error (str "Not defined: " ?symbol)}))
?form ?form))))
(defn bind [form specials ctx]
(let [decorate-pass (partial decorate-ctx specials ctx)]
(-> form
decorate-pass
perform-lookups)))

View file

@ -35,7 +35,7 @@
(defn make-call-pass [specials]
(let [special? (fn [key] (and (symbol? key) (contains? specials key)))]
(r/rewrite
(m/and (m/pred symbol? ?sym) (m/app meta ?m)) ('l/read {:symbol ?sym & ?m})
(m/and (m/pred symbol? ?sym) (m/app meta ?m)) ('l/lookup {:name ?sym & ?m})
((m/pred special? ?special) ?m . (m/cata !args) ...)
(?special ?m . !args ...)

View file

@ -36,4 +36,16 @@
{:type ['fn . !type ... ?return-type] & ?m}
{(m/app to-sym 'fn) [!type ...] & ?m}
{:type ['fn . !type ... 'void] & ?m})))
{:type ['fn . !type ... 'void] & ?m})))
(defn all-subexpressions [s]
(r/rewrite (?special ?m . !subexprs ...)
(?special ?m . (m/app s !subexprs) ...)))
(defn top-down [s]
(fn rec [t]
((r/pipe s (all-subexpressions rec)) t)))
(defn bottom-up [s]
(fn rec [t]
((r/pipe s (all-subexpressions rec)) t)))

View file

@ -1,7 +1,8 @@
(ns tock.compiler.specials
(:require [meander.epsilon :as m]
[meander.strategy.epsilon :as r]
[tock.compiler.meander :refer [parse-type to-sym label] :include-macros true]))
[tock.compiler.meander :refer [parse-type to-sym label] :include-macros true]
[tock.compiler.util :refer [get-meta]]))
;; no namespace - source symbol
;; l/sym - "lowered" form - special form not directly writable from source
@ -9,7 +10,7 @@
;; lowered form
(defn left-associative [symbol]
(r/rewrite
(~symbol ?m ?left ?right . !more ..1) (~symbol {} (~symbol ?m ?left ?right) !more ...)))
(~symbol ?m ?left ?right . !more ..1) (~symbol ?m (~symbol {} ?left ?right) . !more ...)))
(defn simple-identity [symbol]
(r/rewrite (~symbol _ ?v) ?v))
@ -56,11 +57,17 @@
('l/fn (m/app merge ?m {:type ['fn . !types ... 'void]
:params [!names ...]})
('do {} & ?body)))
:typecheck (r/rewrite ({:type ['fn . _ ... ?return-type]} _) [[?return-type ?return-type]])}
:typecheck (r/rewrite ({:type ['fn . _ ... ?return-type]} _) [[?return-type ?return-type]])
:scope {:local-counter 0 :valid-binding-forms #{'l/local 'l/param}}
:new-bindings (fn [form _ctx]
(into {} (map-indexed (fn [index [name type]] [name (list 'l/param {:type type :name name :index index})])
(m/rewrite form
('l/fn {:params [!names ...] :type ['fn . !types ... _]} _)
[[!names !types] ...]))))}
'+ {:desugar (left-binop-desugar '+)
:typecheck (combinator-typecheck numerical-type?)}
'- {:desugar (r/choice
(r/rewrite ('- ?m ?v) ('- ?m 0 ?v))
(r/rewrite ('- ?m ?v) ('- ?m ('l/lit {:value 0 :type 'i64}) ?v))
(left-associative '-))
:typecheck (combinator-typecheck numerical-type?)}
'* {:desugar (left-binop-desugar '*)
@ -77,10 +84,14 @@
:typecheck (comparitor-typecheck logical-type?)}
'or {:desugar (left-binop-desugar 'or)
:typecheck (comparitor-typecheck logical-type?)}
'def {:typecheck (r/rewrite (_ ?t) [[?t ?t]])}
'def {:typecheck (r/rewrite (_ ?t) [[?t ?t]])
:new-bindings (fn [form _ctx] (m/match form ('def {:name ?symbol} ?expr) {?symbol (list 'l/global {:name ?symbol})}))}
'do {:typecheck (r/rewrite (_) [['void]]
(_ . !stmt ... ?last) [[?last . (m/app (constantly 'void) !stmt) ... ?last]])}
'l/read {:typecheck (r/rewrite ({:type ?type}) [[?type]])}
(_ . !stmt ... ?last) [[?last . (m/app (constantly 'void) !stmt) ... ?last]])
:scope {}}
'l/lookup {:typecheck (r/rewrite ({:type ?type}) [[?type]])}
'l/local {:typecheck (r/rewrite ({:type ?type}) [[?type]])}
'l/param {:typecheck (r/rewrite ({:type ?type}) [[?type]])}
'l/lit {:typecheck (r/rewrite ({:type ?type}) [[?type]])}
'l/call
{:typecheck (r/rewrite (_ (m/and ['fn . !param-types ... ?return-type] ?fn-type) . _ ...)

View file

@ -1,6 +1,6 @@
(ns tock.compiler.type
(:require [meander.epsilon :as m]
[tock.compiler.util :refer [bind! compile-error form-dispatch lookup map-subexpressions new-scope] :as u]))
[tock.compiler.util :refer [bind! compile-error lookup new-scope] :as u]))
; once we've hit the typechecking stage, nodes in our expression tree are maps, where each node has at least :special
; and :args keys. :special is a symbol corresponding to a record in the `specials` map; :args is an ordered list of
@ -20,22 +20,12 @@
; written in the input as ^{enum [...]}. (if there is only one parameter, the vector is skipped).
; The desugaring process handles this conversion.
(defn syntax-type [metadata _ctx error]
(m/match metadata
(m/pred symbol? ?symbol) ?symbol
{:tag ?symbol} ?symbol
{:fn [!type ... '-> ?return-type]} (apply vector (concat '[fn] !type [?return-type]))
{:fn [!type ...]} (apply vector (concat '[fn] !type '[void]))
_ (when error (throw error))))
(defn expr-type [expr] (:type (meta expr)))
(defn assoc-type [expr symbol] (with-meta expr (assoc (meta expr) :type symbol)))
;; built-in types
(defn make-type [symbol] (fn [expr] (assoc-type expr symbol)))
(def void (make-type 'void))
(def f64 (make-type 'f64))
(def bool (make-type 'bool))
;; (defn make-type [symbol] (fn [expr] (assoc-type expr symbol)))
;; (def void (make-type 'void))
;; (def f64 (make-type 'f64))
;; (def bool (make-type 'bool))
;; typechecking helpers
(defn coerce [expr typesym error]
@ -44,12 +34,6 @@
(= typesym 'void) (void `(u/cast-void ~expr))
:else (throw error))))
(defn unify [lexpr rexpr error]
(let [ltype (expr-type lexpr)
rtype (expr-type rexpr)]
(cond (= ltype rtype) [lexpr rexpr]
(nil? error) [(coerce lexpr 'void nil) (coerce rexpr 'void nil)]
:else (throw error))))
;; The typechecker itself!
;; This is currently doing several jobs at once:
@ -67,73 +51,6 @@
;; External syntax uses simple namespaceless 'symbols, internal syntax uses `u/symbols
(defmulti decorate-type (fn [form _ctx] (form-dispatch form)))
(defmulti typecheck-expr (fn [form _ctx] (form-dispatch form)))
(defmethod decorate-type :default [form ctx]
(m/match form
((m/app #(typecheck-expr % ctx) (`u/funcref ?funcref) (m/app meta {:type ['fn . !param-types ... ?return-type]})) . !args ...)
; validate arity, coerce !args to !param-types, wrap in ?return-type
(do
(when (not= (count !param-types) (count !args))
(throw (compile-error form (str "Expected " (count !param-types) " arguments, got " (count !args)))))
(assoc-type (apply list (concat [`u/call-func ?funcref]
(map (fn [expr type] (coerce expr type (compile-error expr ["Expected" type])))
!args
!param-types)))
?return-type))))
(defmethod decorate-type 'if [form _ctx]
(m/match form
(_ ?test ?when-true ?when-false)
(let [test-expr (coerce ?test 'bool (compile-error ?test "Condition must be a boolean expression"))
[true-expr false-expr] (unify ?when-true ?when-false nil)
type (expr-type true-expr)]
(assoc-type (list 'if test-expr true-expr false-expr) type))))
(defn equatable-type? [typesym] (contains? #{'f64 'i32 'bool} typesym))
(defn ordered-type? [typesym] (= typesym 'f64))
(defn logical-type? [typesym] (= typesym 'bool))
(defn numerical-type? [typesym] (= typesym 'f64))
(defn decorate-binop [form compute-resulttype verb]
(m/match form
(?op ?left ?right)
(let [[left-expr right-expr] (unify ?left ?right (compile-error ?right (str "Cannot " verb " incompatible types")))]
(if-let [type (compute-resulttype (expr-type left-expr))]
(assoc-type (list ?op left-expr right-expr) type)
(throw (compile-error ?right (str "Cannot " verb " values of type " type " using " ?op)))))))
(defn decorate-comparison [form typecheck]
(decorate-binop form (fn [type] (if (typecheck type) 'bool nil)) "compare"))
(defn decorate-combinator [form typecheck verb]
(decorate-binop form (fn [type] (if (typecheck type) type nil)) verb))
(defmethod decorate-type '= [form _ctx] (decorate-comparison form equatable-type?))
(defmethod decorate-type 'not= [form _ctx] (decorate-comparison form equatable-type?))
(defmethod decorate-type '< [form _ctx] (decorate-comparison form ordered-type?))
(defmethod decorate-type '<= [form _ctx] (decorate-comparison form ordered-type?))
(defmethod decorate-type '> [form _ctx] (decorate-comparison form ordered-type?))
(defmethod decorate-type '>= [form _ctx] (decorate-comparison form ordered-type?))
(defmethod decorate-type 'and [form _ctx] (decorate-combinator form logical-type? "accept"))
(defmethod decorate-type 'or [form _ctx] (decorate-combinator form logical-type? "accept"))
(defmethod decorate-type '+ [form _ctx] (decorate-combinator form numerical-type? "add"))
(defmethod decorate-type '- [form _ctx] (decorate-combinator form numerical-type? "subtract"))
(defmethod decorate-type '* [form _ctx] (decorate-combinator form numerical-type? "multiply"))
(defmethod decorate-type '/ [form _ctx] (decorate-combinator form numerical-type? "divide"))
(defmethod decorate-type `u/atom [form ctx]
(cond (integer? form) (f64 `(u/lit ~form))
(boolean? form) (bool `(u/lit ~form))
(simple-symbol? form) (m/match (lookup ctx form)
{:local (m/pred number? ?local-id) :type ?type} (assoc-type `(u/local ~?local-id) ?type)
{:funcref (m/pred list? ?funcref)} ?funcref
_ (throw (compile-error form "Undefined symbol")))
:else (throw (compile-error form "Not a valid literal"))))
(defmethod typecheck-expr :default [form ctx]
(decorate-type (map-subexpressions form #(typecheck-expr % ctx)) ctx))
; todo: actually validate types
(defn validate-type [type] type)
@ -160,21 +77,3 @@
(m/match params
[!name ... '-> ?return-type] (bind-all! !name ?return-type)
[!name ...] (bind-all! !name 'void))))
(defmethod typecheck-expr 'fn [form ctx]
(let [fn-ctx (new-scope ctx {::u/lastlocal (atom -1)})]
(m/match form
(_ ?params . !body ..1)
(let [fn-type (bind-params! ?params fn-ctx)
return-type (last fn-type)
body-expr (coerce (typecheck-expr (apply list 'do !body) fn-ctx) return-type
(compile-error form ["Expected return type" return-type]))]
(assoc-type (list `u/funcref (alloc-func ctx fn-type body-expr)) fn-type)))))
(defmethod typecheck-expr 'do [form ctx]
(m/match form
(_ . !statement ... ?return-expr)
(let [statements (apply list (map (fn [expr] (coerce (typecheck-expr expr ctx) 'void nil)) !statement))
return-expr (typecheck-expr ?return-expr ctx)
return-type (expr-type return-expr)]
(assoc-type (apply list (concat ['do] statements [return-expr])) return-type))))

View file

@ -4,17 +4,6 @@
; todo: extract location of error term
(js/Error. [term message]))
(defn form-dispatch [form]
(if (list? form) (first form) 'tock.compiler.util/atom))
; not sure about this... in non-special-form cases, all first values are subexpressions
; as well, it's simply not needed for decorate-type, as in order to typecheck any special
; form, you need to explicitly recurse anyway
(defmulti map-subexpressions (fn [form _f] (form-dispatch form)))
(defmethod map-subexpressions :default [form f]
(apply list (first form) (map f (rest form))))
(defmethod map-subexpressions 'tock.compiler.util/atom [form _f] form)
; ctx is a vector of atoms of maps
(defn new-scope
([ctx base] (conj ctx (atom base)))
@ -26,5 +15,12 @@
(let [scope (deref (first scopes))
value (get scope key)]
(if (nil? value) (recur (rest scopes)) value)))))
(defn bind! [ctx key data]
(swap! (last ctx) assoc key data))
(defn update-binding! [ctx key f & rest]
(apply swap! (first (filter #(contains? @% key) ctx)) update key f rest))
(defn get-special [specials form] (get specials (first form)))
(defn get-meta [form key] (get (second form) key))

View file

@ -4,9 +4,10 @@
[helins.wasm.bin :as op]
[helins.binf.string :as binf.string]
[tock.compiler.util :refer [compile-error form-dispatch lookup] :as u]
[tock.compiler.type :refer [expr-type]]
[meander.epsilon :as m]))
(defn expr-type [form] (m/match form (_ {:type ?type} & _) ?type))
(def operator-assembly
{['= 'f64] [[op/f64-eq]]
['= 'i32] [[op/i32-eq]]

View file

@ -1,7 +1,6 @@
(ns tock.experiment
(:require [helins.wasm :as wasm]
[helins.binf :as binf]
[tock.compiler.desugar :refer [desugar]]
[tock.compiler :refer [compile]]))
@ -27,7 +26,7 @@
#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
(defn main []
(js/console.log (desugar `(fn add [^f64 left ^f64 right -> f64] (+ left right)))))
(js/console.log (compile `(fn add [^f64 left ^f64 right -> f64] (+ left right)))))
;; (js/console.log test-wasm)
;; (-> (instantiate-wasm test-wasm #js {})
;; (.then #(js/console.log (-> % (.-instance) (.-exports) (.add-double 2 3)))))