edtris/waltz/sqlog.fnl
Jeremy Penner 82d04e0649 comparison support, fix bugs in inference rules
* rule expression generation was miscounting the number of parameters
* allow using arbitrary expressions in the head of a rule
2022-03-27 14:34:50 -04:00

179 lines
8.5 KiB
Fennel

; sqlog, a datalog-like system built on sqlite
; may not actually have the full power of datalog OR the full power of sqlite
; datalog has a database of facts, and rules. facts are represented in sqlite directly as tables.
; rules are used by the sqlog engine when generating queries; typically they take the form
; of subqueries used in the WITH RECURSIVE clause.
; It would be possible to store them as views, but the view would need to be regenerated from
; its base rules anytime there is a change, so sqlog needs to know about them no matter what.
; Making sqlog manage them in-memory also allows us to use it to query arbitrary sqlite databases.
; Ideally we would persist rules as JSON in a special table; maybe sqlog_rules?
(local Object (require :core.object))
(local lume (require :lib.lume))
(local util (require :lib.util))
(local Sqlog (Object:extend))
; Generating SQL from Datalog should not be too complex, but it pays to start with the simplest
; case and build up from there.
; simple queries:
; [p x y] -> SELECT p.c1 AS x, p.c2 AS y FROM p
; [p 1 y] -> SELECT p.c2 AS y FROM p WHERE p.c1 = 1
; [q x] [p x 1] -> SELECT q.c1 AS x FROM q JOIN p WHERE p.c1 = q.c1 AND p.c2 = 1
; [p 1 2] -> SELECT true FROM p WHERE p.c1 = 1 AND p.c2 = 2
; [p 1 x] [p x 2] -> SELECT t1.c2 AS x FROM p AS t1 JOIN p AS t2 WHERE t1.c1 = 1 AND t1.c2 = t2.c1 AND t2.c2 = 2
; queries using rules:
; ([ancestor x y] [parent x y]) -> SELECT p.c1 AS x, p.c2 AS y FROM parent AS p
; ([ancestor x y] [parent x z] [ancestor z y]) -> SELECT p.c1 AS x, a.y AS y FROM parent AS p JOIN ancestor AS a WHERE p.c1 = a.x AND p.c2 = a.y
; [ancestor x :john] -> WITH RECURSIVE ancestor(x, y) AS (SELECT ... UNION SELECT ...) SELECT a.c1 AS x FROM ancestor AS a WHERE a.y = 'john'
; ([ancestor :bob x] [ancestor x :john]) -> SELECT 'bob' AS c1, a.c1 AS c2 FROM ancestor AS a WHERE a.c2 = 'john'
; queries with arithmetic operations (function calls?)
; [p x y] [q (+ x 1)] -> SELECT p.c1 AS x, p.c2 AS y FROM p JOIN q WHERE q.c1 = p.c1 + 1
; [p (+ x 1) y] [q x] -> SELECT q.c1 AS x, p.c2 AS y FROM p JOIN q WHERE p.c1 = q.c1 + 1
; [p (+ x 1) x] -> SELECT p.c2 AS x FROM p WHERE p.c1 = p.c2 + 1
; queries with comparisons
; [p x y] (< x 5) -> SELECT p.c1 AS x, p.c2 AS y FROM p WHERE p.c1 < 5
; [p x y] (= x y) -> unnecessary but supported, can be written (p x x)
; [p x y] (= x (+ y 1)) -> unnecessary but supported, can be written (p (+ x 1) x)?
; confusing expressions we probably won't support:
; [p (+ x 1) (* x 2)] -> SELECT p.c1 - 1 AS x FROM p WHERE p.c1 + 1 = p.c2 * 2??
; no, that's not right - this says x+1 = c1 AND x*2 = c2
; [p (+ x 1) (* x 2)] -> SELECT p.c1 - 1 AS x FROM p WHERE p.c2 = (p.c1 - 1) * 2
; [p (+ x 1) y] -> meaningless? or...
; -> SELECT p.c1 - 1 AS x, p.c2 AS y FROM p
; is there a way to trick sql into generating x = p.c1 - 1 from p.c1 = x + 1?
; [p z y] (= z (+ x 1))
; unsupported: inline comparisons
; [p (< x 5) y] -> SELECT p.c1 AS x, p.c2 AS y FROM p WHERE p.c1 < 5 -- does this make sense? seems hard to read, hard to parse
(fn Sqlog.new [self]
(set self.tables {})
(set self.rules {}))
(fn Sqlog.deftable [self name ...]
"Defines the column names of a table and their expected ordering"
(when (. name self.rules) (error "tables and rules must not overlap"))
(tset self.tables name [...]))
(fn Sqlog.defrule [self head ...]
(match head
[:literal name] (let [rulelist (or (. self.rules name) [])]
(table.insert rulelist [head ...])
(tset self.rules name rulelist))
_ (error "Expected literal for head, got " (fv head))))
(fn Sqlog.defrules [self ...]
(for [i 1 (select :# ...)] (self:defrule (table.unpack (select i ...)))))
(fn append-if-missing [list value]
(when (not (lume.any list #(= $1 value)))
(table.insert list value)))
(fn add-clause [analysis clause] (table.insert analysis.clauses clause))
(fn Sqlog.reference-name [self analysis name]
(if (or (. self.rules name) (. self.tables name))
(do (table.insert analysis.tables name)
(when (. self.rules name)
(append-if-missing analysis.referenced-rules name))
(length analysis.tables))
(error (.. "Unknown table / rule " name))))
(fn Sqlog.reference-variable [self analysis varname expr]
(match (. analysis.variable-mapping varname)
mapping (add-clause analysis [:= mapping expr])
nil (do (tset analysis.variable-mapping varname expr)
(table.insert analysis.variables varname))))
(local comparitors (collect [_ op (ipairs [:< :> :<= :>= := :and :or])] op true))
(fn Sqlog.analyze-literal [self analysis literal]
(match literal
[:literal name params] (let [itable (self:reference-name analysis name)]
(each [icolumn value (ipairs params)]
(match value
[:var varname] (self:reference-variable analysis varname [:column itable icolumn])
[:const val] (add-clause analysis [:= [:column itable icolumn] [:const val]])
(where [op] (= (. comparitors op) nil)) (add-clause analysis [:= [:column itable icolumn] value])
_ (error (.. "expected var, const, or function, got " (fv value))))))
(where [op] (. comparitors op)) (add-clause analysis literal)
_ (error (.. "Expected literal or comparison but got " (fv literal)))))
(fn new-analysis [?parent]
{:variables []
:variable-mapping {}
:selection []
:clauses []
:tables []
:constants (or (?. ?parent :constants) [])
:referenced-rules (or (?. ?parent :referenced-rules) [])})
(local infix-ops (collect [_ op (ipairs [:+ :- :* :/ :< :> :<= :>= := :|| :and :or])] op true))
(fn Sqlog.gen-expr [self analysis expr]
(match expr
[:const val] (do (table.insert analysis.constants val) "?")
[:column itable icolumn] (.. "_t" itable "."
(match (. self.tables (. analysis.tables itable))
colnames (. colnames icolumn)
_ (.. "c" icolumn)))
[:as subexpr name] (.. (self:gen-expr analysis subexpr) " AS " name)
(where [:var name] (. analysis.variable-mapping name)) (self:gen-expr analysis (. analysis.variable-mapping name))
(where [op lhs rhs] (. infix-ops op)) (.. "(" (self:gen-expr analysis lhs) " " op " " (self:gen-expr analysis rhs) ")")
_ (error (.. "Unrecognized expression " (fv expr)))))
(fn cat [list sep ?f]
(table.concat (icollect [i v (ipairs list)] ((or ?f #$1) v i)) sep))
(fn Sqlog.gen-rule-clause [self analysis-parent [head & literals]]
(let [analysis (new-analysis analysis-parent)]
(each [_ literal (ipairs literals)] (self:analyze-literal analysis literal))
(match head
[:literal name params]
(set analysis.selection (icollect [icolumn param (ipairs params)] [:as param (.. :c icolumn)]))
_ (error (.. "Expected literal, got " (fv head))))
(self:gen-select analysis)))
(fn Sqlog.gen-rule [self analysis name]
(let [rule (. self.rules name)
column-count (match rule
[[[:literal _ clauses]]] (length clauses)
_ (error (.. "Rule should be list of list of literals, was " (fv rule))))]
(.. name "(" (table.concat (icollect [i (util.countiter column-count)] (.. "c" i)) ", ") ") AS NOT MATERIALIZED ("
(cat rule " UNION " #(self:gen-rule-clause analysis $1)) ")")))
(fn Sqlog.gen-with-rules [self analysis]
(let [rulequeries []]
; ipairs will iterate over all referenced-rules even if gen-rule causes more to be appended
(each [_ name (ipairs analysis.referenced-rules)]
(table.insert rulequeries (self:gen-rule analysis name)))
(if (> (length rulequeries) 0)
(.. "WITH RECURSIVE " (cat rulequeries ", ") " ")
"")))
(fn Sqlog.gen-select [self analysis]
(.. "SELECT "
(if (> (length analysis.selection) 0)
(cat analysis.selection ", " #(self:gen-expr analysis $1))
"true")
" FROM "
(cat analysis.tables " JOIN " #(.. $1 " AS _t" $2))
(if (> (length analysis.clauses) 0)
(.. " WHERE " (cat analysis.clauses " AND " #(self:gen-expr analysis $1)))
"")))
(fn Sqlog.query [self ...]
(let [analysis (new-analysis)]
(each [_ literal (ipairs [...])] (self:analyze-literal analysis literal))
(set analysis.selection (icollect [_ varname (ipairs analysis.variables)] [:as (. analysis.variable-mapping varname) varname]))
[(.. (self:gen-with-rules analysis) (self:gen-select analysis)) analysis.constants]))
Sqlog