; sqlog, a datalog-like system built on sqlite ; may not actually have the full power of datalog OR the full power of sqlite ; datalog has a database of facts, and rules. facts are represented in sqlite directly as tables. ; rules are used by the sqlog engine when generating queries; typically they take the form ; of subqueries used in the WITH RECURSIVE clause. ; It would be possible to store them as views, but the view would need to be regenerated from ; its base rules anytime there is a change, so sqlog needs to know about them no matter what. ; Making sqlog manage them in-memory also allows us to use it to query arbitrary sqlite databases. ; Ideally we would persist rules as JSON in a special table; maybe sqlog_rules? (local Object (require :core.object)) (local lume (require :lib.lume)) (local Sqlog (Object:extend)) ; Generating SQL from Datalog should not be too complex, but it pays to start with the simplest ; case and build up from there. ; simple queries: ; [p x y] -> SELECT p.c1 AS x, p.c2 AS y FROM p ; [p 1 y] -> SELECT p.c2 AS y FROM p WHERE p.c1 = 1 ; [q x] [p x 1] -> SELECT q.c1 AS x FROM q JOIN p WHERE p.c1 = q.c1 AND p.c2 = 1 ; [p 1 2] -> SELECT true FROM p WHERE p.c1 = 1 AND p.c2 = 2 ; [p 1 x] [p x 2] -> SELECT t1.c2 AS x FROM p AS t1 JOIN p AS t2 WHERE t1.c1 = 1 AND t1.c2 = t2.c1 AND t2.c2 = 2 ; queries using rules: ; ([ancestor x y] [parent x y]) -> SELECT p.c1 AS x, p.c2 AS y FROM parent AS p ; ([ancestor x y] [parent x z] [ancestor z y]) -> SELECT p.c1 AS x, a.y AS y FROM parent AS p JOIN ancestor AS a WHERE p.c1 = a.x AND p.c2 = a.y ; [ancestor x :john] -> WITH RECURSIVE ancestor(x, y) AS (SELECT ... UNION SELECT ...) SELECT a.c1 AS x FROM ancestor AS a WHERE a.y = 'john' ; ([ancestor :bob x] [ancestor x :john]) -> SELECT 'bob' AS c1, a.c1 AS c2 FROM ancestor AS a WHERE a.c2 = 'john' ; queries with arithmetic operations (function calls?) ; [p x y] [q (+ x 1)] -> SELECT p.c1 AS x, p.c2 AS y FROM p JOIN q WHERE q.c1 = p.c1 + 1 ; [p (+ x 1) y] [q x] -> SELECT q.c1 AS x, p.c2 AS y FROM p JOIN q WHERE p.c1 = q.c1 + 1 ; [p (+ x 1) x] -> SELECT p.c2 AS x FROM p WHERE p.c1 = p.c2 + 1 ; queries with comparisons ; [p x y] (< x 5) -> SELECT p.c1 AS x, p.c2 AS y FROM p WHERE p.c1 < 5 ; confusing expressions we probably won't support: ; [p (+ x 1) (* x 2)] -> SELECT p.c1 - 1 AS x FROM p WHERE p.c1 + 1 = p.c2 * 2?? ; no, that's not right - this says x+1 = c1 AND x*2 = c2 ; [p (+ x 1) (* x 2)] -> SELECT p.c1 - 1 AS x FROM p WHERE p.c2 = (p.c1 - 1) * 2 ; [p (+ x 1) y] -> meaningless? or... ; -> SELECT p.c1 - 1 AS x, p.c2 AS y FROM p ; is there a way to trick sql into generating x = p.c1 - 1 from p.c1 = x + 1? ; [p z y] (= z (+ x 1)) ; unsupported: inline comparisons, explicit equality checks (use unification instead) ; [p (< x 5) y] -> SELECT p.c1 AS x, p.c2 AS y FROM p WHERE p.c1 < 5 -- does this make sense? seems hard to read, hard to parse ; [p x y] (= x y) -> unnecessary, can be written (p x x) ; [p x y] (= x (+ y 1)) -> unnecessary, can be written (p (+ x 1) x)? (fn Sqlog.new [self] (set self.tables {}) (set self.rules {})) (fn Sqlog.deftable [self name ...] "Defines the column names of a table and their expected ordering" (when (. name self.rules) (error "tables and rules must not overlap")) (tset self.tables name [...])) (fn Sqlog.defrule [self head ...] (match head [:literal name] (let [rulelist (or (. self.rules name) [])] (table.insert rulelist [head ...]) (tset self.rules name rulelist)) _ (error "Expected literal for head, got " (fv head)))) (fn Sqlog.defrules [self ...] (for [i 1 (select :# ...)] (self:defrule (table.unpack (select i ...))))) (fn append-if-missing [list value] (when (not (lume.any list #(= $1 value))) (table.insert list value))) (fn add-clause [analysis clause] (table.insert analysis.clauses clause)) (fn Sqlog.reference-name [self analysis name] (if (or (. self.rules name) (. self.tables name)) (do (table.insert analysis.tables name) (when (. self.rules name) (append-if-missing analysis.referenced-rules name)) (length analysis.tables)) (error (.. "Unknown table / rule " name)))) (fn Sqlog.reference-variable [self analysis varname expr] (match (. analysis.variable-mapping varname) mapping (add-clause analysis [:= mapping expr]) nil (do (tset analysis.variable-mapping varname expr) (table.insert analysis.variables varname)))) (fn Sqlog.analyze-literal [self analysis literal] (match literal [:literal name params] (let [itable (self:reference-name analysis name)] (each [icolumn value (ipairs params)] (match value [:var varname] (self:reference-variable analysis varname [:column itable icolumn]) [:const val] (add-clause analysis [:= [:column itable icolumn] [:const val]]) [op & args] (add-clause analysis [:= [:column itable icolumn] value]) _ (error (.. "expected var or const, got " (fv value)))))) _ (error (.. "Expected literal but got " (fv literal))))) (fn new-analysis [?parent] {:variables [] :variable-mapping {} :selection [] :clauses [] :tables [] :constants (or (?. ?parent :constants) []) :referenced-rules (or (?. ?parent :referenced-rules) [])}) (local infix-ops (collect [_ op (ipairs [:+ :- :* :/ :< :> :<= :>= := :|| :and :or])] op true)) (fn Sqlog.gen-expr [self analysis expr] (match expr [:const val] (do (table.insert analysis.constants val) "?") [:column itable icolumn] (.. "_t" itable "." (match (. self.tables (. analysis.tables itable)) colnames (. colnames icolumn) _ (.. "c" icolumn))) [:as subexpr name] (.. (self:gen-expr analysis subexpr) " AS " name) (where [:var name] (. analysis.variable-mapping name)) (self:gen-expr analysis (. analysis.variable-mapping name)) (where [op lhs rhs] (. infix-ops op)) (.. (self:gen-expr analysis lhs) " " op " " (self:gen-expr analysis rhs)) _ (error (.. "Unrecognized expression " (fv expr))))) (fn cat [list sep ?f] (table.concat (icollect [i v (ipairs list)] ((or ?f #$1) v i)) sep)) (fn Sqlog.gen-rule-clause [self analysis-parent [head & literals]] (let [analysis (new-analysis analysis-parent)] (each [_ literal (ipairs literals)] (self:analyze-literal analysis literal)) (match head [:literal name params] (set analysis.selection (icollect [icolumn param (ipairs params)] (match param [:var varname] [:as (. analysis.variable-mapping varname) (.. "c" icolumn)] [:const val] [:as param (.. "c" icolumn)] _ (error (.. "Expected const or var, got " (fv param)))))) _ (error (.. "Expected literal, got " (fv head)))) (self:gen-select analysis))) (fn Sqlog.gen-rule [self analysis name] (let [rule (. self.rules name)] (.. name "(" (cat rule ", " #(.. "c" $2)) ") AS NOT MATERIALIZED (" (cat rule " UNION " #(self:gen-rule-clause analysis $1)) ")"))) (fn Sqlog.gen-with-rules [self analysis] (let [rulequeries []] ; ipairs will iterate over all referenced-rules even if gen-rule causes more to be appended (each [_ name (ipairs analysis.referenced-rules)] (table.insert rulequeries (self:gen-rule analysis name))) (if (> (length rulequeries) 0) (.. "WITH RECURSIVE " (cat rulequeries ", ") " ") ""))) (fn Sqlog.gen-select [self analysis] (.. "SELECT " (if (> (length analysis.selection) 0) (cat analysis.selection ", " #(self:gen-expr analysis $1)) "true") " FROM " (cat analysis.tables " JOIN " #(.. $1 " AS _t" $2)) (if (> (length analysis.clauses) 0) (.. " WHERE " (cat analysis.clauses " AND " #(self:gen-expr analysis $1))) ""))) (fn Sqlog.query [self ...] (let [analysis (new-analysis)] (each [_ literal (ipairs [...])] (self:analyze-literal analysis literal)) (set analysis.selection (icollect [_ varname (ipairs analysis.variables)] [:as (. analysis.variable-mapping varname) varname])) [(.. (self:gen-with-rules analysis) (self:gen-select analysis)) analysis.constants])) Sqlog