Working s-expression parser
This commit is contained in:
parent
908160f4c7
commit
806ab10586
16 changed files with 3242 additions and 165 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1 +1,2 @@
|
|||
node_modules
|
||||
build
|
||||
|
|
|
|||
7
jest.config.js
Normal file
7
jest.config.js
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
/** @type {import('ts-jest').JestConfigWithTsJest} **/
|
||||
export default {
|
||||
testEnvironment: "node",
|
||||
transform: {
|
||||
"^.+.tsx?$": ["ts-jest",{}],
|
||||
},
|
||||
};
|
||||
18
package-lock.json
generated
18
package-lock.json
generated
|
|
@ -1,18 +0,0 @@
|
|||
{
|
||||
"name": "dumbpiler",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"js-sha256": "^0.11.0"
|
||||
}
|
||||
},
|
||||
"node_modules/js-sha256": {
|
||||
"version": "0.11.0",
|
||||
"resolved": "https://registry.npmjs.org/js-sha256/-/js-sha256-0.11.0.tgz",
|
||||
"integrity": "sha512-6xNlKayMZvds9h1Y1VWc0fQHQ82BxTXizWPEtEeGvmOUYpBRy4gbWroHLpzowe6xiQhHpelCQiE7HEdznyBL9Q==",
|
||||
"license": "MIT"
|
||||
}
|
||||
}
|
||||
}
|
||||
12
package.json
12
package.json
|
|
@ -1,6 +1,16 @@
|
|||
{
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"js-sha256": "^0.11.0"
|
||||
"@types/node": "^22.10.2",
|
||||
"js-sha256": "^0.11.0",
|
||||
"wasmati": "^0.2.4"
|
||||
},
|
||||
"packageManager": "pnpm@9.14.2+sha512.6e2baf77d06b9362294152c851c4f278ede37ab1eba3a55fda317a4a17b209f4dbb973fb250a77abc463a341fcb1f17f17cfa24091c4eb319cda0d9b84278387",
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@tsconfig/recommended": "^1.0.8",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.2.5",
|
||||
"typescript": "^5.7.2"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
2607
pnpm-lock.yaml
generated
Normal file
2607
pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load diff
60
src/db/build.ts
Normal file
60
src/db/build.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import { freeze, getHash } from './hash';
|
||||
|
||||
export type BuildDb<T> = {
|
||||
+readonly [P in keyof T]:
|
||||
T[P] extends ((db: any, ...args: infer Args) => infer Ret)
|
||||
? ((...args: Args) => Ret)
|
||||
: BuildDb<T[P]>
|
||||
}
|
||||
|
||||
export default class Build<T> {
|
||||
db: BuildDb<T>;
|
||||
resultCache: Map<string, Map<string, () => any>>;
|
||||
|
||||
constructor(rules: T) {
|
||||
this.db = {} as BuildDb<T>;
|
||||
this.resultCache = new Map();
|
||||
|
||||
const self = this;
|
||||
function wrapDbFunctions<U>(prefix: string, functions: U, container: any) {
|
||||
for (const k of Object.keys(functions as any) as (keyof U)[]) {
|
||||
const name = `${prefix}${String(k)}`;
|
||||
const f = functions[k];
|
||||
if (typeof f === 'function') {
|
||||
container[k] = function(...args: any): any {
|
||||
const input = freeze(args);
|
||||
const result = self.lookupResultThunk(name, input);
|
||||
if (result) {
|
||||
return result();
|
||||
}
|
||||
try {
|
||||
const result = freeze(f(self.db, ...input));
|
||||
return self.saveResultThunk(name, input, () => result);
|
||||
} catch (e) {
|
||||
self.saveResultThunk(name, input, () => { throw e; });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
container[k] = {};
|
||||
wrapDbFunctions(`${name}.`, f, container[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
wrapDbFunctions('', rules, this.db);
|
||||
}
|
||||
|
||||
lookupResultThunk(name: string, input: any) {
|
||||
return this.resultCache.get(name)?.get(getHash(input));
|
||||
}
|
||||
|
||||
saveResultThunk(name: string, input: string, thunk: () => any) {
|
||||
let cache = this.resultCache.get(name);
|
||||
if (!cache) {
|
||||
cache = new Map();
|
||||
this.resultCache.set(name, cache);
|
||||
}
|
||||
cache.set(getHash(input), thunk);
|
||||
return thunk();
|
||||
}
|
||||
}
|
||||
|
||||
71
src/db/dumb.ts
Normal file
71
src/db/dumb.ts
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import type { Syntax } from './parse';
|
||||
import SexprRules from './sexpr';
|
||||
import type { SexprDb } from './sexpr';
|
||||
|
||||
export type NodeId = (String | number)[];
|
||||
export type DumbDb = SexprDb & {
|
||||
dumb: {
|
||||
findNode: (contents: String[], id: NodeId) => Syntax,
|
||||
pathFromNodeId: (contents: String[], id: NodeId) => number[],
|
||||
namedChildren: (syntax: Syntax) => Map<String, number>,
|
||||
}
|
||||
}
|
||||
|
||||
export class InvalidNodeIdError extends Error {
|
||||
id: NodeId
|
||||
|
||||
constructor(id: NodeId) {
|
||||
super("Invalid node ID");
|
||||
this.id = id;
|
||||
}
|
||||
}
|
||||
|
||||
export function namedChildren(db: DumbDb, syntax: Syntax): Map<string, number> {
|
||||
const result = new Map();
|
||||
for (const [i, child] of (syntax.children ?? []).entries()) {
|
||||
const name = getName(child);
|
||||
if (name && !result.has(name)) {
|
||||
result.set(name, i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export function getName(syntax: Syntax): string | undefined {
|
||||
// todo: pattern-match some shit
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function pathFromNodeId(db: DumbDb, contents: String[], id: NodeId): number[] {
|
||||
if (id.length === 0) {
|
||||
return [];
|
||||
} else {
|
||||
const parentPath = db.dumb.pathFromNodeId(contents, id.slice(0, -1));
|
||||
const parent = db.sexpr.findNode(contents, parentPath);
|
||||
let key: number | String | undefined = id[id.length - 1];
|
||||
if (parent.children) {
|
||||
if (key instanceof String) {
|
||||
const childMap = db.dumb.namedChildren(parent);
|
||||
key = childMap.get(key);
|
||||
}
|
||||
if (typeof key === 'number' && key < parent.children.length) {
|
||||
return [...parentPath, key];
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidNodeIdError(id);
|
||||
}
|
||||
}
|
||||
|
||||
export function findNode(db: DumbDb, contents: string[], id: NodeId): Syntax {
|
||||
return db.sexpr.findNode(contents, db.dumb.pathFromNodeId(contents, id));
|
||||
}
|
||||
|
||||
export default {
|
||||
...SexprRules,
|
||||
dumb: {
|
||||
findNode,
|
||||
pathFromNodeId,
|
||||
namedChildren
|
||||
}
|
||||
};
|
||||
|
|
@ -9,19 +9,21 @@ const hashedValue = Symbol('hashValue');
|
|||
// objects: sha256 hash of "o:[hash of key][hash of value]..."
|
||||
// maps: sha256 hash of "m:[hash of key][hash of value]..."
|
||||
// strings: sha256 hash of "t:[text]"
|
||||
// symbols: sha256 hash of "S:[text]"
|
||||
// booleans, numbers, null: sha256 hash of "v:[json representation]"
|
||||
|
||||
// for non-primitives, hash value is cached internally by freeze()
|
||||
|
||||
// notably, functions can't be hashed, so functions can't be used as inputs. define new rules instead.
|
||||
type Freezable = number | string | String | boolean | null | Freezable[] | { [k: string]: Freezable } | Set<Freezable> | Map<Freezable, Freezable>;
|
||||
|
||||
export function freeze(obj) {
|
||||
export function freeze<T extends Freezable>(obj: T): T extends string ? String : T {
|
||||
if (typeof obj === 'object') {
|
||||
if (obj === null || obj[hashedValue]) {
|
||||
return obj;
|
||||
if (obj === null || (obj as any)[hashedValue]) {
|
||||
return obj as T as any;
|
||||
}
|
||||
let result;
|
||||
hash = sha256.create();
|
||||
let result: any;
|
||||
const hash = sha256.create();
|
||||
if (Array.isArray(obj)) {
|
||||
result = [];
|
||||
hash.update('a:');
|
||||
|
|
@ -45,7 +47,7 @@ export function freeze(obj) {
|
|||
} else if (obj instanceof Map) {
|
||||
result = new Map();
|
||||
hash.update('m:');
|
||||
for (const [k, v] of obj) {
|
||||
for (const [k, v] of obj as Map<Freezable, Freezable>) {
|
||||
const frozenKey = freeze(k);
|
||||
const frozenVal = freeze(v);
|
||||
hash.update(getHash(frozenKey));
|
||||
|
|
@ -56,87 +58,36 @@ export function freeze(obj) {
|
|||
result = {};
|
||||
hash.update('o:');
|
||||
for (const [k, v] of Object.entries(obj)) {
|
||||
const frozenKey = freeze(k);
|
||||
const frozenVal = freeze(v);
|
||||
hash.update(getHash(frozenKey));
|
||||
hash.update(getHash(k));
|
||||
hash.update(getHash(frozenVal));
|
||||
result[frozenKey] = frozenVal;
|
||||
result[k] = frozenVal;
|
||||
}
|
||||
}
|
||||
result[hashedValue] = hash.hex();
|
||||
return Object.freeze(result);
|
||||
} else if (typeof obj === 'string') {
|
||||
return freeze(new String(obj)); // always use a frozen String object, so we can cache the hash
|
||||
} else if (typeof obj === 'boolean' || typeof obj === 'number') {
|
||||
return obj;
|
||||
return freeze(new String(obj)) as any; // always use a frozen String object, so we can cache the hash
|
||||
} else if (typeof obj === 'boolean' || typeof obj === 'number' || (typeof(obj) === 'symbol' && Symbol.keyFor(obj))) {
|
||||
return obj as any;
|
||||
} else {
|
||||
throw new Error(`Unable to freeze input of type ${typeof obj}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function getHash(obj) {
|
||||
const input = freeze(obj);
|
||||
export function getHash(obj: Freezable): string {
|
||||
const input = freeze(obj) as any;
|
||||
if (input && input[hashedValue]) {
|
||||
return input[hashedValue];
|
||||
}
|
||||
// primitive value; hash is uncached and must be calculated on the fly
|
||||
const hash = sha256.create();
|
||||
hash.update('v:');
|
||||
hash.update(JSON.stringify(obj));
|
||||
if (typeof obj === 'symbol') {
|
||||
hash.update('S:');
|
||||
hash.update(Symbol.keyFor(obj) as string);
|
||||
} else {
|
||||
hash.update('v:');
|
||||
hash.update(JSON.stringify(obj));
|
||||
}
|
||||
return hash.hex();
|
||||
}
|
||||
|
||||
export class Build {
|
||||
constructor() {
|
||||
this.rules = new Map();
|
||||
this.resultCache = new Map();
|
||||
}
|
||||
|
||||
addRule(name, f) {
|
||||
if (this.rules.has(name)) {
|
||||
throw new Error(`rule for ${name} already defined; we don't support compiler-level time travel yet`);
|
||||
}
|
||||
const rule = (db, ...args) => {
|
||||
const input = freeze(args);
|
||||
const result = this.lookupResultThunk(name, input);
|
||||
if (result) {
|
||||
return result();
|
||||
}
|
||||
try {
|
||||
const result = freeze(f(db, ...input));
|
||||
return this.saveResultThunk(name, input, () => result);
|
||||
} catch (e) {
|
||||
this.saveResultThunk(name, input, () => { throw e; });
|
||||
}
|
||||
}
|
||||
this.rules.set(name, rule);
|
||||
}
|
||||
|
||||
lookupResultThunk(name, input) {
|
||||
return this.resultCache.get(name)?.get(getHash(input));
|
||||
}
|
||||
|
||||
saveResultThunk(name, input, thunk) {
|
||||
if (!this.resultCache.has(name)) {
|
||||
this.resultCache.set(name, new Map());
|
||||
}
|
||||
this.resultCache.get(name).set(getHash(input), thunk);
|
||||
return thunk();
|
||||
}
|
||||
|
||||
db(prefix) {
|
||||
const self = this;
|
||||
return new Proxy(Object.freeze({}), {
|
||||
get(_target, prop, _receiver) {
|
||||
const key = prefix ? `${prefix}.${prop}` : prop;
|
||||
const rule = self.rules[key];
|
||||
if (rule) {
|
||||
return (...args) => rule(self.db(), ...args);
|
||||
} else {
|
||||
return self.db(key);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
266
src/db/parse.ts
Normal file
266
src/db/parse.ts
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
import { freeze } from "./hash";
|
||||
|
||||
export class NeedsMoreInput extends Error {}
|
||||
|
||||
export type Syntax = {
|
||||
length: number,
|
||||
whitespaceLeading?: number,
|
||||
whitespaceTrailing?: number,
|
||||
value?: any,
|
||||
errors?: String[],
|
||||
children?: Syntax[]
|
||||
}
|
||||
|
||||
export type Extraction = { lift?: number[], keep?: number[] } | { become: number }
|
||||
export type Parser = String | [String, ...any];
|
||||
export type ParserDb = {
|
||||
parse: {
|
||||
parse: (contents: String[], parser: Parser) => Syntax,
|
||||
minimal: (contents: String[], parser: Parser) => Syntax,
|
||||
match: (contents: String[], regex: string, asWhitespace?: boolean) => Syntax,
|
||||
seq: (contents: String[], ...parsers: Parser[]) => Syntax,
|
||||
extract: (contents: String[], parser: Parser, extraction: Extraction) => Syntax,
|
||||
choice: (contents: String[], ...parsers: Parser[]) => Syntax,
|
||||
opt: (contents: String[], parser: Parser) => Syntax,
|
||||
repeated: (contents: String[], parser: Parser) => Syntax,
|
||||
}
|
||||
}
|
||||
|
||||
export function canonicalizeSyntax(syntax: Syntax, parser: Parser): Syntax {
|
||||
// validate length
|
||||
let calculatedLength = (syntax.whitespaceLeading ?? 0) + (syntax.whitespaceTrailing ?? 0);
|
||||
let childNeedsCleanup = false;
|
||||
|
||||
if (syntax.children) {
|
||||
for (const child of syntax.children) {
|
||||
const cleanChild = canonicalizeSyntax(child, parser);
|
||||
calculatedLength += cleanChild.length;
|
||||
if (cleanChild !== child) {
|
||||
childNeedsCleanup = true;
|
||||
}
|
||||
}
|
||||
if (calculatedLength !== syntax.length) {
|
||||
throw new Error(`Length mismatch; expected ${calculatedLength}, got ${syntax.length}: ${JSON.stringify(parser)}`);
|
||||
}
|
||||
} else {
|
||||
if (calculatedLength > syntax.length) {
|
||||
throw new Error(`Node has ${calculatedLength} of whitespace, but length is only ${syntax.length}: ${JSON.stringify(parser)}`)
|
||||
}
|
||||
}
|
||||
if (syntax.whitespaceLeading !== 0 && syntax.whitespaceTrailing !== 0 && !childNeedsCleanup) {
|
||||
return syntax;
|
||||
}
|
||||
syntax = {...syntax};
|
||||
if (childNeedsCleanup && syntax.children) {
|
||||
syntax.children = syntax.children.map(s => canonicalizeSyntax(s, parser));
|
||||
}
|
||||
if (syntax.whitespaceLeading === 0) {
|
||||
delete syntax.whitespaceLeading;
|
||||
}
|
||||
if (syntax.whitespaceTrailing === 0) {
|
||||
delete syntax.whitespaceTrailing;
|
||||
}
|
||||
return syntax;
|
||||
}
|
||||
|
||||
export function parse(db: any, contents: String[], parser: Parser): Syntax {
|
||||
const name = parser instanceof String ? parser : parser[0];
|
||||
const args = parser instanceof String ? [] : parser.slice(1);
|
||||
const parserFunc = name.split('.').reduce((d, n) => d[n], db);
|
||||
return canonicalizeSyntax(parserFunc(contents, ...args), parser);
|
||||
}
|
||||
|
||||
export function minimal(db: ParserDb, contents: String[], parser: Parser): Syntax {
|
||||
let end = 1;
|
||||
while (end <= contents.length) {
|
||||
try {
|
||||
return parse(db, contents.slice(0, end), parser);
|
||||
} catch (e) {
|
||||
if (e instanceof NeedsMoreInput) {
|
||||
end ++;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new NeedsMoreInput();
|
||||
}
|
||||
|
||||
export function match(db: ParserDb, contents: String[], regex: string, asWhitespace: boolean = false): Syntax {
|
||||
const match = contents[0].match(`^${regex}`);
|
||||
if (match) {
|
||||
if (asWhitespace) {
|
||||
return { length: match[0].length, whitespaceTrailing: match[0].length };
|
||||
} else {
|
||||
return { length: match[0].length, value: match }
|
||||
}
|
||||
} else {
|
||||
return { length: 0, errors: [`No match: ${regex} for ${contents[0]}`] }
|
||||
}
|
||||
}
|
||||
|
||||
export function consume(contents: String[], len: number): String[] {
|
||||
const newContents = [];
|
||||
let index = 0;
|
||||
while (len > 0) {
|
||||
if (contents[index].length <= len) {
|
||||
len -= contents[index].length;
|
||||
} else {
|
||||
newContents.push(contents[index].slice(len));
|
||||
len = 0;
|
||||
}
|
||||
index ++;
|
||||
}
|
||||
for (; index < contents.length; index ++) {
|
||||
newContents.push(contents[index]);
|
||||
}
|
||||
return newContents;
|
||||
}
|
||||
|
||||
type WhitespaceSide = 'Trailing' | 'Leading';
|
||||
export function redistributeWhitespace(source: Syntax, sourceSide: WhitespaceSide, target: Syntax, targetSide: WhitespaceSide): [Syntax, Syntax] {
|
||||
const sourceSpace = source[`whitespace${sourceSide}`];
|
||||
if (sourceSpace) {
|
||||
const newSource = { ...source, length: source.length - sourceSpace };
|
||||
delete newSource[`whitespace${sourceSide}`];
|
||||
const newTarget = { ...target, length: target.length + sourceSpace };
|
||||
newTarget[`whitespace${targetSide}`] = (target[`whitespace${targetSide}`] ?? 0) + sourceSpace;
|
||||
return [newSource, newTarget];
|
||||
} else {
|
||||
return [source, target];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export function appendChild(parent: Syntax, child: Syntax): Syntax {
|
||||
// redistribute whitespace
|
||||
let lastChild = parent.children && parent.children[parent.children.length - 1];
|
||||
[parent, child] = redistributeWhitespace(parent, 'Trailing', child, 'Leading');
|
||||
if (lastChild) {
|
||||
let lengthWithoutLastChild = parent.length - lastChild.length;
|
||||
[child, lastChild] = redistributeWhitespace(child, 'Leading', lastChild, 'Trailing');
|
||||
parent = {...parent, length: lengthWithoutLastChild + lastChild.length, children: [...parent.children?.slice(0, -1) ?? [], lastChild]};
|
||||
} else {
|
||||
[child, parent] = redistributeWhitespace(child, 'Leading', parent, 'Leading');
|
||||
}
|
||||
[child, parent] = redistributeWhitespace(child, 'Trailing', parent, 'Trailing');
|
||||
if (child.length > 0) {
|
||||
return {...parent, length: parent.length + child.length, children: [...parent.children ?? [], child]};
|
||||
} else {
|
||||
return parent;
|
||||
}
|
||||
}
|
||||
|
||||
export function seq(db: ParserDb, contents: String[], ...parsers: Parser[]): Syntax {
|
||||
let syntax: Syntax = { length: 0 };
|
||||
for (const parser of parsers) {
|
||||
if (contents.length === 0) {
|
||||
throw new NeedsMoreInput();
|
||||
}
|
||||
let child = parse(db, contents, parser);
|
||||
if (child.errors) {
|
||||
syntax.errors = child.errors;
|
||||
break;
|
||||
}
|
||||
syntax = canonicalizeSyntax(appendChild(syntax, child), 'appendChild');
|
||||
contents = consume(contents, child.length);
|
||||
}
|
||||
return syntax;
|
||||
}
|
||||
|
||||
export function extract(db: ParserDb, contents: String[], parser: Parser, extraction: Extraction): Syntax {
|
||||
let syntax = parse(db, contents, parser);
|
||||
if (syntax.errors) { return syntax; }
|
||||
|
||||
const allChildren = syntax.children;
|
||||
if (allChildren) {
|
||||
if ('become' in extraction) {
|
||||
const toBecome = allChildren[extraction.become];
|
||||
let whitespaceLeading = syntax.whitespaceLeading ?? 0;
|
||||
let whitespaceTrailing = syntax.whitespaceTrailing ?? 0;
|
||||
for (const [ichild, child] of allChildren.entries()) {
|
||||
if (ichild < extraction.become) {
|
||||
whitespaceLeading += child.length;
|
||||
} else if (ichild > extraction.become) {
|
||||
whitespaceTrailing += child.length;
|
||||
} else {
|
||||
whitespaceLeading += child.whitespaceLeading ?? 0;
|
||||
whitespaceTrailing += child.whitespaceTrailing ?? 0;
|
||||
}
|
||||
}
|
||||
return { ...toBecome, length: syntax.length, whitespaceLeading, whitespaceTrailing};
|
||||
} else {
|
||||
const lifts = extraction.lift ?? [];
|
||||
const keeps = extraction.keep ?? [];
|
||||
syntax = { ...syntax, length: (syntax.whitespaceLeading ?? 0) + (syntax.whitespaceTrailing ?? 0), children: [] };
|
||||
for (const [index, child] of allChildren.entries()) {
|
||||
if (lifts.includes(index)) {
|
||||
if (child.whitespaceLeading) {
|
||||
syntax = appendChild(syntax, { length: child.whitespaceLeading, whitespaceTrailing: child.whitespaceLeading });
|
||||
}
|
||||
if (child.children) {
|
||||
for (const grandchild of child.children) {
|
||||
syntax = appendChild(syntax, grandchild);
|
||||
}
|
||||
}
|
||||
if (child.whitespaceTrailing) {
|
||||
syntax = appendChild(syntax, { length: child.whitespaceTrailing, whitespaceTrailing: child.whitespaceTrailing });
|
||||
}
|
||||
} else if (keeps.includes(index)) {
|
||||
syntax = appendChild(syntax, child);
|
||||
} else {
|
||||
syntax = appendChild(syntax, { length: child.length, whitespaceTrailing: child.length });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return syntax;
|
||||
}
|
||||
|
||||
export function choice(db: ParserDb, contents: String[], ...parsers: Parser[]): Syntax {
|
||||
let errors = [freeze("No matching choice")];
|
||||
for (const parser of parsers) {
|
||||
const attempt = parse(db, contents, parser);
|
||||
if (!attempt.errors && attempt.length > 0) {
|
||||
return attempt;
|
||||
} else if (attempt.errors) {
|
||||
errors = errors.concat(attempt.errors);
|
||||
}
|
||||
}
|
||||
return { length: 0, errors };
|
||||
}
|
||||
|
||||
export function opt(db: ParserDb, contents: String[], parser: Parser): Syntax {
|
||||
const result = parse(db, contents, parser);
|
||||
if (result.errors) {
|
||||
return { length: 0 };
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
export function repeated(db: ParserDb, contents: String[], parser: Parser): Syntax {
|
||||
let syntax: Syntax = { length: 0 };
|
||||
while (contents.length > 0) {
|
||||
let child = parse(db, contents, ['parse.minimal', parser]);
|
||||
if (child.errors) {
|
||||
return syntax;
|
||||
}
|
||||
syntax = appendChild(syntax, child);
|
||||
contents = consume(contents, child.length);
|
||||
}
|
||||
return syntax;
|
||||
}
|
||||
|
||||
export default {
|
||||
parse: {
|
||||
parse,
|
||||
minimal,
|
||||
seq,
|
||||
choice,
|
||||
opt,
|
||||
repeated,
|
||||
match,
|
||||
extract
|
||||
}
|
||||
};
|
||||
122
src/db/sexpr.ts
Normal file
122
src/db/sexpr.ts
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
import type { ParserDb, Syntax } from './parse';
|
||||
import ParseRules from './parse';
|
||||
|
||||
export type SexprDb = ParserDb & {
|
||||
sexpr: {
|
||||
expr: (contents: String[]) => Syntax,
|
||||
list: (contents: String[]) => Syntax,
|
||||
symbol: (contents: String[]) => Syntax,
|
||||
number: (contents: String[]) => Syntax,
|
||||
string: (contents: String[]) => Syntax,
|
||||
space: (contents: String[]) => Syntax,
|
||||
findNode: (contents: String[], path: number[]) => Syntax,
|
||||
}
|
||||
}
|
||||
|
||||
export class InvalidPathError extends Error {
|
||||
path: number[];
|
||||
|
||||
constructor(path: number[]) {
|
||||
super("Invalid path");
|
||||
this.path = path;
|
||||
}
|
||||
}
|
||||
|
||||
export function expr(db: SexprDb, contents: String[]): Syntax {
|
||||
return db.parse.extract(contents,
|
||||
['parse.seq', 'sexpr.space', ['parse.choice', 'sexpr.list', 'sexpr.symbol', 'sexpr.number', 'sexpr.string'], 'sexpr.space'],
|
||||
{ become: 0 });
|
||||
}
|
||||
|
||||
export function list(db: SexprDb, contents: String[]): Syntax {
|
||||
const syntax = db.parse.extract(contents, ['parse.seq', ['parse.match', '\\('], ['parse.repeated', 'sexpr.expr'], ['parse.match', '\\)']], { lift: [1] });
|
||||
return { ...syntax, value: syntax.children?.map(s => s.value) };
|
||||
}
|
||||
|
||||
export function symbol(db: SexprDb, contents: String[]): Syntax {
|
||||
const syntax = db.parse.match(contents, "[^\\s;\\(\\)\\[\\]\\{\\}\"'`~,:;0-9][^\\s;\\(\\)\\[\\]\\{\\}\"'`~,:;]*");
|
||||
|
||||
if (syntax.value) {
|
||||
return { ...syntax, value: Symbol.for(syntax.value[0].valueOf()) };
|
||||
} else {
|
||||
return syntax;
|
||||
}
|
||||
}
|
||||
|
||||
export function number(db: SexprDb, contents: String[]): Syntax {
|
||||
const syntax = db.parse.match(contents, "(-\s*)?[0-9]+(\\.[0-9]*)?");
|
||||
if (syntax.value instanceof String) {
|
||||
return { ...syntax, value: Number.parseFloat(syntax.value.valueOf()) };
|
||||
} else {
|
||||
return syntax;
|
||||
}
|
||||
}
|
||||
|
||||
export function string(db: SexprDb, contents: String[]): Syntax {
|
||||
const syntax = db.parse.match(contents, "\"([^\\\\]|\\\\\"|\\\\\\\\)*\"");
|
||||
if (syntax.value instanceof String) {
|
||||
return { ...syntax, value: JSON.parse(syntax.value.valueOf()) };
|
||||
} else {
|
||||
return syntax;
|
||||
}
|
||||
}
|
||||
|
||||
export function space(db: SexprDb, contents: String[]): Syntax {
|
||||
return db.parse.repeated(contents, ['parse.choice', ['parse.match', ';.*', true], ['parse.match', '\\s+', true]]);
|
||||
}
|
||||
|
||||
export function findNode(db: SexprDb, contents: String[], path: number[]): Syntax {
|
||||
if (path.length === 0) {
|
||||
return db.parse.extract(contents, ['parse.seq', 'sexpr.expr', ['parse.repeated', 'sexpr.expr']], { keep: [0], lift: [1] });
|
||||
} else {
|
||||
const parentPath = path.slice(0, -1);
|
||||
const key = path[path.length - 1];
|
||||
const parent = db.sexpr.findNode(contents, parentPath);
|
||||
if (parent.children && key < parent.children.length) {
|
||||
return parent.children[key];
|
||||
}
|
||||
throw new InvalidPathError(path);
|
||||
}
|
||||
}
|
||||
|
||||
export function nodeIndex(db: SexprDb, contents: String[], path: number[]): number {
|
||||
if (path.length === 0) {
|
||||
const root = db.sexpr.findNode(contents, path);
|
||||
return root.whitespaceLeading ?? 0;
|
||||
} else {
|
||||
const parentPath = path.slice(0, -1);
|
||||
const key = path[path.length - 1];
|
||||
const parent = db.sexpr.findNode(contents, parentPath);
|
||||
if (!parent.children) {
|
||||
throw new InvalidPathError(path);
|
||||
}
|
||||
let index = nodeIndex(db, contents, parentPath);
|
||||
for (let ichild = 0; ichild < key; ichild ++) {
|
||||
index += parent.children[ichild].length;
|
||||
}
|
||||
return index + (parent.children[key].whitespaceLeading ?? 0);
|
||||
}
|
||||
}
|
||||
|
||||
export function nodeLocation(db: SexprDb, contents: String[], path: number[]): [number, number] {
|
||||
let line = 0;
|
||||
let index = nodeIndex(db, contents, path);
|
||||
while (index >= contents[line].length) {
|
||||
index -= contents[line].length;
|
||||
line ++;
|
||||
}
|
||||
return [line, index];
|
||||
}
|
||||
|
||||
export default {
|
||||
...ParseRules,
|
||||
sexpr: {
|
||||
expr,
|
||||
list,
|
||||
symbol,
|
||||
number,
|
||||
string,
|
||||
space,
|
||||
findNode
|
||||
}
|
||||
};
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
export class NeedsMoreInput extends Error {}
|
||||
|
||||
export function matchNext(input, regex) {
|
||||
|
||||
}
|
||||
export function consume(input, len, value) {
|
||||
|
||||
}
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
import { freeze } from '../build';
|
||||
import Git from 'nodegit';
|
||||
import { isUtf8 } from 'node:buffer';
|
||||
import { open } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
|
||||
export class Repo {
|
||||
constructor(repo) {
|
||||
this.repo = repo;
|
||||
this.workdirVersions = [];
|
||||
}
|
||||
static async open(repoPath) {
|
||||
return new Repo(await Git.Repository.open(repoPath));
|
||||
}
|
||||
|
||||
async lastCached(path) {
|
||||
if (this.workdirVersions.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const workdir = this.workdirVersions[this.workdirVersions.length - 1];
|
||||
|
||||
}
|
||||
|
||||
async read(path) {
|
||||
let contents = this.lastCached(path);
|
||||
|
||||
}
|
||||
}
|
||||
export async function fileContents(repoPath, path, commitId) {
|
||||
const repo = await Git.Repository.open(repoPath);
|
||||
let buf = null;
|
||||
if (commitId) {
|
||||
const commit = await Git.Commit.lookup(repo, commitId);
|
||||
if (!commit) { throw new Error(`Could not find commit ${commitId}`); }
|
||||
const tree = await commit.getTree();
|
||||
const entry = await tree.entryByPath(path);
|
||||
if (entry && entry.isBlob()) {
|
||||
const blob = await entry.getBlob();
|
||||
buf = blob.content();
|
||||
} else if (entry) {
|
||||
throw new Error(`${path} is not a file`);
|
||||
} else {
|
||||
throw new Error(`Could not find ${path} inside ${commitId}`);
|
||||
}
|
||||
} else {
|
||||
// TODO: can we cache this and only load the file if the metadata has changed?
|
||||
//
|
||||
const file = await open(join(repoPath, path))
|
||||
buf = await file.readFile()
|
||||
}
|
||||
if (!buf) { throw new Error(`internal error reading ${path}`); }
|
||||
if (!isUtf8(buf)) { throw new Error(`${path} is not text!`); }
|
||||
|
||||
const contents = [];
|
||||
let index = 0;
|
||||
while (true) {
|
||||
const nextIndex = buf.indexOf('\n', index);
|
||||
if (nextIndex < 0) {
|
||||
contents.push(buf.toString('utf8', index));
|
||||
break;
|
||||
} else {
|
||||
contents.push(buf.toString('utf8', index, nextIndex + 1));
|
||||
index = nextIndex + 1;
|
||||
}
|
||||
}
|
||||
return contents;
|
||||
}
|
||||
39
src/repo/workdir.ts
Normal file
39
src/repo/workdir.ts
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import { freeze } from '../db/hash';
|
||||
import { open } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { isUtf8 } from 'node:buffer';
|
||||
|
||||
export function contentsFromBuffer(buf: Buffer): String[] {
|
||||
if (!isUtf8(buf)) { throw new Error(`buffer is not UTF-8`); }
|
||||
|
||||
const contents = [];
|
||||
let index = 0;
|
||||
while (true) {
|
||||
const nextIndex = buf.indexOf('\n', index);
|
||||
if (nextIndex < 0) {
|
||||
contents.push(buf.toString('utf8', index));
|
||||
break;
|
||||
} else {
|
||||
contents.push(buf.toString('utf8', index, nextIndex + 1));
|
||||
index = nextIndex + 1;
|
||||
}
|
||||
}
|
||||
return freeze(contents);
|
||||
}
|
||||
|
||||
export class WorkdirRepo {
|
||||
path: string;
|
||||
|
||||
constructor(path: string) {
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
async read(path: string) {
|
||||
// TODO: keep around old versions, don't re-read if unchanged
|
||||
// logic to efficiently determine if a file has been changed:
|
||||
// https://www.kernel.org/pub/software/scm/git/docs/technical/racy-git.txt
|
||||
|
||||
const file = await open(join(this.path, path));
|
||||
return contentsFromBuffer(await file.readFile());
|
||||
}
|
||||
}
|
||||
29
src/test/dumb.test.ts
Normal file
29
src/test/dumb.test.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import {test, expect} from '@jest/globals';
|
||||
import { freeze, getHash } from '../db/hash';
|
||||
import Build from '../db/build';
|
||||
import { consume } from '../db/parse';
|
||||
import { nodeLocation } from '../db/sexpr';
|
||||
import DumbRules from '../db/dumb';
|
||||
|
||||
test('freeze some values', () => {
|
||||
expect('hi').toBe('hi');
|
||||
expect('hi').not.toEqual(new String('hi'));
|
||||
expect(freeze("hi")).not.toBe("hi");
|
||||
expect(freeze("Hello")).toEqual(new String("Hello"));
|
||||
const db = new Build({ identity: (_db: any, value: string) => value}).db;
|
||||
expect(db.identity("What's up")).toEqual(new String("What's up"));
|
||||
expect(freeze([]).length).toBe(0);
|
||||
expect(getHash([])).toBe('bfc622d4a6d410f26be3d72dd4f04e47bf434f76c655134adb76fef3a083c3a8');
|
||||
});
|
||||
|
||||
test('consumption boundaries', () => {
|
||||
expect(consume(['x', ''], 1)).toEqual(['']);
|
||||
});
|
||||
|
||||
test('parse a simple nested s-expr', () => {
|
||||
const db = new Build(DumbRules).db;
|
||||
const contents = ["(hello\n", " (world))\n", ""];
|
||||
const node = db.dumb.findNode(contents, [0, 1, 0]);
|
||||
expect(node.value).toBe(Symbol.for("world"));
|
||||
expect(nodeLocation(db, contents, [0, 1, 0])).toEqual([1, 2]);
|
||||
});
|
||||
7
tsconfig.json
Normal file
7
tsconfig.json
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"extends": "@tsconfig/recommended/tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "build",
|
||||
"target": "es2022"
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue