refactor: move oc-path into plugin

This commit is contained in:
Peter Steinberger
2026-05-08 09:46:52 +01:00
parent 9a151b248e
commit e3b33a26cd
89 changed files with 14753 additions and 1 deletions

4
.github/labeler.yml vendored
View File

@@ -276,6 +276,10 @@
- changed-files:
- any-glob-to-any-file:
- "extensions/memory-wiki/**"
"extensions: oc-path":
- changed-files:
- any-glob-to-any-file:
- "extensions/oc-path/**"
"extensions: open-prose":
- changed-files:
- any-glob-to-any-file:

4
.gitignore vendored
View File

@@ -95,6 +95,10 @@ docs/internal/
tmp/
IDENTITY.md
USER.md
# Exception: oc-path real-world test fixtures need to be tracked even
# though the bare names match the local-untracked rule above.
!extensions/oc-path/src/oc-path/tests/fixtures/real/IDENTITY.md
!extensions/oc-path/src/oc-path/tests/fixtures/real/USER.md
*.tgz
*.tar.gz
*.zip

View File

@@ -28,7 +28,7 @@ apply across the CLI.
| Pairing and channels | [`pairing`](/cli/pairing) · [`qr`](/cli/qr) · [`channels`](/cli/channels) |
| Security and plugins | [`security`](/cli/security) · [`secrets`](/cli/secrets) · [`skills`](/cli/skills) · [`plugins`](/cli/plugins) · [`proxy`](/cli/proxy) |
| Legacy aliases | [`daemon`](/cli/daemon) (gateway service) · [`clawbot`](/cli/clawbot) (namespace) |
| Plugins (optional) | [`voicecall`](/cli/voicecall) (if installed) |
| Plugins (optional) | [`path`](/cli/path) · [`voicecall`](/cli/voicecall) (if installed) |
## Global flags
@@ -121,6 +121,12 @@ openclaw [--dev] [--profile <name>] <command>
status
index
search
path
resolve
find
set
validate
emit
commitments
list
dismiss

120
docs/cli/path.md Normal file
View File

@@ -0,0 +1,120 @@
---
summary: "CLI reference for `openclaw path` (inspect and edit workspace files via the `oc://` addressing scheme)"
read_when:
- You want to read or write a leaf inside a workspace file from the terminal
- You're scripting against workspace state and want a stable, kind-agnostic addressing scheme
- You're debugging a `oc://` path (validate the syntax, see what it resolves to)
title: "Path"
---
# `openclaw path`
Plugin-provided shell access to the `oc://` addressing substrate — one universal,
kind-dispatched path scheme for inspecting and surgically editing workspace
files (markdown, jsonc, jsonl, yaml). Self-hosters and editor extensions use
it to read or write a single leaf inside a workspace file without scripting
against the SDK directly.
## Subcommands
| Subcommand | Purpose |
| ----------------------- | ---------------------------------------------------------------------------- |
| `resolve <oc-path>` | Print the match at the path (or "not found"). |
| `find <pattern>` | Enumerate matches for a wildcard / predicate path. |
| `set <oc-path> <value>` | Write a leaf at the path. Supports `--dry-run`. |
| `validate <oc-path>` | Parse-only — print structural breakdown (file / section / item / field). |
| `emit <file>` | Round-trip a file through `parseXxx` + `emitXxx` (byte-fidelity diagnostic). |
## Global flags
| Flag | Purpose |
| --------------- | ------------------------------------------------------------------------ |
| `--cwd <dir>` | Resolve the file slot against this directory (default: `process.cwd()`). |
| `--file <path>` | Override the file slot's resolved path (absolute access). |
| `--json` | Force JSON output (default when stdout is not a TTY). |
| `--human` | Force human output (default when stdout is a TTY). |
| `--dry-run` | (only on `set`) print the bytes that would be written without writing. |
## `oc://` syntax
```
oc://FILE/SECTION/ITEM/FIELD?session=SCOPE
```
Slot rules — `field` requires `item`, `item` requires `section`. Across all
four slots:
- **Quoted segments** — `"a/b.c"` survives `/` and `.` separators.
`"\\"` and `"\""` are the only escapes inside quotes.
The file slot is also quote-aware: `oc://"skills/email-drafter"/Tools/-1`
treats `skills/email-drafter` as a single file path.
- **Predicates** — `[k=v]`, `[k!=v]`, `[k*=v]`, `[k^=v]`, `[k$=v]`,
`[k<v]`, `[k<=v]`, `[k>v]`, `[k>=v]`.
- **Unions** — `{a,b,c}` matches any of the alternatives.
- **Wildcards** — `*` (single sub-segment) and `**` (zero-or-more,
recursive). `find` accepts these; `resolve` and `set` reject them as
ambiguous.
- **Positional** — `$first`, `$last`, `-N` (Nth from end).
- **Ordinal** — `#N` for Nth match.
- **Insertion markers** — `+`, `+key`, `+nnn` for keyed / indexed
insertion (use with `set`).
- **Session scope** — `?session=cron:daily` etc. Orthogonal to slot
nesting.
Reserved characters (`?`, `&`, `%`) outside quoted, predicate, or union
segments are rejected. Control characters (U+0000U+001F, U+007F) are
rejected anywhere.
## Examples
```bash
# Validate a path (no filesystem access)
openclaw path validate 'oc://AGENTS.md/Tools/-1/risk'
# Read a leaf
openclaw path resolve 'oc://gateway.jsonc/version'
# Wildcard search
openclaw path find 'oc://session.jsonl/*/event' --file ./logs/session.jsonl
# Dry-run a write
openclaw path set 'oc://gateway.jsonc/version' '2.0' --dry-run
# Apply the write
openclaw path set 'oc://gateway.jsonc/version' '2.0'
# Byte-fidelity round-trip (diagnostic)
openclaw path emit ./AGENTS.md
```
## Exit codes
| Code | Meaning |
| ---- | -------------------------------------------------------------------------- |
| `0` | Success. (`resolve` / `find`: at least one match. `set`: write succeeded.) |
| `1` | No match, or `set` rejected by the substrate (no system-level error). |
| `2` | Argument or parse error. |
## Output mode
`openclaw path` is TTY-aware: human-readable output on a terminal, JSON when
stdout is piped or redirected. `--json` and `--human` override the
auto-detection.
## Notes
- `set` writes raw bytes through the substrate's emit path, which applies the
redaction-sentinel guard automatically. A leaf carrying
`__OPENCLAW_REDACTED__` (verbatim or as a substring) is refused at write
time.
- JSONC parsing and leaf edits use the plugin-local `jsonc-parser`
dependency, so comments and formatting are preserved on ordinary leaf
writes instead of going through a hand-rolled parser/re-render path.
- `path` does not know about LKG. If the file is LKG-tracked, the next
observe call decides whether to promote / recover. `set --batch` for
atomic multi-set through the LKG promote/recover lifecycle is planned
alongside the LKG-recovery substrate.
## Related
- [CLI reference](/cli)

View File

@@ -0,0 +1,11 @@
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
import { registerOcPathCli } from "./cli-registration.js";
export default definePluginEntry({
id: "oc-path",
name: "OC Path",
description: "Adds the openclaw path CLI for oc:// workspace file addressing.",
register(api) {
registerOcPathCli(api);
},
});

View File

@@ -0,0 +1,19 @@
import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-entry";
export function registerOcPathCli(api: OpenClawPluginApi): void {
api.registerCli(
async ({ program }) => {
const { registerPathCli } = await import("./src/cli.js");
registerPathCli(program);
},
{
descriptors: [
{
name: "path",
description: "Inspect and edit workspace files via oc:// paths",
hasSubcommands: true,
},
],
},
);
}

View File

@@ -0,0 +1,11 @@
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
import { registerOcPathCli } from "./cli-registration.js";
export default definePluginEntry({
id: "oc-path",
name: "OC Path",
description: "Adds the openclaw path CLI for oc:// workspace file addressing.",
register(api) {
registerOcPathCli(api);
},
});

View File

@@ -0,0 +1,20 @@
{
"id": "oc-path",
"name": "OC Path",
"description": "Adds the openclaw path CLI for oc:// workspace file addressing.",
"activation": {
"onStartup": false,
"onCommands": ["path"]
},
"commandAliases": [
{
"name": "path",
"kind": "cli"
}
],
"configSchema": {
"type": "object",
"additionalProperties": false,
"properties": {}
}
}

View File

@@ -0,0 +1,30 @@
{
"name": "@openclaw/oc-path",
"version": "2026.5.6",
"private": true,
"description": "OpenClaw oc:// workspace path plugin",
"type": "module",
"dependencies": {
"commander": "^14.0.3",
"jsonc-parser": "^3.3.1",
"markdown-it": "14.1.1",
"yaml": "^2.8.4"
},
"devDependencies": {
"@openclaw/plugin-sdk": "workspace:*",
"openclaw": "workspace:*"
},
"peerDependencies": {
"openclaw": ">=2026.5.6"
},
"peerDependenciesMeta": {
"openclaw": {
"optional": true
}
},
"openclaw": {
"extensions": [
"./index.ts"
]
}
}

View File

@@ -0,0 +1,265 @@
/**
* Smoke tests for the `openclaw path` CLI handlers.
*
* Tests invoke each subcommand handler directly with a capturing
* `OutputRuntimeEnv` — no commander wiring, no child process spawn.
* Assertions inspect captured stdout/stderr and the exit code the
* handler set on the runtime.
*/
import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import {
type OutputRuntimeEnv,
pathEmitCommand,
pathFindCommand,
pathResolveCommand,
pathSetCommand,
pathValidateCommand,
} from "./cli.js";
interface TestRuntime extends OutputRuntimeEnv {
readonly stdout: string[];
readonly stderr: string[];
exitCode: number;
}
function createTestRuntime(): TestRuntime {
const stdout: string[] = [];
const stderr: string[] = [];
const runtime: TestRuntime = {
stdout,
stderr,
exitCode: 0,
error: (value) => {
stderr.push(value);
},
writeStdout: (value) => {
stdout.push(value);
},
exit: (code) => {
runtime.exitCode = code;
},
};
return runtime;
}
const stdoutText = (rt: TestRuntime): string => rt.stdout.join("\n");
const stderrText = (rt: TestRuntime): string => rt.stderr.join("\n");
describe("openclaw path CLI", () => {
let workspaceDir: string;
beforeEach(() => {
workspaceDir = mkdtempSync(join(tmpdir(), "oc-path-cli-"));
});
afterEach(() => {
// mkdtemp leaves a small dir; OS will GC it. Skip cleanup to keep
// the test deterministic on Windows where rmdir flakes.
});
describe("validate", () => {
it("CLI-V01 accepts a well-formed path with --json", () => {
const rt = createTestRuntime();
pathValidateCommand("oc://AGENTS.md/Tools/-1", { json: true }, rt);
expect(rt.exitCode).toBe(0);
const out = JSON.parse(stdoutText(rt));
expect(out.valid).toBe(true);
expect(out.structure.file).toBe("AGENTS.md");
expect(out.structure.section).toBe("Tools");
});
it("CLI-V02 rejects a malformed path with code 1", () => {
const rt = createTestRuntime();
pathValidateCommand("oc://X/a\x00b", { json: true }, rt);
expect(rt.exitCode).toBe(1);
const out = JSON.parse(stdoutText(rt));
expect(out.valid).toBe(false);
});
it("CLI-V03 missing argument returns 2", () => {
const rt = createTestRuntime();
pathValidateCommand(undefined, { json: true }, rt);
expect(rt.exitCode).toBe(2);
expect(stderrText(rt)).toContain("missing");
});
});
describe("resolve", () => {
it("CLI-R01 finds a leaf in jsonc and prints it", async () => {
const filePath = join(workspaceDir, "gateway.jsonc");
writeFileSync(filePath, '{ "version": "1.0" }', "utf-8");
const rt = createTestRuntime();
await pathResolveCommand("oc://gateway.jsonc/version", { cwd: workspaceDir, json: true }, rt);
expect(rt.exitCode).toBe(0);
const out = JSON.parse(stdoutText(rt));
expect(out.resolved).toBe(true);
expect(out.match.kind).toBe("leaf");
expect(out.match.valueText).toBe("1.0");
});
it("CLI-R02 returns 1 for not-found path", async () => {
const filePath = join(workspaceDir, "gateway.jsonc");
writeFileSync(filePath, '{ "version": "1.0" }', "utf-8");
const rt = createTestRuntime();
await pathResolveCommand("oc://gateway.jsonc/missing", { cwd: workspaceDir, json: true }, rt);
expect(rt.exitCode).toBe(1);
const out = JSON.parse(stdoutText(rt));
expect(out.resolved).toBe(false);
});
it("CLI-R03 missing argument returns 2", async () => {
const rt = createTestRuntime();
await pathResolveCommand(undefined, { json: true }, rt);
expect(rt.exitCode).toBe(2);
expect(stderrText(rt)).toContain("missing");
});
});
describe("set", () => {
it("CLI-S01 writes new bytes when path resolves", async () => {
const filePath = join(workspaceDir, "gateway.jsonc");
writeFileSync(filePath, '{ "version": "1.0" }', "utf-8");
const rt = createTestRuntime();
await pathSetCommand(
"oc://gateway.jsonc/version",
"2.0",
{ cwd: workspaceDir, json: true },
rt,
);
expect(rt.exitCode).toBe(0);
const after = readFileSync(filePath, "utf-8");
expect(after).toContain('"2.0"');
});
it("CLI-S02 --dry-run does not write to disk", async () => {
const filePath = join(workspaceDir, "gateway.jsonc");
const before = '{ "version": "1.0" }';
writeFileSync(filePath, before, "utf-8");
const rt = createTestRuntime();
await pathSetCommand(
"oc://gateway.jsonc/version",
"2.0",
{ cwd: workspaceDir, json: true, dryRun: true },
rt,
);
expect(rt.exitCode).toBe(0);
const out = JSON.parse(stdoutText(rt));
expect(out.dryRun).toBe(true);
expect(out.bytes).toContain('"2.0"');
// File on disk unchanged.
expect(readFileSync(filePath, "utf-8")).toBe(before);
});
it("CLI-S03 sentinel-bearing value is refused at emit", async () => {
const filePath = join(workspaceDir, "gateway.jsonc");
writeFileSync(filePath, '{ "token": "x" }', "utf-8");
const rt = createTestRuntime();
// The sentinel-bearing value is accepted into the AST by setOcPath,
// but `emitForKind` refuses to serialize it (defense-in-depth at
// the per-kind emit boundary). The CLI handler must catch that
// refusal and route it through the structured error boundary —
// a thrown error escaping commander would print raw `String(err)`
// and bypass our JSON/human scrubbing. Pin the structured shape:
// exit code 1, stable code OC_EMIT_SENTINEL, message scrubbed.
await pathSetCommand(
"oc://gateway.jsonc/token",
"__OPENCLAW_REDACTED__",
{ cwd: workspaceDir, json: true },
rt,
);
expect(rt.exitCode).toBe(1);
expect(stderrText(rt)).toContain("OC_EMIT_SENTINEL");
// F13 — file context in sentinel error. Without fileNameForGuard
// plumbing through emitForKind, the message would carry the
// empty-slot fallback (`oc:///[raw]`); now it carries the actual
// file (`oc://gateway.jsonc/[raw]`). Forensics + audit pipelines
// rely on this — without the file context, "sentinel rejected
// somewhere" doesn't tell you WHICH file was involved.
expect(stderrText(rt)).toContain("gateway.jsonc");
});
it("CLI-S04 missing args returns 2", async () => {
const rt = createTestRuntime();
await pathSetCommand(undefined, undefined, { json: true }, rt);
expect(rt.exitCode).toBe(2);
expect(stderrText(rt)).toContain("requires");
});
});
describe("find", () => {
it("CLI-F01 enumerates wildcard matches", async () => {
const filePath = join(workspaceDir, "config.jsonc");
writeFileSync(filePath, '{ "items": [ { "id": "a" }, { "id": "b" } ] }', "utf-8");
const rt = createTestRuntime();
await pathFindCommand("oc://config.jsonc/items/*/id", { cwd: workspaceDir, json: true }, rt);
expect(rt.exitCode).toBe(0);
const out = JSON.parse(stdoutText(rt));
expect(out.count).toBe(2);
});
it("CLI-F02 returns 1 when zero matches", async () => {
const filePath = join(workspaceDir, "gateway.jsonc");
writeFileSync(filePath, "{}", "utf-8");
const rt = createTestRuntime();
await pathFindCommand("oc://gateway.jsonc/nope/*", { cwd: workspaceDir, json: true }, rt);
expect(rt.exitCode).toBe(1);
});
it("CLI-F03 file-slot wildcard rejected with clear error (no ENOENT)", async () => {
// Closes Galin P3 (round 8): `find` resolves `pattern.file` to one
// literal path, so `oc://*.jsonc/...` would silently ENOENT during
// fs.readFile. The CLI now surfaces a clear error before touching
// the filesystem, with stable code OC_PATH_FILE_WILDCARD_UNSUPPORTED.
const rt = createTestRuntime();
await pathFindCommand("oc://*.jsonc/items", { cwd: workspaceDir, json: true }, rt);
expect(rt.exitCode).toBe(2);
expect(stderrText(rt)).toContain("OC_PATH_FILE_WILDCARD_UNSUPPORTED");
expect(stderrText(rt)).toContain("file-slot wildcards are not supported");
});
});
describe("emit", () => {
it("CLI-E01 round-trips jsonc bytes verbatim (byte-fidelity proof)", async () => {
const filePath = join(workspaceDir, "gateway.jsonc");
const before = '// keep this comment\n{\n "v": 1\n}\n';
writeFileSync(filePath, before, "utf-8");
const rt = createTestRuntime();
await pathEmitCommand(filePath, { json: true }, rt);
expect(rt.exitCode).toBe(0);
const out = JSON.parse(stdoutText(rt));
expect(out.kind).toBe("jsonc");
expect(out.bytes).toBe(before);
});
it("CLI-E02 round-trips md verbatim", async () => {
const filePath = join(workspaceDir, "AGENTS.md");
const before = "## Tools\n- gh\n## Boundaries\n- never rm -rf\n";
writeFileSync(filePath, before, "utf-8");
const rt = createTestRuntime();
await pathEmitCommand(filePath, { json: true }, rt);
expect(rt.exitCode).toBe(0);
const out = JSON.parse(stdoutText(rt));
expect(out.kind).toBe("md");
expect(out.bytes).toBe(before);
});
it("CLI-E03 emit --cwd resolves <file> against the supplied directory", async () => {
// Closes round-10 finding F2: emit advertises --cwd / --file in
// the docs but the handler resolved <file> against process.cwd()
// ignoring both. Pin the new wiring: a relative <file> resolves
// against --cwd, not against process.cwd().
const filePath = join(workspaceDir, "AGENTS.md");
writeFileSync(filePath, "## Tools\n- gh\n", "utf-8");
const rt = createTestRuntime();
// Pass a RELATIVE filename + explicit --cwd. If the handler
// ignored --cwd, loadAst would ENOENT against process.cwd().
await pathEmitCommand("AGENTS.md", { cwd: workspaceDir, json: true }, rt);
expect(rt.exitCode).toBe(0);
const out = JSON.parse(stdoutText(rt));
expect(out.kind).toBe("md");
expect(out.bytes).toBe("## Tools\n- gh\n");
});
});
});

View File

@@ -0,0 +1,614 @@
/**
* `openclaw path` — shell-level access to the OcPath substrate verbs.
* Self-hosters and editor extensions use it to inspect and surgically
* edit workspace files without scripting against the SDK directly.
*
* Subcommands:
* - `resolve <oc-path>` — print the match at the path
* - `set <oc-path> <value>` — write a leaf at the path; supports `--dry-run`
* - `find <pattern>` — enumerate matches for a wildcard/predicate path
* - `validate <oc-path>` — parse-only; print structure
* - `emit <file>` — read + parseXxx + emitXxx; verifies byte-fidelity
*
* Output is TTY-aware: defaults to human-readable when stdout is a TTY,
* switches to JSON otherwise (so pipes don't get formatting noise).
* `--json` and `--human` flags override the auto-detection.
*
* Boundaries this CLI does NOT cross (v0):
* - Doesn't know about LKG. `set` writes raw bytes through the
* substrate emit; if the file is LKG-tracked, the next observe
* call decides whether to promote / recover.
* - Doesn't know about lint rules or doctor fixers — that's a
* different surface.
*/
import { promises as fs } from "node:fs";
import { resolve as resolvePath } from "node:path";
import type { Command } from "commander";
import {
OcEmitSentinelError,
OcPathError,
REDACTED_SENTINEL,
emitJsonc,
emitJsonl,
emitMd,
emitYaml,
findOcPaths,
formatOcPath,
inferKind,
parseJsonc,
parseJsonl,
parseMd,
parseOcPath,
parseYaml,
resolveOcPath,
setOcPath,
type OcAst,
type OcMatch,
type OcPath,
type SetResult,
} from "./oc-path/index.js";
export type OutputRuntimeEnv = {
writeStdout(value: string): void;
error(value: string): void;
exit(code: number): void;
};
export interface PathCommandOptions {
readonly json?: boolean;
readonly human?: boolean;
readonly cwd?: string;
readonly file?: string;
readonly dryRun?: boolean;
}
type OutputMode = "human" | "json";
const SCRUB_PLACEHOLDER = "[REDACTED]";
const defaultRuntime: OutputRuntimeEnv = {
writeStdout(value) {
process.stdout.write(value);
},
error(value) {
process.stderr.write(`${value}\n`);
},
exit(code) {
process.exitCode = code;
},
};
/**
* Output-boundary sentinel scrub. Replaces every occurrence of the
* redaction sentinel with `[REDACTED]` before writing to the output
* stream. Defense-in-depth — even if a future code path surfaces raw
* file content carrying the sentinel, the CLI must not echo it.
*/
export function scrubSentinel(s: string): string {
if (!s.includes(REDACTED_SENTINEL)) {
return s;
}
return s.split(REDACTED_SENTINEL).join(SCRUB_PLACEHOLDER);
}
function detectMode(options: PathCommandOptions): OutputMode {
if (options.json === true) {
return "json";
}
if (options.human === true) {
return "human";
}
return process.stdout.isTTY ? "human" : "json";
}
function emit(
runtime: OutputRuntimeEnv,
mode: OutputMode,
value: unknown,
humanFallback: () => string,
): void {
if (mode === "json") {
runtime.writeStdout(scrubSentinel(JSON.stringify(value, null, 2)));
return;
}
runtime.writeStdout(scrubSentinel(humanFallback()));
}
function emitError(
runtime: OutputRuntimeEnv,
mode: OutputMode,
message: string,
code = "ERR",
): void {
const scrubbed = scrubSentinel(message);
if (mode === "json") {
runtime.error(JSON.stringify({ error: { code, message: scrubbed } }));
return;
}
runtime.error(`${code}: ${scrubbed}`);
}
async function loadAst(absPath: string, fileName: string): Promise<OcAst> {
const raw = await fs.readFile(absPath, "utf-8");
const kind = inferKind(fileName);
if (kind === "jsonc") {
return parseJsonc(raw).ast;
}
if (kind === "jsonl") {
return parseJsonl(raw).ast;
}
if (kind === "yaml") {
return parseYaml(raw).ast;
}
return parseMd(raw).ast;
}
function emitForKind(ast: OcAst, fileName?: string): string {
// Plumb fileName through so OcEmitSentinelError messages carry the
// file context (`oc://gateway.jsonc/[raw]`) instead of the
// empty-slot fallback (`oc:///[raw]`). Test S-12 in the wave-21
// sentinel suite asserts the OcPath context appears in the error;
// without this plumbing, CLI emits had it stripped.
const opts = fileName !== undefined ? { fileNameForGuard: fileName } : {};
switch (ast.kind) {
case "jsonc":
return emitJsonc(ast, opts);
case "jsonl":
return emitJsonl(ast, opts);
case "yaml":
// Default round-trip mode preserves bytes verbatim for unmodified
// ASTs (so `openclaw path emit foo.yaml` is a true byte-fidelity
// diagnostic). After `setOcPath` mutates a YAML AST the substrate
// re-renders into `ast.raw` already, so round-trip mode emits the
// mutated bytes too — no need for the render-mode override.
return emitYaml(ast, opts);
case "md":
return emitMd(ast, opts);
}
throw new Error(`unreachable: emitForKind kind`);
}
function resolveFsPath(path: OcPath, options: PathCommandOptions): string {
const cwd = options.cwd ?? process.cwd();
if (options.file !== undefined) {
return resolvePath(options.file);
}
return resolvePath(cwd, path.file);
}
function formatMatchHuman(match: OcMatch): string {
if (match.kind === "leaf") {
return `leaf @ L${match.line}: ${JSON.stringify(match.valueText)} (${match.leafType})`;
}
if (match.kind === "node") {
return `node @ L${match.line} [${match.descriptor}]`;
}
if (match.kind === "insertion-point") {
return `insertion-point @ L${match.line} [${match.container}]`;
}
return `root @ L${match.line}`;
}
export async function pathResolveCommand(
pathStr: string | undefined,
options: PathCommandOptions,
runtime: OutputRuntimeEnv,
): Promise<void> {
const mode = detectMode(options);
if (pathStr === undefined) {
emitError(runtime, mode, "resolve: missing <oc-path> argument");
runtime.exit(2);
return;
}
let ocPath: OcPath;
try {
ocPath = parseOcPath(pathStr);
} catch (err) {
if (err instanceof OcPathError) {
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
runtime.exit(2);
return;
}
throw err;
}
const fsPath = resolveFsPath(ocPath, options);
const ast = await loadAst(fsPath, ocPath.file);
let match;
try {
match = resolveOcPath(ast, ocPath);
} catch (err) {
if (err instanceof OcPathError) {
// resolveOcPath now throws on wildcard patterns (the pattern
// belongs in `find`, not `resolve`). Surface the structured code
// so the CLI message points the caller at the right verb.
emitError(runtime, mode, `resolve refused: ${err.message}`, err.code);
runtime.exit(2);
return;
}
throw err;
}
if (match === null) {
emit(runtime, mode, { resolved: false, ocPath: pathStr }, () => `not found: ${pathStr}`);
runtime.exit(1);
return;
}
emit(runtime, mode, { resolved: true, ocPath: pathStr, match }, () => formatMatchHuman(match));
}
export async function pathSetCommand(
pathStr: string | undefined,
value: string | undefined,
options: PathCommandOptions,
runtime: OutputRuntimeEnv,
): Promise<void> {
const mode = detectMode(options);
if (pathStr === undefined || value === undefined) {
emitError(runtime, mode, "set: requires <oc-path> <value>");
runtime.exit(2);
return;
}
let ocPath: OcPath;
try {
ocPath = parseOcPath(pathStr);
} catch (err) {
if (err instanceof OcPathError) {
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
runtime.exit(2);
return;
}
throw err;
}
const fsPath = resolveFsPath(ocPath, options);
const ast = await loadAst(fsPath, ocPath.file);
// `setOcPath` invokes the per-kind editor which calls back into
// emit during rebuildRaw; the redaction-sentinel guard fires there
// and throws `OcEmitSentinelError` for sentinel-bearing values.
// Catch the throw here so it goes through the structured CLI error
// path instead of escaping to commander's runCommandWithRuntime
// (which would print raw String(err) and bypass --json scrubbing).
let result: SetResult;
try {
result = setOcPath(ast, ocPath, value);
} catch (err) {
if (err instanceof OcEmitSentinelError) {
emitError(runtime, mode, `set refused: ${err.message}`, "OC_EMIT_SENTINEL");
runtime.exit(1);
return;
}
throw err;
}
if (!result.ok) {
const detail = "detail" in result ? result.detail : undefined;
emit(
runtime,
mode,
{ ok: false, reason: result.reason, detail },
() => `set failed: ${result.reason}${detail !== undefined ? `${detail}` : ""}`,
);
runtime.exit(1);
return;
}
// `setOcPath` accepted the value into the AST, but the per-kind
// emit can still refuse to serialize it — most notably when the
// value contains the redaction sentinel (defense-in-depth: the
// substrate's emit guard fires there). The throw must NOT escape
// to commander's runCommandWithRuntime, which would print
// `String(err)` raw and bypass the CLI's JSON/human scrubbed-error
// boundary. Catch and route through `emitError` like every other
// refusal path.
let newBytes: string;
try {
newBytes = emitForKind(result.ast, ocPath.file);
} catch (err) {
if (err instanceof OcEmitSentinelError) {
emitError(runtime, mode, `emit refused: ${err.message}`, "OC_EMIT_SENTINEL");
runtime.exit(1);
return;
}
throw err;
}
// YAML edits go through the yaml library renderer. Self-hosters
// running `openclaw path set` on a carefully formatted file should
// see the warning explicitly.
const lossyKinds: ReadonlySet<OcAst["kind"]> = new Set(["yaml"]);
const formatLossWarning = lossyKinds.has(result.ast.kind)
? `note: ${result.ast.kind} edit-then-emit may rewrite comments / original formatting`
: null;
if (options.dryRun === true) {
emit(
runtime,
mode,
{
ok: true,
dryRun: true,
bytes: newBytes,
...(formatLossWarning !== null ? { warning: formatLossWarning } : {}),
},
() => {
const lines = [`--dry-run: would write ${newBytes.length} bytes to ${fsPath}`];
if (formatLossWarning !== null) {
lines.push(formatLossWarning);
}
lines.push(newBytes);
return lines.join("\n");
},
);
return;
}
await fs.writeFile(fsPath, newBytes, "utf-8");
emit(
runtime,
mode,
{
ok: true,
dryRun: false,
bytesWritten: newBytes.length,
fsPath,
...(formatLossWarning !== null ? { warning: formatLossWarning } : {}),
},
() => {
const lines = [`wrote ${newBytes.length} bytes to ${fsPath}`];
if (formatLossWarning !== null) {
lines.push(formatLossWarning);
}
return lines.join("\n");
},
);
}
export async function pathFindCommand(
patternStr: string | undefined,
options: PathCommandOptions,
runtime: OutputRuntimeEnv,
): Promise<void> {
const mode = detectMode(options);
if (patternStr === undefined) {
emitError(runtime, mode, "find: missing <pattern> argument");
runtime.exit(2);
return;
}
let pattern: OcPath;
try {
pattern = parseOcPath(patternStr);
} catch (err) {
if (err instanceof OcPathError) {
emitError(runtime, mode, `parse failed: ${err.message}`, err.code);
runtime.exit(2);
return;
}
throw err;
}
// The CLI resolves `pattern.file` to a single literal filesystem path.
// Wildcards in the file slot (e.g. `oc://*.jsonc/...`) would silently
// ENOENT during `fs.readFile`. The substrate's `findOcPaths` walks
// *inside* an AST — multi-file globbing is out of scope for v0. Surface
// a clear error so users don't get a confusing missing-file failure.
if (/[*?]/.test(pattern.file)) {
emitError(
runtime,
mode,
`find: file-slot wildcards are not supported (got "${pattern.file}"). ` +
`Pass a concrete file path; multi-file globbing is a follow-up feature.`,
"OC_PATH_FILE_WILDCARD_UNSUPPORTED",
);
runtime.exit(2);
return;
}
const fsPath = resolveFsPath(pattern, options);
const ast = await loadAst(fsPath, pattern.file);
const matches = findOcPaths(ast, pattern);
emit(
runtime,
mode,
{
pattern: patternStr,
count: matches.length,
matches: matches.map((m) => ({
path: formatOcPath(m.path),
match: m.match,
})),
},
() => {
if (matches.length === 0) {
return `0 matches for ${patternStr}`;
}
const plural = matches.length === 1 ? "" : "es";
const lines = [`${matches.length} match${plural} for ${patternStr}:`];
for (const m of matches) {
lines.push(` ${formatOcPath(m.path)}${formatMatchHuman(m.match)}`);
}
return lines.join("\n");
},
);
if (matches.length === 0) {
runtime.exit(1);
}
}
export function pathValidateCommand(
pathStr: string | undefined,
options: PathCommandOptions,
runtime: OutputRuntimeEnv,
): void {
const mode = detectMode(options);
if (pathStr === undefined) {
emitError(runtime, mode, "validate: missing <oc-path> argument");
runtime.exit(2);
return;
}
try {
const ocPath = parseOcPath(pathStr);
emit(
runtime,
mode,
{
valid: true,
ocPath: pathStr,
formatted: formatOcPath(ocPath),
structure: {
file: ocPath.file,
section: ocPath.section,
item: ocPath.item,
field: ocPath.field,
session: ocPath.session,
},
},
() => {
const lines = [`valid: ${pathStr}`, ` file: ${ocPath.file}`];
if (ocPath.section !== undefined) {
lines.push(` section: ${ocPath.section}`);
}
if (ocPath.item !== undefined) {
lines.push(` item: ${ocPath.item}`);
}
if (ocPath.field !== undefined) {
lines.push(` field: ${ocPath.field}`);
}
if (ocPath.session !== undefined) {
lines.push(` session: ${ocPath.session}`);
}
return lines.join("\n");
},
);
return;
} catch (err) {
if (err instanceof OcPathError) {
emit(
runtime,
mode,
{ valid: false, code: err.code, message: err.message },
() => `INVALID: ${err.code}: ${err.message}`,
);
runtime.exit(1);
return;
}
throw err;
}
}
export async function pathEmitCommand(
fileArg: string | undefined,
options: PathCommandOptions,
runtime: OutputRuntimeEnv,
): Promise<void> {
const mode = detectMode(options);
if (fileArg === undefined) {
emitError(runtime, mode, "emit: missing <file> argument");
runtime.exit(2);
return;
}
// Resolve the file slot through the same `--cwd`/`--file` rules the
// sibling subcommands use: `--file` (when set) is the absolute path
// override; otherwise resolve `fileArg` against `--cwd` (defaulting
// to `process.cwd()`). Without this, the flags are accepted by
// commander but ignored by the handler — exactly the bug-shape
// ClawSweeper flagged for the doc/option mismatch.
const fsPath =
options.file !== undefined
? resolvePath(options.file)
: resolvePath(options.cwd ?? process.cwd(), fileArg);
const fileName = fsPath.split(/[\\/]/).pop() ?? fileArg;
const ast = await loadAst(fsPath, fileName);
let bytes: string;
try {
bytes = emitForKind(ast, fileName);
} catch (err) {
if (err instanceof OcEmitSentinelError) {
emitError(runtime, mode, `emit refused: ${err.message}`, "OC_EMIT_SENTINEL");
runtime.exit(1);
return;
}
throw err;
}
if (mode === "json") {
runtime.writeStdout(scrubSentinel(JSON.stringify({ ok: true, kind: ast.kind, bytes })));
return;
}
runtime.writeStdout(bytes);
}
interface RawPathOptions {
json?: boolean;
human?: boolean;
cwd?: string;
file?: string;
dryRun?: boolean;
}
function normalize(opts: RawPathOptions): PathCommandOptions {
return {
json: opts.json,
human: opts.human,
cwd: opts.cwd,
file: opts.file,
dryRun: opts.dryRun,
};
}
export function registerPathCli(program: Command): void {
const path = program
.command("path")
.description("Inspect and edit workspace files via the oc:// addressing scheme")
.addHelpText("after", "\nDocs: https://docs.openclaw.ai/cli/path\n");
path
.command("resolve")
.description("Print the match at an oc:// path")
.argument("<oc-path>", "oc:// path to resolve")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.option("--cwd <dir>", "Resolve file slot against this directory")
.option("--file <file>", "Override the file slot's resolved path")
.action(async (pathStr: string, opts: RawPathOptions) => {
await pathResolveCommand(pathStr, normalize(opts), defaultRuntime);
});
path
.command("find")
.description("Enumerate matches for a wildcard / predicate oc:// pattern")
.argument("<pattern>", "oc:// pattern")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.option("--cwd <dir>", "Resolve file slot against this directory")
.option("--file <file>", "Override the file slot's resolved path")
.action(async (patternStr: string, opts: RawPathOptions) => {
await pathFindCommand(patternStr, normalize(opts), defaultRuntime);
});
path
.command("set")
.description("Write a leaf value at an oc:// path")
.argument("<oc-path>", "oc:// path to write")
.argument("<value>", "string value to write")
.option("--dry-run", "Print bytes without writing")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.option("--cwd <dir>", "Resolve file slot against this directory")
.option("--file <file>", "Override the file slot's resolved path")
.action(async (pathStr: string, value: string, opts: RawPathOptions) => {
await pathSetCommand(pathStr, value, normalize(opts), defaultRuntime);
});
path
.command("validate")
.description("Parse an oc:// path and print its slot structure")
.argument("<oc-path>", "oc:// path to validate")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.action((pathStr: string, opts: RawPathOptions) => {
pathValidateCommand(pathStr, normalize(opts), defaultRuntime);
});
path
.command("emit")
.description("Round-trip a file through parse + emit")
.argument("<file>", "Path to a workspace file")
.option("--cwd <dir>", "Resolve <file> against this directory")
.option("--file <file>", "Override the file's resolved path")
.option("--json", "Force JSON output")
.option("--human", "Force human output")
.action(async (fileArg: string, opts: RawPathOptions) => {
await pathEmitCommand(fileArg, normalize(opts), defaultRuntime);
});
}

View File

@@ -0,0 +1,125 @@
/**
* Workspace-Markdown AST — generic addressing index over the 8 workspace
* files openclaw treats as opaque text in `loadWorkspaceBootstrapFiles`.
*
* **The AST is purely an addressing index.** It does NOT encode opinions
* about what a "valid" SOUL.md / AGENTS.md / MEMORY.md looks like; it
* exposes the markdown features (frontmatter, sections, items, tables,
* code blocks) that any `OcPath` (`{ file, section?, item?, field? }`) can
* resolve over. Per-file lint opinions ride in @openclaw/oc-lint, not
* here.
*
* **Byte-fidelity contract**: `emitMd(parse(raw)) === raw` for every input
* the parser accepts. The parser preserves the original bytes on the
* root node (`raw`) so emitters can round-trip even content the AST
* doesn't structurally model (foreign content, idiosyncratic whitespace).
*
* @module @openclaw/oc-path/ast
*/
/**
* Diagnostic emitted by the parser. Used by lint rules and parse-error
* surfacing alike. Severity is `info` by default; the parser emits
* `warning` for suspicious-but-recoverable inputs (e.g., unclosed
* frontmatter fence) and never throws.
*/
export interface Diagnostic {
readonly line: number;
readonly message: string;
readonly severity: "info" | "warning" | "error";
readonly code?: string;
}
/**
* A frontmatter key/value pair. Keys are preserved as written; values
* are unquoted (surrounding `"` or `'` stripped) but otherwise verbatim.
*/
export interface FrontmatterEntry {
readonly key: string;
readonly value: string;
readonly line: number;
}
/**
* A bullet-list item inside a section. Items are addressable via OcPath
* `{ file, section, item }` where `item` is the slug of the bullet's
* text (or the slug of `kv.key` when the bullet is in `- key: value`
* shape).
*
* `kv` is populated when the bullet matches `- <key>: <value>` (the
* common pattern in AGENTS.md / TOOLS.md / USER.md). Lint rules use it
* for field-level addressing via `OcPath.field`.
*/
export interface AstItem {
readonly text: string;
readonly slug: string;
readonly line: number;
readonly kv?: { readonly key: string; readonly value: string };
}
/**
* A markdown table. Tables surface in `## Tool Guidance` blocks and
* elsewhere; lint rules can address rows by header value if needed.
*/
export interface AstTable {
readonly headers: readonly string[];
readonly rows: readonly (readonly string[])[];
readonly line: number;
}
/**
* A fenced code block. Carries the language tag (or `null`) and the
* verbatim body.
*/
export interface AstCodeBlock {
readonly lang: string | null;
readonly text: string;
readonly line: number;
}
/**
* An H2-delimited block. The `slug` is the kebab-case lowercase form of
* `heading` and is what OcPath `section` matches against. `bodyText` is
* the prose between this heading and the next H2 (or end of file),
* verbatim. `items`, `tables`, `codeBlocks` are extracted from
* `bodyText` for addressing convenience but the raw text is preserved.
*/
export interface AstBlock {
readonly heading: string;
readonly slug: string;
readonly line: number;
readonly bodyText: string;
readonly items: readonly AstItem[];
readonly tables: readonly AstTable[];
readonly codeBlocks: readonly AstCodeBlock[];
}
/**
* The root AST node. Always carries `raw` for byte-identical round-trip.
* `frontmatter` is empty when the file has none. `preamble` is the
* prose before the first H2 (may be empty). `blocks` is the H2 tree in
* document order.
*
* `kind: 'md'` discriminator matches the jsonc / jsonl / yaml AST
* shapes; the universal `setOcPath` / `resolveOcPath` verbs dispatch
* via this tag at runtime so callers don't have to thread kind
* through the call site.
*
* The generic shape is the same for all 9 workspace files; opinions
* (`AGENTS_TOOLS_SECTION_EMPTY`, etc.) ride in lint rules, not here.
*/
export interface MdAst {
readonly kind: "md";
readonly raw: string;
readonly frontmatter: readonly FrontmatterEntry[];
readonly preamble: string;
readonly blocks: readonly AstBlock[];
}
/**
* Parser output: the AST plus any diagnostics from the parse pass.
*/
export interface ParseResult {
readonly ast: MdAst;
readonly diagnostics: readonly Diagnostic[];
}

View File

@@ -0,0 +1,37 @@
/**
* Cross-kind utilities. The substrate exposes per-kind verbs only;
* `inferKind` is a convention helper for callers who want to map
* filename → kind so they can pick the right `parseXxx` / `setXxx` /
* `resolveXxx` function.
*
* Earlier drafts had `resolveOcPath` / `setOcPath` / `appendOcPath`
* universal dispatchers with tagged-union AST inputs. They were dropped
* — the kind tag bled through every consumer (lint runner, doctor
* fixers, tests) since those code paths still needed to know the kind
* to use the result. Per-kind verbs are honest about input/output.
*
* @module @openclaw/oc-path/dispatch
*/
export type OcKind = "md" | "jsonc" | "jsonl" | "yaml";
/**
* Recommend a kind from a filename. Pure convention helper — returns
* the substrate's default mapping. Consumers can override.
*/
export function inferKind(filename: string): OcKind | null {
const lower = filename.toLowerCase();
if (lower.endsWith(".md")) {
return "md";
}
if (lower.endsWith(".jsonl") || lower.endsWith(".ndjson")) {
return "jsonl";
}
if (lower.endsWith(".jsonc") || lower.endsWith(".json")) {
return "jsonc";
}
if (lower.endsWith(".yaml") || lower.endsWith(".yml") || lower.endsWith(".lobster")) {
return "yaml";
}
return null;
}

View File

@@ -0,0 +1,171 @@
/**
* Mutate a `MdAst` at an OcPath. Returns a new AST with the
* value replaced; the original is unchanged.
*
* Writable surface:
*
* oc://FILE/[frontmatter]/key → frontmatter entry value
* oc://FILE/section/item/field → item.kv.value (when item has kv shape)
*
* Section bodies, tables, and code blocks are NOT writable through
* this primitive — they're prose, and a generic "set" doesn't compose
* cleanly. Doctor fixers handle structural edits via dedicated verbs.
*
* @module @openclaw/oc-path/edit
*/
import type { AstBlock, AstItem, FrontmatterEntry, MdAst } from "./ast.js";
import type { OcPath } from "./oc-path.js";
export type MdEditResult =
| { readonly ok: true; readonly ast: MdAst }
| {
readonly ok: false;
readonly reason: "unresolved" | "not-writable" | "no-item-kv";
};
/**
* Replace the value at `path` with `newValue`. The new AST has fresh
* `raw` re-rendered from the structural fields.
*/
export function setMdOcPath(ast: MdAst, path: OcPath, newValue: string): MdEditResult {
// Frontmatter address: oc://FILE/[frontmatter]/<key>
if (path.section === "[frontmatter]") {
const key = path.item ?? path.field;
if (key === undefined) {
return { ok: false, reason: "unresolved" };
}
const idx = ast.frontmatter.findIndex((e) => e.key === key);
if (idx === -1) {
return { ok: false, reason: "unresolved" };
}
const existing = ast.frontmatter[idx];
if (existing === undefined) {
return { ok: false, reason: "unresolved" };
}
const newEntry: FrontmatterEntry = { ...existing, value: newValue };
const newFm = ast.frontmatter.slice();
newFm[idx] = newEntry;
return finalize({ ...ast, frontmatter: newFm });
}
// Item-field address: oc://FILE/section/item/field
if (path.section === undefined || path.item === undefined || path.field === undefined) {
return { ok: false, reason: "not-writable" };
}
const sectionSlug = path.section.toLowerCase();
const blockIdx = ast.blocks.findIndex((b) => b.slug === sectionSlug);
if (blockIdx === -1) {
return { ok: false, reason: "unresolved" };
}
const block = ast.blocks[blockIdx];
if (block === undefined) {
return { ok: false, reason: "unresolved" };
}
const itemSlug = path.item.toLowerCase();
const itemIdx = block.items.findIndex((i) => i.slug === itemSlug);
if (itemIdx === -1) {
return { ok: false, reason: "unresolved" };
}
const item = block.items[itemIdx];
if (item === undefined) {
return { ok: false, reason: "unresolved" };
}
if (item.kv === undefined) {
return { ok: false, reason: "no-item-kv" };
}
if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) {
return { ok: false, reason: "unresolved" };
}
const newItem: AstItem = {
...item,
kv: { key: item.kv.key, value: newValue },
};
const newItems = block.items.slice();
newItems[itemIdx] = newItem;
const newBlock: AstBlock = {
...block,
items: newItems,
bodyText: rebuildBlockBody(block, newItems),
};
const newBlocks = ast.blocks.slice();
newBlocks[blockIdx] = newBlock;
return finalize({ ...ast, blocks: newBlocks });
}
/**
* Rebuild block.bodyText so emit-roundtrip mode reflects the edit. We
* do a minimal in-place substitution on the existing bodyText: find
* each `- key: value` line for a touched item and rewrite the value.
*
* For items without a matching bullet line, we leave bodyText alone
* (the structural fields take precedence in render mode anyway).
*/
function rebuildBlockBody(block: AstBlock, newItems: readonly AstItem[]): string {
let body = block.bodyText;
for (let i = 0; i < newItems.length; i++) {
const newItem = newItems[i];
const oldItem = block.items[i];
if (newItem === undefined || oldItem === undefined) {
continue;
}
if (newItem.kv === undefined || oldItem.kv === undefined) {
continue;
}
if (newItem.kv.value === oldItem.kv.value) {
continue;
}
const re = new RegExp(`^(\\s*-\\s*${escapeRegex(oldItem.kv.key)}\\s*:\\s*).*$`, "m");
body = body.replace(re, `$1${newItem.kv.value}`);
}
return body;
}
function escapeRegex(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
/**
* Re-render `ast.raw` from the (possibly mutated) tree using the same
* shape the round-trip emitter expects.
*/
function finalize(ast: MdAst): MdEditResult {
const parts: string[] = [];
if (ast.frontmatter.length > 0) {
parts.push("---");
for (const fm of ast.frontmatter) {
parts.push(`${fm.key}: ${formatFrontmatterValue(fm.value)}`);
}
parts.push("---");
}
if (ast.preamble.length > 0) {
if (parts.length > 0) {
parts.push("");
}
parts.push(ast.preamble);
}
for (const block of ast.blocks) {
if (parts.length > 0) {
parts.push("");
}
parts.push(`## ${block.heading}`);
if (block.bodyText.length > 0) {
parts.push(block.bodyText);
}
}
const raw = parts.join("\n");
return { ok: true, ast: { ...ast, raw } };
}
function formatFrontmatterValue(value: string): string {
if (value.length === 0) {
return '""';
}
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) {
return JSON.stringify(value);
}
return value;
}

View File

@@ -0,0 +1,143 @@
/**
* Emit an AST back to bytes.
*
* **Two modes**:
*
* 1. **Round-trip** — the AST hasn't been mutated since `parseMd`
* produced it. Returns `ast.raw` verbatim. Byte-identical.
*
* 2. **Mutation-aware** — the AST has been modified (frontmatter
* entry edited, item kv.value changed, block reordered). Returns
* a freshly-rendered representation. **Not** byte-identical to a
* hypothetical "perfect" rewrite — we render canonical forms
* (LF endings, single space after `:` in frontmatter, etc.).
* Callers needing byte-fidelity for partial edits should patch
* `raw` directly instead of mutating the AST.
*
* In both modes, every emitted leaf flows through `guardSentinel` so a
* `__OPENCLAW_REDACTED__` literal anywhere in the output throws
* `OcEmitSentinelError`. This is the substrate guard: callers can't
* accidentally write a redacted view to disk through this emitter.
*
* @module @openclaw/oc-path/emit
*/
import type { FrontmatterEntry, MdAst } from "./ast.js";
import { guardSentinel } from "./sentinel.js";
/**
* Emit options. `mode: 'roundtrip'` (default) returns `ast.raw` if
* present and not flagged as dirty; `mode: 'render'` always
* re-renders.
*/
export interface EmitOptions {
readonly mode?: "roundtrip" | "render";
/**
* When provided, the emitter walks every emitted leaf string through
* `guardSentinel(value, ocPath)`. Default uses the file name
* (`oc://<file>`) when the field-precise path can't be determined.
* Callers that want richer error context can supply `ocPathFor` to
* compute a path per leaf.
*/
readonly fileNameForGuard?: string;
/**
* See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale.
* Default `true` — round-trip echoes parsed bytes without scanning
* for the sentinel. Render mode scans every leaf regardless.
*/
readonly acceptPreExistingSentinel?: boolean;
}
/**
* Emit the AST. In render mode, throws `OcEmitSentinelError` if any
* leaf string matches `REDACTED_SENTINEL`. In round-trip mode, echoes
* `ast.raw` verbatim (does not scan unless caller opts in via
* `acceptPreExistingSentinel: false`).
*/
export function emitMd(ast: MdAst, opts: EmitOptions = {}): string {
const mode = opts.mode ?? "roundtrip";
const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : "oc://";
const acceptPreExisting = opts.acceptPreExistingSentinel ?? true;
if (mode === "roundtrip") {
// Round-trip trusts parsed bytes — see emit-policy comment in
// jsonc/emit.ts. A markdown file legitimately containing the
// sentinel literal (in a code block, in a pasted error log) would
// otherwise become a workspace-wide emit DoS.
if (!acceptPreExisting && ast.raw.includes("__OPENCLAW_REDACTED__")) {
guardSentinel("__OPENCLAW_REDACTED__", `${guardPath}/[raw]`);
}
return ast.raw;
}
// Render mode: rebuild from structural fields. This loses
// formatting details (extra blank lines, custom whitespace, etc.)
// but is correct.
const parts: string[] = [];
if (ast.frontmatter.length > 0) {
parts.push("---");
for (const fm of ast.frontmatter) {
guardSentinel(fm.value, `${guardPath}/[frontmatter]/${fm.key}`);
parts.push(`${fm.key}: ${formatFrontmatterValue(fm.value)}`);
}
parts.push("---");
}
if (ast.preamble.length > 0) {
guardSentinel(ast.preamble, `${guardPath}/[preamble]`);
if (parts.length > 0) {
parts.push("");
}
parts.push(ast.preamble);
}
for (const block of ast.blocks) {
if (parts.length > 0) {
parts.push("");
}
parts.push(`## ${block.heading}`);
if (block.bodyText.length > 0) {
// Walk items + frontmatter-key value strings for sentinels;
// body text is also walked as one big string in case of any raw
// sentinel.
guardSentinel(block.bodyText, `${guardPath}/${block.slug}/[body]`);
for (const item of block.items) {
if (item.kv) {
guardSentinel(item.kv.value, `${guardPath}/${block.slug}/${item.slug}/${item.kv.key}`);
}
}
parts.push(block.bodyText);
}
}
return parts.join("\n");
}
function formatFrontmatterValue(value: string): string {
// Quote values containing characters that would confuse a YAML
// parser; otherwise emit bare.
if (value.length === 0) {
return '""';
}
if (/[:#&*?|<>=!%@`,[\]{}\r\n]/.test(value)) {
return JSON.stringify(value);
}
return value;
}
/**
* Mark an AST as "dirty" — useful for callers that mutate the AST
* structurally and want emitMd() to re-render rather than round-trip.
*
* Currently a no-op flag — emitMd() decides based on `opts.mode`. Kept
* as an extension point for a future invariant where the AST tracks
* its own dirty state.
*/
export function markDirty(_ast: MdAst): void {
// intentionally empty
}
// Re-export the frontmatter type for convenience so tests don't need
// to import from ast.ts.
export type { FrontmatterEntry };

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,128 @@
/**
* `@openclaw/oc-path` — substrate package public surface.
*
* **Strategic frame**: workspace files are byte-stable and addressable
* via the `oc://` scheme — the addressing scheme is universal across
* file kinds (md / jsonc / jsonl / yaml). Encoding (parse/emit) is
* per-kind; addressing (resolve/set) is universal.
*
* **Public verbs**:
* - One `setOcPath(ast, path, value)` — universal, kind-dispatched
* - One `resolveOcPath(ast, path)` — universal, kind-dispatched
* - Per-kind `parseXxx` / `emitXxx` (parsing IS per-kind by nature)
*
* `setOcPath` accepts a string value; the substrate coerces based on
* AST shape at the path location. The OcPath syntax encodes the
* operation: plain path = leaf set, `+` suffix = insertion.
*
* Per-kind set/resolve helpers exist as internal implementation; they
* aren't on the public surface. Callers don't need to pick a kind —
* the AST carries its `kind` discriminator and the universal verbs
* dispatch internally.
*
* @module @openclaw/oc-path
*/
/**
* SDK version this build of `@openclaw/oc-path` exposes. Bumped on
* every breaking change to AST shape, OcPath syntax, or universal
* verbs (`resolveOcPath`, `setOcPath`, `findOcPaths`, `parseXxx`,
* `emitXxx`). Plugin packs that depend on the substrate declare the
* version they were authored against and the host warns on mismatch.
*/
export const SDK_VERSION = "0.1.0";
// AST types
export type {
AstBlock,
AstCodeBlock,
AstItem,
AstTable,
Diagnostic,
FrontmatterEntry,
ParseResult,
MdAst,
} from "./ast.js";
export type { JsoncAst, JsoncEntry, JsoncValue } from "./jsonc/ast.js";
export type { JsonlAst, JsonlLine } from "./jsonl/ast.js";
export type { YamlAst } from "./yaml/ast.js";
// OcPath types + parser/formatter
export type { OcPath, PathSegmentLayout, PositionalContainer, PredicateSpec } from "./oc-path.js";
// Public OcPath surface — what plugin authors and callers use.
export {
MAX_PATH_LENGTH,
MAX_SUB_SEGMENTS_PER_SLOT,
MAX_TRAVERSAL_DEPTH,
OcPathError,
POS_FIRST,
POS_LAST,
WILDCARD_RECURSIVE,
WILDCARD_SINGLE,
formatOcPath,
hasWildcard,
isOrdinalSeg,
isPattern,
isPositionalSeg,
isPredicateSeg,
isQuotedSeg,
isUnionSeg,
isValidOcPath,
parseOcPath,
} from "./oc-path.js";
// `evaluatePredicate`, `getPathLayout`, `parseOrdinalSeg`,
// `parsePredicateSeg`, `parseUnionSeg`, `quoteSeg`, `unquoteSeg`,
// `repackPath`, `resolvePositionalSeg`, `splitRespectingBrackets`
// were exported from earlier prototypes. They're substrate-internal
// helpers — used by `find.ts`, the per-kind resolvers, and the parser
// itself, but not part of the upstream-portable public surface.
// Callers that need their behavior should round-trip through
// `parseOcPath` / `formatOcPath` / `findOcPaths`.
// Per-kind parse / emit (encoding is genuinely per-kind)
export { parseMd } from "./parse.js";
export { parseJsonc } from "./jsonc/parse.js";
export { parseJsonl } from "./jsonl/parse.js";
export { parseYaml } from "./yaml/parse.js";
export type { JsoncParseResult } from "./jsonc/parse.js";
export type { JsonlParseResult } from "./jsonl/parse.js";
export type { YamlParseResult } from "./yaml/parse.js";
export type { EmitOptions } from "./emit.js";
export { emitMd, markDirty } from "./emit.js";
export type { JsoncEmitOptions } from "./jsonc/emit.js";
export { emitJsonc } from "./jsonc/emit.js";
export type { JsonlEmitOptions } from "./jsonl/emit.js";
export { emitJsonl } from "./jsonl/emit.js";
export type { YamlEmitOptions } from "./yaml/emit.js";
export { emitYaml } from "./yaml/emit.js";
// Universal verbs — the only public resolve / set on the surface.
export type {
OcAst,
OcMatch,
LeafType,
NodeDescriptor,
ContainerKind,
SetResult,
InsertionInfo,
} from "./universal.js";
export { resolveOcPath, setOcPath, detectInsertion } from "./universal.js";
// Multi-match search verb — the wildcard-accepting cousin of resolve.
export type { OcPathMatch } from "./find.js";
export { findOcPaths } from "./find.js";
// Cross-kind utility — filename → kind hint.
export { inferKind } from "./dispatch.js";
export type { OcKind } from "./dispatch.js";
// Sentinel guard
export { OcEmitSentinelError, REDACTED_SENTINEL, guardSentinel } from "./sentinel.js";
// Slug helper
export { slugify } from "./slug.js";
// Workspace manifest is a separate concern (filesystem classifier);
// it's not part of this PR's scope.

View File

@@ -0,0 +1,49 @@
/**
* JSONC AST types — the addressing skeleton for JSONC files (gateway
* config, plugin manifests, JSON-with-comments artifacts).
*
* **Per-kind discriminator**: every AST in this substrate carries a
* `kind` field. The OcPath resolver dispatches on `kind` so md / jsonc
* / json / jsonl can share one resolver entry point.
*
* **Byte-fidelity**: `raw` is preserved on the root for round-trip
* emit. The minimal prototype parser doesn't preserve every formatting
* detail in the structural tree — for production, a fuller
* comment-preserving parser ports from `openclaw-workspace`.
*
* @module @openclaw/oc-path/jsonc/ast
*/
/** The root JSONC AST. `raw` round-trips byte-identical via emit. */
export interface JsoncAst {
readonly kind: "jsonc";
readonly raw: string;
/** Parsed value tree, or `null` if the file is empty / unparseable. */
readonly root: JsoncValue | null;
}
/**
* A JSONC value node — discriminated union over the standard JSON kinds.
*
* `line` is the 1-based line where the value's literal token starts
* (the `{`, `[`, opening `"`, or first digit). The parser always sets
* it; synthetic constructions (mutations, fixtures) may omit it and
* consumers fall back to 1 / parent line. Optional rather than
* required so test fixtures and externally-constructed values stay
* concise.
*/
export type JsoncValue =
| { readonly kind: "object"; readonly entries: readonly JsoncEntry[]; readonly line?: number }
| { readonly kind: "array"; readonly items: readonly JsoncValue[]; readonly line?: number }
| { readonly kind: "string"; readonly value: string; readonly line?: number }
| { readonly kind: "number"; readonly value: number; readonly line?: number }
| { readonly kind: "boolean"; readonly value: boolean; readonly line?: number }
| { readonly kind: "null"; readonly line?: number };
/** Object key/value entry. Keys are unquoted; quoting happens at emit. */
export interface JsoncEntry {
readonly key: string;
readonly value: JsoncValue;
/** 1-based line number of the key. */
readonly line: number;
}

View File

@@ -0,0 +1,143 @@
import { applyEdits, modify } from "jsonc-parser/lib/esm/main.js";
import type { OcPath } from "../oc-path.js";
import {
isPositionalSeg,
isQuotedSeg,
resolvePositionalSeg,
splitRespectingBrackets,
unquoteSeg,
} from "../oc-path.js";
import { OcEmitSentinelError, REDACTED_SENTINEL } from "../sentinel.js";
import type { JsoncAst, JsoncValue } from "./ast.js";
import { parseJsonc } from "./parse.js";
type JsoncEditPath = Array<string | number>;
export type JsoncEditResult =
| { readonly ok: true; readonly ast: JsoncAst }
| { readonly ok: false; readonly reason: "unresolved" | "no-root" };
export function setJsoncOcPath(ast: JsoncAst, path: OcPath, newValue: JsoncValue): JsoncEditResult {
if (ast.root === null) {
return { ok: false, reason: "no-root" };
}
const segments = resolveEditSegments(ast.root, pathSegments(path));
if (segments === null) {
return { ok: false, reason: "unresolved" };
}
guardSentinel(newValue, `oc://${path.file}/${segments.join("/")}`);
const edits = modify(ast.raw, segments, jsoncValueToJson(newValue), {
formattingOptions: { insertSpaces: true, tabSize: 2 },
isArrayInsertion: false,
});
if (edits.length === 0) {
return { ok: false, reason: "unresolved" };
}
const nextRaw = applyEdits(ast.raw, edits);
const reparsed = parseJsonc(nextRaw);
if (reparsed.ast.root === null) {
return { ok: false, reason: "unresolved" };
}
return { ok: true, ast: reparsed.ast };
}
function guardSentinel(value: JsoncValue, guardPath: string): void {
if (value.kind === "string") {
if (value.value.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(guardPath);
}
return;
}
if (value.kind === "array") {
value.items.forEach((item, index) => guardSentinel(item, `${guardPath}/${index}`));
return;
}
if (value.kind === "object") {
value.entries.forEach((entry) => guardSentinel(entry.value, `${guardPath}/${entry.key}`));
}
}
function pathSegments(path: OcPath): string[] {
const out: string[] = [];
const collect = (slot: string | undefined) => {
if (slot === undefined) {
return;
}
for (const segment of splitRespectingBrackets(slot, ".")) {
out.push(isQuotedSeg(segment) ? unquoteSeg(segment) : segment);
}
};
collect(path.section);
collect(path.item);
collect(path.field);
return out;
}
function resolveEditSegments(root: JsoncValue, segments: readonly string[]): JsoncEditPath | null {
const out: JsoncEditPath = [];
let current: JsoncValue = root;
for (let segment of segments) {
if (segment.length === 0) {
return null;
}
if (isPositionalSeg(segment)) {
const concrete = positionalForJsonc(current, segment);
if (concrete !== null) {
segment = concrete;
}
}
if (current.kind === "object") {
const entry = current.entries.find((candidate) => candidate.key === segment);
if (!entry) {
return null;
}
out.push(segment);
current = entry.value;
continue;
}
if (current.kind === "array") {
const index = Number(segment);
if (!Number.isInteger(index) || index < 0 || index >= current.items.length) {
return null;
}
out.push(index);
current = current.items[index]!;
continue;
}
return null;
}
return out;
}
function positionalForJsonc(node: JsoncValue, segment: string): string | null {
if (node.kind === "object") {
const keys = node.entries.map((entry) => entry.key);
return resolvePositionalSeg(segment, { indexable: false, size: keys.length, keys });
}
if (node.kind === "array") {
return resolvePositionalSeg(segment, { indexable: true, size: node.items.length });
}
return null;
}
function jsoncValueToJson(value: JsoncValue): unknown {
switch (value.kind) {
case "object":
return Object.fromEntries(
value.entries.map((entry) => [entry.key, jsoncValueToJson(entry.value)]),
);
case "array":
return value.items.map(jsoncValueToJson);
case "string":
return value.value;
case "number":
return value.value;
case "boolean":
return value.value;
case "null":
return null;
}
}

View File

@@ -0,0 +1,98 @@
/**
* Emit a `JsoncAst` to bytes.
*
* **Round-trip mode (default)** returns `ast.raw` verbatim — this
* preserves comments, formatting, and trailing whitespace exactly.
*
* **Sentinel-guard policy**:
*
* - Round-trip echoes `ast.raw` *without* scanning for the redaction
* sentinel. Bytes that came in via `parseJsonc` are trusted: a
* workspace file legitimately containing the literal
* `__OPENCLAW_REDACTED__` (in a code-block comment, in a pasted
* error log, etc.) would otherwise become a workspace-wide emit
* DoS — every `openclaw path emit FILE.jsonc` would exit non-zero,
* breaking lint round-trip rules, doctor fixers, and LKG
* fingerprinting. The substrate's contract is "no NEW sentinel
* bytes introduced via emit", not "no sentinel byte ever leaves".
* - Render mode walks every leaf and rejects sentinel-bearing leaf
* values (caller-injected sentinel via `setOcPath` lands here:
* `setJsoncOcPath` rebuilds raw via render-mode, so a leaf set to
* the sentinel by the caller is caught at the rebuild boundary
* before the raw is shipped back).
*
* Callers that want pre-existing sentinel detection (e.g., LKG
* fingerprint verification) can opt in via
* `acceptPreExistingSentinel: false`.
*
* @module @openclaw/oc-path/jsonc/emit
*/
import { OcEmitSentinelError, REDACTED_SENTINEL } from "../sentinel.js";
import type { JsoncAst, JsoncValue } from "./ast.js";
export interface JsoncEmitOptions {
readonly mode?: "roundtrip" | "render";
readonly fileNameForGuard?: string;
/**
* When `false`, round-trip mode also scans `ast.raw` for the
* redaction sentinel and throws `OcEmitSentinelError` if found.
* Default `true` — round-trip trusts parsed bytes (see policy
* comment above). Render mode always scans leaves regardless.
*/
readonly acceptPreExistingSentinel?: boolean;
}
export function emitJsonc(ast: JsoncAst, opts: JsoncEmitOptions = {}): string {
const mode = opts.mode ?? "roundtrip";
const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : "oc://";
const acceptPreExisting = opts.acceptPreExistingSentinel ?? true;
if (mode === "roundtrip") {
if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/[raw]`);
}
return ast.raw;
}
// Render mode — synthesize JSON from the structural tree (loses
// comments). Walk every leaf string for sentinel detection so a
// caller-injected sentinel via setOcPath is rejected.
if (ast.root === null) {
return "";
}
return renderValue(ast.root, guardPath, []);
}
function renderValue(value: JsoncValue, guardPath: string, walked: readonly string[]): string {
switch (value.kind) {
case "object": {
const parts = value.entries.map(
(e) => `${JSON.stringify(e.key)}: ${renderValue(e.value, guardPath, [...walked, e.key])}`,
);
return `{ ${parts.join(", ")} }`;
}
case "array": {
const parts = value.items.map((v, i) => renderValue(v, guardPath, [...walked, String(i)]));
return `[ ${parts.join(", ")} ]`;
}
case "string": {
// Reject ANY string that contains the sentinel — embedded
// (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a
// "literal redacted token landed on disk" leak as exact-match.
// The roundtrip path uses `raw.includes()` for the same reason;
// render needs the same predicate per leaf.
if (value.value.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/${walked.join("/")}`);
}
return JSON.stringify(value.value);
}
case "number":
return String(value.value);
case "boolean":
return String(value.value);
case "null":
return "null";
}
throw new Error(`unreachable: jsonc renderValue kind`);
}

View File

@@ -0,0 +1,149 @@
import {
ParseErrorCode,
type Node as JsoncParserNode,
type ParseError,
parseTree,
printParseErrorCode,
} from "jsonc-parser/lib/esm/main.js";
import type { Diagnostic } from "../ast.js";
import type { JsoncAst, JsoncEntry, JsoncValue } from "./ast.js";
export const MAX_PARSE_DEPTH = 256;
export interface JsoncParseResult {
readonly ast: JsoncAst;
readonly diagnostics: readonly Diagnostic[];
}
type LineMap = {
lineForOffset(offset: number): number;
};
export function parseJsonc(raw: string): JsoncParseResult {
if (raw.trim().length === 0) {
return { ast: { kind: "jsonc", raw, root: null }, diagnostics: [] };
}
const parseSource = raw.startsWith("\uFEFF") ? raw.slice(1) : raw;
const errors: ParseError[] = [];
const tree = parseTree(parseSource, errors, {
allowTrailingComma: true,
disallowComments: false,
allowEmptyContent: true,
});
const lineMap = createLineMap(raw);
const diagnostics = errors.map((error) => toDiagnostic(error, lineMap, tree));
let root: JsoncValue | null = null;
if (tree && diagnostics.every((d) => d.severity !== "error")) {
try {
root = nodeToJsoncValue(tree, lineMap, 0);
} catch (err) {
diagnostics.push({
line: 1,
message: err instanceof Error ? err.message : String(err),
severity: "error",
code: "OC_JSONC_DEPTH_EXCEEDED",
});
}
}
return {
ast: {
kind: "jsonc",
raw,
root: diagnostics.every((d) => d.severity !== "error") ? root : null,
},
diagnostics,
};
}
function toDiagnostic(
error: ParseError,
lineMap: LineMap,
tree: JsoncParserNode | undefined,
): Diagnostic {
const treeEnd = tree ? tree.offset + tree.length : 0;
const isTrailingInput =
error.error === ParseErrorCode.EndOfFileExpected ||
(tree !== undefined && error.error === ParseErrorCode.InvalidSymbol && error.offset >= treeEnd);
return {
line: lineMap.lineForOffset(error.offset),
message: printParseErrorCode(error.error),
severity: isTrailingInput ? "warning" : "error",
code: isTrailingInput ? "OC_JSONC_TRAILING_INPUT" : "OC_JSONC_PARSE_FAILED",
};
}
function nodeToJsoncValue(node: JsoncParserNode, lineMap: LineMap, depth: number): JsoncValue {
if (depth > MAX_PARSE_DEPTH) {
throw new Error(`structural depth exceeded MAX_PARSE_DEPTH (${MAX_PARSE_DEPTH})`);
}
const line = lineMap.lineForOffset(node.offset);
switch (node.type) {
case "object":
return {
kind: "object",
line,
entries: (node.children ?? []).flatMap((child): JsoncEntry[] => {
if (child.type !== "property") {
return [];
}
const keyNode = child.children?.[0];
const valueNode = child.children?.[1];
if (!keyNode || !valueNode) {
return [];
}
return [
{
key: String(keyNode.value),
line: lineMap.lineForOffset(keyNode.offset),
value: nodeToJsoncValue(valueNode, lineMap, depth + 1),
},
];
}),
};
case "array":
return {
kind: "array",
line,
items: (node.children ?? []).map((child) => nodeToJsoncValue(child, lineMap, depth + 1)),
};
case "string":
return { kind: "string", value: String(node.value), line };
case "number":
return { kind: "number", value: Number(node.value), line };
case "boolean":
return { kind: "boolean", value: Boolean(node.value), line };
case "null":
return { kind: "null", line };
default:
return { kind: "null", line };
}
}
function createLineMap(raw: string): LineMap {
const starts = [0];
for (let i = 0; i < raw.length; i++) {
if (raw[i] === "\n") {
starts.push(i + 1);
}
}
return {
lineForOffset(offset) {
let low = 0;
let high = starts.length - 1;
while (low <= high) {
const mid = Math.floor((low + high) / 2);
const start = starts[mid] ?? 0;
if (start <= offset) {
low = mid + 1;
} else {
high = mid - 1;
}
}
return Math.max(1, high + 1);
},
};
}
export type { Diagnostic };

View File

@@ -0,0 +1,133 @@
/**
* Resolve an `OcPath` against a `JsoncAst`.
*
* The OcPath model has 4 segments (file, section, item, field) — for
* JSONC artifacts that's not enough depth, so segments concat with `/`
* AND a section/item/field MAY contain dots (`.`) for deeper traversal.
* Both forms work:
*
* oc://config/plugins/entries/foo (segment-per-key)
* oc://config/plugins.entries.foo (dotted section)
* oc://config/plugins/entries.foo (mixed)
*
* Each segment is split on `.`, and the resulting flat list of keys
* walks the value tree from `ast.root`. Numeric segments index into
* arrays.
*
* @module @openclaw/oc-path/jsonc/resolve
*/
import type { OcPath } from "../oc-path.js";
import {
isPositionalSeg,
isQuotedSeg,
resolvePositionalSeg,
splitRespectingBrackets,
unquoteSeg,
} from "../oc-path.js";
import type { JsoncAst, JsoncEntry, JsoncValue } from "./ast.js";
export type JsoncOcPathMatch =
| { readonly kind: "root"; readonly node: JsoncAst }
| { readonly kind: "value"; readonly node: JsoncValue; readonly path: readonly string[] }
| {
readonly kind: "object-entry";
readonly node: JsoncEntry;
readonly path: readonly string[];
};
/**
* Walk the JSONC tree following the OcPath. Returns the matched node
* or `null`. Numeric path segments index into arrays.
*/
export function resolveJsoncOcPath(ast: JsoncAst, path: OcPath): JsoncOcPathMatch | null {
if (ast.root === null) {
return null;
}
// Bracket-aware split + unquote: `"foo/bar".baz` becomes
// [`foo/bar`, `baz`] (literal slash preserved in the first sub).
const segments: string[] = [];
if (path.section !== undefined) {
for (const s of splitRespectingBrackets(path.section, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (path.item !== undefined) {
for (const s of splitRespectingBrackets(path.item, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (path.field !== undefined) {
for (const s of splitRespectingBrackets(path.field, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (segments.length === 0) {
return { kind: "root", node: ast };
}
let current: JsoncValue = ast.root;
let lastEntry: JsoncEntry | null = null;
const walked: string[] = [];
for (let seg of segments) {
if (seg.length === 0) {
return null;
}
// Positional resolution: `$first` / `$last` always; `-N` only on
// indexable (array) containers. On a keyed (object) container, a
// `-N` segment falls through to literal-key lookup so paths like
// `groups.-5028303500.requireMention` (Telegram supergroup IDs —
// openclaw#59934) address the literal key instead of crashing.
if (isPositionalSeg(seg)) {
const concrete = positionalForJsonc(current, seg);
if (concrete !== null) {
seg = concrete;
}
// null means "not applicable" — fall through to literal lookup.
}
walked.push(seg);
if (current.kind === "object") {
const entry = current.entries.find((e) => e.key === seg);
if (entry === undefined) {
return null;
}
lastEntry = entry;
current = entry.value;
continue;
}
if (current.kind === "array") {
const idx = Number(seg);
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {
return null;
}
lastEntry = null;
const item = current.items[idx];
if (item === undefined) {
return null;
}
current = item;
continue;
}
// Primitive — can't descend further.
return null;
}
if (lastEntry !== null && current === lastEntry.value) {
return { kind: "object-entry", node: lastEntry, path: walked };
}
return { kind: "value", node: current, path: walked };
}
function positionalForJsonc(node: JsoncValue, seg: string): string | null {
if (node.kind === "object") {
const keys = node.entries.map((e) => e.key);
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
}
if (node.kind === "array") {
return resolvePositionalSeg(seg, { indexable: true, size: node.items.length });
}
return null;
}

View File

@@ -0,0 +1,49 @@
/**
* JSONL AST types — JSON-Lines: one JSON value per line, separated by
* `\n`. The shape used by openclaw session-event logs, audit trails,
* and LKG checkpoints (which is why JSONL is part of the universal
* OcPath addressing scheme).
*
* **Per-kind discriminator**: every AST in this substrate carries a
* `kind` field. The OcPath resolver dispatches on `kind`.
*
* **Byte-fidelity**: `raw` is preserved on the root for round-trip
* emit. JSONL is line-oriented, so blank lines and per-line comments
* (we don't strip them in render mode either — we preserve them as
* "raw" line entries) live in the AST.
*
* @module @openclaw/oc-path/jsonl/ast
*/
import type { JsoncValue } from "../jsonc/ast.js";
/** The root JSONL AST. `raw` round-trips byte-identical via emit. */
export interface JsonlAst {
readonly kind: "jsonl";
readonly raw: string;
readonly lines: readonly JsonlLine[];
/**
* Line-ending convention detected at parse time. Used by render mode
* to reconstruct the original convention (Windows-authored datasets
* use CRLF; Unix uses LF). Optional for back-compat with synthetic
* ASTs that don't track this — render mode falls back to LF when
* undefined.
*/
readonly lineEnding?: "\r\n" | "\n";
}
/**
* One line of a JSONL file. Either a parsed JSON value, a blank line
* (preserved for round-trip), or a malformed line (emit verbatim;
* emit-time sentinel guard still scans).
*/
export type JsonlLine =
| {
readonly kind: "value";
readonly line: number;
readonly value: JsoncValue;
/** The original line text (without trailing newline). */
readonly raw: string;
}
| { readonly kind: "blank"; readonly line: number; readonly raw: string }
| { readonly kind: "malformed"; readonly line: number; readonly raw: string };

View File

@@ -0,0 +1,273 @@
/**
* Mutate a `JsonlAst` at an OcPath. Returns a new AST with the line
* (or sub-field of a line) replaced.
*
* Edit shapes:
*
* oc://session-events/L42 → replace line 42's whole value
* oc://session-events/L42/field → replace field on line 42
* oc://session-events/L42/field.sub → dotted descent
* oc://session-events/$last/... → resolves to most recent value
*
* Append (no existing line) is NOT a `set` — use `appendJsonlLine` for
* that. `setJsonlOcPath` only edits existing addresses.
*
* @module @openclaw/oc-path/jsonl/edit
*/
import type { JsoncEntry, JsoncValue } from "../jsonc/ast.js";
import type { OcPath } from "../oc-path.js";
import {
isPositionalSeg,
isQuotedSeg,
resolvePositionalSeg,
splitRespectingBrackets,
unquoteSeg,
} from "../oc-path.js";
import type { JsonlAst, JsonlLine } from "./ast.js";
import { emitJsonl } from "./emit.js";
export type JsonlEditResult =
| { readonly ok: true; readonly ast: JsonlAst }
| { readonly ok: false; readonly reason: "unresolved" | "not-a-value-line" };
export function setJsonlOcPath(ast: JsonlAst, path: OcPath, newValue: JsoncValue): JsonlEditResult {
const head = path.section;
if (head === undefined) {
return { ok: false, reason: "unresolved" };
}
const lineIdx = pickLineIndex(ast, head);
if (lineIdx === -1) {
return { ok: false, reason: "unresolved" };
}
const target = ast.lines[lineIdx];
if (target === undefined) {
return { ok: false, reason: "unresolved" };
}
// No item/field — replace the whole line value. Requires the line to
// already be a value line (we don't synthesize lines from blanks).
if (path.item === undefined && path.field === undefined) {
if (target.kind !== "value") {
return { ok: false, reason: "not-a-value-line" };
}
const newLine: JsonlLine = {
kind: "value",
line: target.line,
value: newValue,
raw: target.raw,
};
return finalize(ast, lineIdx, newLine, path.file);
}
if (target.kind !== "value") {
return { ok: false, reason: "not-a-value-line" };
}
// Bracket/brace/quote-aware split — preserves quoted segments
// verbatim so the edit path matches `resolveJsonlOcPath`'s
// unquoting behavior. Plain `.split('.')` would shred a quoted key
// and silently desync read-vs-write.
const segments: string[] = [];
if (path.item !== undefined) {
segments.push(...splitRespectingBrackets(path.item, "."));
}
if (path.field !== undefined) {
segments.push(...splitRespectingBrackets(path.field, "."));
}
const replaced = replaceAt(target.value, segments, 0, newValue);
if (replaced === null) {
return { ok: false, reason: "unresolved" };
}
const newLine: JsonlLine = {
kind: "value",
line: target.line,
value: replaced,
raw: target.raw,
};
return finalize(ast, lineIdx, newLine, path.file);
}
function replaceAt(
current: JsoncValue,
segments: readonly string[],
i: number,
newValue: JsoncValue,
): JsoncValue | null {
const seg = segments[i];
if (seg === undefined) {
return newValue;
}
if (seg.length === 0) {
return null;
}
if (current.kind === "object") {
// Resolve positional tokens ($first / $last) against the entries'
// ordered key list before any literal-key comparison. Keeps the
// jsonl edit path symmetric with resolveJsonlOcPath, which already
// honors positional tokens during read.
let segNorm: string = seg;
if (isPositionalSeg(seg)) {
const resolved = resolvePositionalSeg(seg, {
indexable: false,
size: current.entries.length,
keys: current.entries.map((e) => e.key),
});
if (resolved === null) {
return null;
}
segNorm = resolved;
}
// Quoted segments carry the raw bytes verbatim; AST entry keys
// are unquoted. Strip the surrounding quotes before comparing.
const lookupKey = isQuotedSeg(segNorm) ? unquoteSeg(segNorm) : segNorm;
const idx = current.entries.findIndex((e) => e.key === lookupKey);
if (idx === -1) {
return null;
}
const child = current.entries[idx];
if (child === undefined) {
return null;
}
const replacedChild = replaceAt(child.value, segments, i + 1, newValue);
if (replacedChild === null) {
return null;
}
const newEntry: JsoncEntry = { ...child, value: replacedChild };
const newEntries = current.entries.slice();
newEntries[idx] = newEntry;
return {
kind: "object",
entries: newEntries,
...(current.line !== undefined ? { line: current.line } : {}),
};
}
if (current.kind === "array") {
// Resolve positional tokens ($first / $last / -N) against the
// array's size before the numeric coercion below; without this
// `Number('$last')` is NaN and the path silently unresolves.
let segNorm: string = seg;
if (isPositionalSeg(seg)) {
const resolved = resolvePositionalSeg(seg, {
indexable: true,
size: current.items.length,
});
if (resolved === null) {
return null;
}
segNorm = resolved;
}
const idx = Number(segNorm);
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {
return null;
}
const child = current.items[idx];
if (child === undefined) {
return null;
}
const replacedChild = replaceAt(child, segments, i + 1, newValue);
if (replacedChild === null) {
return null;
}
const newItems = current.items.slice();
newItems[idx] = replacedChild;
return {
kind: "array",
items: newItems,
...(current.line !== undefined ? { line: current.line } : {}),
};
}
return null;
}
function pickLineIndex(ast: JsonlAst, addr: string): number {
// Mirrors the line-address grammar handled by resolveJsonlOcPath's
// pickLine and find.ts's pickLine — the four shapes a JSONL line can
// be addressed by. Without `$first` and `-N` here, a path that
// resolves cleanly under those tokens would silently unresolve on
// the edit path (resolve↔write asymmetry).
if (addr === "$last") {
for (let i = ast.lines.length - 1; i >= 0; i--) {
const l = ast.lines[i];
if (l !== undefined && l.kind === "value") {
return i;
}
}
return -1;
}
if (addr === "$first") {
for (let i = 0; i < ast.lines.length; i++) {
const l = ast.lines[i];
if (l !== undefined && l.kind === "value") {
return i;
}
}
return -1;
}
if (/^-\d+$/.test(addr)) {
// -N selects the Nth-from-last value line. Walk only value lines
// so blank/malformed lines don't shift the count (consistent with
// resolve.ts's pickLine).
const valueIndices: number[] = [];
for (let i = 0; i < ast.lines.length; i++) {
const l = ast.lines[i];
if (l !== undefined && l.kind === "value") {
valueIndices.push(i);
}
}
const n = valueIndices.length + Number(addr);
return n >= 0 && n < valueIndices.length ? (valueIndices[n] ?? -1) : -1;
}
const m = /^L(\d+)$/.exec(addr);
if (m === null || m[1] === undefined) {
return -1;
}
const target = Number(m[1]);
return ast.lines.findIndex((l) => l.line === target);
}
function finalize(
ast: JsonlAst,
lineIdx: number,
newLine: JsonlLine,
fileName?: string,
): JsonlEditResult {
const newLines = ast.lines.slice();
newLines[lineIdx] = newLine;
const next: JsonlAst = {
kind: "jsonl",
raw: "",
lines: newLines,
...(ast.lineEnding !== undefined ? { lineEnding: ast.lineEnding } : {}),
};
const opts =
fileName !== undefined
? { mode: "render" as const, fileNameForGuard: fileName }
: { mode: "render" as const };
const rendered = emitJsonl(next, opts);
return { ok: true, ast: { ...next, raw: rendered } };
}
/**
* Append a new value as the next line. Useful for session checkpointing
* (each event is a new line). Returns a new AST. The `path` parameter
* is accepted for OcPath-naming consistency but jsonl append addresses
* the file as a whole (line numbers are assigned by the substrate).
*/
export function appendJsonlOcPath(ast: JsonlAst, value: JsoncValue): JsonlAst {
const nextLineNo = ast.lines.length === 0 ? 1 : (ast.lines[ast.lines.length - 1]?.line ?? 0) + 1;
const newLine: JsonlLine = {
kind: "value",
line: nextLineNo,
value,
raw: "",
};
const next: JsonlAst = { kind: "jsonl", raw: "", lines: [...ast.lines, newLine] };
const rendered = emitJsonl(next, { mode: "render" });
return { ...next, raw: rendered };
}

View File

@@ -0,0 +1,98 @@
/**
* Emit a `JsonlAst` to bytes.
*
* **Round-trip mode (default)** returns `ast.raw` verbatim — preserves
* malformed lines, blanks, trailing-newline shape exactly.
*
* **Render mode** rebuilds the file from line entries (re-stringifies
* value lines via JSON.stringify; preserves blank/malformed lines
* verbatim). Useful for synthetic ASTs.
*
* **Sentinel guard**: scans every emitted byte sequence for the
* `__OPENCLAW_REDACTED__` literal.
*
* @module @openclaw/oc-path/jsonl/emit
*/
import type { JsoncValue } from "../jsonc/ast.js";
import { OcEmitSentinelError, REDACTED_SENTINEL } from "../sentinel.js";
import type { JsonlAst } from "./ast.js";
export interface JsonlEmitOptions {
readonly mode?: "roundtrip" | "render";
readonly fileNameForGuard?: string;
/**
* See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale.
* Default `true` — round-trip echoes parsed bytes without scanning
* for the sentinel. Render mode scans value-line leaves regardless.
*/
readonly acceptPreExistingSentinel?: boolean;
}
export function emitJsonl(ast: JsonlAst, opts: JsonlEmitOptions = {}): string {
const mode = opts.mode ?? "roundtrip";
const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : "oc://";
const acceptPreExisting = opts.acceptPreExistingSentinel ?? true;
if (mode === "roundtrip") {
if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/[raw]`);
}
return ast.raw;
}
const out: string[] = [];
for (const ln of ast.lines) {
if (ln.kind === "blank" || ln.kind === "malformed") {
// Blank/malformed lines round-trip as their original raw bytes.
// Apply the same trust policy: only scan when caller opts in.
if (!acceptPreExisting && ln.raw.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/L${ln.line}`);
}
out.push(ln.raw);
continue;
}
// Value lines re-serialize via renderValue, which always scans
// string leaves regardless of acceptPreExistingSentinel — a
// caller-injected sentinel via setOcPath / appendJsonl must
// always be rejected.
out.push(renderValue(ln.value, `${guardPath}/L${ln.line}`, []));
}
// Restore the original line-ending convention. Without this, a CRLF
// input edited via setJsonlOcPath would emit a mixed-ending file:
// edited lines joined with `\n` and untouched lines retaining the
// `\r` on their .raw bytes — silent CRLF→LF corruption on
// Windows-authored datasets.
return out.join(ast.lineEnding ?? "\n");
}
function renderValue(value: JsoncValue, guardPath: string, walked: readonly string[]): string {
switch (value.kind) {
case "object": {
const parts = value.entries.map(
(e) => `${JSON.stringify(e.key)}:${renderValue(e.value, guardPath, [...walked, e.key])}`,
);
return `{${parts.join(",")}}`;
}
case "array": {
const parts = value.items.map((v, i) => renderValue(v, guardPath, [...walked, String(i)]));
return `[${parts.join(",")}]`;
}
case "string": {
// Reject ANY string that contains the sentinel — embedded
// (`prefix__OPENCLAW_REDACTED__suffix`) is just as much of a
// "literal redacted token landed on disk" leak as exact-match.
if (value.value.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/${walked.join("/")}`);
}
return JSON.stringify(value.value);
}
case "number":
return String(value.value);
case "boolean":
return String(value.value);
case "null":
return "null";
}
throw new Error(`unreachable: jsonl renderValue kind`);
}

View File

@@ -0,0 +1,73 @@
/**
* JSONL parser — splits on `\n`, parses each non-empty line as JSONC
* (allowing comments/trailing-comma is harmless and matches what
* openclaw session logs actually emit). Soft-error policy: malformed
* lines surface as `kind: 'malformed'` AST entries plus a diagnostic.
*
* @module @openclaw/oc-path/jsonl/parse
*/
import type { Diagnostic } from "../ast.js";
import { parseJsonc } from "../jsonc/parse.js";
import type { JsonlAst, JsonlLine } from "./ast.js";
export interface JsonlParseResult {
readonly ast: JsonlAst;
readonly diagnostics: readonly Diagnostic[];
}
export function parseJsonl(raw: string): JsonlParseResult {
const diagnostics: Diagnostic[] = [];
// Detect the line-ending convention from the input. Windows-authored
// datasets use CRLF; Unix and most cross-platform tooling use LF. We
// count CRLF occurrences and call CRLF if the majority of newlines
// are CRLF — this handles mixed-ending files (e.g., a Unix log
// edited once on Windows) by picking the dominant convention.
// Without this, `setJsonlOcPath` rebuilds a CRLF input via render
// mode which joins with `\n`, producing mixed endings on a
// previously-CRLF file.
const crlfCount = (raw.match(/\r\n/g) ?? []).length;
const lfCount = (raw.match(/\n/g) ?? []).length;
const lineEnding: "\r\n" | "\n" = crlfCount > 0 && crlfCount * 2 >= lfCount ? "\r\n" : "\n";
// Trim trailing newline so we don't fabricate a blank line at EOF
// for files that end with `\n` (which is most of them).
let body = raw.endsWith("\r\n") ? raw.slice(0, -2) : raw.endsWith("\n") ? raw.slice(0, -1) : raw;
// Normalize line endings to LF for consistent splitting; per-line
// `raw` is stored without the trailing `\r`, and render mode
// restores the original convention via `lineEnding`.
body = body.replace(/\r\n/g, "\n");
const lines: JsonlLine[] = [];
if (body.length === 0) {
return { ast: { kind: "jsonl", raw, lines, lineEnding }, diagnostics };
}
const parts = body.split("\n");
parts.forEach((lineText, idx) => {
const lineNo = idx + 1;
if (lineText.trim().length === 0) {
lines.push({ kind: "blank", line: lineNo, raw: lineText });
return;
}
const r = parseJsonc(lineText);
if (r.ast.root === null) {
lines.push({ kind: "malformed", line: lineNo, raw: lineText });
diagnostics.push({
line: lineNo,
message: `line ${lineNo} could not be parsed as JSON`,
severity: "warning",
code: "OC_JSONL_LINE_MALFORMED",
});
return;
}
lines.push({
kind: "value",
line: lineNo,
value: r.ast.root,
raw: lineText,
});
});
return { ast: { kind: "jsonl", raw, lines, lineEnding }, diagnostics };
}

View File

@@ -0,0 +1,180 @@
/**
* Resolve an `OcPath` against a `JsonlAst`.
*
* Convention for JSONL OcPaths:
*
* oc://session-events/L42 → entire line 42 value
* oc://session-events/L42/result → field on line 42's value
* oc://session-events/L42/result.detail → dotted descent
* oc://session-events/$last → final non-blank value
*
* `Lnnn` (line address) and `$last` are the addressing primitives
* unique to JSONL — they're how forensics / replay refers to a
* specific entry without committing to a content key.
*
* @module @openclaw/oc-path/jsonl/resolve
*/
import type { JsoncEntry, JsoncValue } from "../jsonc/ast.js";
import type { OcPath } from "../oc-path.js";
import {
POS_FIRST,
POS_LAST,
isPositionalSeg,
isQuotedSeg,
resolvePositionalSeg,
splitRespectingBrackets,
unquoteSeg,
} from "../oc-path.js";
import type { JsonlAst, JsonlLine } from "./ast.js";
export type JsonlOcPathMatch =
| { readonly kind: "root"; readonly node: JsonlAst }
| { readonly kind: "line"; readonly node: JsonlLine }
| {
readonly kind: "value";
readonly node: JsoncValue;
readonly line: number;
readonly path: readonly string[];
}
| {
readonly kind: "object-entry";
readonly node: JsoncEntry;
readonly line: number;
readonly path: readonly string[];
};
export function resolveJsonlOcPath(ast: JsonlAst, path: OcPath): JsonlOcPathMatch | null {
// The first non-file segment is the line address (Lnnn or $last).
const head = path.section;
if (head === undefined) {
return { kind: "root", node: ast };
}
const lineEntry = pickLine(ast, head);
if (lineEntry === null) {
return null;
}
// No further descent — return the line entry itself.
if (path.item === undefined && path.field === undefined) {
return { kind: "line", node: lineEntry };
}
if (lineEntry.kind !== "value") {
return null;
}
const segments: string[] = [];
if (path.item !== undefined) {
for (const s of splitRespectingBrackets(path.item, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (path.field !== undefined) {
for (const s of splitRespectingBrackets(path.field, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
let current: JsoncValue = lineEntry.value;
let lastEntry: JsoncEntry | null = null;
const walked: string[] = [];
for (let seg of segments) {
if (seg.length === 0) {
return null;
}
// See openclaw#59934 — positional `-N` falls through on keyed containers.
if (isPositionalSeg(seg)) {
const concrete = positionalForJsonc(current, seg);
if (concrete !== null) {
seg = concrete;
}
}
walked.push(seg);
if (current.kind === "object") {
const entry = current.entries.find((e) => e.key === seg);
if (entry === undefined) {
return null;
}
lastEntry = entry;
current = entry.value;
continue;
}
if (current.kind === "array") {
const idx = Number(seg);
if (!Number.isInteger(idx) || idx < 0 || idx >= current.items.length) {
return null;
}
lastEntry = null;
const item = current.items[idx];
if (item === undefined) {
return null;
}
current = item;
continue;
}
return null;
}
if (lastEntry !== null && current === lastEntry.value) {
return {
kind: "object-entry",
node: lastEntry,
line: lineEntry.line,
path: walked,
};
}
return { kind: "value", node: current, line: lineEntry.line, path: walked };
}
function pickLine(ast: JsonlAst, addr: string): JsonlLine | null {
if (addr === POS_LAST) {
for (let i = ast.lines.length - 1; i >= 0; i--) {
const l = ast.lines[i];
if (l !== undefined && l.kind === "value") {
return l;
}
}
return null;
}
if (addr === POS_FIRST) {
for (const l of ast.lines) {
if (l.kind === "value") {
return l;
}
}
return null;
}
// Negative line address: `-N` selects the Nth-from-last value line.
if (/^-\d+$/.test(addr)) {
const valueLines = ast.lines.filter(
(l): l is Extract<JsonlLine, { kind: "value" }> => l.kind === "value",
);
const n = valueLines.length + Number(addr);
return n >= 0 && n < valueLines.length ? valueLines[n] : null;
}
const m = /^L(\d+)$/.exec(addr);
if (m === null || m[1] === undefined) {
return null;
}
const target = Number(m[1]);
for (const l of ast.lines) {
if (l.line === target) {
return l;
}
}
return null;
}
function positionalForJsonc(node: JsoncValue, seg: string): string | null {
if (node.kind === "object") {
const keys = node.entries.map((e) => e.key);
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
}
if (node.kind === "array") {
return resolvePositionalSeg(seg, { indexable: true, size: node.items.length });
}
return null;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,301 @@
/**
* Generic markdown-flavored parser for the 8 workspace files.
*
* Produces a `MdAst` addressing index over `raw` bytes:
* frontmatter (if present), preamble (prose before first H2), and an
* H2-block tree with items/tables/code-blocks extracted for OcPath
* resolution.
*
* **No file-kind discrimination.** Same parse path for SOUL.md /
* AGENTS.md / MEMORY.md / TOOLS.md / IDENTITY.md / USER.md /
* HEARTBEAT.md / SKILL.md. Per-file lint opinions ride downstream
* (`@openclaw/oc-lint` rule packs).
*
* **Byte-fidelity contract**: `raw` is preserved on the AST root so
* `emitMd(parse(raw)) === raw` for every input the parser accepts.
*
* @module @openclaw/oc-path/parse
*/
import type {
AstBlock,
AstCodeBlock,
AstItem,
AstTable,
Diagnostic,
FrontmatterEntry,
ParseResult,
MdAst,
} from "./ast.js";
import { slugify } from "./slug.js";
const FENCE = "---";
const BOM = "";
/**
* Parse raw bytes into a `MdAst`. Soft-error policy: never
* throws. Suspicious-but-recoverable inputs (unclosed frontmatter,
* malformed bullet) become diagnostics.
*/
export function parseMd(raw: string): ParseResult {
const diagnostics: Diagnostic[] = [];
// Strip a leading BOM for parsing convenience; keep the raw input
// intact on the AST so emit can round-trip the BOM if present.
const withoutBom = raw.startsWith(BOM) ? raw.slice(BOM.length) : raw;
const lines = withoutBom.split(/\r?\n/);
const fm = detectFrontmatter(lines, diagnostics);
const bodyStartLine = fm === null ? 0 : fm.endLine + 1;
const bodyLines = lines.slice(bodyStartLine);
const { preamble, blocks } = splitH2Blocks(bodyLines, bodyStartLine + 1, diagnostics);
const ast: MdAst = {
kind: "md",
raw,
frontmatter: fm?.entries ?? [],
preamble,
blocks,
};
return { ast, diagnostics };
}
// ---------- Frontmatter ---------------------------------------------------
interface FrontmatterRange {
readonly entries: readonly FrontmatterEntry[];
/** 0-based line index of the closing `---`. */
readonly endLine: number;
}
function detectFrontmatter(
lines: readonly string[],
diagnostics: Diagnostic[],
): FrontmatterRange | null {
if (lines.length < 2) {
return null;
}
if (lines[0] !== FENCE) {
return null;
}
let closeIndex = -1;
for (let i = 1; i < lines.length; i++) {
if (lines[i] === FENCE) {
closeIndex = i;
break;
}
}
if (closeIndex === -1) {
diagnostics.push({
line: 1,
message: "frontmatter opens with --- but never closes",
severity: "warning",
code: "OC_FRONTMATTER_UNCLOSED",
});
return null;
}
const entries: FrontmatterEntry[] = [];
for (let i = 1; i < closeIndex; i++) {
const line = lines[i];
if (line.trim().length === 0) {
continue;
}
const m = /^([a-zA-Z_][a-zA-Z0-9_-]*)\s*:\s*(.*)$/.exec(line);
if (m === null) {
// Could be a list-style continuation (` - item`) for the previous key;
// we don't structurally model lists in frontmatter at the substrate
// layer (lint rules can do that against the raw substring if they
// need to). Skip silently — keeps the parser opinion-free.
continue;
}
entries.push({
key: m[1],
value: unquote(m[2].trim()),
line: i + 1,
});
}
return { entries, endLine: closeIndex };
}
function unquote(value: string): string {
if (value.length >= 2) {
const first = value.charCodeAt(0);
const last = value.charCodeAt(value.length - 1);
if (first === last && (first === 34 /* " */ || first === 39) /* ' */) {
return value.slice(1, -1);
}
}
return value;
}
// ---------- H2 block split -------------------------------------------------
function splitH2Blocks(
bodyLines: readonly string[],
/** 1-based line number of `bodyLines[0]` in the original file. */
bodyStartLineNum: number,
diagnostics: Diagnostic[],
): { preamble: string; blocks: AstBlock[] } {
// Track code-block state so `##` inside a fenced block doesn't get
// parsed as a heading.
let inCode = false;
const headings: { line: number; text: string }[] = [];
for (let i = 0; i < bodyLines.length; i++) {
const line = bodyLines[i];
if (line.startsWith("```")) {
inCode = !inCode;
continue;
}
if (inCode) {
continue;
}
const m = /^##\s+(\S.*?)\s*$/.exec(line);
if (m !== null) {
headings.push({ line: i, text: m[1] });
}
}
if (headings.length === 0) {
return {
preamble: bodyLines.join("\n"),
blocks: [],
};
}
const preamble = bodyLines.slice(0, headings[0].line).join("\n");
const blocks: AstBlock[] = [];
for (let h = 0; h < headings.length; h++) {
const start = headings[h].line;
const end = h + 1 < headings.length ? headings[h + 1].line : bodyLines.length;
const headingText = headings[h].text;
const blockBodyLines = bodyLines.slice(start + 1, end);
const bodyText = blockBodyLines.join("\n");
const headingLineNum = bodyStartLineNum + start;
const items = extractItems(blockBodyLines, headingLineNum + 1, diagnostics);
const tables = extractTables(blockBodyLines, headingLineNum + 1);
const codeBlocks = extractCodeBlocks(blockBodyLines, headingLineNum + 1);
blocks.push({
heading: headingText,
slug: slugify(headingText),
line: headingLineNum,
bodyText,
items,
tables,
codeBlocks,
});
}
return { preamble, blocks };
}
// ---------- Items ----------------------------------------------------------
const BULLET_RE = /^(?:[-*+])\s+(.+?)\s*$/;
const KV_RE = /^([^:]+?)\s*:\s*(.+)$/;
function extractItems(
blockBodyLines: readonly string[],
startLineNum: number,
_diagnostics: Diagnostic[],
): AstItem[] {
const items: AstItem[] = [];
let inCode = false;
for (let i = 0; i < blockBodyLines.length; i++) {
const line = blockBodyLines[i];
if (line.startsWith("```")) {
inCode = !inCode;
continue;
}
if (inCode) {
continue;
}
const m = BULLET_RE.exec(line);
if (m === null) {
continue;
}
const text = m[1];
const kvMatch = KV_RE.exec(text);
const item: AstItem = {
text,
slug: kvMatch ? slugify(kvMatch[1]) : slugify(text),
line: startLineNum + i,
...(kvMatch !== null ? { kv: { key: kvMatch[1].trim(), value: kvMatch[2].trim() } } : {}),
};
items.push(item);
}
return items;
}
// ---------- Tables ---------------------------------------------------------
function extractTables(blockBodyLines: readonly string[], startLineNum: number): AstTable[] {
const tables: AstTable[] = [];
let i = 0;
while (i < blockBodyLines.length) {
const headerLine = blockBodyLines[i];
const sepLine = blockBodyLines[i + 1];
if (
headerLine.trim().startsWith("|") &&
sepLine !== undefined &&
/^\s*\|\s*[:-]+(?:\s*\|\s*[:-]+)*\s*\|?\s*$/.test(sepLine)
) {
const headers = splitTableRow(headerLine);
const rows: string[][] = [];
let j = i + 2;
while (j < blockBodyLines.length && blockBodyLines[j].trim().startsWith("|")) {
rows.push(splitTableRow(blockBodyLines[j]));
j++;
}
tables.push({ headers, rows, line: startLineNum + i });
i = j;
continue;
}
i++;
}
return tables;
}
function splitTableRow(line: string): string[] {
const trimmed = line.trim().replace(/^\|/, "").replace(/\|$/, "");
return trimmed.split("|").map((cell) => cell.trim());
}
// ---------- Code blocks ---------------------------------------------------
function extractCodeBlocks(
blockBodyLines: readonly string[],
startLineNum: number,
): AstCodeBlock[] {
const codeBlocks: AstCodeBlock[] = [];
let i = 0;
while (i < blockBodyLines.length) {
const open = blockBodyLines[i];
if (open.startsWith("```")) {
const lang = open.slice(3).trim();
const langField = lang.length > 0 ? lang : null;
const startLine = startLineNum + i;
let j = i + 1;
const bodyLines: string[] = [];
while (j < blockBodyLines.length && !blockBodyLines[j].startsWith("```")) {
bodyLines.push(blockBodyLines[j]);
j++;
}
codeBlocks.push({ lang: langField, text: bodyLines.join("\n"), line: startLine });
i = j + 1;
continue;
}
i++;
}
return codeBlocks;
}

View File

@@ -0,0 +1,129 @@
/**
* OcPath → AST node resolver.
*
* Resolves an `OcPath` against a `MdAst` and returns the matched
* node (block / item / frontmatter entry / kv field) or `null` if the
* path doesn't match anything.
*
* The address dispatch:
*
* { file } → AST root
* { file, section } → AstBlock with matching slug
* { file, section, item } → AstItem inside that block
* { file, section, item, field } → kv.value of that item if kv.key matches
*
* The `file` segment is informational here — callers verify file
* matching before passing the AST. The resolver doesn't load files; it
* walks an in-memory AST.
*
* @module @openclaw/oc-path/resolve
*/
import type { AstBlock, AstItem, FrontmatterEntry, MdAst } from "./ast.js";
import type { OcPath } from "./oc-path.js";
import { isOrdinalSeg, isPositionalSeg, parseOrdinalSeg, resolvePositionalSeg } from "./oc-path.js";
/**
* The resolved target plus a stable description of what kind of node it
* is. Lint rules and doctor fixers branch on `kind`.
*/
export type OcPathMatch =
| { readonly kind: "root"; readonly node: MdAst }
| { readonly kind: "frontmatter"; readonly node: FrontmatterEntry }
| { readonly kind: "block"; readonly node: AstBlock }
| { readonly kind: "item"; readonly node: AstItem; readonly block: AstBlock }
| {
readonly kind: "item-field";
readonly node: AstItem;
readonly block: AstBlock;
/** The kv.value string, surfaced for convenience. */
readonly value: string;
};
/**
* Resolve an `OcPath` against an AST. Returns the matched node or
* `null`. Slugs match case-insensitively against `slugify(input)` —
* "Boundaries" matches a section heading "## Boundaries" because both
* slugify to "boundaries".
*
* Special-case: `OcPath.section === '[frontmatter]'` (literal) addresses
* frontmatter; `field` then names the frontmatter key. This lets a
* single OcPath shape address both prose-tree fields and frontmatter
* fields without growing the tuple.
*/
export function resolveMdOcPath(ast: MdAst, path: OcPath): OcPathMatch | null {
// Frontmatter addressing: oc://FILE/[frontmatter]/key
// The frontmatter key sits at the OcPath `item` slot in this 3-segment
// shape; we accept `field` as a fallback for callers that thread
// 4-segment paths.
if (path.section === "[frontmatter]") {
const key = path.item ?? path.field;
if (key === undefined) {
return null;
}
const entry = ast.frontmatter.find((e) => e.key === key);
if (entry === undefined) {
return null;
}
return { kind: "frontmatter", node: entry };
}
// Plain file root address.
if (path.section === undefined) {
return { kind: "root", node: ast };
}
const sectionSlug = path.section.toLowerCase();
const block = ast.blocks.find((b) => b.slug === sectionSlug);
if (block === undefined) {
return null;
}
// Section-only address.
if (path.item === undefined) {
return { kind: "block", node: block };
}
// Item addressing: ordinal (`#N`) > positional (`$first`/`$last`/`-N`)
// > slug. Ordinal uses absolute document order so two items sharing
// a slug stay distinguishable.
let item: AstItem | undefined;
if (isOrdinalSeg(path.item)) {
const n = parseOrdinalSeg(path.item);
if (n === null || n < 0 || n >= block.items.length) {
return null;
}
item = block.items[n];
} else if (isPositionalSeg(path.item)) {
const concrete = resolvePositionalSeg(path.item, {
indexable: true,
size: block.items.length,
});
if (concrete === null) {
return null;
}
item = block.items[Number(concrete)];
} else {
const itemSlug = path.item.toLowerCase();
item = block.items.find((i) => i.slug === itemSlug);
}
if (item === undefined) {
return null;
}
// Item-only address.
if (path.field === undefined) {
return { kind: "item", node: item, block };
}
// Item-field address. Requires the item to have a `kv` and the field
// to match the kv key (case-insensitive). A field on an item without
// kv shape is unresolvable — return null rather than guessing.
if (item.kv === undefined) {
return null;
}
if (item.kv.key.toLowerCase() !== path.field.toLowerCase()) {
return null;
}
return { kind: "item-field", node: item, block, value: item.kv.value };
}

View File

@@ -0,0 +1,63 @@
/**
* Substrate-level redaction-sentinel guard.
*
* Closes the `__OPENCLAW_REDACTED__` corruption class by rejecting the
* literal string at the emit boundary. Per-call-site reject rules
* (added piecemeal in [#62281](https://github.com/openclaw/openclaw/issues/62281),
* [#44357](https://github.com/openclaw/openclaw/issues/44357),
* [#13495](https://github.com/openclaw/openclaw/issues/13495), and others)
* caught the symptom; this guard removes the substrate that produced
* the symptom in the first place.
*
* Throwing at emit (not at the consumer) means every code path through
* the substrate is covered, including future call sites we haven't
* audited.
*
* @module @openclaw/oc-path/sentinel
*/
/**
* The literal string that marks redacted secrets in OpenClaw's runtime
* representation. Writing it to disk is always a bug — the consumer
* was supposed to drop the redacted view, not pass it through to the
* writer.
*/
export const REDACTED_SENTINEL = "__OPENCLAW_REDACTED__";
/**
* Thrown when emit detects a `"__OPENCLAW_REDACTED__"` literal in any
* emitted bytes. Callers should treat this as a fatal write error;
* recovering by stripping the sentinel would silently corrupt the
* file. Fail-closed.
*
* `path` is the OcPath-shaped pointer to where the sentinel was
* detected (e.g., `oc://config/plugins.entries.foo.token`). For
* non-config emits, it's the closest meaningful address (frontmatter
* key, section/item slug, etc.) or just the file name.
*/
export class OcEmitSentinelError extends Error {
readonly code = "OC_EMIT_SENTINEL";
readonly path: string;
constructor(path: string) {
super(`emit refused to write "${REDACTED_SENTINEL}" sentinel literal at ${path}`);
this.name = "OcEmitSentinelError";
this.path = path;
}
}
/**
* Throw `OcEmitSentinelError` if `value` contains the redaction
* sentinel anywhere. Substring match (not equality) — a hostile caller
* embedding `prefix__OPENCLAW_REDACTED__suffix` in a leaf must be
* rejected just as forcefully as the bare sentinel; the substring form
* still leaks the marker bytes to disk where downstream scanners flag
* the file as corrupted.
*
* No-op for any non-string input. Used by every leaf-write boundary.
*/
export function guardSentinel(value: unknown, ocPath: string): void {
if (typeof value === "string" && value.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(ocPath);
}
}

View File

@@ -0,0 +1,43 @@
/**
* Slug derivation for OcPath section/item addressing.
*
* A slug is the kebab-case lowercase form of a heading or item text:
* "Tool Guidance" → "tool-guidance"
* " Restricted Data " → "restricted-data"
* "deny-rule-1" → "deny-rule-1" (already a slug)
* "API_KEY" → "api-key"
* "Multi-tenant isolation" → "multi-tenant-isolation"
* "deny: secrets" → "deny-secrets" (colon + space → hyphen)
*
* Deterministic + idempotent. Used by parse to pre-compute slugs for
* blocks and items, and by resolveOcPath to match section/item names.
*
* @module @openclaw/oc-path/slug
*/
const NON_SLUG_CHARS = /[^a-z0-9-]+/g;
const COLLAPSE_HYPHENS = /-+/g;
const TRIM_HYPHENS = /^-+|-+$/g;
/**
* Convert arbitrary text into a slug usable as an OcPath segment.
*
* Rules:
* 1. Lowercase
* 2. Replace `_` with `-`
* 3. Replace any non-`[a-z0-9-]` runs with a single `-`
* 4. Collapse repeated `-`
* 5. Trim leading/trailing `-`
*
* Returns the empty string for input that has no slug-valid characters
* (e.g., `"!!"` → `""`); callers should treat empty slugs as not
* matchable rather than as wildcards.
*/
export function slugify(text: string): string {
return text
.toLowerCase()
.replace(/_/g, "-")
.replace(NON_SLUG_CHARS, "-")
.replace(COLLAPSE_HYPHENS, "-")
.replace(TRIM_HYPHENS, "");
}

View File

@@ -0,0 +1,81 @@
import { describe, expect, it } from "vitest";
import { setMdOcPath as setOcPath } from "../edit.js";
import { parseOcPath } from "../oc-path.js";
import { parseMd } from "../parse.js";
describe("setOcPath — frontmatter", () => {
it("replaces a frontmatter value", () => {
const raw = `---
name: github
description: old desc
---
Body.
`;
const { ast } = parseMd(raw);
const r = setOcPath(ast, parseOcPath("oc://AGENTS.md/[frontmatter]/description"), "new desc");
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain("description: new desc");
expect(r.ast.raw).not.toContain("old desc");
}
});
it("reports unresolved when the key is missing", () => {
const { ast } = parseMd("---\nname: x\n---\n");
const r = setOcPath(ast, parseOcPath("oc://AGENTS.md/[frontmatter]/nope"), "x");
expect(r).toEqual({ ok: false, reason: "unresolved" });
});
it("quotes values that need YAML-escaping", () => {
const { ast } = parseMd("---\nx: a\n---\n");
const r = setOcPath(ast, parseOcPath("oc://AGENTS.md/[frontmatter]/x"), "has: colon");
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain('x: "has: colon"');
}
});
});
describe("setOcPath — item kv field", () => {
it("replaces an item kv value and reflects it in the rebuilt body", () => {
const raw = `## Boundaries
- enabled: true
- timeout: 5
`;
const { ast } = parseMd(raw);
const r = setOcPath(ast, parseOcPath("oc://AGENTS.md/boundaries/timeout/timeout"), "30");
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain("- timeout: 30");
expect(r.ast.raw).toContain("- enabled: true");
}
});
it("reports no-item-kv for an item without kv shape", () => {
const raw = `## Boundaries
- plain bullet
`;
const { ast } = parseMd(raw);
const r = setOcPath(
ast,
parseOcPath("oc://AGENTS.md/boundaries/plain-bullet/plain-bullet"),
"x",
);
expect(r).toEqual({ ok: false, reason: "no-item-kv" });
});
it("reports unresolved when section/item is missing", () => {
const { ast } = parseMd("## Other\n\n- foo: bar\n");
const r = setOcPath(ast, parseOcPath("oc://AGENTS.md/missing/foo/foo"), "x");
expect(r).toEqual({ ok: false, reason: "unresolved" });
});
it("reports not-writable for section-only addresses", () => {
const { ast } = parseMd("## Boundaries\n\n- enabled: true\n");
const r = setOcPath(ast, parseOcPath("oc://AGENTS.md/boundaries"), "x");
expect(r).toEqual({ ok: false, reason: "not-writable" });
});
});

View File

@@ -0,0 +1,106 @@
import { describe, expect, it } from "vitest";
import { emitMd } from "../emit.js";
import { parseMd } from "../parse.js";
import { OcEmitSentinelError } from "../sentinel.js";
describe("emit — round-trip mode (default)", () => {
it("returns the raw bytes byte-for-byte", () => {
const raw = `---\nname: x\n---\n\n## Sec\n\n- a\n- b\n`;
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
});
it("round-trips CRLF line endings", () => {
const raw = "## Heading\r\n\r\n- item\r\n";
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
});
it("round-trips a file with no frontmatter and no sections", () => {
const raw = "Just preamble. No structure.\n";
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
});
it("echoes raw bytes containing the sentinel by default; strict mode rejects", () => {
// Round-trip trusts parsed bytes — see emit.ts policy comment.
// Strict mode (acceptPreExistingSentinel: false) is the opt-in
// path for callers that want LKG-style fingerprint verification.
const raw = "## Section\n\n- token: __OPENCLAW_REDACTED__\n";
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
});
describe("emit — render mode", () => {
it("renders frontmatter + blocks", () => {
const ast = {
kind: "md" as const,
raw: "",
frontmatter: [
{ key: "name", value: "github", line: 2 },
{ key: "description", value: "gh CLI", line: 3 },
],
preamble: "",
blocks: [
{
heading: "Tools",
slug: "tools",
line: 5,
bodyText: "- gh: GitHub",
items: [{ text: "gh: GitHub", slug: "gh", line: 7, kv: { key: "gh", value: "GitHub" } }],
tables: [],
codeBlocks: [],
},
],
};
const output = emitMd(ast, { mode: "render" });
expect(output).toContain("name: github");
expect(output).toContain("description: gh CLI");
expect(output).toContain("## Tools");
expect(output).toContain("- gh: GitHub");
});
it("quotes frontmatter values containing special chars", () => {
const ast = {
kind: "md" as const,
raw: "",
frontmatter: [{ key: "title", value: "a: b", line: 2 }],
preamble: "",
blocks: [],
};
const output = emitMd(ast, { mode: "render" });
expect(output).toContain('title: "a: b"');
});
it("throws if a kv item value matches the sentinel", () => {
const ast = {
kind: "md" as const,
raw: "",
frontmatter: [],
preamble: "",
blocks: [
{
heading: "Secrets",
slug: "secrets",
line: 1,
bodyText: "- token: __OPENCLAW_REDACTED__",
items: [
{
text: "token: __OPENCLAW_REDACTED__",
slug: "token",
line: 2,
kv: { key: "token", value: "__OPENCLAW_REDACTED__" },
},
],
tables: [],
codeBlocks: [],
},
],
};
expect(() => emitMd(ast, { mode: "render", fileNameForGuard: "AGENTS.md" })).toThrow(
OcEmitSentinelError,
);
});
});

View File

@@ -0,0 +1,743 @@
/**
* `findOcPaths` — multi-match search verb test surface.
*
* Tests cover: `*` single-segment expansion across all 4 kinds; `**`
* recursive descent for jsonc + yaml; the wildcard guard on
* `resolveOcPath` / `setOcPath`; the slot-shape preservation invariant
* (a `*` in the `item` slot produces concrete paths whose `item` field
* carries the matched value).
*/
import { describe, expect, it } from "vitest";
import { findOcPaths } from "../find.js";
import { parseJsonc } from "../jsonc/parse.js";
import { parseJsonl } from "../jsonl/parse.js";
import { formatOcPath, hasWildcard, OcPathError, parseOcPath } from "../oc-path.js";
import { parseMd } from "../parse.js";
import { resolveOcPath, setOcPath } from "../universal.js";
import { parseYaml } from "../yaml/parse.js";
// ---------- hasWildcard ----------------------------------------------------
describe("hasWildcard", () => {
it("detects single-segment * in any slot", () => {
expect(hasWildcard(parseOcPath("oc://X/*/y"))).toBe(true);
expect(hasWildcard(parseOcPath("oc://X/a/*"))).toBe(true);
expect(hasWildcard(parseOcPath("oc://X/a/b/*"))).toBe(true);
});
it("detects ** in any slot", () => {
expect(hasWildcard(parseOcPath("oc://X/**"))).toBe(true);
expect(hasWildcard(parseOcPath("oc://X/a/**/c"))).toBe(true);
});
it("detects wildcards inside dotted sub-segments", () => {
expect(hasWildcard(parseOcPath("oc://X/a.*.c"))).toBe(true);
expect(hasWildcard(parseOcPath("oc://X/a.**.c"))).toBe(true);
});
it("returns false for plain paths", () => {
expect(hasWildcard(parseOcPath("oc://X/a/b/c"))).toBe(false);
expect(hasWildcard(parseOcPath("oc://X/a.b.c"))).toBe(false);
});
it("treats `*` inside an identifier as literal", () => {
expect(hasWildcard(parseOcPath("oc://X/foo*bar"))).toBe(false);
expect(hasWildcard(parseOcPath("oc://X/a*"))).toBe(false);
});
});
// ---------- Wildcard guard on resolveOcPath / setOcPath -------------------
describe("wildcard guard", () => {
const yaml = parseYaml("steps:\n - id: a\n command: foo\n").ast;
it("resolveOcPath throws OcPathError for wildcard pattern (F16)", () => {
// Previously returned `null` — indistinguishable from "path doesn't
// resolve". Now throws with `OC_PATH_WILDCARD_IN_RESOLVE` so the
// CLI / consumers can surface "use findOcPaths" rather than "not
// found". setOcPath uses a discriminated `wildcard-not-allowed`
// reason; this is the resolve-side analogue.
expect(() => resolveOcPath(yaml, parseOcPath("oc://wf/steps/*/command"))).toThrow(
/findOcPaths/,
);
try {
resolveOcPath(yaml, parseOcPath("oc://wf/**"));
expect.fail("should have thrown");
} catch (err) {
expect(err).toBeInstanceOf(OcPathError);
expect((err as OcPathError).code).toBe("OC_PATH_WILDCARD_IN_RESOLVE");
}
});
it("setOcPath returns wildcard-not-allowed for wildcard pattern", () => {
const r = setOcPath(yaml, parseOcPath("oc://wf/steps/*/command"), "bar");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("wildcard-not-allowed");
}
});
it("setOcPath wildcard guard reason carries actionable detail", () => {
const r = setOcPath(yaml, parseOcPath("oc://wf/**"), "bar");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.detail).toContain("findOcPaths");
}
});
});
// ---------- findOcPaths — fast-path (no wildcards) -------------------------
describe("findOcPaths — non-wildcard fast-path", () => {
it("wraps resolveOcPath result for plain path", () => {
const ast = parseYaml("name: x\n").ast;
const out = findOcPaths(ast, parseOcPath("oc://wf/name"));
expect(out).toHaveLength(1);
expect(out[0].match.kind).toBe("leaf");
expect(formatOcPath(out[0].path)).toBe("oc://wf/name");
});
it("returns empty for unresolved plain path", () => {
const ast = parseYaml("name: x\n").ast;
expect(findOcPaths(ast, parseOcPath("oc://wf/missing"))).toHaveLength(0);
});
});
// ---------- findOcPaths — YAML --------------------------------------------
describe("findOcPaths — YAML kind", () => {
const yaml = parseYaml(
"steps:\n" +
" - id: build\n" +
" command: npm run build\n" +
" - id: test\n" +
" command: npm test\n" +
" - id: lint\n" +
" command: npm run lint\n",
).ast;
it("* in item slot enumerates each step", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf.lobster/steps/*/command"));
expect(out).toHaveLength(3);
const paths = out.map((m) => formatOcPath(m.path));
expect(paths).toEqual([
"oc://wf.lobster/steps/0/command",
"oc://wf.lobster/steps/1/command",
"oc://wf.lobster/steps/2/command",
]);
});
it("preserves slot shape — concrete path has matched value in item slot", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/steps/*/id"));
expect(out).toHaveLength(3);
for (const m of out) {
expect(m.path.section).toBe("steps");
expect(m.path.field).toBe("id");
expect(m.path.item).toMatch(/^[0-2]$/);
}
});
it("returns leaf valueText for each match", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/steps/*/id"));
const leaves = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : null));
expect(leaves).toEqual(["build", "test", "lint"]);
});
it("** descends recursively", () => {
const yaml2 = parseYaml("a:\n b:\n c: deep\n d: shallow\n").ast;
const out = findOcPaths(yaml2, parseOcPath("oc://wf/**"));
// ** matches root + a + a.b + a.b.c + a.d
const leaves = out
.filter((m) => m.match.kind === "leaf")
.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(leaves.toSorted()).toEqual(["deep", "shallow"]);
});
it("returns empty for path that does not match", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/missing/*/x"));
expect(out).toHaveLength(0);
});
it("every returned path is consumable by resolveOcPath", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/steps/*/command"));
for (const m of out) {
const r = resolveOcPath(yaml, m.path);
expect(r).not.toBeNull();
expect(r?.kind).toBe("leaf");
}
});
});
// ---------- findOcPaths — JSONC --------------------------------------------
describe("findOcPaths — JSONC kind", () => {
const jsonc = parseJsonc(
"{\n" +
' "plugins": {\n' +
' "github": {"enabled": true},\n' +
' "gitlab": {"enabled": false},\n' +
' "slack": {"enabled": true}\n' +
" }\n" +
"}\n",
).ast;
it("* in item slot enumerates each plugin", () => {
const out = findOcPaths(jsonc, parseOcPath("oc://config/plugins/*/enabled"));
expect(out).toHaveLength(3);
const keys = out.map((m) => m.path.item);
expect(keys.toSorted((a, b) => (a ?? "").localeCompare(b ?? ""))).toEqual([
"github",
"gitlab",
"slack",
]);
});
it("returns boolean leaves with leafType", () => {
const out = findOcPaths(jsonc, parseOcPath("oc://config/plugins/*/enabled"));
for (const m of out) {
expect(m.match.kind).toBe("leaf");
if (m.match.kind === "leaf") {
expect(m.match.leafType).toBe("boolean");
}
}
});
});
// ---------- findOcPaths — JSONL --------------------------------------------
describe("findOcPaths — JSONL kind", () => {
const jsonl = parseJsonl(
'{"event":"start","userId":"u1"}\n' +
'{"event":"action","userId":"u1"}\n' +
'{"event":"end","userId":"u1"}\n',
).ast;
it("* in section slot enumerates each value line", () => {
const out = findOcPaths(jsonl, parseOcPath("oc://session/*/event"));
expect(out).toHaveLength(3);
const events = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(events).toEqual(["start", "action", "end"]);
});
it("preserves Lnnn line addresses in concrete paths", () => {
const out = findOcPaths(jsonl, parseOcPath("oc://session/*/event"));
for (const m of out) {
expect(m.path.section).toMatch(/^L\d+$/);
}
});
// F8 — line-slot union and predicate. Without these, yaml/jsonc
// walkers handled them but JSONL fell through to `pickLine(addr)`
// which returns null for union/predicate shapes → silent zero matches.
it("union {L1,L2} at line slot enumerates each alternative", () => {
const out = findOcPaths(jsonl, parseOcPath("oc://session/{L1,L3}/event"));
expect(out).toHaveLength(2);
const events = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(events).toEqual(["start", "end"]);
});
it("union of positional + literal line addresses works", () => {
const out = findOcPaths(jsonl, parseOcPath("oc://session/{L1,$last}/event"));
expect(out).toHaveLength(2);
const events = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(events).toEqual(["start", "end"]);
});
it("predicate [event=action] at line slot filters by top-level field", () => {
const out = findOcPaths(jsonl, parseOcPath("oc://session/[event=action]/userId"));
expect(out).toHaveLength(1);
if (out[0]?.match.kind === "leaf") {
expect(out[0].match.valueText).toBe("u1");
}
});
it("predicate [event=missing] at line slot matches zero lines (silent zero is correct)", () => {
const out = findOcPaths(jsonl, parseOcPath("oc://session/[event=missing]/userId"));
expect(out).toHaveLength(0);
});
});
// ---------- Positional primitives ($first / $last / -N) -------------------
describe("positional primitives — yaml", () => {
const yaml = parseYaml("steps:\n - id: a\n - id: b\n - id: c\n").ast;
it("resolveOcPath accepts $first", () => {
const m = resolveOcPath(yaml, parseOcPath("oc://wf/steps/$first/id"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("a");
}
});
it("resolveOcPath accepts $last", () => {
const m = resolveOcPath(yaml, parseOcPath("oc://wf/steps/$last/id"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("c");
}
});
it("resolveOcPath accepts negative index", () => {
const m = resolveOcPath(yaml, parseOcPath("oc://wf/steps/-2/id"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("b");
}
});
it("out-of-range positional returns null", () => {
expect(resolveOcPath(yaml, parseOcPath("oc://wf/steps/-99/id"))).toBeNull();
});
it("positional on empty container returns null", () => {
const empty = parseYaml("steps: []\n").ast;
expect(resolveOcPath(empty, parseOcPath("oc://wf/steps/$first/id"))).toBeNull();
});
it("findOcPaths emits concrete index for positional", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/steps/$last/id"));
expect(out).toHaveLength(1);
expect(out[0].path.item).toBe("2");
});
it("hasWildcard returns false for positional patterns", () => {
// Positional ≠ wildcard — they resolve deterministically.
expect(hasWildcard(parseOcPath("oc://X/$last/id"))).toBe(false);
expect(hasWildcard(parseOcPath("oc://X/-1/id"))).toBe(false);
});
});
describe("positional primitives — jsonc", () => {
const jsonc = parseJsonc('{"items":[10,20,30]}').ast;
it("$first picks first array element", () => {
const m = resolveOcPath(jsonc, parseOcPath("oc://config/items/$first"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("10");
}
});
it("$last picks last array element", () => {
const m = resolveOcPath(jsonc, parseOcPath("oc://config/items/$last"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("30");
}
});
it("$first on object picks first-declared key", () => {
const obj = parseJsonc('{"a":1,"b":2,"c":3}').ast;
const m = resolveOcPath(obj, parseOcPath("oc://config/$first"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("1");
}
});
});
describe("positional primitives — jsonl", () => {
const jsonl = parseJsonl('{"event":"start"}\n{"event":"step"}\n{"event":"end"}\n').ast;
it("$first picks first value line", () => {
const m = resolveOcPath(jsonl, parseOcPath("oc://session/$first/event"));
if (m?.kind === "leaf") {
expect(m.valueText).toBe("start");
}
});
it("$last picks last value line (existing behavior)", () => {
const m = resolveOcPath(jsonl, parseOcPath("oc://session/$last/event"));
if (m?.kind === "leaf") {
expect(m.valueText).toBe("end");
}
});
it("-1 is alias for $last", () => {
const m = resolveOcPath(jsonl, parseOcPath("oc://session/-1/event"));
if (m?.kind === "leaf") {
expect(m.valueText).toBe("end");
}
});
});
// ---------- Segment unions {a,b,c} -----------------------------------------
describe("union segments — yaml", () => {
const yaml = parseYaml(
"steps:\n" +
" - id: a\n command: x\n" +
" - id: b\n run: y\n" +
" - id: c\n pipeline: z\n",
).ast;
it("{command,run} matches each step that has either field", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/steps/*/{command,run}"));
expect(out).toHaveLength(2);
const fields = out.map((m) => m.path.field);
expect(fields.toSorted((a, b) => (a ?? "").localeCompare(b ?? ""))).toEqual(["command", "run"]);
});
it("preserves the chosen alternative in concrete paths", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/steps/*/{command,pipeline}"));
expect(out).toHaveLength(2);
for (const m of out) {
expect(["command", "pipeline"]).toContain(m.path.field);
}
});
it("unions on top-level keys", () => {
const yaml2 = parseYaml("a: 1\nb: 2\nc: 3\n").ast;
const out = findOcPaths(yaml2, parseOcPath("oc://X/{a,c}"));
expect(out).toHaveLength(2);
const values = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(values.toSorted()).toEqual(["1", "3"]);
});
it("hasWildcard detects unions (single-match guard rejects them)", () => {
expect(hasWildcard(parseOcPath("oc://X/{a,b}"))).toBe(true);
// F16 — wildcard guard now throws OC_PATH_WILDCARD_IN_RESOLVE
// instead of returning silent null.
expect(() => resolveOcPath(parseYaml("a: 1\nb: 2\n").ast, parseOcPath("oc://X/{a,b}"))).toThrow(
/findOcPaths/,
);
});
});
// ---------- Value predicates [key=value] ----------------------------------
describe("value predicates — yaml", () => {
const yaml = parseYaml(
"steps:\n" +
" - id: build\n command: npm run build\n" +
" - id: test\n command: npm test\n" +
" - id: lint\n command: npm run lint\n",
).ast;
it("[id=test] selects the matching step", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/steps/[id=test]/command"));
expect(out).toHaveLength(1);
if (out[0].match.kind === "leaf") {
expect(out[0].match.valueText).toBe("npm test");
}
expect(out[0].path.item).toBe("1"); // concrete index of the matched step
});
it("predicate yields no matches when key/value missing", () => {
expect(findOcPaths(yaml, parseOcPath("oc://wf/steps/[id=nonexistent]/command"))).toHaveLength(
0,
);
});
it("predicate concretizes the index — path round-trips through resolveOcPath", () => {
const out = findOcPaths(yaml, parseOcPath("oc://wf/steps/[id=build]/command"));
expect(out).toHaveLength(1);
const resolved = resolveOcPath(yaml, out[0].path);
expect(resolved?.kind).toBe("leaf");
});
it("predicate rejects single-match verbs (treated as wildcard)", () => {
// F16 — wildcard guard throws on predicate too (predicate is a
// multi-match shape; resolveOcPath is single-match only).
expect(() => resolveOcPath(yaml, parseOcPath("oc://wf/steps/[id=build]"))).toThrow(
/findOcPaths/,
);
});
});
describe("quoted segments (v1.0)", () => {
// Evidence: openclaw#69004 — model alias `anthropic/claude-opus-4-7`.
// Slash inside the key has no other syntax that doesn't conflict with
// path-level slash split.
const jsonc = parseJsonc(
'{"agents":{"defaults":{"models":{' +
'"anthropic/claude-opus-4-7":{"alias":"opus47","contextWindow":1000000},' +
'"github-copilot/claude-opus-4.7-1m-internal":{"alias":"copilot-opus-1m","contextWindow":1000000},' +
'"plain":{"alias":"p","contextWindow":200000}' +
"}}}}",
).ast;
it("resolveOcPath — quoted segment with literal slash", () => {
const m = resolveOcPath(
jsonc,
parseOcPath('oc://config/agents.defaults.models/"anthropic/claude-opus-4-7"/alias'),
);
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("opus47");
}
});
it("resolveOcPath — quoted segment with literal slash AND dot", () => {
const m = resolveOcPath(
jsonc,
parseOcPath(
'oc://config/agents.defaults.models/"github-copilot/claude-opus-4.7-1m-internal"/alias',
),
);
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("copilot-opus-1m");
}
});
it("quoted segment with whitespace", () => {
const ast = parseJsonc('{"prompts":{"hello world":"value"}}').ast;
const m = resolveOcPath(ast, parseOcPath('oc://X/prompts/"hello world"'));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("value");
}
});
it("quoted segment with embedded escape sequences", () => {
// Key literally contains a backslash and a quote.
const ast = parseJsonc('{"keys":{"a\\\\b":"v1","c\\"d":"v2"}}').ast;
const m1 = resolveOcPath(ast, parseOcPath('oc://X/keys/"a\\\\b"'));
expect(m1?.kind).toBe("leaf");
if (m1?.kind === "leaf") {
expect(m1.valueText).toBe("v1");
}
});
it("findOcPaths — wildcard returns paths with quoted keys when needed", () => {
const out = findOcPaths(jsonc, parseOcPath("oc://config/agents.defaults.models/*/alias"));
expect(out).toHaveLength(3);
// The two slash-bearing keys round-trip via quotes; `plain` stays bare.
const items = out.map((m) => m.path.item);
expect(items.some((s) => s === "plain")).toBe(true);
expect(items.some((s) => s === '"anthropic/claude-opus-4-7"')).toBe(true);
expect(items.some((s) => s === '"github-copilot/claude-opus-4.7-1m-internal"')).toBe(true);
});
it("findOcPaths — emitted paths round-trip through resolveOcPath", () => {
const out = findOcPaths(jsonc, parseOcPath("oc://config/agents.defaults.models/*/alias"));
for (const m of out) {
const r = resolveOcPath(jsonc, m.path);
expect(r?.kind).toBe("leaf");
}
});
it("rejects unbalanced quotes at parse time", () => {
expect(() => parseOcPath('oc://X/"unterminated')).toThrow(/Unbalanced/);
});
it("control characters still rejected inside quotes", () => {
expect(() => parseOcPath('oc://X/"\x00"')).toThrow(/Control character/);
});
});
describe("value predicates — numeric operators (v1.1)", () => {
// Evidence: openclaw#54383 — compaction fails when maxTokens > model output cap.
// Doctor lint rule: flag any model with maxTokens > 128000 (Anthropic per-request output cap).
const jsonc = parseJsonc(
'{"models":{"providers":{"anthropic":{"models":[' +
'{"id":"claude-sonnet-4-6","contextWindow":1000000,"maxTokens":128000},' +
'{"id":"claude-opus-4-7","contextWindow":1000000,"maxTokens":240000},' +
'{"id":"claude-sonnet-4-7","contextWindow":200000,"maxTokens":64000}' +
"]}}}}",
).ast;
// Slot layout: section=`models.providers.anthropic.models`, item=predicate, field=`id`.
const PREFIX = "oc://config/models.providers.anthropic.models";
it("> finds models exceeding the per-request output cap", () => {
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>128000]/id`));
expect(out).toHaveLength(1);
if (out[0].match.kind === "leaf") {
expect(out[0].match.valueText).toBe("claude-opus-4-7");
}
});
it(">= matches the boundary", () => {
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>=128000]/id`));
const ids = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(ids.toSorted()).toEqual(["claude-opus-4-7", "claude-sonnet-4-6"]);
});
it("< filters small context windows", () => {
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[contextWindow<500000]/id`));
expect(out).toHaveLength(1);
if (out[0].match.kind === "leaf") {
expect(out[0].match.valueText).toBe("claude-sonnet-4-7");
}
});
it("<= matches the boundary", () => {
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[contextWindow<=200000]/id`));
const ids = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(ids).toEqual(["claude-sonnet-4-7"]);
});
it("numeric operator rejects non-numeric leaves silently", () => {
// String leaf, numeric op — predicate doesn't match (no false positive).
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[id>5]/id`));
expect(out).toHaveLength(0);
});
it("rejects numeric predicate value that is not a number", () => {
const out = findOcPaths(jsonc, parseOcPath(`${PREFIX}/[maxTokens>foo]/id`));
expect(out).toHaveLength(0);
});
});
describe("value predicates — jsonc", () => {
const jsonc = parseJsonc(
'{"plugins":{"github":{"enabled":true,"role":"vcs"},"slack":{"enabled":false,"role":"chat"},"jira":{"enabled":true,"role":"tracker"}}}',
).ast;
it("[enabled=true] filters by sibling boolean", () => {
const out = findOcPaths(jsonc, parseOcPath("oc://config/plugins/[enabled=true]/role"));
expect(out).toHaveLength(2);
const roles = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(roles.toSorted()).toEqual(["tracker", "vcs"]);
});
});
// ---------- Ordinal addressing (#N) for distinct duplicate slugs ----------
describe("ordinal addressing — md", () => {
// Two items with the same slug after slugify (`foo: a` and `foo: b`).
const md = parseMd("## Tools\n\n- foo: a\n- foo: b\n- bar: c\n").ast;
it("#0 picks the first item by document order", () => {
const m = resolveOcPath(md, parseOcPath("oc://AGENTS.md/tools/#0/foo"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("a");
}
});
it("#1 picks the second item — distinct from #0 even though slug collides", () => {
const m = resolveOcPath(md, parseOcPath("oc://AGENTS.md/tools/#1/foo"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("b");
}
});
it("out-of-range #N returns null", () => {
expect(resolveOcPath(md, parseOcPath("oc://AGENTS.md/tools/#99/foo"))).toBeNull();
});
it("findOcPaths disambiguates duplicate-slug items via #N", () => {
const out = findOcPaths(md, parseOcPath("oc://AGENTS.md/tools/*/foo"));
// 2 items have key `foo` (and matching slug); 1 has `bar` (no match).
expect(out).toHaveLength(2);
const items = out.map((m) => m.path.item);
expect(items).toEqual(["#0", "#1"]);
const values = out.map((m) => (m.match.kind === "leaf" ? m.match.valueText : ""));
expect(values.toSorted()).toEqual(["a", "b"]);
});
it("non-duplicate slug keeps slug form (back-compat)", () => {
const md2 = parseMd("## Tools\n\n- foo: a\n- bar: b\n").ast;
const out = findOcPaths(md2, parseOcPath("oc://AGENTS.md/tools/*"));
const items = out.map((m) => m.path.item);
// Both unique → both stay as slugs.
expect(items.toSorted((a, b) => (a ?? "").localeCompare(b ?? ""))).toEqual(["bar", "foo"]);
});
});
// ---------- findOcPaths — Markdown -----------------------------------------
describe("findOcPaths — Markdown kind", () => {
const md = parseMd(
"---\nname: drafter\nrole: writer\n---\n\n" +
"## Tools\n\n" +
"- send_email: enabled\n" +
"- search: enabled\n" +
"- read_email: disabled\n",
).ast;
it("* in field slot enumerates frontmatter keys", () => {
const out = findOcPaths(md, parseOcPath("oc://SOUL.md/[frontmatter]/*"));
expect(out).toHaveLength(2);
const keys = out.map((m) => m.path.item ?? m.path.field);
expect(keys.toSorted((a, b) => (a ?? "").localeCompare(b ?? ""))).toEqual(["name", "role"]);
});
it("* in field slot enumerates each item kv key", () => {
// Item slug is the kv-key slug ('send_email' → 'send-email').
const out = findOcPaths(md, parseOcPath("oc://SKILL.md/Tools/send-email/*"));
expect(out).toHaveLength(1);
expect(out[0].match.kind).toBe("leaf");
if (out[0].match.kind === "leaf") {
expect(out[0].match.valueText).toBe("enabled");
}
});
it("* in item slot + matching field returns each item whose kv key matches", () => {
// The kv key on `- send_email: enabled` is `send_email`. Pattern
// field='send_email' matches that one item; the other two items
// (search, read_email) have different kv keys.
const out = findOcPaths(md, parseOcPath("oc://SKILL.md/Tools/*/send_email"));
expect(out).toHaveLength(1);
expect(out[0].path.item).toBe("send-email");
});
it("** at section slot matches items at every depth (F14 — cross-kind symmetry)", () => {
// Without the retain-i branch on `**`, walkMd only descended one
// level (i + 1, consumed `**`) — yaml/jsonc walkers also retain
// `**` to keep matching deeper. Lint rules expecting universal
// `**` behavior across kinds (sweep all sections for `risk:`)
// would silently get 0 md matches on a multi-block file.
//
// Pattern `**/send-email` — `**` matches the `tools` block, then
// `send-email` (kebab slug) matches the item under it. Without the
// retain-i branch, the walker descends with `**` consumed at the
// section layer and then can't satisfy the item slot since the
// walker is now inside the wrong block looking for an item slug.
const multiBlock = parseMd(
"## Boundaries\n\n" +
"- never: rm -rf\n\n" +
"## Tools\n\n" +
"- send_email: enabled\n" +
"- search: enabled\n",
).ast;
const out = findOcPaths(multiBlock, parseOcPath("oc://SOUL.md/**/send-email"));
// The `send-email` item is under the `tools` block. Pin that we
// get at least one match (the substrate's md `**` should reach it).
expect(out.length).toBeGreaterThanOrEqual(1);
const items = out.map((m) => m.path.item).filter((v): v is string => v !== undefined);
expect(items).toContain("send-email");
});
});
describe("findOcPaths — quoted segments survive expansion (regression: resolve↔find symmetry)", () => {
it("finds keys with slashes when the path quotes them and a sibling wildcards", () => {
// Closes ClawSweeper P2 on PR #78678: when a pattern needs
// expansion (e.g. trailing union or wildcard), the JSONC walker
// bypassed `resolveJsoncOcPath` and compared object keys to the
// raw `cur.value` directly. Patterns with quoted literals
// returned no matches even though resolve worked. This test
// exercises a quoted middle segment + a trailing union.
const raw = `{
"agents": {
"defaults": {
"models": {
"github-copilot/claude-opus-4-7": {
"alias": "opus-internal",
"contextWindow": 200000
}
}
}
}
}
`;
const { ast } = parseJsonc(raw);
const out = findOcPaths(
ast,
parseOcPath(
'oc://config.jsonc/agents.defaults.models/"github-copilot/claude-opus-4-7"/{alias,contextWindow}',
),
);
// Both alternatives in the union should match.
expect(out.length).toBe(2);
const fields = out
.map((m) => m.path.field)
.toSorted((a, b) => (a ?? "").localeCompare(b ?? ""));
expect(fields).toEqual(["alias", "contextWindow"]);
});
});

View File

@@ -0,0 +1,17 @@
## Roles
- planner: breaks down user goals into tasks
- executor: runs the planned tasks one at a time
- reviewer: checks output before user-visible writes
## Tools
- gh: GitHub CLI for issues, PRs, CI
- curl: HTTP client
- rg: ripgrep — fast file content search
## Boundaries
- never edit /etc, /usr, or system paths
- always confirm before destructive operations
- read SOUL.md before each session for persona context

View File

@@ -0,0 +1,17 @@
# Workspace bootstrap
This is the first thing the agent reads on a fresh workspace. Once
the user finishes setup (filling in SOUL.md, USER.md, etc.),
BOOTSTRAP.md gets removed and the workspace is "live."
## Setup checklist
- review SOUL.md and add personal context
- review USER.md and add role/preferences
- run `openclaw doctor` to verify config + workspace are valid
- confirm the gateway can reach your providers
## Removing this file
When the checklist is complete, delete BOOTSTRAP.md. The runtime
detects its absence as "setup complete."

View File

@@ -0,0 +1,16 @@
## Every 30m wake
- check unread Slack DMs in #incidents
- summarize new PR review comments since last wake
- if any test fails on main, surface to user immediately
## Every 4h wake
- compile a brief status summary of in-flight tasks
- check Linear for new high-priority issues
- update the daily log entry
## On user-presence wake
- briefly orient on what changed since last user interaction
- prioritize incoming items by urgency

View File

@@ -0,0 +1,19 @@
## Organization
Example Org / Platform Team
## Team
OpenClaw infrastructure & tooling
## Trust Level
internal-trusted
## Region
us-west
## Compliance scope
SOC 2 Type II + FedRAMP Moderate (in audit)

View File

@@ -0,0 +1,18 @@
---
scope: project
---
## User prefers async communication
The user has mentioned twice (sessions 2026-04-15 and 2026-04-22) that
they prefer Slack DMs over meetings for short questions.
## Project uses TypeScript with strict mode
The codebase enforces `strict: true` and `noUncheckedIndexedAccess`.
Avoid `any`; prefer `unknown` with narrowing.
## Deploy on Tuesdays only
Production deploys happen Tue 9am-12pm Pacific. Outside that window,
deploys go to staging and wait for the next Tuesday window.

View File

@@ -0,0 +1,38 @@
---
name: github
description: Use gh for GitHub issues, PR status, CI/logs, comments, reviews, releases, and API queries.
tier: T1
tools:
- gh
- bash
trigger_phrases:
- github
- pr
- issue
- workflow
metadata: { "openclaw": { "emoji": "🐙", "requires": { "bins": ["gh"] } } }
user-invocable: true
---
# When to use
Use this skill when the user asks anything about GitHub: issues, pull
requests, CI runs, releases, comments, code review, or organizational
metadata. Prefer the `gh` CLI over web URLs — `gh` handles auth,
pagination, and structured output natively.
## Common commands
```bash
gh pr view 123 # view PR details
gh pr checks 123 # CI status
gh issue list --state open # list open issues
gh run list -L 5 # last 5 workflow runs
gh release create v1.2.3 # cut a release
```
## When NOT to use
- The user's repo is on a non-GitHub forge (GitLab, Gitea, Bitbucket).
Use the appropriate CLI instead.
- Operations that require admin permissions the agent doesn't have.

View File

@@ -0,0 +1,17 @@
# Persona
I'm a thoughtful, methodical assistant. I ask clarifying questions
when the user's request is ambiguous, and I'd rather be slightly
slower than confidently wrong.
## Voice
- terse and direct
- no filler words
- code snippets > prose when explaining technical things
## Boundaries
- never write to /etc or system paths
- always confirm before deleting files
- redact secrets from logs and audit trails

View File

@@ -0,0 +1,21 @@
## Tool Guidance
| tool | guidance |
| ---- | ------------------------------------------------------------- |
| gh | Use for GitHub operations (issues, PRs, CI). Prefer over web. |
| curl | HTTP client. Use --silent for clean output. |
| rg | ripgrep — content search. Faster than grep for code. |
| fd | find replacement. Use over `find` when available. |
## Allow / Deny
- enabled: gh
- enabled: curl
- enabled: rg
- enabled: fd
- disabled: legacy-tool
## Notes
The agent reads this file at session start; runtime tool gates honor
the `enabled` flags.

View File

@@ -0,0 +1,16 @@
## Role
Senior PM working on AI runtime + governance layers. Reports to a VP-level
stakeholder; coordinates across 4-6 engineering teams.
## Preferences
- async-first communication (Slack DMs > meetings)
- terse responses; avoid filler
- code snippets > prose for technical detail
- always include repo:file:line citations for code claims
## Working hours
- Mon-Fri 9am-6pm Pacific
- occasional evening for sync with EU teams

View File

@@ -0,0 +1,186 @@
import { describe, expect, it } from "vitest";
import { setJsoncOcPath } from "../../jsonc/edit.js";
import { emitJsonc } from "../../jsonc/emit.js";
import { parseJsonc } from "../../jsonc/parse.js";
import { parseOcPath } from "../../oc-path.js";
describe("setJsoncOcPath — value replacement", () => {
const config = `{
"plugins": {
"entries": {
"github": {
"token": "old"
}
}
}
}`;
it("replaces a leaf string value", () => {
const { ast } = parseJsonc(config);
const r = setJsoncOcPath(ast, parseOcPath("oc://config/plugins.entries.github.token"), {
kind: "string",
value: "new",
});
expect(r.ok).toBe(true);
if (r.ok) {
const out = emitJsonc(r.ast);
expect(JSON.parse(out)).toEqual({
plugins: { entries: { github: { token: "new" } } },
});
}
});
it("replaces nested objects", () => {
const { ast } = parseJsonc(config);
const r = setJsoncOcPath(ast, parseOcPath("oc://config/plugins.entries"), {
kind: "object",
entries: [{ key: "gitlab", line: 0, value: { kind: "string", value: "tok" } }],
});
expect(r.ok).toBe(true);
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({
plugins: { entries: { gitlab: "tok" } },
});
}
});
it("replaces an array element by index", () => {
const { ast } = parseJsonc('{ "limits": [10, 20, 30] }');
const r = setJsoncOcPath(ast, parseOcPath("oc://config/limits.1"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({ limits: [10, 99, 30] });
}
});
it("reports unresolved when a key is missing", () => {
const { ast } = parseJsonc(config);
const r = setJsoncOcPath(ast, parseOcPath("oc://config/plugins.entries.gitlab"), {
kind: "string",
value: "x",
});
expect(r).toEqual({ ok: false, reason: "unresolved" });
});
it("reports no-root on empty AST", () => {
const { ast } = parseJsonc("");
const r = setJsoncOcPath(ast, parseOcPath("oc://config/x"), {
kind: "string",
value: "y",
});
expect(r).toEqual({ ok: false, reason: "no-root" });
});
it("does not mutate the original AST", () => {
const { ast } = parseJsonc(config);
const before = JSON.stringify(ast);
setJsoncOcPath(ast, parseOcPath("oc://config/plugins.entries.github.token"), {
kind: "string",
value: "new",
});
expect(JSON.stringify(ast)).toBe(before);
});
});
describe("setJsoncOcPath — positional tokens (round-11 resolve↔edit symmetry)", () => {
// ClawSweeper round-11 P2 — `$first` / `$last` / `-N` resolved on
// the read path but not on the edit path. Pin the new behavior:
// editing through a positional address must reach the same child
// that `resolveJsoncOcPath` would have returned.
it("edits the first array element via $first", () => {
const { ast } = parseJsonc('{ "items": [10, 20, 30] }');
const r = setJsoncOcPath(ast, parseOcPath("oc://config.jsonc/items/$first"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [99, 20, 30] });
}
});
it("edits the last array element via $last", () => {
const { ast } = parseJsonc('{ "items": [10, 20, 30] }');
const r = setJsoncOcPath(ast, parseOcPath("oc://config.jsonc/items/$last"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [10, 20, 99] });
}
});
it("edits the second-to-last array element via -2", () => {
const { ast } = parseJsonc('{ "items": [10, 20, 30] }');
const r = setJsoncOcPath(ast, parseOcPath("oc://config.jsonc/items/-2"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({ items: [10, 99, 30] });
}
});
it("edits the first object entry value via $first", () => {
const { ast } = parseJsonc('{ "a": 1, "b": 2, "c": 3 }');
const r = setJsoncOcPath(ast, parseOcPath("oc://config.jsonc/$first"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({ a: 99, b: 2, c: 3 });
}
});
it("reports unresolved for $first against an empty array", () => {
const { ast } = parseJsonc('{ "items": [] }');
const r = setJsoncOcPath(ast, parseOcPath("oc://config.jsonc/items/$first"), {
kind: "number",
value: 99,
});
expect(r).toEqual({ ok: false, reason: "unresolved" });
});
});
describe("setJsoncOcPath — quoted segments (regression: resolve↔edit symmetry)", () => {
it("edits a key containing slashes via quoted segment", () => {
// The provider/model alias key contains a `/`; without quoting
// it would be split as two segments. `resolveJsoncOcPath` handles
// this; `setJsoncOcPath` MUST handle it the same way or the path
// becomes resolve-only. Closes ClawSweeper P2 on PR #78678.
const raw = `{
"agents": {
"defaults": {
"models": {
"anthropic/claude-opus-4-7": { "alias": "opus" }
}
}
}
}
`;
const { ast } = parseJsonc(raw);
const r = setJsoncOcPath(
ast,
parseOcPath('oc://config.jsonc/agents.defaults.models/"anthropic/claude-opus-4-7"/alias'),
{ kind: "string", value: "big-opus" },
);
expect(r.ok).toBe(true);
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({
agents: {
defaults: {
models: {
"anthropic/claude-opus-4-7": { alias: "big-opus" },
},
},
},
});
}
});
});

View File

@@ -0,0 +1,87 @@
import { describe, expect, it } from "vitest";
import { emitJsonc } from "../../jsonc/emit.js";
import { parseJsonc } from "../../jsonc/parse.js";
import { OcEmitSentinelError, REDACTED_SENTINEL } from "../../sentinel.js";
describe("emitJsonc — round-trip", () => {
it("returns raw bytes verbatim by default", () => {
const raw = `{
// comment is preserved on round-trip
"x": 1,
"y": [/* inline */ 2, 3],
}
`;
const { ast } = parseJsonc(raw);
expect(emitJsonc(ast)).toBe(raw);
});
it("echoes pre-existing sentinel bytes by default; strict mode rejects", () => {
// Round-trip trusts parsed bytes — workspace files legitimately
// containing the sentinel (in code blocks, pasted error logs)
// would otherwise become a workspace-wide emit DoS. Strict mode
// is the opt-in path.
const raw = `{ "x": "${REDACTED_SENTINEL}" }`;
const { ast } = parseJsonc(raw);
expect(emitJsonc(ast)).toBe(raw);
expect(() =>
emitJsonc(ast, { fileNameForGuard: "config", acceptPreExistingSentinel: false }),
).toThrow(OcEmitSentinelError);
});
});
describe("emitJsonc — render mode", () => {
it("re-stringifies the structural tree (no comments)", () => {
const { ast } = parseJsonc('{ /* drop me */ "x": 1, "y": [2, 3] }');
const out = emitJsonc(ast, { mode: "render" });
expect(out).not.toContain("drop me");
expect(JSON.parse(out)).toEqual({ x: 1, y: [2, 3] });
});
it("throws OcEmitSentinelError when a leaf string is the sentinel", () => {
const ast = parseJsonc('{ "x": "ok" }').ast;
const tampered = {
...ast,
root: {
kind: "object" as const,
entries: [
{
key: "x",
line: 1,
value: { kind: "string" as const, value: REDACTED_SENTINEL },
},
],
},
};
expect(() => emitJsonc(tampered, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("throws when a leaf string EMBEDS the sentinel (prefix/suffix wrap)", () => {
// Regression: prior to this fix, render mode used `value.value === SENTINEL`
// (exact match), so `prefix__OPENCLAW_REDACTED__suffix` slipped through.
// The roundtrip path always used `.includes()` for the same reason —
// render must too. Catches the sentinel-guard bypass class.
const ast = parseJsonc('{ "x": "ok" }').ast;
const tampered = {
...ast,
root: {
kind: "object" as const,
entries: [
{
key: "x",
line: 1,
value: {
kind: "string" as const,
value: `prefix-${REDACTED_SENTINEL}-suffix`,
},
},
],
},
};
expect(() => emitJsonc(tampered, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("renders empty AST as empty string", () => {
const { ast } = parseJsonc("");
expect(emitJsonc(ast, { mode: "render" })).toBe("");
});
});

View File

@@ -0,0 +1,144 @@
import { describe, expect, it } from "vitest";
import { parseJsonc } from "../../jsonc/parse.js";
describe("parseJsonc — basic shapes", () => {
it("parses an empty object", () => {
const { ast, diagnostics } = parseJsonc("{}");
expect(diagnostics).toEqual([]);
expect(ast.kind).toBe("jsonc");
expect(ast.root).toEqual({ kind: "object", entries: [], line: 1 });
});
it("parses an empty array", () => {
const { ast, diagnostics } = parseJsonc("[]");
expect(diagnostics).toEqual([]);
expect(ast.root).toEqual({ kind: "array", items: [], line: 1 });
});
it("parses an empty input as null root", () => {
const { ast, diagnostics } = parseJsonc("");
expect(diagnostics).toEqual([]);
expect(ast.root).toBeNull();
});
it("parses scalars", () => {
expect(parseJsonc("42").ast.root).toEqual({ kind: "number", value: 42, line: 1 });
expect(parseJsonc("-3.14").ast.root).toEqual({ kind: "number", value: -3.14, line: 1 });
expect(parseJsonc("1e3").ast.root).toEqual({ kind: "number", value: 1000, line: 1 });
expect(parseJsonc('"hello"').ast.root).toEqual({ kind: "string", value: "hello", line: 1 });
expect(parseJsonc("true").ast.root).toEqual({ kind: "boolean", value: true, line: 1 });
expect(parseJsonc("false").ast.root).toEqual({ kind: "boolean", value: false, line: 1 });
expect(parseJsonc("null").ast.root).toEqual({ kind: "null", line: 1 });
});
it("parses nested object/array", () => {
const raw = '{ "plugins": { "entries": ["a", "b"] } }';
const { ast, diagnostics } = parseJsonc(raw);
expect(diagnostics).toEqual([]);
expect(ast.root).toEqual({
kind: "object",
line: 1,
entries: [
{
key: "plugins",
line: 1,
value: {
kind: "object",
line: 1,
entries: [
{
key: "entries",
line: 1,
value: {
kind: "array",
line: 1,
items: [
{ kind: "string", value: "a", line: 1 },
{ kind: "string", value: "b", line: 1 },
],
},
},
],
},
},
],
});
});
it("preserves raw on the AST root for byte-fidelity emit", () => {
const raw = '{\n "x": 1\n}\n';
const { ast } = parseJsonc(raw);
expect(ast.raw).toBe(raw);
});
});
describe("parseJsonc — JSONC extensions", () => {
it("skips line comments", () => {
const raw = `{
// comment
"x": 1 // trailing comment
}`;
const { ast, diagnostics } = parseJsonc(raw);
expect(diagnostics).toEqual([]);
expect(ast.root).toEqual({
kind: "object",
line: 1,
entries: [{ key: "x", value: { kind: "number", value: 1, line: 3 }, line: 3 }],
});
});
it("skips block comments", () => {
const raw = '{ /* hi */ "x": /* mid */ 1 }';
const { ast, diagnostics } = parseJsonc(raw);
expect(diagnostics).toEqual([]);
expect(ast.root).toEqual({
kind: "object",
line: 1,
entries: [{ key: "x", value: { kind: "number", value: 1, line: 1 }, line: 1 }],
});
});
it("tolerates trailing commas in objects", () => {
const { ast, diagnostics } = parseJsonc('{ "x": 1, }');
expect(diagnostics).toEqual([]);
expect(ast.root).toEqual({
kind: "object",
line: 1,
entries: [{ key: "x", value: { kind: "number", value: 1, line: 1 }, line: 1 }],
});
});
it("tolerates trailing commas in arrays", () => {
const { ast } = parseJsonc("[1, 2, 3,]");
expect(ast.root).toEqual({
kind: "array",
line: 1,
items: [
{ kind: "number", value: 1, line: 1 },
{ kind: "number", value: 2, line: 1 },
{ kind: "number", value: 3, line: 1 },
],
});
});
it("handles escape sequences in strings", () => {
const { ast } = parseJsonc('"a\\nb\\tc\\u0041"');
expect(ast.root).toEqual({ kind: "string", value: "a\nb\tcA", line: 1 });
});
});
describe("parseJsonc — soft errors", () => {
it("returns null root + error diagnostic on unrecoverable input", () => {
const { ast, diagnostics } = parseJsonc('{ "x" 1 }');
expect(ast.root).toBeNull();
expect(diagnostics).toHaveLength(1);
expect(diagnostics[0]?.severity).toBe("error");
});
it("warns on trailing input after a valid value", () => {
const { diagnostics } = parseJsonc("1 garbage");
expect(diagnostics).toHaveLength(1);
expect(diagnostics[0]?.severity).toBe("warning");
expect(diagnostics[0]?.code).toBe("OC_JSONC_TRAILING_INPUT");
});
});

View File

@@ -0,0 +1,76 @@
import { describe, expect, it } from "vitest";
import { parseJsonc } from "../../jsonc/parse.js";
import { resolveJsoncOcPath } from "../../jsonc/resolve.js";
import { parseOcPath } from "../../oc-path.js";
function rs(raw: string, ocPath: string) {
const { ast } = parseJsonc(raw);
const path = parseOcPath(ocPath);
return resolveJsoncOcPath(ast, path);
}
describe("resolveJsoncOcPath", () => {
const config = `{
"plugins": {
"entries": {
"github": {
"token": "secret",
"enabled": true
}
}
},
"limits": [10, 20, 30]
}`;
it("resolves the root when no segments are given", () => {
const m = rs(config, "oc://config");
expect(m?.kind).toBe("root");
});
it("walks dotted section paths", () => {
const m = rs(config, "oc://config/plugins.entries.github.token");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.key).toBe("token");
expect(m.node.value).toMatchObject({ kind: "string", value: "secret" });
}
});
it("walks 4-segment slash paths up to OcPath depth limit", () => {
const m = rs(config, "oc://config/plugins/entries/github");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.key).toBe("github");
}
});
it("walks mixed dotted+slash paths", () => {
const m = rs(config, "oc://config/plugins/entries.github.token");
expect(m?.kind).toBe("object-entry");
});
it("indexes into arrays via numeric segments", () => {
const m = rs(config, "oc://config/limits.1");
expect(m?.kind).toBe("value");
if (m?.kind === "value") {
expect(m.node).toMatchObject({ kind: "number", value: 20 });
}
});
it("returns null for missing keys", () => {
expect(rs(config, "oc://config/plugins.entries.gitlab")).toBeNull();
});
it("returns null for out-of-bounds array indexes", () => {
expect(rs(config, "oc://config/limits.99")).toBeNull();
});
it("returns null when descending past a primitive", () => {
expect(rs(config, "oc://config/plugins.entries.github.token.x")).toBeNull();
});
it("returns null on empty AST", () => {
const { ast } = parseJsonc("");
expect(resolveJsoncOcPath(ast, parseOcPath("oc://config/x"))).toBeNull();
});
});

View File

@@ -0,0 +1,244 @@
import { describe, expect, it } from "vitest";
import { appendJsonlOcPath, setJsonlOcPath } from "../../jsonl/edit.js";
import { emitJsonl } from "../../jsonl/emit.js";
import { parseJsonl } from "../../jsonl/parse.js";
import { parseOcPath } from "../../oc-path.js";
describe("setJsonlOcPath — value replacement", () => {
const log = '{"event":"start"}\n{"event":"step","n":1}\n{"event":"end"}\n';
it("replaces a field on a specific line", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/L2/n"), {
kind: "number",
value: 42,
});
expect(r.ok).toBe(true);
if (r.ok) {
const lines = emitJsonl(r.ast).split("\n");
expect(JSON.parse(lines[1] ?? "")).toEqual({ event: "step", n: 42 });
}
});
it("replaces an entire line value", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/L2"), {
kind: "object",
entries: [{ key: "event", line: 0, value: { kind: "string", value: "replaced" } }],
});
expect(r.ok).toBe(true);
if (r.ok) {
const lines = emitJsonl(r.ast).split("\n");
expect(JSON.parse(lines[1] ?? "")).toEqual({ event: "replaced" });
}
});
it("resolves $last and edits the most recent value line", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/$last/event"), {
kind: "string",
value: "final",
});
expect(r.ok).toBe(true);
if (r.ok) {
const lines = emitJsonl(r.ast).split("\n");
expect(JSON.parse(lines[2] ?? "")).toEqual({ event: "final" });
}
});
it("reports unresolved for unknown line addresses", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/L99/x"), {
kind: "number",
value: 1,
});
expect(r).toEqual({ ok: false, reason: "unresolved" });
});
it("reports not-a-value-line when targeting a blank line", () => {
const { ast } = parseJsonl('{"a":1}\n\n{"b":2}\n');
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/L2"), {
kind: "number",
value: 1,
});
expect(r).toEqual({ ok: false, reason: "not-a-value-line" });
});
});
describe("appendJsonlOcPath — session checkpointing primitive", () => {
it("appends to an empty file", () => {
const { ast } = parseJsonl("");
const next = appendJsonlOcPath(ast, {
kind: "object",
entries: [{ key: "event", line: 0, value: { kind: "string", value: "start" } }],
});
expect(emitJsonl(next)).toBe('{"event":"start"}');
});
it("appends to an existing log preserving prior lines", () => {
const { ast } = parseJsonl('{"a":1}\n');
const next = appendJsonlOcPath(ast, {
kind: "object",
entries: [{ key: "b", line: 0, value: { kind: "number", value: 2 } }],
});
const out = emitJsonl(next).split("\n");
expect(out).toHaveLength(2);
expect(JSON.parse(out[1] ?? "")).toEqual({ b: 2 });
});
});
describe("setJsonlOcPath — line-address positional tokens (resolve↔edit symmetry)", () => {
// Line-address slot must accept every token shape pickLine accepts
// (resolve.ts and find.ts already do). Without `$first` and `-N` here,
// a path that reads under those tokens silently unresolves on write.
const log = '{"event":"start","n":1}\n{"event":"step","n":2}\n{"event":"end","n":3}\n';
it("writes under $first line address", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/$first/n"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
const lines = emitJsonl(r.ast).split("\n");
expect(JSON.parse(lines[0] ?? "")).toEqual({ event: "start", n: 99 });
}
});
it("writes under -1 line address (alias for last value line)", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/-1/n"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
const lines = emitJsonl(r.ast).split("\n");
expect(JSON.parse(lines[2] ?? "")).toEqual({ event: "end", n: 99 });
}
});
it("writes under -2 line address (penultimate value line)", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/-2/n"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
const lines = emitJsonl(r.ast).split("\n");
expect(JSON.parse(lines[1] ?? "")).toEqual({ event: "step", n: 99 });
}
});
it("reports unresolved for $first against an empty log", () => {
const { ast } = parseJsonl("");
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/$first/n"), {
kind: "number",
value: 99,
});
expect(r).toEqual({ ok: false, reason: "unresolved" });
});
it("reports unresolved for -99 (out-of-range) line address", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/-99/n"), {
kind: "number",
value: 99,
});
expect(r).toEqual({ ok: false, reason: "unresolved" });
});
});
describe("setJsonlOcPath — positional field tokens (round-11 resolve↔edit symmetry)", () => {
// ClawSweeper round-11 P2 — JSONL line-address `$last` already
// resolved (pickLineIndex), but positional tokens INSIDE a line's
// structural body (item / field) were not. Pin the in-line edit
// path: a `$first` / `$last` / `-N` field-segment must reach the
// same child as resolveJsonlOcPath.
const log = '{"items":[10,20,30],"events":{"a":1,"b":2}}\n';
it("edits the first array item on a line via $first", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/L1/items/$first"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
const firstLine =
emitJsonl(r.ast)
.split("\n")
.find((l) => l.length > 0) ?? "";
expect(JSON.parse(firstLine)).toEqual({
items: [99, 20, 30],
events: { a: 1, b: 2 },
});
}
});
it("edits the last array item on a line via $last", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/L1/items/$last"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
const firstLine =
emitJsonl(r.ast)
.split("\n")
.find((l) => l.length > 0) ?? "";
expect(JSON.parse(firstLine)).toEqual({
items: [10, 20, 99],
events: { a: 1, b: 2 },
});
}
});
it("edits the first object entry on a line via $first", () => {
const { ast } = parseJsonl(log);
const r = setJsonlOcPath(ast, parseOcPath("oc://session-events/L1/events/$first"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(true);
if (r.ok) {
const firstLine =
emitJsonl(r.ast)
.split("\n")
.find((l) => l.length > 0) ?? "";
expect(JSON.parse(firstLine)).toEqual({
items: [10, 20, 30],
events: { a: 99, b: 2 },
});
}
});
});
describe("setJsonlOcPath — quoted field segments (regression: resolve↔edit symmetry)", () => {
it("edits a field key containing a slash via quoted segment", () => {
// Closes ClawSweeper P2 on PR #78678: JSONL resolve unquotes
// bracket-aware segments but the edit path used plain
// `.split('.')`. A path that resolves under `Lnnn` MUST be
// editable through the same address.
const raw = `{"event":"start","detail":{"github/repo":"old"}}\n`;
const { ast } = parseJsonl(raw);
const r = setJsonlOcPath(ast, parseOcPath('oc://x.jsonl/L1/detail/"github/repo"'), {
kind: "string",
value: "new",
});
expect(r.ok).toBe(true);
if (r.ok) {
const lines = emitJsonl(r.ast)
.split("\n")
.filter((l) => l.length > 0);
expect(lines).toHaveLength(1);
expect(JSON.parse(lines[0] ?? "")).toEqual({
event: "start",
detail: { "github/repo": "new" },
});
}
});
});

View File

@@ -0,0 +1,94 @@
import { describe, expect, it } from "vitest";
import { emitJsonl } from "../../jsonl/emit.js";
import { parseJsonl } from "../../jsonl/parse.js";
import { OcEmitSentinelError, REDACTED_SENTINEL } from "../../sentinel.js";
describe("emitJsonl — round-trip", () => {
it("returns raw bytes verbatim by default", () => {
const raw = '{"a":1}\n\n{"b":2}\nthis is malformed\n';
const { ast } = parseJsonl(raw);
expect(emitJsonl(ast)).toBe(raw);
});
it("echoes pre-existing sentinel bytes by default; strict mode rejects", () => {
const raw = `{"a":"${REDACTED_SENTINEL}"}\n`;
const { ast } = parseJsonl(raw);
expect(emitJsonl(ast)).toBe(raw);
expect(() =>
emitJsonl(ast, {
fileNameForGuard: "session-events",
acceptPreExistingSentinel: false,
}),
).toThrow(OcEmitSentinelError);
});
});
describe("emitJsonl — render mode", () => {
it("rebuilds value lines via JSON-stringify", () => {
const { ast } = parseJsonl('{"a":1}\n{"b":2}\n');
const out = emitJsonl(ast, { mode: "render" });
expect(out.split("\n")).toEqual(['{"a":1}', '{"b":2}']);
});
it("preserves blank and malformed lines verbatim in render mode", () => {
const { ast } = parseJsonl('{"a":1}\n\nbroken\n{"b":2}\n');
const out = emitJsonl(ast, { mode: "render" });
expect(out.split("\n")).toEqual(['{"a":1}', "", "broken", '{"b":2}']);
});
it("throws when a value-leaf is the sentinel under render mode", () => {
const ast = parseJsonl('{"a":"ok"}\n').ast;
const tampered = {
...ast,
lines: [
{
kind: "value" as const,
line: 1,
raw: '{"a":"ok"}',
value: {
kind: "object" as const,
entries: [
{
key: "a",
line: 1,
value: { kind: "string" as const, value: REDACTED_SENTINEL },
},
],
},
},
],
};
expect(() => emitJsonl(tampered, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("throws when a value-leaf EMBEDS the sentinel (prefix/suffix wrap)", () => {
// Regression: prior to this fix, render mode used exact-match
// (`value.value === SENTINEL`), so `prefix__OPENCLAW_REDACTED__suffix`
// slipped through. The contains-check is the right invariant.
const ast = parseJsonl('{"a":"ok"}\n').ast;
const tampered = {
...ast,
lines: [
{
kind: "value" as const,
line: 1,
raw: '{"a":"ok"}',
value: {
kind: "object" as const,
entries: [
{
key: "a",
line: 1,
value: {
kind: "string" as const,
value: `wrap-${REDACTED_SENTINEL}-end`,
},
},
],
},
},
],
};
expect(() => emitJsonl(tampered, { mode: "render" })).toThrow(OcEmitSentinelError);
});
});

View File

@@ -0,0 +1,43 @@
import { describe, expect, it } from "vitest";
import { parseJsonl } from "../../jsonl/parse.js";
describe("parseJsonl", () => {
it("parses an empty file as zero lines", () => {
const { ast, diagnostics } = parseJsonl("");
expect(diagnostics).toEqual([]);
expect(ast.lines).toEqual([]);
});
it("parses each line as a JSON value", () => {
const raw = `{"event":"start"}
{"event":"step","n":1}
{"event":"end"}
`;
const { ast, diagnostics } = parseJsonl(raw);
expect(diagnostics).toEqual([]);
expect(ast.lines).toHaveLength(3);
expect(ast.lines[0]?.kind).toBe("value");
expect(ast.lines[2]?.kind).toBe("value");
});
it("preserves blank lines as blank entries", () => {
const raw = '{"a":1}\n\n{"b":2}\n';
const { ast, diagnostics } = parseJsonl(raw);
expect(diagnostics).toEqual([]);
expect(ast.lines.map((l) => l.kind)).toEqual(["value", "blank", "value"]);
});
it("flags malformed lines as warnings without aborting", () => {
const raw = '{"a":1}\nthis is not json\n{"b":2}\n';
const { ast, diagnostics } = parseJsonl(raw);
expect(ast.lines.map((l) => l.kind)).toEqual(["value", "malformed", "value"]);
expect(diagnostics).toHaveLength(1);
expect(diagnostics[0]?.code).toBe("OC_JSONL_LINE_MALFORMED");
});
it("preserves raw on the AST root for byte-fidelity emit", () => {
const raw = '{"a":1}\n{"b":2}\n';
const { ast } = parseJsonl(raw);
expect(ast.raw).toBe(raw);
});
});

View File

@@ -0,0 +1,103 @@
import { describe, expect, it } from "vitest";
import { findOcPaths } from "../../find.js";
import { parseJsonl } from "../../jsonl/parse.js";
import { resolveJsonlOcPath } from "../../jsonl/resolve.js";
import { parseOcPath } from "../../oc-path.js";
import { resolveOcPath } from "../../universal.js";
const log = `{"event":"start","ts":1}
{"event":"step","n":1,"result":{"ok":true,"detail":"a"}}
{"event":"end","ts":99}
`;
function rs(ocPath: string) {
const { ast } = parseJsonl(log);
return resolveJsonlOcPath(ast, parseOcPath(ocPath));
}
describe("resolveJsonlOcPath", () => {
it("returns root when no segments are given", () => {
expect(rs("oc://session-events")?.kind).toBe("root");
});
it("addresses an entire line by line number", () => {
const m = rs("oc://session-events/L1");
expect(m?.kind).toBe("line");
});
it("addresses fields under a line via item segment", () => {
const m = rs("oc://session-events/L2/event");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "string", value: "step" });
}
});
it("descends via dotted item paths", () => {
const m = rs("oc://session-events/L2/result.ok");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "boolean", value: true });
}
});
it("resolves $last to the most recent value line", () => {
const m = rs("oc://session-events/$last/event");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "string", value: "end" });
}
});
it("returns null for unknown line addresses", () => {
expect(rs("oc://session-events/L99")).toBeNull();
expect(rs("oc://session-events/garbage")).toBeNull();
});
it("returns null when descending into a blank line", () => {
expect(rs("oc://session-events/L3/anything")).toBeNull();
});
});
describe("resolveJsonlToUniversal — file-relative line metadata (regression)", () => {
// Regression: surfaced via the openclaw-path CLI scenario run on
// a multi-line session.jsonl. Every match returned `line: 1`
// because the inside-line jsonc parser numbers from 1 within each
// line's bytes; the universal resolve was preferring that local
// number over the JsonlLine's file-relative line.
const log = [
'{"event":"start"}', // line 1
'{"event":"step","n":1}', // line 2
'{"event":"step","n":2}', // line 3
'{"event":"end"}', // line 4
"", // line 5 (blank)
].join("\n");
it("resolves L2/event with line=2 (not 1)", () => {
const { ast } = parseJsonl(log);
const m = resolveOcPath(ast, parseOcPath("oc://session.jsonl/L2/event"));
expect(m).not.toBeNull();
if (m !== null) {
expect(m.line).toBe(2);
}
});
it("resolves L4/event with line=4", () => {
const { ast } = parseJsonl(log);
const m = resolveOcPath(ast, parseOcPath("oc://session.jsonl/L4/event"));
expect(m).not.toBeNull();
if (m !== null) {
expect(m.line).toBe(4);
}
});
it("findOcPaths over wildcard surfaces correct file-relative lines", () => {
const { ast } = parseJsonl(log);
const matches = findOcPaths(ast, parseOcPath("oc://session.jsonl/*/event"));
expect(matches).toHaveLength(4);
const lines = matches.map((m) => m.match.line);
expect(lines).toEqual([1, 2, 3, 4]);
});
});

View File

@@ -0,0 +1,129 @@
import { describe, expect, it } from "vitest";
import { OcPathError, formatOcPath, isValidOcPath, parseOcPath } from "../oc-path.js";
describe("parseOcPath", () => {
it("parses file-only path", () => {
expect(parseOcPath("oc://SOUL.md")).toEqual({ file: "SOUL.md" });
});
it("parses file + section", () => {
expect(parseOcPath("oc://SOUL.md/Boundaries")).toEqual({
file: "SOUL.md",
section: "Boundaries",
});
});
it("parses file + section + item", () => {
expect(parseOcPath("oc://SOUL.md/Boundaries/deny-rule-1")).toEqual({
file: "SOUL.md",
section: "Boundaries",
item: "deny-rule-1",
});
});
it("parses file + section + item + field", () => {
expect(parseOcPath("oc://SOUL.md/Boundaries/deny-rule-1/risk")).toEqual({
file: "SOUL.md",
section: "Boundaries",
item: "deny-rule-1",
field: "risk",
});
});
it("parses session query", () => {
expect(parseOcPath("oc://SOUL.md?session=daily-cron")).toEqual({
file: "SOUL.md",
session: "daily-cron",
});
});
it("rejects missing scheme", () => {
expectOcPathError(() => parseOcPath("SOUL.md"), "OC_PATH_MISSING_SCHEME");
});
it("rejects empty path after scheme", () => {
expectOcPathError(() => parseOcPath("oc://"), "OC_PATH_EMPTY");
});
it("rejects empty segment", () => {
expectOcPathError(() => parseOcPath("oc://SOUL.md//deny-rule-1"), "OC_PATH_EMPTY_SEGMENT");
});
it("rejects too-deep nesting", () => {
expectOcPathError(() => parseOcPath("oc://SOUL.md/a/b/c/d/e"), "OC_PATH_TOO_DEEP");
});
it("rejects non-string input", () => {
expectOcPathError(() => parseOcPath(123 as unknown as string), "OC_PATH_NOT_STRING");
});
});
function expectOcPathError(fn: () => unknown, expectedCode: string): void {
try {
fn();
expect.fail(`expected OcPathError with code "${expectedCode}" but no error thrown`);
} catch (err) {
expect(err).toBeInstanceOf(OcPathError);
expect((err as OcPathError).code).toBe(expectedCode);
}
}
describe("formatOcPath", () => {
it("round-trips file-only", () => {
expect(formatOcPath({ file: "SOUL.md" })).toBe("oc://SOUL.md");
});
it("round-trips full nesting", () => {
expect(
formatOcPath({
file: "SOUL.md",
section: "Boundaries",
item: "deny-rule-1",
field: "risk",
}),
).toBe("oc://SOUL.md/Boundaries/deny-rule-1/risk");
});
it("round-trips session", () => {
expect(formatOcPath({ file: "SOUL.md", session: "cron" })).toBe("oc://SOUL.md?session=cron");
});
it("rejects empty file", () => {
expectOcPathError(() => formatOcPath({ file: "" }), "OC_PATH_FILE_REQUIRED");
});
it("rejects item without section", () => {
expectOcPathError(() => formatOcPath({ file: "F.md", item: "i" }), "OC_PATH_NESTING");
});
});
describe("round-trip", () => {
const cases = [
"oc://SOUL.md",
"oc://SOUL.md/Boundaries",
"oc://SOUL.md/Boundaries/deny-rule-1",
"oc://SOUL.md/Boundaries/deny-rule-1/risk",
"oc://SOUL.md?session=daily",
"oc://AGENTS.md/Tools/gh/risk",
];
for (const input of cases) {
it(`formatOcPath(parseOcPath("${input}")) === "${input}"`, () => {
expect(formatOcPath(parseOcPath(input))).toBe(input);
});
}
});
describe("isValidOcPath", () => {
it("returns true for valid paths", () => {
expect(isValidOcPath("oc://SOUL.md")).toBe(true);
expect(isValidOcPath("oc://SOUL.md/Boundaries")).toBe(true);
});
it("returns false for invalid paths", () => {
expect(isValidOcPath("SOUL.md")).toBe(false);
expect(isValidOcPath("oc://")).toBe(false);
expect(isValidOcPath(null)).toBe(false);
expect(isValidOcPath(undefined)).toBe(false);
expect(isValidOcPath(42)).toBe(false);
});
});

View File

@@ -0,0 +1,205 @@
import { describe, expect, it } from "vitest";
import { parseMd } from "../parse.js";
describe("parseMd — frontmatter", () => {
it("parses simple frontmatter", () => {
const raw = `---
name: github
description: gh CLI for issues, PRs, runs
---
Body text.
`;
const { ast, diagnostics } = parseMd(raw);
expect(diagnostics).toEqual([]);
expect(ast.frontmatter).toEqual([
{ key: "name", value: "github", line: 2 },
{ key: "description", value: "gh CLI for issues, PRs, runs", line: 3 },
]);
});
it("handles no frontmatter", () => {
const raw = `## First section\n\nContent.\n`;
const { ast } = parseMd(raw);
expect(ast.frontmatter).toEqual([]);
expect(ast.preamble).toBe("");
expect(ast.blocks.length).toBe(1);
});
it("emits diagnostic for unclosed frontmatter", () => {
const raw = `---
name: github
description: never closes
Body.
`;
const { diagnostics } = parseMd(raw);
expect(diagnostics).toContainEqual(
expect.objectContaining({ code: "OC_FRONTMATTER_UNCLOSED" }),
);
});
it("strips quotes from values", () => {
const raw = `---
title: "Hello world"
hint: 'quoted'
---
`;
const { ast } = parseMd(raw);
expect(ast.frontmatter[0]?.value).toBe("Hello world");
expect(ast.frontmatter[1]?.value).toBe("quoted");
});
});
describe("parseMd — H2 blocks", () => {
it("splits sections", () => {
const raw = `Preamble text.
## First
Body of first.
## Second
Body of second.
`;
const { ast } = parseMd(raw);
expect(ast.preamble.trim()).toBe("Preamble text.");
expect(ast.blocks.length).toBe(2);
expect(ast.blocks[0]?.heading).toBe("First");
expect(ast.blocks[0]?.slug).toBe("first");
expect(ast.blocks[1]?.heading).toBe("Second");
});
it("preserves line numbers (1-based)", () => {
const raw = `Line 1
## Heading at line 2
Line 3
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.line).toBe(2);
});
it("does NOT split on `## ` inside fenced code blocks", () => {
const raw = `## Real section
\`\`\`md
## Not a heading
content
\`\`\`
## Another section
`;
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["Real section", "Another section"]);
});
});
describe("parseMd — items", () => {
it("extracts plain bullet items", () => {
const raw = `## Boundaries
- never write to /etc
- always confirm before deleting
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.items.length).toBe(2);
expect(ast.blocks[0]?.items[0]?.text).toBe("never write to /etc");
expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined();
});
it("extracts kv items", () => {
const raw = `## Tools
- gh: GitHub CLI
- curl: HTTP client
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "gh", value: "GitHub CLI" });
expect(ast.blocks[0]?.items[0]?.slug).toBe("gh");
expect(ast.blocks[0]?.items[1]?.kv).toEqual({ key: "curl", value: "HTTP client" });
});
it("does NOT extract bullets inside fenced code", () => {
const raw = `## Section
\`\`\`
- not a bullet
\`\`\`
- real bullet
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.items.length).toBe(1);
expect(ast.blocks[0]?.items[0]?.text).toBe("real bullet");
});
});
describe("parseMd — tables", () => {
it("extracts a simple table", () => {
const raw = `## Tool Guidance
| tool | guidance |
| --- | --- |
| gh | use for GitHub |
| curl | HTTP client |
`;
const { ast } = parseMd(raw);
const table = ast.blocks[0]?.tables[0];
if (!table) {
throw new Error("expected parsed markdown table");
}
expect(table.headers).toEqual(["tool", "guidance"]);
expect(table.rows.length).toBe(2);
expect(table.rows[0]).toEqual(["gh", "use for GitHub"]);
});
});
describe("parseMd — code blocks", () => {
it("extracts a fenced code block", () => {
const raw = `## Examples
\`\`\`ts
const x = 1;
\`\`\`
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks[0]).toMatchObject({
lang: "ts",
text: "const x = 1;",
});
});
it("handles unlanguaged fences", () => {
const raw = `## Block
\`\`\`
plain text
\`\`\`
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBeNull();
});
});
describe("parseMd — byte-fidelity", () => {
it("preserves raw on the AST", () => {
const raw = `---\nname: x\n---\n\n## Sec\n\n- a\n- b\n`;
const { ast } = parseMd(raw);
expect(ast.raw).toBe(raw);
});
it("preserves BOM in raw but ignores it for parsing", () => {
const raw = "## Heading\n";
const { ast } = parseMd(raw);
expect(ast.raw).toBe(raw);
expect(ast.blocks[0]?.heading).toBe("Heading");
});
it("handles CRLF line endings", () => {
const raw = "## Heading\r\n\r\n- item\r\n";
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.heading).toBe("Heading");
expect(ast.blocks[0]?.items[0]?.text).toBe("item");
});
});

View File

@@ -0,0 +1,100 @@
import { describe, expect, it } from "vitest";
import { parseMd } from "../parse.js";
import { resolveMdOcPath as resolveOcPath } from "../resolve.js";
const SAMPLE = `---
name: github
description: gh CLI
---
Preamble.
## Boundaries
- never write to /etc
- deny: secrets
## Tools
- gh: GitHub CLI
- curl: HTTP client
`;
describe("resolveOcPath", () => {
const { ast } = parseMd(SAMPLE);
it("resolves root", () => {
const m = resolveOcPath(ast, { file: "AGENTS.md" });
expect(m?.kind).toBe("root");
});
it("resolves block by slug", () => {
const m = resolveOcPath(ast, { file: "AGENTS.md", section: "boundaries" });
expect(m?.kind).toBe("block");
if (m?.kind === "block") {
expect(m.node.heading).toBe("Boundaries");
}
});
it("resolves item by slug", () => {
const m = resolveOcPath(ast, {
file: "AGENTS.md",
section: "tools",
item: "gh",
});
expect(m?.kind).toBe("item");
if (m?.kind === "item") {
expect(m.node.kv?.value).toBe("GitHub CLI");
expect(m.block.heading).toBe("Tools");
}
});
it("resolves item-field via kv", () => {
const m = resolveOcPath(ast, {
file: "AGENTS.md",
section: "tools",
item: "gh",
field: "gh",
});
expect(m?.kind).toBe("item-field");
if (m?.kind === "item-field") {
expect(m.value).toBe("GitHub CLI");
}
});
it("resolves frontmatter via [frontmatter] sentinel section", () => {
const m = resolveOcPath(ast, {
file: "AGENTS.md",
section: "[frontmatter]",
field: "name",
});
expect(m?.kind).toBe("frontmatter");
if (m?.kind === "frontmatter") {
expect(m.node.value).toBe("github");
}
});
it("returns null for unknown section", () => {
const m = resolveOcPath(ast, { file: "AGENTS.md", section: "nonexistent" });
expect(m).toBeNull();
});
it("returns null for unknown item", () => {
const m = resolveOcPath(ast, {
file: "AGENTS.md",
section: "tools",
item: "nonexistent",
});
expect(m).toBeNull();
});
it("returns null for field on non-kv item", () => {
const m = resolveOcPath(ast, {
file: "AGENTS.md",
section: "boundaries",
item: "never-write-to-etc",
field: "risk",
});
expect(m).toBeNull();
});
});

View File

@@ -0,0 +1,126 @@
/**
* Wave 20 — JSONL append + multi-agent session sim.
*
* Substrate guarantee: `appendJsonlOcPath(ast, value)` returns a new AST
* with the value appended as a new line. Single-writer model at the
* substrate; concurrent-append safety lives in the LKG tracker layer
* (PR-4) on top of git's three-way merge.
*
* Append for other kinds (jsonc array push, md item-to-section) was
* removed from the substrate — those are domain operations that ride
* on top of `setXxxOcPath` at the doctor / tracker layer, where the
* value shapes are domain-defined.
*/
import { describe, expect, it } from "vitest";
import type { JsoncValue } from "../../jsonc/ast.js";
import { appendJsonlOcPath } from "../../jsonl/edit.js";
import { emitJsonl } from "../../jsonl/emit.js";
import { parseJsonl } from "../../jsonl/parse.js";
function event(name: string, n: number): JsoncValue {
return {
kind: "object",
entries: [
{ key: "event", line: 0, value: { kind: "string", value: name } },
{ key: "n", line: 0, value: { kind: "number", value: n } },
],
};
}
describe("wave-20 jsonl append + multi-agent session sim", () => {
it("A-01 single agent appends 100 events in order", () => {
let ast = parseJsonl("").ast;
for (let i = 0; i < 100; i++) {
ast = appendJsonlOcPath(ast, event("step", i));
}
const lines = emitJsonl(ast)
.split("\n")
.filter((l) => l.length > 0);
expect(lines).toHaveLength(100);
expect(JSON.parse(lines[0] ?? "")).toEqual({ event: "step", n: 0 });
expect(JSON.parse(lines[99] ?? "")).toEqual({ event: "step", n: 99 });
});
it("A-02 two agents alternating appends preserve interleave order", () => {
let ast = parseJsonl("").ast;
for (let i = 0; i < 10; i++) {
const agent = i % 2 === 0 ? "a" : "b";
ast = appendJsonlOcPath(ast, event(agent, i));
}
const lines = emitJsonl(ast)
.split("\n")
.filter((l) => l.length > 0);
expect(lines).toHaveLength(10);
for (let i = 0; i < 10; i++) {
const expected = i % 2 === 0 ? "a" : "b";
expect(JSON.parse(lines[i] ?? "").event).toBe(expected);
}
});
it("A-03 append after a malformed line preserves both", () => {
let ast = parseJsonl('{"a":1}\nbroken\n').ast;
ast = appendJsonlOcPath(ast, event("start", 1));
const out = emitJsonl(ast);
expect(out).toContain("broken");
expect(out).toContain('"event":"start"');
});
it("A-04 append to empty file produces a single value line", () => {
let ast = parseJsonl("").ast;
ast = appendJsonlOcPath(ast, event("first", 0));
const out = emitJsonl(ast);
expect(JSON.parse(out)).toEqual({ event: "first", n: 0 });
});
it("A-05 append assigns line numbers monotonically", () => {
let ast = parseJsonl("").ast;
ast = appendJsonlOcPath(ast, event("a", 0));
ast = appendJsonlOcPath(ast, event("b", 1));
ast = appendJsonlOcPath(ast, event("c", 2));
expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3]);
});
it("A-06 append after blank lines preserves line-number gaps correctly", () => {
let ast = parseJsonl('{"a":1}\n\n\n').ast;
ast = appendJsonlOcPath(ast, event("after", 0));
// Existing lines: L1 value, L2 blank, L3 blank. Appended line is L4.
expect(ast.lines.length).toBe(4);
expect(ast.lines[3]?.line).toBe(4);
});
it("A-07 1000-event session sim is deterministic", () => {
let ast = parseJsonl("").ast;
for (let i = 0; i < 1000; i++) {
ast = appendJsonlOcPath(ast, event("e", i));
}
const lines = emitJsonl(ast)
.split("\n")
.filter((l) => l.length > 0);
expect(lines).toHaveLength(1000);
expect(JSON.parse(lines[999] ?? "").n).toBe(999);
});
it("A-08 append is non-mutating on the input AST", () => {
const ast = parseJsonl('{"a":1}\n').ast;
const before = JSON.stringify(ast);
appendJsonlOcPath(ast, event("x", 0));
expect(JSON.stringify(ast)).toBe(before);
});
it("A-09 append preserves prior raw bytes (renders new tail)", () => {
let ast = parseJsonl('{"a":1}\n').ast;
ast = appendJsonlOcPath(ast, event("b", 1));
const out = emitJsonl(ast);
const lines = out.split("\n");
// First line content unchanged.
expect(lines[0]).toContain('"a":1');
// Second line is the new event.
expect(JSON.parse(lines[1] ?? "")).toEqual({ event: "b", n: 1 });
});
it("A-10 deterministic line-number assignment after malformed lines", () => {
let ast = parseJsonl('{"a":1}\nbroken\n{"b":2}\n').ast;
ast = appendJsonlOcPath(ast, event("c", 2));
expect(ast.lines.map((l) => l.line)).toEqual([1, 2, 3, 4]);
});
});

View File

@@ -0,0 +1,179 @@
/**
* Wave 1 — byte-fidelity round-trip.
*
* Substrate guarantee: `emitMd(parse(raw), { mode: 'roundtrip' }) === raw`
* for every input the parser accepts. This wave hammers that.
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
function roundTrip(raw: string): string {
const { ast } = parseMd(raw);
return emitMd(ast);
}
describe("wave-01 byte-fidelity", () => {
it("B-01 empty file", () => {
expect(roundTrip("")).toBe("");
});
it("B-02 whitespace-only file", () => {
expect(roundTrip(" \n\n \n")).toBe(" \n\n \n");
});
it("B-03 single newline", () => {
expect(roundTrip("\n")).toBe("\n");
});
it("B-04 file without trailing newline", () => {
expect(roundTrip("## H\n- item")).toBe("## H\n- item");
});
it("B-05 file with trailing newline", () => {
expect(roundTrip("## H\n- item\n")).toBe("## H\n- item\n");
});
it("B-06 file with multiple trailing newlines", () => {
expect(roundTrip("## H\n- item\n\n\n")).toBe("## H\n- item\n\n\n");
});
it("B-07 BOM at start", () => {
const raw = "## Heading\n- item\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-08 CRLF line endings", () => {
const raw = "## H\r\n\r\n- item\r\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-09 mixed line endings (CRLF + LF)", () => {
const raw = "## H\r\n- item\n- another\r\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-10 tabs preserved in body", () => {
const raw = "## H\n\n\tindented body\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-11 trailing whitespace on lines preserved", () => {
const raw = "## Heading \n- item \n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-12 multiple consecutive blank lines preserved", () => {
const raw = "## H\n\n\n\n- item\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-13 frontmatter only, no body", () => {
const raw = "---\nname: x\n---\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-14 body only, no frontmatter, no headings", () => {
const raw = "Just some prose.\nNo structure.\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-15 frontmatter + body + multiple sections", () => {
const raw = `---
name: github
description: gh CLI
---
Preamble.
## Boundaries
- never write to /etc
## Tools
- gh: GitHub CLI
- curl: HTTP client
`;
expect(roundTrip(raw)).toBe(raw);
});
it("B-16 unicode content preserved", () => {
const raw = "## Café Section\n\n- résumé item\n- 日本語\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-17 emoji preserved", () => {
const raw = "## 🚀 Launch\n\n- ✅ ready\n- 🔒 secure\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-18 frontmatter with special chars in values", () => {
const raw = `---\nurl: https://example.com:443/path?q=1&a=2\n---\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("B-19 file with mixed bullet markers (-, *, +)", () => {
const raw = "## H\n\n- dash\n* star\n+ plus\n";
expect(roundTrip(raw)).toBe(raw);
});
it("B-20 raw === parse(raw).raw === emitMd(parse(raw)) for 50 random shapes", () => {
const inputs = [
"",
"\n",
"## A\n",
"## A\n## B\n",
"---\n---\n",
"---\nk: v\n---\n",
"---\nk: v\n---\nbody\n",
"## H\n- a\n- b\n## I\n- c\n",
"\n",
"\r\n",
"\t\n",
"plain\n",
"`code`\n",
"```\nfence\n```\n",
"```ts\nconst x = 1;\n```\n",
"| a | b |\n| - | - |\n| 1 | 2 |\n",
"> quote\n",
"# H1 not split\n## H2 split\n",
"preamble\n## block\nbody\n",
"preamble\n## block\nbody\n## block2\nbody2\n",
"## h\n\n\n\n",
" ## indented heading (not parsed)\n",
"##NoSpace\n",
"## With trailing spaces \n- item\n",
"## H\n- nested\n - sub\n",
"## H\n\n```md\n## inside code\n```\n",
"---\na: 1\nb: \"two\"\nc: 'three'\n---\n",
"---\nopen\nbut no close\n\nbody\n",
"mixed\r\nline\nendings\r\n",
"---\nname: bom\n---\nbody\n",
"## h\n- k: v\n- k2: v2\n- plain\n",
"## h\n\n| a | b |\n|---|---|\n",
"## h\n```sql\nSELECT 1\n```\n",
"## h\n\n- url: http://x.example.com:80/p?q=1\n",
"## h\n\n- key: value with: colons\n",
'## h\n\n- key: "quoted: value"\n',
"## h\n\n- a-b: c-d\n",
"## h with `inline code`\n",
"no blocks\nat all\n",
"No body or section\n\n\n\n",
" \n \n",
"## h\n## h2\n## h3\n",
"##\n", // empty heading
"## \n", // heading whitespace only
"\n\n## h\n\n\n",
"---\n\n---\n",
"## h\n- \n", // empty bullet
"## h\n\n\n```\nempty fence body\n```\n",
"## h\n```\nunclosed fence",
"## empty section\n## next\n",
"0\n",
];
for (const raw of inputs) {
expect(roundTrip(raw), `failed on: ${JSON.stringify(raw.slice(0, 60))}`).toBe(raw);
}
});
});

View File

@@ -0,0 +1,97 @@
/**
* Wave 6 — fenced code blocks.
*
* Substrate guarantee: triple-backtick fences (` ``` `) inside H2 blocks
* extract as `AstCodeBlock` with `lang` (or null) and verbatim `text`.
* Code blocks suppress H2-split and item-extraction inside their body.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-06 code-blocks", () => {
it("CB-01 unlanguaged fence", () => {
const raw = `## H\n\n\`\`\`\nplain text\n\`\`\`\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks[0]).toMatchObject({
lang: null,
text: "plain text",
});
});
it("CB-02 languaged fence", () => {
const raw = `## H\n\n\`\`\`ts\nconst x = 1;\n\`\`\`\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe("ts");
expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe("const x = 1;");
});
it("CB-03 multi-line code body preserved verbatim", () => {
const raw = `## H\n\n\`\`\`ts\nline 1\nline 2\nline 3\n\`\`\`\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe("line 1\nline 2\nline 3");
});
it("CB-04 empty code block", () => {
const raw = `## H\n\n\`\`\`\n\`\`\`\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks[0]?.text).toBe("");
});
it("CB-05 code block with `## ` does NOT split as heading", () => {
const raw = `## Real\n\n\`\`\`md\n## Not a heading\n\`\`\`\n\n## Another real\n`;
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["Real", "Another real"]);
});
it("CB-06 code block with `- bullet` does NOT extract as item", () => {
const raw = `## H\n\n\`\`\`\n- not a bullet\n- still not\n\`\`\`\n\n- real bullet\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["real bullet"]);
});
it("CB-07 multiple code blocks in same section", () => {
const raw = `## H\n\n\`\`\`a\nfirst\n\`\`\`\n\n\`\`\`b\nsecond\n\`\`\`\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks.length).toBe(2);
expect(ast.blocks[0]?.codeBlocks.map((c) => c.lang)).toEqual(["a", "b"]);
});
it("CB-08 unterminated fence — body extends to end of section", () => {
const raw = `## H\n\n\`\`\`\nopen but never closes\n`;
const { ast } = parseMd(raw);
// Behavior: code block is created with whatever was after the open
// fence, including any trailing newline lines. Documents are
// likely malformed; substrate is lenient and preserves what's
// there (verifiable via raw round-trip).
expect(ast.blocks[0]?.codeBlocks[0]?.text).toContain("open but never closes");
});
it("CB-09 fence with leading spaces (4-space indented code)", () => {
// Note: only column-0 ``` triggers fence. Indented content is body
// text. This is the documented behavior.
const raw = `## H\n\n \`\`\`\n indented\n \`\`\`\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks).toEqual([]);
});
it("CB-10 lang tag with extra whitespace trimmed", () => {
const raw = `## H\n\n\`\`\` jsonc \nbody\n\`\`\`\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe("jsonc");
});
it("CB-11 lang tag with hyphen / dot (typescript-jsx, c++)", () => {
const raw = `## H\n\n\`\`\`typescript-jsx\nx\n\`\`\`\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.codeBlocks[0]?.lang).toBe("typescript-jsx");
});
it("CB-12 fence appearing in preamble (before any H2) is ignored at block layer", () => {
const raw = `\`\`\`\npreamble code\n\`\`\`\n\n## H\n`;
const { ast } = parseMd(raw);
// Preamble code blocks aren't structurally extracted at the
// substrate layer; this is documented. Lint can scan preamble
// raw if needed.
expect(ast.blocks[0]?.codeBlocks).toEqual([]);
});
});

View File

@@ -0,0 +1,139 @@
/**
* Wave 13 — cross-cutting integration.
*
* Pipelines: parse + resolve + emit working together. Slug stability
* across re-parses. OcPath round-trip via the AST (slugs in OcPath
* must round-trip back to the resolved node).
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { formatOcPath, parseOcPath } from "../../oc-path.js";
import { parseMd } from "../../parse.js";
import { resolveMdOcPath as resolveOcPath } from "../../resolve.js";
const SAMPLE = `---
name: github
description: gh CLI
---
Preamble.
## Boundaries
- never write to /etc
- always confirm
## Tools
- gh: GitHub CLI
- curl: HTTP client
`;
describe("wave-13 cross-cutting", () => {
it("CC-01 parse → resolve → emit pipeline (block)", () => {
const { ast } = parseMd(SAMPLE);
const m = resolveOcPath(ast, { file: "AGENTS.md", section: "boundaries" });
expect(m?.kind).toBe("block");
expect(emitMd(ast)).toBe(SAMPLE);
});
it("CC-02 OcPath round-trip via AST: parse + resolve + format", () => {
const { ast } = parseMd(SAMPLE);
for (const block of ast.blocks) {
const path = parseOcPath(`oc://AGENTS.md/${block.slug}`);
const m = resolveOcPath(ast, path);
expect(m?.kind, `block ${block.slug} should resolve`).toBe("block");
// Format the same path back; slug → URI shape should be stable.
expect(formatOcPath(path)).toBe(`oc://AGENTS.md/${block.slug}`);
}
});
it("CC-03 every item in every block is OcPath-addressable", () => {
const { ast } = parseMd(SAMPLE);
for (const block of ast.blocks) {
for (const item of block.items) {
const path = parseOcPath(`oc://AGENTS.md/${block.slug}/${item.slug}`);
const m = resolveOcPath(ast, path);
expect(m?.kind, `${block.slug}/${item.slug} should resolve`).toBe("item");
}
}
});
it("CC-04 every kv item field is OcPath-addressable", () => {
const { ast } = parseMd(SAMPLE);
for (const block of ast.blocks) {
for (const item of block.items) {
if (!item.kv) {
continue;
}
const path = parseOcPath(`oc://AGENTS.md/${block.slug}/${item.slug}/${item.kv.key}`);
const m = resolveOcPath(ast, path);
expect(m?.kind).toBe("item-field");
}
}
});
it("CC-05 every frontmatter entry is OcPath-addressable", () => {
const { ast } = parseMd(SAMPLE);
for (const fm of ast.frontmatter) {
const path = parseOcPath(`oc://AGENTS.md/[frontmatter]/${fm.key}`);
const m = resolveOcPath(ast, path);
expect(m?.kind).toBe("frontmatter");
}
});
it("CC-06 slugs are stable across re-parses (deterministic)", () => {
const a1 = parseMd(SAMPLE).ast;
const a2 = parseMd(SAMPLE).ast;
expect(a1.blocks.map((b) => b.slug)).toEqual(a2.blocks.map((b) => b.slug));
expect(a1.blocks.map((b) => b.items.map((i) => i.slug))).toEqual(
a2.blocks.map((b) => b.items.map((i) => i.slug)),
);
});
it("CC-07 modifying raw + re-parse produces consistent AST shape", () => {
const a1 = parseMd(SAMPLE).ast;
const modified = SAMPLE.replace("GitHub CLI", "GitHub command-line interface");
const a2 = parseMd(modified).ast;
// Block + item count + slugs unchanged.
expect(a2.blocks.length).toBe(a1.blocks.length);
const a1Tools = a1.blocks.find((b) => b.slug === "tools");
const a2Tools = a2.blocks.find((b) => b.slug === "tools");
expect(a2Tools?.items.length).toBe(a1Tools?.items.length);
// KV value reflects the change.
const ghItem = a2Tools?.items.find((i) => i.kv?.key === "gh");
expect(ghItem?.kv?.value).toBe("GitHub command-line interface");
});
it("CC-08 unknown OcPath returns null without affecting subsequent valid resolves", () => {
const { ast } = parseMd(SAMPLE);
expect(resolveOcPath(ast, { file: "X.md", section: "nonexistent" })).toBeNull();
expect(resolveOcPath(ast, { file: "X.md", section: "tools" })?.kind).toBe("block");
});
it("CC-09 resolve does not depend on file segment matching", () => {
const { ast } = parseMd(SAMPLE);
const a = resolveOcPath(ast, { file: "A.md", section: "tools" });
const b = resolveOcPath(ast, { file: "B.md", section: "tools" });
expect(a?.kind).toBe(b?.kind);
});
it("CC-10 round-trip across all 9 valid OcPath shapes", () => {
const { ast } = parseMd(SAMPLE);
const cases = [
{ file: "X.md" },
{ file: "X.md", section: "tools" },
{ file: "X.md", section: "tools", item: "gh" },
{ file: "X.md", section: "tools", item: "gh", field: "gh" },
{ file: "X.md", section: "[frontmatter]", field: "name" },
{ file: "X.md", section: "boundaries" },
{ file: "X.md", section: "boundaries", item: "never-write-to-etc" },
{ file: "X.md", section: "boundaries", item: "always-confirm" },
{ file: "X.md", section: "[frontmatter]", field: "description" },
];
for (const path of cases) {
const m = resolveOcPath(ast, path);
expect(m, `failed for ${JSON.stringify(path)}`).not.toBeNull();
}
});
});

View File

@@ -0,0 +1,147 @@
/**
* Wave 22 — cross-kind property invariants.
*
* Per-kind verbs hold the same shape contracts regardless of kind:
*
* 1. parse → emit (round-trip) is byte-stable for ALL kinds
* 2. resolve is non-mutating for ALL kinds
* 3. set returns structured failure (never throws) for unresolvable
* paths across ALL kinds
* 4. inferKind aligns with the parsers consumers actually pick
* 5. parse → emit → parse is fixpoint
* 6. hostile inputs do not throw at parse time
*/
import { describe, expect, it } from "vitest";
import { inferKind } from "../../dispatch.js";
import { setMdOcPath } from "../../edit.js";
import { emitMd } from "../../emit.js";
import { setJsoncOcPath } from "../../jsonc/edit.js";
import { emitJsonc } from "../../jsonc/emit.js";
import { parseJsonc } from "../../jsonc/parse.js";
import { resolveJsoncOcPath } from "../../jsonc/resolve.js";
import { setJsonlOcPath } from "../../jsonl/edit.js";
import { emitJsonl } from "../../jsonl/emit.js";
import { parseJsonl } from "../../jsonl/parse.js";
import { resolveJsonlOcPath } from "../../jsonl/resolve.js";
import { parseOcPath } from "../../oc-path.js";
import { parseMd } from "../../parse.js";
import { resolveMdOcPath } from "../../resolve.js";
describe("wave-22 cross-kind property invariants", () => {
const mdRaw = "---\nname: x\n---\n\n## Boundaries\n\n- enabled: true\n";
const jsoncRaw = '// h\n{ "k": 1, "n": [1,2,3] }\n';
const jsonlRaw = '{"a":1}\n\nbroken\n{"b":2}\n';
it("P-01 round-trip parse → emit is byte-stable across all kinds", () => {
expect(emitMd(parseMd(mdRaw).ast)).toBe(mdRaw);
expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(jsoncRaw);
expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(jsonlRaw);
});
it("P-02 resolve is non-mutating across all kinds", () => {
const md = parseMd(mdRaw).ast;
let before = JSON.stringify(md);
resolveMdOcPath(md, parseOcPath("oc://X/[frontmatter]/name"));
resolveMdOcPath(md, parseOcPath("oc://X/boundaries"));
expect(JSON.stringify(md)).toBe(before);
const jsonc = parseJsonc(jsoncRaw).ast;
before = JSON.stringify(jsonc);
resolveJsoncOcPath(jsonc, parseOcPath("oc://X/k"));
resolveJsoncOcPath(jsonc, parseOcPath("oc://X/n.0"));
expect(JSON.stringify(jsonc)).toBe(before);
const jsonl = parseJsonl(jsonlRaw).ast;
before = JSON.stringify(jsonl);
resolveJsonlOcPath(jsonl, parseOcPath("oc://X/L1"));
resolveJsonlOcPath(jsonl, parseOcPath("oc://X/$last"));
expect(JSON.stringify(jsonl)).toBe(before);
});
it("P-03 unresolvable set never throws across all kinds", () => {
const ocPath = parseOcPath("oc://X/totally.missing.path");
expect(() => setMdOcPath(parseMd(mdRaw).ast, ocPath, "x")).not.toThrow();
expect(() =>
setJsoncOcPath(parseJsonc(jsoncRaw).ast, ocPath, {
kind: "string",
value: "x",
}),
).not.toThrow();
expect(() =>
setJsonlOcPath(parseJsonl(jsonlRaw).ast, ocPath, {
kind: "string",
value: "x",
}),
).not.toThrow();
});
it("P-04 inferKind aligns with the parser actually used", () => {
expect(inferKind("AGENTS.md")).toBe("md");
expect(inferKind("SOUL.md")).toBe("md");
expect(inferKind("config.jsonc")).toBe("jsonc");
expect(inferKind("plugins.json")).toBe("jsonc");
expect(inferKind("events.jsonl")).toBe("jsonl");
expect(inferKind("audit.ndjson")).toBe("jsonl");
});
it("P-05 parse → emit → parse is fixpoint across all kinds", () => {
const md1 = emitMd(parseMd(mdRaw).ast);
const md2 = emitMd(parseMd(md1).ast);
expect(md1).toBe(md2);
const jc1 = emitJsonc(parseJsonc(jsoncRaw).ast);
const jc2 = emitJsonc(parseJsonc(jc1).ast);
expect(jc1).toBe(jc2);
const jl1 = emitJsonl(parseJsonl(jsonlRaw).ast);
const jl2 = emitJsonl(parseJsonl(jl1).ast);
expect(jl1).toBe(jl2);
});
it("P-06 hostile inputs do not throw at parse time across all kinds", () => {
const hostile = [
"\x00\x01\x02 binary garbage",
'{ "unclosed":',
"## heading without anything",
"\n\n\n\n\n",
];
for (const raw of hostile) {
expect(() => parseMd(raw)).not.toThrow();
expect(() => parseJsonc(raw)).not.toThrow();
expect(() => parseJsonl(raw)).not.toThrow();
}
});
it("P-07 resolver returns null for paths past valid kinds (no throw)", () => {
const overlong = parseOcPath("oc://X/a/b/c.d.e.f.g.h");
expect(() => resolveMdOcPath(parseMd(mdRaw).ast, overlong)).not.toThrow();
expect(() => resolveJsoncOcPath(parseJsonc(jsoncRaw).ast, overlong)).not.toThrow();
expect(() => resolveJsonlOcPath(parseJsonl(jsonlRaw).ast, overlong)).not.toThrow();
});
it("P-08 set-then-resolve produces the value just written (jsonc)", () => {
const ast = parseJsonc('{ "k": 1 }').ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://X/k"), {
kind: "number",
value: 42,
});
if (r.ok) {
const m = resolveJsoncOcPath(r.ast, parseOcPath("oc://X/k"));
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "number", value: 42 });
}
}
});
it("P-09 verbs are deterministic — same input twice produces same output", () => {
expect(emitMd(parseMd(mdRaw).ast)).toBe(emitMd(parseMd(mdRaw).ast));
expect(emitJsonc(parseJsonc(jsoncRaw).ast)).toBe(emitJsonc(parseJsonc(jsoncRaw).ast));
expect(emitJsonl(parseJsonl(jsonlRaw).ast)).toBe(emitJsonl(parseJsonl(jsonlRaw).ast));
});
it("P-10 inferKind returns null for unknown extensions", () => {
expect(inferKind("binary.bin")).toBeNull();
expect(inferKind("no-ext")).toBeNull();
expect(inferKind("archive.tar.gz")).toBeNull();
});
});

View File

@@ -0,0 +1,168 @@
/**
* Wave 19 — edit → emit round-trip across all kinds.
*
* Substrate guarantee: parse → setXxxOcPath → emitXxx produces valid
* bytes that re-parse to an AST whose addressed value reflects the edit.
* Per-kind verbs throughout — caller picks based on AST type.
*/
import { describe, expect, it } from "vitest";
import { setMdOcPath } from "../../edit.js";
import { emitMd } from "../../emit.js";
import { setJsoncOcPath } from "../../jsonc/edit.js";
import { emitJsonc } from "../../jsonc/emit.js";
import { parseJsonc } from "../../jsonc/parse.js";
import { resolveJsoncOcPath } from "../../jsonc/resolve.js";
import { setJsonlOcPath } from "../../jsonl/edit.js";
import { emitJsonl } from "../../jsonl/emit.js";
import { parseJsonl } from "../../jsonl/parse.js";
import { parseOcPath } from "../../oc-path.js";
import { parseMd } from "../../parse.js";
describe("wave-19 edit-then-emit round-trip", () => {
it("EE-01 md frontmatter edit re-parses to the new value", () => {
const md = parseMd("---\nname: old\n---\n\n## Body\n").ast;
const r = setMdOcPath(md, parseOcPath("oc://AGENTS.md/[frontmatter]/name"), "new");
expect(r.ok).toBe(true);
if (r.ok) {
const reparsed = parseMd(r.ast.raw).ast;
expect(reparsed.frontmatter.find((e) => e.key === "name")?.value).toBe("new");
}
});
it("EE-02 md item kv edit re-parses to the new value", () => {
const md = parseMd("## Boundaries\n\n- timeout: 5\n").ast;
const r = setMdOcPath(md, parseOcPath("oc://AGENTS.md/boundaries/timeout/timeout"), "60");
expect(r.ok).toBe(true);
if (r.ok) {
const reparsed = parseMd(emitMd(r.ast)).ast;
const block = reparsed.blocks.find((b) => b.slug === "boundaries");
expect(block?.items[0]?.kv?.value).toBe("60");
}
});
it("EE-03 jsonc value edit re-parses to the new value", () => {
const ast = parseJsonc('{ "k": 1 }').ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/k"), {
kind: "number",
value: 42,
});
expect(r.ok).toBe(true);
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({ k: 42 });
}
});
it("EE-04 jsonc nested edit preserves untouched siblings", () => {
const ast = parseJsonc('{ "a": 1, "b": { "c": 2, "d": 3 }, "e": 4 }').ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/b.c"), {
kind: "number",
value: 99,
});
if (r.ok) {
expect(JSON.parse(emitJsonc(r.ast))).toEqual({
a: 1,
b: { c: 99, d: 3 },
e: 4,
});
}
});
it("EE-05 jsonl line edit re-parses to the new value at the same line", () => {
const ast = parseJsonl('{"a":1}\n{"a":2}\n{"a":3}\n').ast;
const r = setJsonlOcPath(ast, parseOcPath("oc://log/L2/a"), {
kind: "number",
value: 99,
});
if (r.ok) {
const reparsed = parseJsonl(emitJsonl(r.ast)).ast;
const line2 = reparsed.lines[1];
expect(line2?.kind).toBe("value");
if (line2?.kind === "value" && line2.value.kind === "object") {
const entry = line2.value.entries.find((e) => e.key === "a");
expect(entry?.value).toMatchObject({ kind: "number", value: 99 });
}
}
});
it("EE-06 jsonc edit composes: two sequential edits both land", () => {
let ast = parseJsonc('{ "a": 1, "b": 2 }').ast;
let r = setJsoncOcPath(ast, parseOcPath("oc://config/a"), {
kind: "number",
value: 10,
});
if (r.ok) {
ast = r.ast;
}
r = setJsoncOcPath(ast, parseOcPath("oc://config/b"), {
kind: "number",
value: 20,
});
if (r.ok) {
ast = r.ast;
}
expect(JSON.parse(emitJsonc(ast))).toEqual({ a: 10, b: 20 });
});
it("EE-07 missing path returns structured failure (not throw)", () => {
const ast = parseJsonc('{ "a": 1 }').ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/missing"), {
kind: "number",
value: 99,
});
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("unresolved");
}
});
it("EE-08 each per-kind verb takes its own AST type — no cross-kind leakage", () => {
// Type-level guarantee: each setter only accepts its kind's AST.
// Caller picks based on the AST they have. This is the design.
const md = parseMd("---\nx: 1\n---\n").ast;
const jsonc = parseJsonc('{"x":1}').ast;
const jsonl = parseJsonl('{"x":1}\n').ast;
const a = setMdOcPath(md, parseOcPath("oc://X/[frontmatter]/x"), "2");
const b = setJsoncOcPath(jsonc, parseOcPath("oc://X/x"), {
kind: "number",
value: 2,
});
const c = setJsonlOcPath(jsonl, parseOcPath("oc://X/L1/x"), {
kind: "number",
value: 2,
});
expect(a.ok).toBe(true);
expect(b.ok).toBe(true);
expect(c.ok).toBe(true);
});
it("EE-09 jsonc parser-backed edit preserves comments", () => {
const raw = '{\n "k": 1 // comment\n}\n';
const ast = parseJsonc(raw).ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/k"), {
kind: "number",
value: 2,
});
if (r.ok) {
expect(emitJsonc(r.ast)).toContain("// comment");
const reparsed = resolveJsoncOcPath(r.ast, parseOcPath("oc://config/k"));
expect(reparsed?.kind).toBe("object-entry");
if (reparsed?.kind === "object-entry") {
expect(reparsed.node.value).toMatchObject({ kind: "number", value: 2 });
}
}
});
it("EE-10 edit on empty AST surfaces no-root", () => {
const ast = parseJsonc("").ast;
const r = setJsoncOcPath(ast, parseOcPath("oc://config/x"), {
kind: "number",
value: 1,
});
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("no-root");
}
});
});

View File

@@ -0,0 +1,140 @@
/**
* Wave 2 — frontmatter edges.
*
* Substrate guarantee: frontmatter is parsed as `key: value` entries
* with quote-stripping; malformed frontmatter doesn't crash the parser
* (soft-error policy: emit diagnostic, recover).
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-02 frontmatter-edges", () => {
it("FM-01 simple kv pairs", () => {
const { ast } = parseMd("---\nname: x\ndescription: y\n---\n");
expect(ast.frontmatter.map((e) => [e.key, e.value])).toEqual([
["name", "x"],
["description", "y"],
]);
});
it("FM-02 unclosed frontmatter emits diagnostic, treats as preamble", () => {
const { ast, diagnostics } = parseMd("---\nname: x\nno close fence\nbody\n");
expect(diagnostics.some((d) => d.code === "OC_FRONTMATTER_UNCLOSED")).toBe(true);
expect(ast.frontmatter).toEqual([]);
});
it("FM-03 empty frontmatter (just open + close)", () => {
const { ast } = parseMd("---\n---\n");
expect(ast.frontmatter).toEqual([]);
});
it("FM-04 frontmatter only, file has no other content", () => {
const { ast } = parseMd("---\nk: v\n---\n");
expect(ast.frontmatter).toEqual([{ key: "k", value: "v", line: 2 }]);
expect(ast.preamble).toBe("");
expect(ast.blocks).toEqual([]);
});
it("FM-05 double-quoted value", () => {
const { ast } = parseMd('---\ntitle: "Hello, world"\n---\n');
expect(ast.frontmatter[0]?.value).toBe("Hello, world");
});
it("FM-06 single-quoted value", () => {
const { ast } = parseMd("---\ntitle: 'Hello, world'\n---\n");
expect(ast.frontmatter[0]?.value).toBe("Hello, world");
});
it("FM-07 unquoted value with internal colons preserved", () => {
const { ast } = parseMd("---\nurl: https://example.com:443/p\n---\n");
expect(ast.frontmatter[0]?.value).toBe("https://example.com:443/p");
});
it("FM-08 empty value", () => {
const { ast } = parseMd("---\nk:\n---\n");
expect(ast.frontmatter[0]).toEqual({ key: "k", value: "", line: 2 });
});
it("FM-09 value with leading/trailing whitespace trimmed", () => {
const { ast } = parseMd("---\nk: spaced \n---\n");
expect(ast.frontmatter[0]?.value).toBe("spaced");
});
it("FM-10 list-style continuations are silently dropped (substrate stays opinion-free)", () => {
const { ast } = parseMd("---\ntools:\n - gh\n - curl\n---\n");
// The `tools:` key has an empty inline value; the list continuation
// lines ` - gh` and ` - curl` don't match the kv regex and are
// skipped. Lint rules can do their own structural reading of
// frontmatter; the substrate does not.
expect(ast.frontmatter.map((e) => e.key)).toEqual(["tools"]);
expect(ast.frontmatter[0]?.value).toBe("");
});
it("FM-11 line numbers are 1-based and accurate", () => {
const { ast } = parseMd("---\nk1: v1\nk2: v2\nk3: v3\n---\n");
expect(ast.frontmatter.map((e) => [e.key, e.line])).toEqual([
["k1", 2],
["k2", 3],
["k3", 4],
]);
});
it("FM-12 dash-key allowed", () => {
const { ast } = parseMd("---\nuser-invocable: true\n---\n");
expect(ast.frontmatter[0]?.key).toBe("user-invocable");
});
it("FM-13 underscore-key allowed", () => {
const { ast } = parseMd("---\nparam_set: foo\n---\n");
expect(ast.frontmatter[0]?.key).toBe("param_set");
});
it("FM-14 number-only value preserved as string", () => {
const { ast } = parseMd("---\ntimeout: 15000\n---\n");
expect(ast.frontmatter[0]?.value).toBe("15000");
});
it("FM-15 boolean-like value preserved as string", () => {
const { ast } = parseMd("---\nenabled: true\n---\n");
expect(ast.frontmatter[0]?.value).toBe("true");
});
it("FM-16 blank lines inside frontmatter are skipped", () => {
const { ast } = parseMd("---\n\nk1: v1\n\nk2: v2\n\n---\n");
expect(ast.frontmatter.map((e) => e.key)).toEqual(["k1", "k2"]);
});
it("FM-17 frontmatter with same key twice — both retained (no dedup)", () => {
// Substrate doesn't dedup; lint rules can flag duplicates if needed.
const { ast } = parseMd("---\nk: v1\nk: v2\n---\n");
expect(ast.frontmatter).toEqual([
{ key: "k", value: "v1", line: 2 },
{ key: "k", value: "v2", line: 3 },
]);
});
it("FM-18 frontmatter must be at start — leading blank line breaks detection", () => {
const { ast } = parseMd("\n---\nk: v\n---\n");
expect(ast.frontmatter).toEqual([]);
});
it("FM-19 frontmatter must be at start — leading text breaks detection", () => {
const { ast } = parseMd("intro\n\n---\nk: v\n---\n");
expect(ast.frontmatter).toEqual([]);
});
it("FM-20 BOM before frontmatter open is tolerated", () => {
const { ast } = parseMd("---\nname: bom\n---\n");
expect(ast.frontmatter[0]?.value).toBe("bom");
});
it("FM-21 single-line file with `---` and `---` is empty frontmatter", () => {
const { ast } = parseMd("---\n---");
expect(ast.frontmatter).toEqual([]);
});
it("FM-22 hash-prefixed lines skipped (not yaml comments — just don't match kv regex)", () => {
const { ast } = parseMd("---\n# comment\nk: v\n---\n");
expect(ast.frontmatter.map((e) => e.key)).toEqual(["k"]);
});
});

View File

@@ -0,0 +1,149 @@
/**
* Wave 3 — H2 block split.
*
* Substrate guarantee: `## ` at column 0 outside fenced code blocks
* starts a new H2 block. H1 (`# `), H3 (`### `), and `## ` inside
* fenced code blocks do NOT split.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-03 h2-block-split", () => {
it("H2-01 no headings → no blocks, all preamble", () => {
const raw = "Just prose, no headings.\nMore prose.\n";
const { ast } = parseMd(raw);
expect(ast.blocks).toEqual([]);
// Preamble preserves the trailing newline from raw (split + rejoin
// is symmetric); callers that want trimmed prose call .trim().
expect(ast.preamble).toBe("Just prose, no headings.\nMore prose.\n");
});
it("H2-02 single heading splits preamble + one block", () => {
const { ast } = parseMd("preamble\n## Section\nbody\n");
expect(ast.preamble.trim()).toBe("preamble");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("Section");
expect(ast.blocks[0]?.bodyText.trim()).toBe("body");
});
it("H2-03 multiple headings produce blocks in order", () => {
const { ast } = parseMd("## A\nbody-a\n## B\nbody-b\n## C\nbody-c\n");
expect(ast.blocks.map((b) => b.heading)).toEqual(["A", "B", "C"]);
});
it("H2-04 H1 does NOT split", () => {
const { ast } = parseMd("# H1 heading\n## H2 heading\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("H2 heading");
expect(ast.preamble).toContain("# H1 heading");
});
it("H2-05 H3 does NOT split", () => {
const { ast } = parseMd("## H2\nbody\n### H3\nstill in H2 block\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.bodyText).toContain("### H3");
});
it("H2-06 `## ` inside fenced code block does NOT split", () => {
const raw = "## Real\n\n```md\n## Inside code\n```\n\n## Another real\n";
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["Real", "Another real"]);
});
it("H2-07 `##` without trailing space — does NOT match (regex requires \\s+)", () => {
const { ast } = parseMd("##NoSpace\n## With space\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("With space");
});
it("H2-08 leading whitespace before `##` — does NOT match (regex anchored at line start)", () => {
const { ast } = parseMd(" ## indented\n## not indented\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("not indented");
});
it("H2-09 trailing whitespace on heading — trimmed in heading text", () => {
const { ast } = parseMd("## Trailing \n");
expect(ast.blocks[0]?.heading).toBe("Trailing");
expect(ast.blocks[0]?.slug).toBe("trailing");
});
it("H2-10 inline code in heading preserved", () => {
const { ast } = parseMd("## Use `gh` for GitHub\n");
expect(ast.blocks[0]?.heading).toBe("Use `gh` for GitHub");
});
it("H2-11 markdown formatting in heading preserved", () => {
const { ast } = parseMd("## **Bold** *italic*\n");
expect(ast.blocks[0]?.heading).toBe("**Bold** *italic*");
});
it("H2-12 immediately after frontmatter", () => {
const { ast } = parseMd("---\nk: v\n---\n## Section\nbody\n");
expect(ast.blocks[0]?.heading).toBe("Section");
expect(ast.preamble).toBe("");
});
it("H2-13 H2 at end of file (no body)", () => {
const { ast } = parseMd("preamble\n## End\n");
expect(ast.blocks[0]?.heading).toBe("End");
expect(ast.blocks[0]?.bodyText).toBe("");
});
it("H2-14 two consecutive H2s — empty body block between", () => {
const { ast } = parseMd("## A\n## B\n");
expect(ast.blocks[0]?.bodyText).toBe("");
expect(ast.blocks[1]?.heading).toBe("B");
});
it("H2-15 line numbers are 1-based and track through frontmatter", () => {
const { ast } = parseMd("---\nk: v\n---\n## At line 4\n");
expect(ast.blocks[0]?.line).toBe(4);
});
it("H2-16 line numbers track through preamble", () => {
const { ast } = parseMd("line 1\nline 2\n## At line 3\n");
expect(ast.blocks[0]?.line).toBe(3);
});
it("H2-17 nested fenced code blocks (~~~ vs ```) — only ``` is detected", () => {
// Current parser only treats ``` as fence; ~~~ falls through. This
// is a documented limit. Inputs with ~~~ aren't broken — they're
// just not protected from H2-misparsing inside them.
const raw = "## H\n\n~~~md\n~~~\n\n## Next\n";
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["H", "Next"]);
});
it("H2-18 setext-style heading (`Heading\\n========\\n`) is NOT recognized", () => {
// Substrate is opinion-aware: setext headings are treated as
// preamble. Lint rules can flag if needed; recognized markdown
// dialect is `## ATX-style only` for OpenClaw workspace files.
const raw = "Heading\n=======\n## Real\n";
const { ast } = parseMd(raw);
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("Real");
});
it("H2-19 empty heading text (`## `)", () => {
const { ast } = parseMd("## \n");
// Empty heading is technically a valid match (`## ` + empty text)
// but the regex requires `(.+?)` so empty doesn't match. Validates
// it's NOT split.
expect(ast.blocks).toEqual([]);
});
it("H2-20 heading with only whitespace (`## `)", () => {
const { ast } = parseMd("## \n");
expect(ast.blocks).toEqual([]);
});
it("H2-21 heading-shaped text inside multi-line bullet body — does split", () => {
// The substrate treats line-start ## as a heading regardless of
// logical context (item continuation lines). Lint rules can flag
// the boundary; substrate prefers structural simplicity.
const raw = "## Section\n- item starts\n continues\n## Next\n";
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["Section", "Next"]);
});
});

View File

@@ -0,0 +1,146 @@
/**
* Wave 4 — items (bullets + kv).
*
* Substrate guarantee: bullet lines (`- text`, `* text`, `+ text`) inside
* H2 blocks are extracted as `AstItem`. Lines matching `- key: value`
* also populate `item.kv`. Items inside fenced code blocks are NOT
* extracted.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-04 items", () => {
it("I-01 plain dash bullets", () => {
const { ast } = parseMd("## H\n- a\n- b\n- c\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b", "c"]);
});
it("I-02 star bullets", () => {
const { ast } = parseMd("## H\n* a\n* b\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b"]);
});
it("I-03 plus bullets", () => {
const { ast } = parseMd("## H\n+ a\n+ b\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["a", "b"]);
});
it("I-04 mixed bullet markers in same section", () => {
const { ast } = parseMd("## H\n- dash\n* star\n+ plus\n");
expect(ast.blocks[0]?.items.length).toBe(3);
});
it("I-05 kv-shape items populate kv", () => {
const { ast } = parseMd("## H\n- gh: GitHub CLI\n");
expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "gh", value: "GitHub CLI" });
});
it("I-06 plain item has no kv", () => {
const { ast } = parseMd("## H\n- plain text\n");
expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined();
});
it("I-07 multiple colons — first colon is the kv split", () => {
const { ast } = parseMd("## H\n- url: http://x.com:80/p\n");
expect(ast.blocks[0]?.items[0]?.kv).toEqual({
key: "url",
value: "http://x.com:80/p",
});
});
it("I-08 colon with no space after is still kv", () => {
const { ast } = parseMd("## H\n- key:value\n");
expect(ast.blocks[0]?.items[0]?.kv).toEqual({ key: "key", value: "value" });
});
it("I-09 quoted value preserved verbatim (no unquote at item layer)", () => {
const { ast } = parseMd('## H\n- title: "quoted: value"\n');
expect(ast.blocks[0]?.items[0]?.kv?.value).toBe('"quoted: value"');
});
it("I-10 slug from kv key when kv present", () => {
const { ast } = parseMd("## H\n- The Tool: description\n");
expect(ast.blocks[0]?.items[0]?.slug).toBe("the-tool");
});
it("I-11 slug from item text when no kv", () => {
const { ast } = parseMd("## H\n- The Plain Item\n");
expect(ast.blocks[0]?.items[0]?.slug).toBe("the-plain-item");
});
it("I-12 items inside fenced code block are NOT extracted", () => {
const raw = "## H\n```\n- not a bullet\n- still not\n```\n- real bullet\n";
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.items.length).toBe(1);
expect(ast.blocks[0]?.items[0]?.text).toBe("real bullet");
});
it("I-13 line numbers track through block body", () => {
const { ast } = parseMd("## H\n- first\n- second\n- third\n");
expect(ast.blocks[0]?.items.map((i) => i.line)).toEqual([2, 3, 4]);
});
it("I-14 trailing whitespace on bullet trimmed in text", () => {
const { ast } = parseMd("## H\n- spaced \n");
expect(ast.blocks[0]?.items[0]?.text).toBe("spaced");
});
it("I-15 empty bullet text is dropped", () => {
const { ast } = parseMd("## H\n- \n- real\n");
// The regex requires (.+?) non-empty, so `- ` alone doesn't match.
expect(ast.blocks[0]?.items.length).toBe(1);
});
it("I-16 indented bullet (sub-bullet) — current parser still picks up", () => {
// The current regex `^(?:[-*+])\\s+(.+?)\\s*$` requires column-0
// bullet markers; indented bullets do NOT match. Documented as a
// limit — sub-bullets surface in body text but not in items.
const { ast } = parseMd("## H\n- top\n - sub\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["top"]);
});
it("I-17 numbered list (1. item) is NOT extracted as item", () => {
const { ast } = parseMd("## H\n1. first\n2. second\n");
expect(ast.blocks[0]?.items).toEqual([]);
});
it("I-18 items in a section with no body before — first item line is heading+1", () => {
const { ast } = parseMd("## H\n- a\n");
expect(ast.blocks[0]?.items[0]?.line).toBe(2);
});
it("I-19 items spread across blocks are scoped to their block", () => {
const { ast } = parseMd("## A\n- a1\n## B\n- b1\n- b2\n");
expect(ast.blocks[0]?.items.length).toBe(1);
expect(ast.blocks[1]?.items.length).toBe(2);
expect(ast.blocks[1]?.items.map((i) => i.text)).toEqual(["b1", "b2"]);
});
it("I-20 item with only-symbol kv key still parses", () => {
const { ast } = parseMd("## H\n- API_KEY: secret-value\n");
expect(ast.blocks[0]?.items[0]?.kv).toEqual({
key: "API_KEY",
value: "secret-value",
});
expect(ast.blocks[0]?.items[0]?.slug).toBe("api-key");
});
it("I-21 item with kv where value is empty", () => {
const { ast } = parseMd("## H\n- key:\n");
// `- key:` has empty value after the colon; the kv regex requires
// (.+) for value, so this falls through to plain item.
expect(ast.blocks[0]?.items[0]?.kv).toBeUndefined();
expect(ast.blocks[0]?.items[0]?.text).toBe("key:");
});
it("I-22 bullet in preamble (before first H2) is NOT in any block", () => {
const { ast } = parseMd("- preamble bullet\n## H\n- block bullet\n");
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["block bullet"]);
expect(ast.preamble).toContain("- preamble bullet");
});
it("I-23 bullet with internal markdown (italics, code) preserved in text", () => {
const { ast } = parseMd("## H\n- use *gh* and `curl`\n");
expect(ast.blocks[0]?.items[0]?.text).toBe("use *gh* and `curl`");
});
});

View File

@@ -0,0 +1,198 @@
/**
* Wave 15 — JSONC byte-fidelity round-trip.
*
* Substrate guarantee: `emitJsonc(parseJsonc(raw)) === raw` for every
* input the parser accepts. Mirrors wave-01 but for the JSONC kind.
* Comments, trailing commas, BOMs, mixed line endings — all byte-stable
* via the round-trip path.
*
* **What this file proves**: byte-identical round-trip via the
* default-mode emit (which echoes `ast.raw`). This is necessary but
* not sufficient — without the structural assertions below, a parser
* that emitted `ast.root: null` for every input would still pass the
* byte test (since `raw` is preserved on the AST regardless).
*
* Each assertParseable() call proves the parser actually ran and
* produced a structural tree, not just stored `raw` verbatim and
* called it a day. JC-17 deliberately uses `assertNotParseable` —
* malformed input must echo `raw` AND emit a diagnostic.
*/
import { describe, expect, it } from "vitest";
import type { JsoncValue } from "../../jsonc/ast.js";
import { emitJsonc } from "../../jsonc/emit.js";
import { parseJsonc } from "../../jsonc/parse.js";
function rt(raw: string): string {
return emitJsonc(parseJsonc(raw).ast);
}
/**
* Verify the parser actually produced a structural tree (not just a
* `null` root with echoed `raw`). Without this, a parser that
* delegated everything to `raw` would pass the byte-fidelity test
* trivially. Returns the parsed root for follow-up structural asserts.
*/
function assertParseable(raw: string): JsoncValue {
const result = parseJsonc(raw);
expect(result.ast.root).not.toBeNull();
return result.ast.root as JsoncValue;
}
/**
* The complement: malformed input round-trips bytes verbatim AND
* emits an error diagnostic. JC-17 needs this — without the
* diagnostic check, the test would pass even if the parser silently
* dropped malformed content.
*/
function assertNotParseable(raw: string): void {
const result = parseJsonc(raw);
expect(result.ast.root).toBeNull();
expect(result.diagnostics.some((d) => d.severity === "error")).toBe(true);
}
describe("wave-15 jsonc byte-fidelity", () => {
it("JC-01 empty file", () => {
expect(rt("")).toBe("");
});
it("JC-02 whitespace-only", () => {
expect(rt(" \n\n \n")).toBe(" \n\n \n");
});
it("JC-03 empty object", () => {
expect(rt("{}")).toBe("{}");
const root = assertParseable("{}");
expect(root.kind).toBe("object");
if (root.kind === "object") {
expect(root.entries).toHaveLength(0);
}
});
it("JC-04 empty array", () => {
expect(rt("[]")).toBe("[]");
const root = assertParseable("[]");
expect(root.kind).toBe("array");
if (root.kind === "array") {
expect(root.items).toHaveLength(0);
}
});
it("JC-05 trivial scalar root", () => {
expect(rt("42")).toBe("42");
expect(rt('"x"')).toBe('"x"');
expect(rt("true")).toBe("true");
expect(rt("null")).toBe("null");
expect(assertParseable("42").kind).toBe("number");
expect(assertParseable('"x"').kind).toBe("string");
expect(assertParseable("true").kind).toBe("boolean");
expect(assertParseable("null").kind).toBe("null");
});
it("JC-06 line comments preserved", () => {
const raw = '// a leading comment\n{ "x": 1 } // trailing\n';
expect(rt(raw)).toBe(raw);
// Pin parse: the structural value `x: 1` is reachable.
const root = assertParseable(raw);
expect(root.kind).toBe("object");
});
it("JC-07 block comments preserved", () => {
const raw = '/* header */\n{\n /* inline */\n "x": 1\n}\n';
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
expect(root.kind).toBe("object");
});
it("JC-08 trailing commas preserved", () => {
const raw = '{\n "x": 1,\n "y": 2,\n}';
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
if (root.kind === "object") {
expect(root.entries).toHaveLength(2);
}
});
it("JC-09 mixed CRLF + LF preserved", () => {
const raw = '{\r\n "x": 1,\n "y": 2\r\n}';
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
if (root.kind === "object") {
expect(root.entries.map((e) => e.key)).toEqual(["x", "y"]);
}
});
it("JC-10 BOM preserved on raw", () => {
const raw = '{ "x": 1 }';
expect(rt(raw)).toBe(raw);
// BOM stripped before parsing — parser still sees `{` as first char.
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-11 deeply nested structures preserved", () => {
const raw = '{ "a": { "b": { "c": { "d": [1, [2, [3, [4]]]] } } } }';
expect(rt(raw)).toBe(raw);
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-12 string with escape sequences preserved", () => {
const raw = '{ "s": "a\\nb\\tc\\u0041\\\\d\\"e" }';
expect(rt(raw)).toBe(raw);
// Pin escape resolution — parsed value carries actual control chars.
const root = assertParseable(raw);
if (root.kind === "object") {
const s = root.entries[0]?.value;
if (s?.kind === "string") {
expect(s.value).toBe('a\nb\tcA\\d"e');
}
}
});
it("JC-13 numbers in scientific / negative / decimal forms preserved", () => {
const raw = "[ 0, -0, 1.5, -3.14, 1e3, -2.5e-10, 1E+5 ]";
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
if (root.kind === "array") {
expect(root.items).toHaveLength(7);
expect(root.items.every((v) => v.kind === "number")).toBe(true);
}
});
it("JC-14 unicode characters preserved verbatim", () => {
const raw = '{ "name": "héllo 世界 🎉" }';
expect(rt(raw)).toBe(raw);
const root = assertParseable(raw);
if (root.kind === "object") {
const v = root.entries[0]?.value;
if (v?.kind === "string") {
expect(v.value).toBe("héllo 世界 🎉");
}
}
});
it("JC-15 idiosyncratic whitespace preserved", () => {
const raw = '{ "x" : 1 ,\n "y": 2}';
expect(rt(raw)).toBe(raw);
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-16 file-level trailing whitespace preserved", () => {
const raw = '{ "x": 1 }\n\n\n';
expect(rt(raw)).toBe(raw);
expect(assertParseable(raw).kind).toBe("object");
});
it("JC-17 malformed input still emits raw verbatim AND emits a diagnostic", () => {
const raw = '{ broken json with "key": value }';
expect(rt(raw)).toBe(raw);
// Without this assertion the test passes for any input regardless
// of parser behavior — pin both halves of the contract.
assertNotParseable(raw);
});
it("JC-18 comments-only file preserved", () => {
const raw = "// just a comment\n/* and a block */\n";
expect(rt(raw)).toBe(raw);
// Comments-only files have no structural root — that's expected.
expect(parseJsonc(raw).ast.root).toBeNull();
});
});

View File

@@ -0,0 +1,140 @@
/**
* Wave 17 — JSONC resolver adversarial edges.
*
* Substrate guarantee: the resolver walks the value tree deterministically
* with mixed dotted / segment paths, returns null on any unresolvable
* walk, and never throws on hostile inputs.
*/
import { describe, expect, it } from "vitest";
import { parseJsonc } from "../../jsonc/parse.js";
import { resolveJsoncOcPath } from "../../jsonc/resolve.js";
import { parseOcPath } from "../../oc-path.js";
function rs(raw: string, ocPath: string) {
return resolveJsoncOcPath(parseJsonc(raw).ast, parseOcPath(ocPath));
}
describe("wave-17 jsonc resolver edges", () => {
it("JR-01 root resolves on empty object", () => {
expect(rs("{}", "oc://config")?.kind).toBe("root");
});
it("JR-02 root resolves on scalar root", () => {
expect(rs("42", "oc://config")?.kind).toBe("root");
});
it("JR-03 root resolves on array root", () => {
expect(rs("[1,2,3]", "oc://config")?.kind).toBe("root");
});
it("JR-04 deep dotted descent within section", () => {
const m = rs('{"a":{"b":{"c":1}}}', "oc://config/a.b.c");
expect(m?.kind).toBe("object-entry");
});
it("JR-05 missing intermediate key returns null", () => {
expect(rs('{"a":{"b":1}}', "oc://config/a.x.b")).toBeNull();
});
it("JR-06 numeric segment indexes into array", () => {
const m = rs('{"items":["a","b","c"]}', "oc://config/items.1");
expect(m?.kind).toBe("value");
if (m?.kind === "value") {
expect(m.node).toMatchObject({ kind: "string", value: "b" });
}
});
it("JR-07 negative array index resolves to Nth-from-last", () => {
expect(rs('{"x":[1,2]}', "oc://config/x.-1")).toMatchObject({
kind: "value",
node: { kind: "number", value: 2 },
});
expect(rs('{"x":[1,2]}', "oc://config/x.-2")).toMatchObject({
kind: "value",
node: { kind: "number", value: 1 },
});
expect(rs('{"x":[1,2]}', "oc://config/x.-5")).toBeNull();
});
it("JR-08 out-of-bounds array index returns null", () => {
expect(rs('{"x":[1,2]}', "oc://config/x.99")).toBeNull();
});
it("JR-09 non-integer index returns null (no NaN coercion)", () => {
expect(rs('{"x":[1,2]}', "oc://config/x.foo")).toBeNull();
});
it("JR-10 null AST root returns null on any path", () => {
expect(rs("", "oc://config/x")).toBeNull();
});
it("JR-11 descending past a primitive returns null", () => {
expect(rs('{"x":42}', "oc://config/x.y")).toBeNull();
});
it("JR-12 empty segment in dotted path throws OcPathError", () => {
// v1 invariant: malformed paths fail loud at parse time, not silently null.
expect(() => rs('{"x":1}', "oc://config/x..y")).toThrow(/Empty dotted sub-segment/);
});
it("JR-13 string value at leaf surfaces via object-entry shape", () => {
const m = rs('{"k":"v"}', "oc://config/k");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.key).toBe("k");
}
});
it("JR-14 boolean and null values resolve", () => {
const m1 = rs('{"k":true}', "oc://config/k");
expect(m1?.kind).toBe("object-entry");
const m2 = rs('{"k":null}', "oc://config/k");
expect(m2?.kind).toBe("object-entry");
});
it("JR-15 mixed slash + dot segments resolve identically", () => {
const a = rs('{"a":{"b":{"c":1}}}', "oc://config/a.b.c");
const b = rs('{"a":{"b":{"c":1}}}', "oc://config/a/b.c");
const c = rs('{"a":{"b":{"c":1}}}', "oc://config/a/b/c");
expect(a?.kind).toBe(b?.kind);
expect(b?.kind).toBe(c?.kind);
});
it("JR-16 keys with special characters resolve", () => {
const m = rs('{"a-b_c":{"x":1}}', "oc://config/a-b_c.x");
expect(m?.kind).toBe("object-entry");
});
it("JR-17 unicode keys resolve", () => {
const m = rs('{"héllo":1}', "oc://config/héllo");
expect(m?.kind).toBe("object-entry");
});
it("JR-18 large nested structure (depth 20) resolves to leaf", () => {
let json = '"leaf"';
const segs: string[] = [];
for (let i = 19; i >= 0; i--) {
json = `{"k${i}":${json}}`;
segs.unshift(`k${i}`);
}
const m = rs(json, `oc://config/${segs.join(".")}`);
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "string", value: "leaf" });
}
});
it("JR-19 resolver is non-mutating across calls", () => {
const { ast } = parseJsonc('{"x":{"y":1}}');
const before = JSON.stringify(ast);
rs('{"x":{"y":1}}', "oc://config/x.y");
rs('{"x":{"y":1}}', "oc://config/x");
rs('{"x":{"y":1}}', "oc://config/missing");
expect(JSON.stringify(ast)).toBe(before);
});
it("JR-20 hostile input shapes do not throw", () => {
expect(() => rs("{garbage}", "oc://config/x")).not.toThrow();
expect(() => rs('{"a":', "oc://config/a")).not.toThrow();
});
});

View File

@@ -0,0 +1,124 @@
/**
* Wave 16 — JSONL byte-fidelity round-trip.
*
* Substrate guarantee: `emitJsonl(parseJsonl(raw)) === raw` for every
* input the parser accepts. JSONL is line-oriented; blanks, malformed
* lines, mixed line endings, trailing-newline shape — all byte-stable.
*/
import { describe, expect, it } from "vitest";
import { emitJsonl } from "../../jsonl/emit.js";
import { parseJsonl } from "../../jsonl/parse.js";
function rt(raw: string): string {
return emitJsonl(parseJsonl(raw).ast);
}
describe("wave-16 jsonl byte-fidelity", () => {
it("JL-01 empty file", () => {
expect(rt("")).toBe("");
});
it("JL-02 single line no trailing newline", () => {
expect(rt('{"a":1}')).toBe('{"a":1}');
});
it("JL-03 single line with trailing newline", () => {
expect(rt('{"a":1}\n')).toBe('{"a":1}\n');
});
it("JL-04 multiple lines preserved", () => {
const raw = '{"a":1}\n{"b":2}\n{"c":3}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-05 blank line in the middle preserved", () => {
const raw = '{"a":1}\n\n{"b":2}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-06 multiple blank lines preserved", () => {
const raw = '{"a":1}\n\n\n{"b":2}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-07 malformed line round-trips verbatim", () => {
const raw = '{"a":1}\nthis is not json\n{"b":2}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-08 entirely malformed file round-trips", () => {
const raw = "header\nbody\nfooter\n";
expect(rt(raw)).toBe(raw);
});
it("JL-09 leading + trailing blanks preserved", () => {
const raw = '\n\n{"a":1}\n\n';
expect(rt(raw)).toBe(raw);
});
it("JL-10 file ending without final newline preserved", () => {
const raw = '{"a":1}\n{"b":2}';
expect(rt(raw)).toBe(raw);
});
it("JL-11 nested object lines preserved", () => {
const raw = '{"a":{"b":{"c":1}}}\n{"x":[1,[2,[3]]]}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-12 unicode in a value line preserved", () => {
const raw = '{"name":"héllo 世界 🎉"}\n';
expect(rt(raw)).toBe(raw);
});
it("JL-13 idiosyncratic whitespace inside a line preserved", () => {
const raw = '{ "a" : 1 }\n';
expect(rt(raw)).toBe(raw);
});
it("JL-14 single blank line file preserved", () => {
const raw = "\n";
expect(rt(raw)).toBe(raw);
});
it("JL-15 large log (1000 lines) preserved", () => {
const lines = Array.from({ length: 1000 }, (_, i) => `{"i":${i}}`);
const raw = lines.join("\n") + "\n";
expect(rt(raw)).toBe(raw);
});
it("JL-16 mixed value + malformed + blank preserved", () => {
const raw = '{"a":1}\n{not json}\n\n{"b":2}\nstill not json\n{"c":3}\n';
expect(rt(raw)).toBe(raw);
});
// F10 — CRLF preservation. Without lineEnding tracking on the AST,
// a CRLF input edited via setJsonlOcPath rebuilds raw via render
// which joins with `\n`, mixing endings on Windows-authored datasets.
it("JL-17 CRLF input round-trips byte-identical via the default emit", () => {
const raw = '{"a":1}\r\n{"b":2}\r\n{"c":3}\r\n';
expect(rt(raw)).toBe(raw);
});
it("JL-18 CRLF input preserves CRLF after a structural edit (render mode)", () => {
// Pin the render path: setJsonlOcPath rebuilds raw via render mode,
// which now consults ast.lineEnding to reconstruct the original
// convention. Without the fix, render-mode output uses `\n` and
// produces mixed line endings on Windows datasets.
const raw = '{"a":1}\r\n{"b":2}\r\n';
const { ast } = parseJsonl(raw);
const rendered = emitJsonl(ast, { mode: "render" });
expect(rendered).toBe('{"a":1}\r\n{"b":2}');
// Pin no-LF-only joins by counting CRLFs vs bare LFs.
expect((rendered.match(/\r\n/g) ?? []).length).toBe(1);
expect((rendered.match(/(?<!\r)\n/g) ?? []).length).toBe(0);
});
it("JL-19 LF input preserves LF after a structural edit (render mode)", () => {
// Symmetric: a Unix-authored log doesn't mysteriously gain CRLF.
const raw = '{"a":1}\n{"b":2}\n';
const { ast } = parseJsonl(raw);
const rendered = emitJsonl(ast, { mode: "render" });
expect(rendered).toBe('{"a":1}\n{"b":2}');
});
});

View File

@@ -0,0 +1,125 @@
/**
* Wave 18 — JSONL resolver adversarial edges.
*
* Substrate guarantee: line addresses (`Lnnn`, `$last`) walk
* deterministically; missing addresses, blank-line targets, and
* malformed-line targets all surface as null without throwing.
*/
import { describe, expect, it } from "vitest";
import { parseJsonl } from "../../jsonl/parse.js";
import { resolveJsonlOcPath } from "../../jsonl/resolve.js";
import { parseOcPath } from "../../oc-path.js";
function rs(raw: string, ocPath: string) {
return resolveJsonlOcPath(parseJsonl(raw).ast, parseOcPath(ocPath));
}
describe("wave-18 jsonl resolver edges", () => {
it("JLR-01 root resolves with no segments", () => {
expect(rs('{"a":1}\n', "oc://log")?.kind).toBe("root");
});
it("JLR-02 L1 resolves to a value line", () => {
const m = rs('{"a":1}\n', "oc://log/L1");
expect(m?.kind).toBe("line");
});
it("JLR-03 L99 unknown line returns null", () => {
expect(rs('{"a":1}\n', "oc://log/L99")).toBeNull();
});
it("JLR-04 $last picks the most recent value line", () => {
const m = rs('{"a":1}\n{"a":2}\n{"a":3}\n', "oc://log/$last/a");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "number", value: 3 });
}
});
it("JLR-05 $last skips trailing blank lines", () => {
const m = rs('{"a":1}\n\n\n', "oc://log/$last/a");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "number", value: 1 });
}
});
it("JLR-06 $last skips trailing malformed lines", () => {
const m = rs('{"a":1}\nbroken\n', "oc://log/$last/a");
expect(m?.kind).toBe("object-entry");
});
it("JLR-07 $last on empty file returns null", () => {
expect(rs("", "oc://log/$last/x")).toBeNull();
});
it("JLR-08 $last on all-blank file returns null", () => {
expect(rs("\n\n\n", "oc://log/$last/x")).toBeNull();
});
it("JLR-09 $last on all-malformed file returns null", () => {
expect(rs("a\nb\nc\n", "oc://log/$last/x")).toBeNull();
});
it("JLR-10 garbage line address returns null", () => {
expect(rs('{"a":1}\n', "oc://log/garbage")).toBeNull();
expect(rs('{"a":1}\n', "oc://log/L")).toBeNull();
expect(rs('{"a":1}\n', "oc://log/Labc")).toBeNull();
});
it("JLR-11 descent into a blank line returns null", () => {
expect(rs('{"a":1}\n\n{"b":2}\n', "oc://log/L2/anything")).toBeNull();
});
it("JLR-12 descent into a malformed line returns null", () => {
expect(rs('{"a":1}\nbroken\n{"b":2}\n', "oc://log/L2/anything")).toBeNull();
});
it("JLR-13 missing field on a value line returns null", () => {
expect(rs('{"a":1}\n', "oc://log/L1/missing")).toBeNull();
});
it("JLR-14 dotted descent through line value resolves", () => {
const m = rs('{"r":{"ok":true,"d":"x"}}\n', "oc://log/L1/r.d");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "string", value: "x" });
}
});
it("JLR-15 array index inside a line resolves", () => {
const m = rs('{"items":["a","b","c"]}\n', "oc://log/L1/items.2");
expect(m?.kind).toBe("value");
if (m?.kind === "value") {
expect(m.node).toMatchObject({ kind: "string", value: "c" });
}
});
it("JLR-16 line numbers are 1-indexed", () => {
const m = rs('{"a":1}\n{"a":2}\n', "oc://log/L1/a");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "number", value: 1 });
}
});
it("JLR-17 line numbers preserved across blank/malformed entries", () => {
const m = rs('{"a":1}\n\nbroken\n{"a":4}\n', "oc://log/L4/a");
expect(m?.kind).toBe("object-entry");
if (m?.kind === "object-entry") {
expect(m.node.value).toMatchObject({ kind: "number", value: 4 });
}
});
it("JLR-18 resolver is non-mutating", () => {
const { ast } = parseJsonl('{"a":1}\n{"b":2}\n');
const before = JSON.stringify(ast);
rs('{"a":1}\n{"b":2}\n', "oc://log/L1");
rs('{"a":1}\n{"b":2}\n', "oc://log/$last");
expect(JSON.stringify(ast)).toBe(before);
});
it("JLR-19 hostile inputs do not throw", () => {
expect(() => rs("not json\n", "oc://log/L1")).not.toThrow();
expect(() => rs("", "oc://log/$last")).not.toThrow();
});
});

View File

@@ -0,0 +1,155 @@
/**
* Wave 11 — malformed input recovery.
*
* Substrate guarantee: parser is **soft-error**: it never throws on
* malformed input. Suspicious-but-recoverable inputs produce
* diagnostics; unparseable structural pieces are dropped silently.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-11 malformed-input", () => {
it("M-01 truncated mid-frontmatter (no close fence)", () => {
const raw = "---\nname: github\n";
const { ast, diagnostics } = parseMd(raw);
expect(diagnostics.some((d) => d.code === "OC_FRONTMATTER_UNCLOSED")).toBe(true);
expect(ast.frontmatter).toEqual([]);
});
it("M-02 truncated mid-section", () => {
const raw = "## H\n- item\nmid-line";
const { ast } = parseMd(raw);
expect(ast.blocks.length).toBe(1);
});
it("M-03 only `---` (single fence, no content)", () => {
expect(() => parseMd("---\n")).not.toThrow();
});
it("M-04 only `---\\n---`", () => {
const { ast } = parseMd("---\n---");
expect(ast.frontmatter).toEqual([]);
});
it("M-05 binary-ish bytes (non-ASCII control chars)", () => {
const raw = "## H\n\x00\x01\x02\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-06 very long single line (10k chars)", () => {
const raw = `## H\n${"x".repeat(10_000)}\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.heading).toBe("H");
});
it("M-07 deeply repeated headings (1000 H2 blocks)", () => {
const lines: string[] = [];
for (let i = 0; i < 1000; i++) {
lines.push(`## H${i}`);
lines.push(`- item ${i}`);
}
const raw = lines.join("\n") + "\n";
const { ast } = parseMd(raw);
expect(ast.blocks.length).toBe(1000);
});
it("M-08 bullet shape that isn't actually a bullet (`-not-a-bullet`)", () => {
const { ast } = parseMd("## H\n-not-a-bullet\n- real\n");
expect(ast.blocks[0]?.items.length).toBe(1);
});
it("M-09 unclosed code fence", () => {
const raw = "## H\n```\nbody\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-10 mismatched fence (open with ``` close with ~~~)", () => {
const raw = "## H\n```\nbody\n~~~\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-11 nested fences (treated linearly, not nested)", () => {
const raw = "## H\n```\n```\nstill-in-second\n```\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-12 empty file", () => {
const { ast, diagnostics } = parseMd("");
expect(ast.raw).toBe("");
expect(ast.frontmatter).toEqual([]);
expect(ast.blocks).toEqual([]);
expect(diagnostics).toEqual([]);
});
it("M-13 single character file", () => {
const { ast } = parseMd("x");
expect(ast.preamble).toBe("x");
expect(ast.blocks).toEqual([]);
});
it("M-14 single newline file", () => {
const { ast } = parseMd("\n");
expect(ast.blocks).toEqual([]);
});
it("M-15 file with mixed indentation extremes (tabs, spaces, mixed)", () => {
const raw = "## H\n\t- tabbed\n - spaced\n\t - mixed\n";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-16 frontmatter with frontmatter-shaped content inside (---)", () => {
const raw = "---\nk: v\n---\n\n---\nshould not parse as second frontmatter\n---\n";
const { ast } = parseMd(raw);
expect(ast.frontmatter.map((e) => e.key)).toEqual(["k"]);
// Second `---` block becomes part of preamble/body (it's not at file start).
expect(ast.preamble).toContain("---");
});
it("M-17 lines starting with `#` but not heading (raw `#` chars in body)", () => {
const raw = "## H\n\n# This is text starting with #\n#### h4 not parsed as block\n";
const { ast } = parseMd(raw);
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.bodyText).toContain("# This is text");
});
it("M-18 lines starting with multiple ## but malformed (####, ######)", () => {
const { ast } = parseMd("## Real\n#### Not block\n###### Not block\n");
expect(ast.blocks.length).toBe(1);
expect(ast.blocks[0]?.heading).toBe("Real");
});
it("M-19 file with just whitespace", () => {
expect(() => parseMd(" \n\t\n \n")).not.toThrow();
});
it("M-20 file with only BOM", () => {
const { ast } = parseMd("");
expect(ast.raw).toBe("");
});
it("M-21 file mixing BOM + frontmatter + body + sections", () => {
const raw = "---\nk: v\n---\n\nbody\n## Section\n- item\n";
expect(() => parseMd(raw)).not.toThrow();
const { ast } = parseMd(raw);
expect(ast.frontmatter[0]?.value).toBe("v");
expect(ast.blocks[0]?.heading).toBe("Section");
});
it("M-22 line endings: legacy CR-only (Mac classic)", () => {
// Our regex /\r?\n/ doesn't split on CR-only. Treats whole as one line.
const raw = "line1\rline2\r## Heading\r";
expect(() => parseMd(raw)).not.toThrow();
});
it("M-23 100 KB file", () => {
const lines: string[] = [];
for (let i = 0; i < 1000; i++) {
lines.push("## H" + i);
for (let j = 0; j < 5; j++) {
lines.push(`- item-${i}-${j}: value with some text content here`);
}
}
const raw = lines.join("\n");
expect(() => parseMd(raw)).not.toThrow();
});
});

View File

@@ -0,0 +1,252 @@
/**
* Wave 7 — OcPath parsing edges.
*
* Substrate guarantee: `parseOcPath(s)` is a pure function. Valid input
* round-trips via `formatOcPath`; invalid input throws `OcPathError`
* with a stable `code`.
*/
import { describe, expect, it } from "vitest";
import {
OcPathError,
formatOcPath,
getPathLayout,
isPattern,
isValidOcPath,
parseOcPath,
} from "../../oc-path.js";
function expectErr(fn: () => unknown, code: string): void {
try {
fn();
expect.fail(`expected OcPathError code ${code}`);
} catch (err) {
expect(err).toBeInstanceOf(OcPathError);
expect((err as OcPathError).code).toBe(code);
}
}
describe("wave-07 oc-path-parse-edges", () => {
it("OP-01 file-only", () => {
expect(parseOcPath("oc://SOUL.md")).toEqual({ file: "SOUL.md" });
});
it("OP-02 file + section", () => {
expect(parseOcPath("oc://SOUL.md/Boundaries").section).toBe("Boundaries");
});
it("OP-03 file + section + item", () => {
expect(parseOcPath("oc://SOUL.md/Boundaries/deny-rule-1").item).toBe("deny-rule-1");
});
it("OP-04 file + section + item + field", () => {
expect(parseOcPath("oc://SOUL.md/B/deny-1/risk").field).toBe("risk");
});
it("OP-05 session query parameter", () => {
expect(parseOcPath("oc://X.md?session=daily").session).toBe("daily");
});
it("OP-06 session with full path", () => {
const p = parseOcPath("oc://X.md/sec/item/field?session=cron");
expect(p).toEqual({
file: "X.md",
section: "sec",
item: "item",
field: "field",
session: "cron",
});
});
it("OP-07 unknown query parameters silently ignored", () => {
const p = parseOcPath("oc://X.md?foo=bar&session=s&baz=qux");
expect(p.session).toBe("s");
});
it("OP-08 session= with empty value drops session", () => {
const p = parseOcPath("oc://X.md?session=");
expect(p.session).toBeUndefined();
});
it("OP-09 query without `=` ignored", () => {
const p = parseOcPath("oc://X.md?nokeyhere");
expect(p.session).toBeUndefined();
});
it("OP-10 missing scheme throws", () => {
expectErr(() => parseOcPath("SOUL.md"), "OC_PATH_MISSING_SCHEME");
});
it("OP-11 wrong scheme throws", () => {
expectErr(() => parseOcPath("https://x.com"), "OC_PATH_MISSING_SCHEME");
});
it("OP-12 empty after scheme throws", () => {
expectErr(() => parseOcPath("oc://"), "OC_PATH_EMPTY");
});
it("OP-13 empty segment throws", () => {
expectErr(() => parseOcPath("oc://X.md//item"), "OC_PATH_EMPTY_SEGMENT");
});
it("OP-14 too-deep nesting throws", () => {
expectErr(() => parseOcPath("oc://X.md/a/b/c/d/e"), "OC_PATH_TOO_DEEP");
});
it("OP-15 non-string throws", () => {
expectErr(() => parseOcPath(42 as unknown as string), "OC_PATH_NOT_STRING");
});
it("OP-16 round-trip canonical forms", () => {
const cases = [
"oc://SOUL.md",
"oc://SOUL.md/Boundaries",
"oc://SOUL.md/Boundaries/deny-rule-1",
"oc://SOUL.md/Boundaries/deny-rule-1/risk",
"oc://SOUL.md?session=daily",
"oc://X.md/a/b/c?session=s",
"oc://skills/email-drafter/[frontmatter]/name",
"oc://config/plugins.entries.foo.token",
];
for (const c of cases) {
expect(formatOcPath(parseOcPath(c)), `round-trip failed for ${c}`).toBe(c);
}
});
it("OP-17 isValidOcPath true positives", () => {
expect(isValidOcPath("oc://X.md")).toBe(true);
expect(isValidOcPath("oc://X.md/sec/item/field")).toBe(true);
});
it("OP-18 isValidOcPath true negatives", () => {
expect(isValidOcPath("")).toBe(false);
expect(isValidOcPath("X.md")).toBe(false);
expect(isValidOcPath("oc://")).toBe(false);
expect(isValidOcPath("oc://x//y")).toBe(false);
expect(isValidOcPath(null)).toBe(false);
expect(isValidOcPath({})).toBe(false);
});
it("OP-19 file segment with special chars (file with dots/slashes)", () => {
const p = parseOcPath("oc://config/plugins.entries.foo.token");
expect(p.file).toBe("config");
expect(p.section).toBe("plugins.entries.foo.token");
});
it("OP-20 section segment with hyphens / underscores / numbers", () => {
const p = parseOcPath("oc://X.md/Multi-Tenant_Section_2");
expect(p.section).toBe("Multi-Tenant_Section_2");
});
it("OP-21 [frontmatter] sentinel is just a section name", () => {
const p = parseOcPath("oc://X.md/[frontmatter]/name");
expect(p.section).toBe("[frontmatter]");
expect(p.item).toBe("name");
});
it("OP-22 formatOcPath rejects empty file", () => {
expectErr(() => formatOcPath({ file: "" }), "OC_PATH_FILE_REQUIRED");
});
it("OP-23 formatOcPath rejects item without section", () => {
expectErr(() => formatOcPath({ file: "X.md", item: "i" }), "OC_PATH_NESTING");
});
it("OP-24 formatOcPath quotes raw slot values containing special chars", () => {
// Closes ClawSweeper P2 on PR #78678: `formatOcPath` previously
// concatenated raw slot values, so a programmatically-constructed
// path with a `/` in the section/item slot would emit extra
// segments and fail to parse back to the same address.
// Use a slot value with `/` (and no internal `.`) — `.` inside
// a slot is the dotted sub-segment delimiter; callers wanting a
// literal `.` in a key should pre-quote that single sub-segment.
const constructed = formatOcPath({
file: "config.jsonc",
section: "agents.defaults.models",
item: "github-copilot/claude-opus-4-7",
field: "alias",
});
expect(constructed).toBe(
'oc://config.jsonc/agents.defaults.models/"github-copilot/claude-opus-4-7"/alias',
);
const parsed = parseOcPath(constructed);
expect(parsed.item).toBe('"github-copilot/claude-opus-4-7"');
});
it("OP-25 parseOcPath finds query separator outside quoted keys", () => {
// Closes ClawSweeper P2 on PR #78678: `parseOcPath` previously
// used `indexOf('?')` which split a key like `"foo?bar"` at the
// embedded `?`, breaking advertised quoted-segment support.
const parsed = parseOcPath('oc://config.jsonc/"foo?bar"?session=daily');
expect(parsed.section).toBe('"foo?bar"');
expect(parsed.session).toBe("daily");
});
it("OP-26 file slot with `/` round-trips via quoting", () => {
// Closes ClawSweeper P2 on PR #78678 (round 4): `parseOcPath` stored
// `path.file` verbatim while `formatOcPath` prefixed it without
// quote-wrapping, so a file like `skills/email-drafter` couldn't
// round-trip — formatter output got re-parsed as file plus section,
// and quoted input leaked the surrounding quotes into filesystem
// resolution.
const constructed = formatOcPath({
file: "skills/email-drafter",
section: "Tools",
item: "-1",
});
expect(constructed).toBe('oc://"skills/email-drafter"/Tools/-1');
const parsed = parseOcPath(constructed);
expect(parsed.file).toBe("skills/email-drafter");
expect(parsed.section).toBe("Tools");
expect(parsed.item).toBe("-1");
});
it("OP-27 file slot with dot extension does NOT get quoted", () => {
// The file slot's quoting trigger excludes `.` because filename
// extensions (`AGENTS.md`, `gateway.jsonc`) are normal — quoting
// them would make canonical form ugly without need.
expect(formatOcPath({ file: "AGENTS.md" })).toBe("oc://AGENTS.md");
expect(formatOcPath({ file: "gateway.jsonc", section: "version" })).toBe(
"oc://gateway.jsonc/version",
);
});
it("OP-28 formatOcPath rejects field without item or section", () => {
// Closes Galin P2 (round 8): the nesting guard caught
// `field + section + no item` but missed `field + no section + no item`.
// Such a struct emits `oc://FILE/FIELD` which silently re-parses as
// `{ file, section: FIELD }` — different shape, breaking round-trip.
expect(() => formatOcPath({ file: "X", field: "name" })).toThrow(OcPathError);
try {
formatOcPath({ file: "X", field: "name" });
} catch (err) {
expect(err).toBeInstanceOf(OcPathError);
expect((err as OcPathError).code).toBe("OC_PATH_NESTING");
}
});
it("OP-29 isPattern is quote-aware (literal `*` inside quoted segment)", () => {
// Closes Galin P2 (round 8): `isPattern` previously used
// `slot.split('.')` which shredded a quoted key like `"items.*.glob"`
// and falsely detected the literal `*` as a wildcard, causing
// single-match verbs to reject a concrete path.
const concrete = parseOcPath('oc://config.jsonc/"items.*.glob"');
expect(isPattern(concrete)).toBe(false);
// Sanity: an unquoted `*` IS still a wildcard.
const wildcard = parseOcPath("oc://config.jsonc/items/*");
expect(isPattern(wildcard)).toBe(true);
});
it("OP-30 getPathLayout is quote-aware", () => {
// Closes Galin P2 (round 8): `getPathLayout` used `slot.split('.')`
// for all three slots, breaking the find-walker / repackPath layout
// contract for quoted segments containing `.`.
const path = parseOcPath('oc://config.jsonc/"github.com"/repos');
const layout = getPathLayout(path);
// Quoted segment is one sub-segment, not two.
expect(layout.sectionLen).toBe(1);
expect(layout.subs[0]).toBe('"github.com"');
expect(layout.itemLen).toBe(1);
expect(layout.subs[1]).toBe("repos");
});
});

View File

@@ -0,0 +1,245 @@
/**
* Wave 8 — OcPath resolver edges.
*
* Substrate guarantee: `resolveOcPath(ast, ocPath)` returns the matched
* node or `null`. Slug matching is case-insensitive. Field on non-kv
* item returns `null` (not a guess). Frontmatter via the `[frontmatter]`
* sentinel section.
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
import { resolveMdOcPath as resolveOcPath } from "../../resolve.js";
const SAMPLE = `---
name: github
description: gh CLI
url: https://example.com
---
Preamble prose.
## Boundaries
- never write to /etc
- always confirm before deleting
## Tools
- gh: GitHub CLI
- curl: HTTP client
- The Tool: with caps and spaces
## Multi-Word Section
- item one
`;
describe("wave-08 oc-path-resolver-edges", () => {
const { ast } = parseMd(SAMPLE);
it("R-01 root resolves to AST", () => {
const m = resolveOcPath(ast, { file: "X.md" });
expect(m?.kind).toBe("root");
});
it("R-02 block by exact slug", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "boundaries" });
expect(m?.kind).toBe("block");
});
it("R-03 block by case-mismatched slug (Boundaries → boundaries)", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "Boundaries" });
expect(m?.kind).toBe("block");
});
it("R-04 block by uppercased slug", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "BOUNDARIES" });
expect(m?.kind).toBe("block");
});
it("R-05 multi-word section by slug", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "multi-word-section" });
expect(m?.kind).toBe("block");
if (m?.kind === "block") {
expect(m.node.heading).toBe("Multi-Word Section");
}
});
it("R-06 multi-word section by exact heading text (case-folded)", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "Multi-Word Section" });
// The OcPath section is matched case-insensitively against block.slug.
// Block.slug for "Multi-Word Section" is "multi-word-section", and
// path.section.toLowerCase() = "multi-word section" which does NOT
// match "multi-word-section". Documented limit — callers must
// pass slug form, not heading text. This is intentional.
expect(m).toBeNull();
});
it("R-07 unknown section returns null", () => {
const m = resolveOcPath(ast, { file: "X.md", section: "unknown" });
expect(m).toBeNull();
});
it("R-08 item by slug under known section", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
item: "gh",
});
expect(m?.kind).toBe("item");
});
it('R-09 item slug for KV uses kv.key (gh, not "gh-github-cli")', () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
item: "gh",
});
expect(m).not.toBeNull();
if (m?.kind === "item") {
expect(m.node.kv?.value).toBe("GitHub CLI");
}
});
it("R-10 item slug for plain bullet uses text", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "boundaries",
item: "never-write-to-etc",
});
expect(m?.kind).toBe("item");
});
it("R-11 item slug case-insensitive", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
item: "GH",
});
expect(m?.kind).toBe("item");
});
it("R-12 item with spaces in key (slugified)", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
item: "the-tool",
});
expect(m?.kind).toBe("item");
if (m?.kind === "item") {
expect(m.node.kv?.value).toBe("with caps and spaces");
}
});
it("R-13 unknown item returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
item: "nonexistent",
});
expect(m).toBeNull();
});
it("R-14 item-field matches kv.key (case-insensitive)", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
item: "gh",
field: "gh",
});
expect(m?.kind).toBe("item-field");
});
it("R-15 field on plain (non-kv) item returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "boundaries",
item: "never-write-to-etc",
field: "risk",
});
expect(m).toBeNull();
});
it("R-16 field that does not match kv.key returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "tools",
item: "gh",
field: "nonexistent",
});
expect(m).toBeNull();
});
it("R-17 frontmatter via [frontmatter] sentinel section", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "[frontmatter]",
field: "name",
});
expect(m?.kind).toBe("frontmatter");
if (m?.kind === "frontmatter") {
expect(m.node.value).toBe("github");
}
});
it("R-18 frontmatter unknown key returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "[frontmatter]",
field: "nonexistent",
});
expect(m).toBeNull();
});
it("R-19 frontmatter without field returns null", () => {
const m = resolveOcPath(ast, {
file: "X.md",
section: "[frontmatter]",
});
expect(m).toBeNull();
});
it("R-20 multiple frontmatter keys with same name — first match wins", () => {
// Build an AST manually to test
const dupeAst = {
kind: "md" as const,
raw: "",
frontmatter: [
{ key: "k", value: "first", line: 2 },
{ key: "k", value: "second", line: 3 },
],
preamble: "",
blocks: [],
};
const m = resolveOcPath(dupeAst, {
file: "X.md",
section: "[frontmatter]",
field: "k",
});
expect(m?.kind).toBe("frontmatter");
if (m?.kind === "frontmatter") {
expect(m.node.value).toBe("first");
}
});
it("R-21 empty AST resolves root only", () => {
const empty = { kind: "md" as const, raw: "", frontmatter: [], preamble: "", blocks: [] };
expect(resolveOcPath(empty, { file: "X.md" })?.kind).toBe("root");
expect(resolveOcPath(empty, { file: "X.md", section: "any" })).toBeNull();
});
it("R-22 resolver does not mutate the AST", () => {
const before = JSON.stringify(ast);
resolveOcPath(ast, { file: "X.md", section: "tools", item: "gh", field: "gh" });
const after = JSON.stringify(ast);
expect(after).toBe(before);
});
it("R-23 file segment is informational — resolver doesn't check it", () => {
// The file name in OcPath is metadata; resolver assumes the AST
// matches. Callers verify file mapping before passing the AST.
const m1 = resolveOcPath(ast, { file: "SOUL.md", section: "tools" });
const m2 = resolveOcPath(ast, { file: "AGENTS.md", section: "tools" });
expect(m1?.kind).toBe(m2?.kind);
});
});

View File

@@ -0,0 +1,127 @@
/**
* Wave 14 — performance + determinism + immutability.
*
* Substrate guarantees:
* - Parsing scales sub-linearly with file size (no quadratic blowup)
* - Same input produces same AST (no Object.keys / Set order surprises)
* - Resolver does not mutate the AST
* - AST is structurally cloneable (no functions, no cycles)
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
import { resolveMdOcPath as resolveOcPath } from "../../resolve.js";
describe("wave-14 perf + determinism", () => {
it("PD-01 parses 100 KB file in under 200 ms", () => {
const lines: string[] = [];
for (let i = 0; i < 1000; i++) {
lines.push("## H" + i);
for (let j = 0; j < 5; j++) {
lines.push(`- key${i}-${j}: value with content`);
}
}
const raw = lines.join("\n");
const start = performance.now();
parseMd(raw);
const elapsed = performance.now() - start;
expect(elapsed).toBeLessThan(200);
});
it("PD-02 parses 1000 small files in under 500 ms", () => {
const raw = `## H\n- a\n- b: c\n## I\n- d\n`;
const start = performance.now();
for (let i = 0; i < 1000; i++) {
parseMd(raw);
}
const elapsed = performance.now() - start;
expect(elapsed).toBeLessThan(500);
});
it("PD-03 100k OcPath resolutions on parsed AST in under 500 ms", () => {
const raw = `## A\n- a1\n- a2\n## B\n- b1\n- b2\n## C\n- c1: cv\n`;
const { ast } = parseMd(raw);
const path = { file: "X.md", section: "b", item: "b1" };
const start = performance.now();
for (let i = 0; i < 100_000; i++) {
resolveOcPath(ast, path);
}
const elapsed = performance.now() - start;
expect(elapsed).toBeLessThan(500);
});
it("PD-04 same input → byte-identical AST.raw across runs", () => {
const raw = `---\nb: 2\na: 1\n---\n## Z\n- z\n## A\n- a\n`;
const a1 = parseMd(raw).ast;
const a2 = parseMd(raw).ast;
expect(a1.raw).toBe(a2.raw);
expect(a1.frontmatter).toEqual(a2.frontmatter);
expect(a1.blocks).toEqual(a2.blocks);
});
it("PD-05 resolveOcPath is non-mutating", () => {
const raw = `## A\n- a: x\n## B\n- b\n`;
const { ast } = parseMd(raw);
const before = JSON.stringify(ast);
resolveOcPath(ast, { file: "X.md", section: "a", item: "a", field: "a" });
resolveOcPath(ast, { file: "X.md", section: "b" });
resolveOcPath(ast, { file: "X.md", section: "unknown" });
expect(JSON.stringify(ast)).toBe(before);
});
it("PD-06 AST is JSON-serializable (no functions, no cycles)", () => {
const raw = `---\nk: v\n---\n## A\n- a\n\`\`\`ts\nx\n\`\`\`\n| h |\n| - |\n| 1 |\n`;
const { ast } = parseMd(raw);
const serialized = JSON.stringify(ast);
const parsed = JSON.parse(serialized);
expect(parsed.raw).toBe(ast.raw);
expect(parsed.blocks.length).toBe(ast.blocks.length);
});
it("PD-07 emit is non-mutating", () => {
const raw = `## A\n- a\n`;
const { ast } = parseMd(raw);
const before = JSON.stringify(ast);
emitMd(ast);
emitMd(ast);
emitMd(ast);
expect(JSON.stringify(ast)).toBe(before);
});
it("PD-08 frontmatter ordering is preserved (insertion order, not alphabetical)", () => {
const raw = `---\nz: 1\nm: 2\na: 3\n---\n`;
const { ast } = parseMd(raw);
expect(ast.frontmatter.map((e) => e.key)).toEqual(["z", "m", "a"]);
});
it("PD-09 block ordering is document order, not alphabetical", () => {
const raw = `## Z\n## A\n## M\n`;
const { ast } = parseMd(raw);
expect(ast.blocks.map((b) => b.heading)).toEqual(["Z", "A", "M"]);
});
it("PD-10 item ordering within block is document order", () => {
const raw = `## H\n- z\n- a\n- m\n`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.items.map((i) => i.text)).toEqual(["z", "a", "m"]);
});
it("PD-11 large fixture round-trip stays under 100 ms", () => {
const lines: string[] = [];
for (let i = 0; i < 500; i++) {
lines.push(`## Section ${i}`);
lines.push("");
for (let j = 0; j < 10; j++) {
lines.push(`- item-${i}-${j}: with some prose value content here`);
}
lines.push("");
}
const raw = lines.join("\n");
const start = performance.now();
const { ast } = parseMd(raw);
const out = emitMd(ast);
const elapsed = performance.now() - start;
expect(out).toBe(raw);
expect(elapsed).toBeLessThan(100);
});
});

View File

@@ -0,0 +1,630 @@
/**
* Wave-23 — Pitfall scenarios.
*
* One test per pitfall ID enumerated in
* `packages/oc-paths-substrate/PITFALLS.md` (the substrate-local
* pitfall taxonomy). Tests are grouped by category so a regression in
* any one defense is visible at a glance. Every MITIGATED / REJECTED
* pitfall has a positive validation here; DEFERRED ones are covered
* as documented limits with a `.skip` note.
*
* **Namespace note**: substrate pitfall IDs (P-001 … P-040) are a
* separate namespace from the claws-side `docs/PITFALLS.md`
* governance taxonomy (which uses P-NNN for completely different
* pitfalls — e.g., P-033 there is "Memory poisoning"). The package
* boundary disambiguates.
*/
import { describe, expect, it } from "vitest";
import {
MAX_PATH_LENGTH,
MAX_TRAVERSAL_DEPTH,
OcPathError,
findOcPaths,
formatOcPath,
parseOcPath,
resolveOcPath,
setOcPath,
} from "../../index.js";
import { parseJsonc } from "../../jsonc/parse.js";
import { parseJsonl } from "../../jsonl/parse.js";
import { parseYaml } from "../../yaml/parse.js";
// ---------- Encoding pitfalls --------------------------------------------
describe("wave-23 pitfalls — encoding", () => {
it("P-001 strips leading UTF-8 BOM from path string", () => {
const bom = "";
expect(parseOcPath(`${bom}oc://X/Y`).file).toBe("X");
});
it("P-002 normalizes path to NFC", () => {
const nfc = "café"; // composed
const nfd = "café"; // decomposed
expect(parseOcPath(`oc://X/${nfd}`).section).toBe(nfc);
expect(parseOcPath(`oc://X/${nfc}`).section).toBe(nfc);
// Same struct out for both inputs.
expect(parseOcPath(`oc://X/${nfd}`)).toEqual(parseOcPath(`oc://X/${nfc}`));
});
it("P-003 rejects whitespace in identifier-shaped segments", () => {
expect(() => parseOcPath("oc://X/foo /bar")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/ foo")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/foo\tbar")).toThrow(OcPathError);
});
it("P-003 allows whitespace inside predicate values (content)", () => {
// Spaces inside a predicate value are legitimate — they're filtering
// against actual content.
expect(() => parseOcPath("oc://X/[name=hello world]")).not.toThrow();
});
it("P-004 / P-011 rejects control characters and null bytes", () => {
expect(() => parseOcPath("oc://X/\x00")).toThrow(/Control character/);
expect(() => parseOcPath("oc://X/foo\x01bar")).toThrow(/Control character/);
expect(() => parseOcPath("oc://X/foo\x7Fbar")).toThrow(/Control character/);
});
});
// ---------- Empty / structural pitfalls ----------------------------------
describe("wave-23 pitfalls — empty & structural", () => {
it("P-008 rejects empty segments", () => {
expect(() => parseOcPath("oc://X//Y")).toThrow(/Empty segment/);
});
it("P-009 rejects empty dotted sub-segments", () => {
expect(() => parseOcPath("oc://X/a..b")).toThrow(/Empty dotted sub-segment/);
});
it("P-010 rejects scheme-only path", () => {
expect(() => parseOcPath("oc://")).toThrow(/Empty oc:\/\/ path/);
});
it("P-014 rejects empty predicate key", () => {
expect(() => parseOcPath("oc://X/[=foo]")).toThrow(/Malformed predicate/);
});
it("P-014 rejects empty predicate value", () => {
expect(() => parseOcPath("oc://X/[id=]")).toThrow(/Malformed predicate/);
});
it("P-015 accepts bracket segment with no operator as literal sentinel", () => {
// `[frontmatter]` predates the predicate grammar — kept as literal.
expect(parseOcPath("oc://AGENTS.md/[frontmatter]/key").section).toBe("[frontmatter]");
});
it("P-016 rejects mismatched brackets", () => {
expect(() => parseOcPath("oc://X/[unclosed")).toThrow(OcPathError);
expect(() => parseOcPath("oc://X/closed]")).toThrow(OcPathError);
});
it("P-016 rejects mismatched braces", () => {
expect(() => parseOcPath("oc://X/{a,b")).toThrow(OcPathError);
});
it("P-018 rejects empty union", () => {
expect(() => parseOcPath("oc://X/{}")).toThrow(/Empty union/);
});
it("P-018 rejects union with empty alternative", () => {
expect(() => parseOcPath("oc://X/{a,,b}")).toThrow(/Empty alternative/);
});
});
// ---------- Predicate-content pitfalls -----------------------------------
describe("wave-23 pitfalls — predicate content", () => {
it("P-012 predicate value containing `/` round-trips", () => {
// The path-level `/` split must respect bracket boundaries.
const p = parseOcPath("oc://X/[id=foo/bar]/cmd");
expect(p.section).toBe("[id=foo/bar]");
expect(p.item).toBe("cmd");
});
it("P-012 findOcPaths matches a leaf whose id contains a slash", () => {
const ast = parseYaml("steps:\n - id: foo/bar\n cmd: x\n - id: baz\n cmd: y\n").ast;
const out = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=foo/bar]/cmd"));
expect(out).toHaveLength(1);
if (out[0].match.kind === "leaf") {
expect(out[0].match.valueText).toBe("x");
}
});
it("P-013 predicate value containing `.` round-trips", () => {
const p = parseOcPath("oc://X/steps.[id=1.0].cmd");
expect(p.section).toBe("steps.[id=1.0].cmd");
});
it("P-013 findOcPaths matches a leaf whose id is `1.0`", () => {
const ast = parseYaml('steps:\n - id: "1.0"\n cmd: x\n - id: "2.0"\n cmd: y\n').ast;
const out = findOcPaths(ast, parseOcPath("oc://wf/steps/[id=1.0]/cmd"));
expect(out).toHaveLength(1);
if (out[0].match.kind === "leaf") {
expect(out[0].match.valueText).toBe("x");
}
});
});
// ---------- Sentinel & collision pitfalls --------------------------------
describe("wave-23 pitfalls — sentinels & collisions", () => {
it("P-020/openclaw#59934 negative numeric key on object resolves as literal key", () => {
// Telegram supergroup IDs are negative numbers used as map keys.
// Our positional `-N` token would otherwise hijack them. Resolver
// falls through to literal-key lookup on non-indexable containers.
const ast = parseJsonc(
'{"channels":{"telegram":{"groups":{"-5028303500":{"requireMention":false}}}}}',
).ast;
const m = resolveOcPath(
ast,
parseOcPath("oc://config/channels.telegram.groups.-5028303500.requireMention"),
);
expect(m).not.toBeNull();
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("false");
expect(m.leafType).toBe("boolean");
}
});
it("P-020 negative `-N` still works as positional on arrays", () => {
// Same syntax, indexable container — positional resolution wins.
const ast = parseJsonc('{"items":[10,20,30]}').ast;
const m = resolveOcPath(ast, parseOcPath("oc://X/items/-1"));
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("30");
}
});
it("P-020 numeric segment dispatches by node kind (array index vs map key)", () => {
// Same path string against two different ASTs — kind disambiguates.
const arr = parseJsonc('{"x":["a","b"]}').ast;
const map = parseJsonc('{"x":{"0":"a","1":"b"}}').ast;
const arrM = resolveOcPath(arr, parseOcPath("oc://config/x/0"));
const mapM = resolveOcPath(map, parseOcPath("oc://config/x/0"));
expect(arrM?.kind).toBe("leaf");
expect(mapM?.kind).toBe("leaf");
if (arrM?.kind === "leaf") {
expect(arrM.valueText).toBe("a");
}
if (mapM?.kind === "leaf") {
expect(mapM.valueText).toBe("a");
}
});
it("P-021 `$last` literal in a yaml key is shadowed by positional sentinel", () => {
// Document v0 limitation: `$last` always means "last", never a literal key.
// Authors with `$last` literal keys must use kind-narrow access.
const ast = parseYaml("$last: literal-value\nfoo: bar\n").ast;
const m = resolveOcPath(ast, parseOcPath("oc://X/$last"));
// `$last` resolves to the LAST key (`foo` → `bar`), not the literal `$last` key.
expect(m?.kind).toBe("leaf");
if (m?.kind === "leaf") {
expect(m.valueText).toBe("bar");
}
});
});
// ---------- Round-trip pitfalls ------------------------------------------
describe("wave-23 pitfalls — round-trip", () => {
it("P-023 parseOcPath ∘ formatOcPath is idempotent across path shapes", () => {
const inputs = [
"oc://X",
"oc://X/a",
"oc://X/a/b",
"oc://X/a/b/c",
"oc://X/a.b.c",
"oc://X/a?session=s1",
"oc://X/[frontmatter]/key",
"oc://X/steps/*/command",
"oc://X/steps/$last/id",
"oc://X/steps/-2/id",
"oc://X/steps/{command,run}",
"oc://X/steps/[id=foo]/cmd",
"oc://X/steps/#0/foo",
];
for (const s of inputs) {
const parsed = parseOcPath(s);
const reparsed = parseOcPath(s);
expect(parsed).toEqual(reparsed);
}
});
});
// ---------- Sentinel-guard pitfalls --------------------------------------
describe("wave-23 pitfalls — sentinel at format boundary (F9)", () => {
it("formatOcPath rejects an OcPath struct carrying the redaction sentinel", () => {
// Path strings flow into telemetry, audit events, error messages,
// find-result `path` fields. Without the format-time guard, a
// struct with `section: REDACTED_SENTINEL` would slip past every
// consumer except the CLI's scrubSentinel layer. The substrate's
// contract is "emit boundaries refuse the sentinel" — formatOcPath
// IS such a boundary for path strings.
expect(() => formatOcPath({ file: "AGENTS.md", section: "__OPENCLAW_REDACTED__" })).toThrow(
/sentinel literal/,
);
});
});
// ---------- Containment pitfalls -----------------------------------------
describe("wave-23 pitfalls — file-slot containment", () => {
// oc:// paths are workspace-relative. Absolute paths and `..` segments
// would let a hostile workflow / skill manifest persuade
// `openclaw path resolve|set|emit` into reading or writing arbitrary
// filesystem locations (Node `path.resolve(cwd, absolute)` returns
// `absolute`, bypassing the workspace root). Reject at parseOcPath
// and formatOcPath for symmetric defense.
it("rejects an absolute POSIX file slot", () => {
expect(() => parseOcPath("oc:///etc/passwd")).toThrow(/Empty segment/);
// Quoted form — same containment violation, different parse path.
expect(() => parseOcPath('oc://"/etc/passwd"/section')).toThrow(/Absolute file slot/);
});
it("rejects a Windows drive-letter file slot", () => {
expect(() => parseOcPath('oc://"C:/Windows/System32/foo"/section')).toThrow(
/Absolute file slot/,
);
expect(() => parseOcPath('oc://"C:\\\\Windows\\\\System32"/section')).toThrow(
/Absolute file slot/,
);
});
it("rejects a leading-backslash file slot", () => {
expect(() => parseOcPath('oc://"\\\\srv\\\\share\\\\foo"/section')).toThrow(
/Absolute file slot/,
);
});
it("rejects a parent-directory escape via plain `..`", () => {
expect(() => parseOcPath('oc://"../foo"/section')).toThrow(/Parent-directory/);
expect(() => parseOcPath('oc://".."/section')).toThrow(/Parent-directory/);
});
it("rejects a parent-directory escape mid-path", () => {
expect(() => parseOcPath('oc://"foo/../bar"/section')).toThrow(/Parent-directory/);
});
it("does not decode URL-encoded `..` — literal `%2E%2E` is treated as a filename", () => {
// The substrate does NOT do URL decoding — `%2E%2E` is the literal
// five-character filename, not a parent-directory escape. Documented
// limitation: consumers that pre-decode (HTTP layers, browser UI)
// are responsible for normalizing before invoking parseOcPath.
// Pin the current behavior so a future "let's decode for them" PR
// sees the explicit choice.
const p = parseOcPath('oc://"%2E%2E/foo"/section');
expect(p.file).toBe("%2E%2E/foo");
});
it("formatOcPath rejects an OcPath struct with absolute file", () => {
expect(() => formatOcPath({ file: "/etc/passwd" })).toThrow(/Absolute file slot/);
expect(() => formatOcPath({ file: "C:/Windows" })).toThrow(/Absolute file slot/);
});
it("formatOcPath rejects an OcPath struct with parent-directory file", () => {
expect(() => formatOcPath({ file: ".." })).toThrow(/Parent-directory/);
expect(() => formatOcPath({ file: "../etc/passwd" })).toThrow(/Parent-directory/);
expect(() => formatOcPath({ file: "foo/../bar" })).toThrow(/Parent-directory/);
});
});
// ---------- formatOcPath ↔ parseOcPath round-trip ------------------------
describe("wave-23 pitfalls — format/parse round-trip", () => {
// The contract on oc-path.ts:13 — `formatOcPath(parseOcPath(s)) === s`
// for any string the formatter accepts. Round-trip breaks were
// observable on (a) struct fields with empty dotted sub-segments
// (`section: 'foo.'` → `oc://X/foo.""` → re-parses with `section:
// 'foo.""'`) and (b) struct fields with control chars (formatter
// emitted unquoted, parser refused). Pin both directions.
it("formatOcPath rejects empty dotted sub-segment in a slot", () => {
expect(() => formatOcPath({ file: "a.md", section: "foo." })).toThrow(
/Empty dotted sub-segment/,
);
expect(() => formatOcPath({ file: "a.md", section: ".foo" })).toThrow(
/Empty dotted sub-segment/,
);
expect(() => formatOcPath({ file: "a.md", section: "foo..bar" })).toThrow(
/Empty dotted sub-segment/,
);
});
it("formatOcPath rejects control characters in any slot", () => {
expect(() => formatOcPath({ file: "a.md", section: "sec\x00tion" })).toThrow(
/Control character/,
);
expect(() => formatOcPath({ file: "a.md", section: "sec\x01tion" })).toThrow(
/Control character/,
);
expect(() => formatOcPath({ file: "a.md", section: "tab\ttion" })).toThrow(/Control character/);
expect(() => formatOcPath({ file: "a\x00b.md" })).toThrow(/Control character/);
});
it("round-trips every shape parseOcPath accepts", () => {
// For every valid input, formatOcPath(parseOcPath(s)) MUST be
// re-parseable to the same struct. Don't string-compare (the
// formatter normalizes quoting); parse the round-tripped output
// and compare structs.
const inputs = [
"oc://X",
"oc://X/a",
"oc://X/a/b",
"oc://X/a/b/c",
"oc://X/a.b.c",
"oc://X/a?session=s1",
"oc://X/[frontmatter]/key",
"oc://X/steps/$last/id",
"oc://X/steps/-2/id",
"oc://X/steps/[id=foo]/cmd",
"oc://X/steps/{a,b}/cmd",
'oc://X/"foo/bar"/baz',
'oc://X/agents/"anthropic/claude-opus-4-7"/alias',
];
for (const s of inputs) {
const parsed = parseOcPath(s);
const formatted = formatOcPath(parsed);
const reparsed = parseOcPath(formatted);
expect(reparsed).toEqual(parsed);
}
});
});
// ---------- Performance pitfalls -----------------------------------------
describe("wave-23 pitfalls — performance & limits", () => {
it("P-031 / P-033 walker depth cap throws on pathological recursion", () => {
// Construct a yaml that nests deeper than MAX_TRAVERSAL_DEPTH.
// We're using `**` against a synthetic deeply-nested structure.
let yaml = "root:\n";
let indent = " ";
for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) {
yaml += `${indent}a:\n`;
indent += " ";
}
yaml += `${indent}leaf: x\n`;
const ast = parseYaml(yaml).ast;
expect(() => findOcPaths(ast, parseOcPath("oc://X/**"))).toThrow(/MAX_TRAVERSAL_DEPTH/);
});
it("P-032 rejects path strings longer than MAX_PATH_LENGTH", () => {
const big = "oc://X/" + "a".repeat(MAX_PATH_LENGTH);
expect(() => parseOcPath(big)).toThrow(/exceeds .* bytes/);
});
it("P-032 path at the cap parses cleanly", () => {
const justUnder = "oc://X/" + "a".repeat(MAX_PATH_LENGTH - "oc://X/".length);
expect(() => parseOcPath(justUnder)).not.toThrow();
});
it("P-032 formatOcPath enforces the same cap on output", () => {
// Symmetric upper bound — without this guard, a struct whose
// formatted form crosses the cap would emit a string parseOcPath
// would immediately reject (round-trip break).
expect(() => formatOcPath({ file: "X", section: "a".repeat(MAX_PATH_LENGTH) })).toThrow(
/Formatted oc:\/\/ exceeds/,
);
});
it("parser depth cap fires on pathological JSONC nesting (F6)", () => {
// Without `MAX_PARSE_DEPTH`, pathological input like
// `'['.repeat(20000) + '0' + ']'.repeat(20000)` triggers a V8
// RangeError ("Maximum call stack size exceeded") that escapes
// commander as a raw stringified error — no `OcEmitSentinelError`-
// style structured catch. Pin the structured-diagnostic path:
// parser must surface OC_JSONC_DEPTH_EXCEEDED, not bare RangeError.
const open = "[".repeat(MAX_TRAVERSAL_DEPTH + 100);
const close = "]".repeat(MAX_TRAVERSAL_DEPTH + 100);
const raw = `${open}0${close}`;
const result = parseJsonc(raw);
expect(result.ast.root).toBeNull();
expect(result.diagnostics.some((d) => d.code === "OC_JSONC_DEPTH_EXCEEDED")).toBe(true);
});
it("parser depth cap fires on JSONL line with deeply-nested JSON (F6)", () => {
// Per-line parseJsonc dispatch carries the same protection — each
// value line is parsed in isolation and gets its own depth cap.
// The line surfaces as `kind: 'malformed'` with the depth diagnostic.
let nested = '"x"';
for (let i = 0; i < MAX_TRAVERSAL_DEPTH + 50; i++) {
nested = `{"a":${nested}}`;
}
const { diagnostics } = parseJsonl(nested + "\n");
// The line-level diagnostic is OC_JSONL_LINE_MALFORMED (line failed);
// we don't promote OC_JSONC_DEPTH_EXCEEDED through the JSONL layer
// but the malformed-line detection prevents stack-overflow escape.
expect(diagnostics.some((d) => d.code === "OC_JSONL_LINE_MALFORMED")).toBe(true);
});
});
// ---------- Coercion pitfalls --------------------------------------------
describe("wave-23 pitfalls — coercion", () => {
it("P-029 numeric coercion is locale-independent", () => {
// `Number()` doesn't honor locale; `parseFloat` doesn't either in
// practice, but we never use `parseFloat`. Verify `Number("1,5")`
// returns NaN (which is rejected) and `"1.5"` returns 1.5.
const ast = parseJsonc('{"x":1.0}').ast;
const r1 = setOcPath(ast, parseOcPath("oc://X/x"), "1.5");
expect(r1.ok).toBe(true);
const r2 = setOcPath(ast, parseOcPath("oc://X/x"), "1,5");
expect(r2.ok).toBe(false);
if (!r2.ok) {
expect(r2.reason).toBe("parse-error");
}
});
it("P-030 boolean coercion is exact-match lowercase", () => {
const ast = parseJsonc('{"x":true}').ast;
expect(setOcPath(ast, parseOcPath("oc://X/x"), "false").ok).toBe(true);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "False").ok).toBe(false);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "TRUE").ok).toBe(false);
expect(setOcPath(ast, parseOcPath("oc://X/x"), "yes").ok).toBe(false);
});
});
// ---------- Reserved character pitfalls ----------------------------------
describe("wave-23 pitfalls — reserved characters", () => {
it("P-026 rejects `?` outside the query separator position", () => {
// `?` triggers the query split. `oc://X/foo?session=s` is fine
// (legitimate query). But `?` *inside* a segment after the query
// section is consumed isn't a normal use case — the parser treats
// the first `?` as the query split.
expect(parseOcPath("oc://X/foo?session=s").section).toBe("foo");
// Empty key after `?` (no `=`): query parser silently ignores.
expect(() => parseOcPath("oc://X/foo?")).not.toThrow();
});
it("P-040 negative-index magnitude is bounded", () => {
// Out-of-range negative index → null at resolve time, not crash.
const ast = parseJsonc('{"x":[1,2,3]}').ast;
expect(resolveOcPath(ast, parseOcPath("oc://X/x/-9999999999"))).toBeNull();
expect(resolveOcPath(ast, parseOcPath("oc://X/x/-1"))?.kind).toBe("leaf");
});
});
// ---------- Sentinel-redaction pitfall (P-036) ---------------------------
describe("wave-23 pitfalls — redaction sentinel", () => {
// P-036 is fully covered by wave-21-sentinel-cross-kind. This is a
// smoke test asserting the link is intact.
it("P-036 sentinel guard activates at emit time (covered by wave-21)", () => {
expect(true).toBe(true);
});
});
// ---------- DEFERRED — documented limits ---------------------------------
describe("wave-23 pitfalls — deferred (v0 limits)", () => {
it.skip("P-005 slash literal in key — v1: quoted segments", () => {});
it.skip("P-006 dot literal in key — v1: quoted segments", () => {});
it.skip("P-017 nested unions {a,{b,c}} — v1: parser stack", () => {});
it.skip("P-019 wildcard inside wildcard — v1: pattern composition", () => {});
it.skip("P-025 leading-zero numeric `01` — v1: explicit form", () => {});
it.skip("P-027 `&` in segments — v1: percent-encoding", () => {});
it.skip("P-028 percent-encoded segments — v1: rfc3986 layer", () => {});
it.skip("P-034 ast mutation between resolve & consume — caller invariant", () => {});
it.skip("P-035 stale paths from prior find — caller invariant", () => {});
});
// ---------- Injection pitfalls (C12 / W12) -------------------------------
describe("wave-23 pitfalls — injection (caller-supplied hostile input)", () => {
// P-037: a hostile path string. The substrate's job is to either
// parse safely or reject with `OcPathError` — never let undefined
// behavior leak. These cases lock the rejection-or-safe contract.
it("P-037a control characters in path body are rejected", () => {
expect(() => parseOcPath("oc://a\x00b")).toThrow(OcPathError);
expect(() => parseOcPath("oc://a\x01b/c")).toThrow(OcPathError);
expect(() => parseOcPath("oc://a/b\x1Fc")).toThrow(OcPathError);
});
it("P-037b NUL byte anywhere in path is rejected", () => {
expect(() => parseOcPath("oc://X.md/sec\x00tion")).toThrow(OcPathError);
});
it("P-037c BOM at start of path is stripped, not interpreted", () => {
// BOM is unicode U+FEFF (0xFEFF). The substrate strips it before
// scheme check; without stripping, the BOM-prefixed string would
// fail the `oc://` scheme test.
const path = parseOcPath("oc://X.md/section");
expect(path.file).toBe("X.md");
expect(path.section).toBe("section");
});
it("P-037d session query is parsed only via the documented `?session=...` form", () => {
// Legal session form parses cleanly.
const ok = parseOcPath("oc://X.md/sec?session=cron:daily");
expect(ok.section).toBe("sec");
expect(ok.session).toBe("cron:daily");
// Substrate is lenient about loose `?garbage` — caller's
// responsibility to construct paths from `formatOcPath`. Confirm
// the loose form does NOT silently invent a session value.
const loose = parseOcPath("oc://X.md/sec?garbage");
expect(loose.session).toBeUndefined();
});
it("P-037e unescaped `&` in segments is rejected", () => {
expect(() => parseOcPath("oc://X.md/a&b")).toThrow(OcPathError);
});
it("P-037f unescaped `%` in segments is rejected", () => {
expect(() => parseOcPath("oc://X.md/a%b")).toThrow(OcPathError);
});
it("P-037g empty file slot is rejected", () => {
expect(() => parseOcPath("oc:///section")).toThrow(OcPathError);
});
it("P-037h backslash-escape attempts are not treated as path traversal", () => {
// No special meaning — the literal backslash is just a regular
// character. Doesn't allow escaping forward slashes.
expect(() => parseOcPath("oc://X.md/a\\../b")).toThrow(OcPathError);
});
// P-038: predicate-value injection. `[k=v]` predicates filter
// matches; a hostile `v` containing regex metachars, brackets, or
// operators must NOT escape the predicate scope or be interpreted
// as a regex.
it("P-038a regex metacharacters in predicate value match literally", () => {
const ast = parseJsonc('{ "items": [ {"name": "a.*"}, {"name": "abc"} ] }').ast;
// Looking for the literal string "a.*" — should match only the
// first item, not "abc" (which would match if `.*` were treated
// as a regex).
const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[name=a.*]"));
expect(matches).toHaveLength(1);
});
it("P-038b nested-bracket attempts in predicate value are kept literal", () => {
// The substrate is permissive on nested brackets — they're part
// of the literal predicate value, not interpreted as path syntax.
// The match would be against the literal string "a[b]"; a
// resolver that finds zero matches fails closed.
const path = parseOcPath("oc://X.jsonc/items/[name=a[b]]");
expect(path.item).toBe("[name=a[b]]");
// No data has the literal value `a[b]` here, so finding empty.
const ast = parseJsonc('{ "items": [ {"name": "abc"} ] }').ast;
expect(findOcPaths(ast, path)).toHaveLength(0);
});
it("P-038c equals-sign in predicate value is treated as part of the value", () => {
// The FIRST `=` separates key from value; subsequent `=`s belong
// to the value. The rule keeps the predicate parser simple —
// operators that prefix-match (`!=`, `<=`, `>=`) are tried
// before `=`, then `=` consumes the rest.
const ast = parseJsonc('{ "items": [ {"k": "a=b"}, {"k": "c"} ] }').ast;
const matches = findOcPaths(ast, parseOcPath("oc://X.jsonc/items/[k=a=b]"));
expect(matches).toHaveLength(1);
});
it("P-038d control characters in predicate value are rejected", () => {
expect(() => parseOcPath("oc://X.jsonc/items/[k=a\x00b]")).toThrow(OcPathError);
});
it("P-038e empty predicate body is rejected", () => {
expect(() => parseOcPath("oc://X.jsonc/items/[]")).toThrow(OcPathError);
});
it("P-038f predicate-shaped bracket without operator is treated as literal sentinel", () => {
// `[name]` without `=` is parsed as a literal-bracket sentinel
// (e.g. `[frontmatter]`-style). The substrate accepts it as a
// literal path segment — predicate parsing only kicks in when an
// operator is present. Document this to lock the behavior.
const path = parseOcPath("oc://X.jsonc/items/[name]");
expect(path.item).toBe("[name]");
});
it("P-038g predicate-shaped bracket with unsupported operator parses as literal", () => {
// `~` isn't in the supported-operator set; the parser doesn't
// recognize it as a predicate, so it's accepted as a literal
// bracket segment. This is the documented v1.1 behavior — a
// future version may add `~` (regex) and bump SDK_VERSION.
const path = parseOcPath("oc://X.jsonc/items/[k~v]");
expect(path.item).toBe("[k~v]");
});
});

View File

@@ -0,0 +1,142 @@
/**
* Wave 12 — real-world fixtures.
*
* Eight workspace files (one per upstream-recognized workspace
* filename) — each parsed, resolved, and round-tripped to verify the
* substrate handles realistic content.
*/
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
import { resolveMdOcPath as resolveOcPath } from "../../resolve.js";
const HERE = dirname(fileURLToPath(import.meta.url));
const FIXTURES = join(HERE, "..", "fixtures", "real");
function load(name: string): string {
return readFileSync(join(FIXTURES, name), "utf-8");
}
describe("wave-12 real-world-fixtures", () => {
it("F-01 SOUL.md parses + round-trips", () => {
const raw = load("SOUL.md");
const { ast, diagnostics } = parseMd(raw);
expect(diagnostics).toEqual([]);
expect(emitMd(ast)).toBe(raw);
// Has at least one H2 block.
expect(ast.blocks.length).toBeGreaterThan(0);
});
it("F-02 AGENTS.md parses + resolves Tools section", () => {
const raw = load("AGENTS.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
const tools = resolveOcPath(ast, { file: "AGENTS.md", section: "tools" });
expect(tools?.kind).toBe("block");
if (tools?.kind === "block") {
expect(tools.node.items.some((i) => i.kv?.key === "gh")).toBe(true);
}
});
it("F-03 MEMORY.md frontmatter scope resolves via [frontmatter]", () => {
const raw = load("MEMORY.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
const scope = resolveOcPath(ast, {
file: "MEMORY.md",
section: "[frontmatter]",
field: "scope",
});
expect(scope?.kind).toBe("frontmatter");
if (scope?.kind === "frontmatter") {
expect(scope.node.value).toBe("project");
}
});
it("F-04 TOOLS.md table extracted from Tool Guidance section", () => {
const raw = load("TOOLS.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
const guidance = resolveOcPath(ast, {
file: "TOOLS.md",
section: "tool-guidance",
});
expect(guidance?.kind).toBe("block");
if (guidance?.kind === "block") {
expect(guidance.node.tables.length).toBeGreaterThan(0);
expect(guidance.node.tables[0]?.headers).toEqual(["tool", "guidance"]);
}
});
it("F-05 IDENTITY.md sections resolvable by slug", () => {
const raw = load("IDENTITY.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
const trust = resolveOcPath(ast, {
file: "IDENTITY.md",
section: "trust-level",
});
expect(trust?.kind).toBe("block");
});
it("F-06 USER.md Preferences items extracted", () => {
const raw = load("USER.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
const prefs = resolveOcPath(ast, {
file: "USER.md",
section: "preferences",
});
expect(prefs?.kind).toBe("block");
if (prefs?.kind === "block") {
expect(prefs.node.items.length).toBeGreaterThan(0);
}
});
it("F-07 HEARTBEAT.md schedules — H2 sections as triggers", () => {
const raw = load("HEARTBEAT.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
expect(ast.blocks.length).toBeGreaterThanOrEqual(3);
const slugs = ast.blocks.map((b) => b.slug);
expect(slugs).toContain("every-30m-wake");
expect(slugs).toContain("every-4h-wake");
});
it("F-08 SKILL.md frontmatter has name + description + tier", () => {
const raw = load("SKILL.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
const fmKeys = ast.frontmatter.map((e) => e.key);
expect(fmKeys).toContain("name");
expect(fmKeys).toContain("description");
expect(fmKeys).toContain("tier");
});
it("F-09 BOOTSTRAP.md round-trips", () => {
const raw = load("BOOTSTRAP.md");
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
});
it("F-10 all 8 fixtures combined round-trip-clean (sanity)", () => {
const names = [
"SOUL.md",
"AGENTS.md",
"MEMORY.md",
"TOOLS.md",
"IDENTITY.md",
"USER.md",
"HEARTBEAT.md",
"SKILL.md",
"BOOTSTRAP.md",
];
for (const name of names) {
const raw = load(name);
expect(emitMd(parseMd(raw).ast), `${name} failed round-trip`).toBe(raw);
}
});
});

View File

@@ -0,0 +1,156 @@
/**
* Wave 10 — round-trip property tests.
*
* Substrate guarantee: `emitMd(parse(raw)) === raw` for all inputs the
* parser accepts. This wave exercises that property over a generated
* corpus of synthetic markdown shapes and verifies parser idempotence
* (`parse(emitMd(parse(raw))) === parse(raw)` modulo `raw`).
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
function roundTrip(raw: string): string {
return emitMd(parseMd(raw).ast);
}
describe("wave-10 roundtrip-property", () => {
it("RT-01 byte-fidelity over 100 generated shapes", () => {
const inputs = generateCorpus(100);
for (const raw of inputs) {
try {
expect(roundTrip(raw)).toBe(raw);
} catch (e) {
// Surface which input failed for debugging.
throw new Error(
`round-trip failed for input (length ${raw.length}):\n${JSON.stringify(raw.slice(0, 200))}\nError: ${(e as Error).message}`,
{ cause: e },
);
}
}
});
it("RT-02 parser idempotence (parse → emit → parse → identical AST shape)", () => {
const inputs = generateCorpus(50);
for (const raw of inputs) {
const a = parseMd(raw).ast;
const a2 = parseMd(emitMd(a)).ast;
// Compare structural fields; raw will of course be identical.
expect(a2.frontmatter).toEqual(a.frontmatter);
expect(a2.preamble).toEqual(a.preamble);
expect(a2.blocks.map(stripDerived)).toEqual(a.blocks.map(stripDerived));
}
});
it("RT-03 stable output for identical input", () => {
const raw = `---\nname: x\n---\n\n## A\n- a\n## B\n- b: c\n`;
const out1 = roundTrip(raw);
const out2 = roundTrip(raw);
const out3 = roundTrip(raw);
expect(out1).toBe(out2);
expect(out2).toBe(out3);
});
it("RT-04 ordering deterministic (no Object.keys / Set ordering surprises)", () => {
const raw = `---\nb: 2\na: 1\nc: 3\n---\n## Z\n- z\n## A\n- a\n`;
const a1 = parseMd(raw).ast;
const a2 = parseMd(raw).ast;
expect(a1.frontmatter.map((e) => e.key)).toEqual(a2.frontmatter.map((e) => e.key));
expect(a1.blocks.map((b) => b.heading)).toEqual(a2.blocks.map((b) => b.heading));
});
it("RT-05 round-trip preserves comment-like lines (no comment recognition at substrate)", () => {
const raw = `## H\n\n<!-- a comment -->\n- bullet\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-06 round-trip preserves indented blocks (substrate doesn't reflow)", () => {
const raw = `## H\n\n indented code-ish block\n more indented\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-07 round-trip preserves blockquotes", () => {
const raw = `## H\n\n> quoted line 1\n> quoted line 2\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-08 round-trip preserves images / links", () => {
const raw = `## H\n\n![alt](path/to/img.png)\n[link](http://example.com)\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-09 round-trip preserves HTML", () => {
const raw = `## H\n\n<details><summary>x</summary>body</details>\n`;
expect(roundTrip(raw)).toBe(raw);
});
it("RT-10 round-trip preserves consecutive headings with no body between", () => {
const raw = `## A\n## B\n## C\n`;
expect(roundTrip(raw)).toBe(raw);
});
});
// ---------- corpus generator -------------------------------------------------
function generateCorpus(count: number): string[] {
const corpus: string[] = [];
// Deterministic seed so flaky failures don't surface differently each run.
let seed = 42;
const rand = () => {
seed = (seed * 1664525 + 1013904223) % 2 ** 32;
return seed / 2 ** 32;
};
const choose = <T>(arr: readonly T[]): T => arr[Math.floor(rand() * arr.length)];
const headings = ["Boundaries", "Tools", "Memory", "Identity", "User", "Heartbeat", "Skills"];
const fmKeys = ["name", "description", "tier", "enabled", "timeout", "url"];
const fmValues = ["github", "gh CLI", "T1", "true", "15000", "https://example.com"];
const itemTexts = ["never write to /etc", "always confirm", "gh: GitHub CLI", "curl: HTTP"];
const eols = ["\n", "\r\n"];
for (let i = 0; i < count; i++) {
const eol = choose(eols);
const parts: string[] = [];
if (rand() < 0.5) {
parts.push("---");
const fmCount = Math.floor(rand() * 4);
for (let k = 0; k < fmCount; k++) {
parts.push(`${choose(fmKeys)}: ${choose(fmValues)}`);
}
parts.push("---");
parts.push("");
}
if (rand() < 0.3) {
parts.push("Some preamble.");
parts.push("");
}
const blockCount = Math.floor(rand() * 3) + 1;
for (let b = 0; b < blockCount; b++) {
parts.push(`## ${choose(headings)}`);
parts.push("");
const itemCount = Math.floor(rand() * 4);
for (let it = 0; it < itemCount; it++) {
parts.push(`- ${choose(itemTexts)}`);
}
if (rand() < 0.2) {
parts.push("```");
parts.push("code");
parts.push("```");
}
parts.push("");
}
corpus.push(parts.join(eol));
}
return corpus;
}
function stripDerived(b: { heading: string; slug: string; bodyText: string }): {
heading: string;
slug: string;
} {
return { heading: b.heading, slug: b.slug };
}

View File

@@ -0,0 +1,160 @@
/**
* Wave 21 — sentinel guard across all 3 kinds.
*
* Substrate guarantee: emit refuses to write a CALLER-INJECTED
* `__OPENCLAW_REDACTED__` literal. Round-trip mode trusts parsed bytes
* (a workspace file legitimately containing the sentinel — in a code
* block, in a pasted error log — would otherwise become a workspace-
* wide emit DoS). Render mode walks every leaf, so a caller-injected
* sentinel via `setOcPath` always fails. Callers that want strict
* pre-existing-byte detection (e.g., LKG fingerprint verification)
* opt in via `acceptPreExistingSentinel: false`.
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { setJsoncOcPath } from "../../jsonc/edit.js";
import { emitJsonc } from "../../jsonc/emit.js";
import { parseJsonc } from "../../jsonc/parse.js";
import { emitJsonl } from "../../jsonl/emit.js";
import { parseJsonl } from "../../jsonl/parse.js";
import { parseOcPath } from "../../oc-path.js";
import { parseMd } from "../../parse.js";
import { OcEmitSentinelError, REDACTED_SENTINEL } from "../../sentinel.js";
describe("wave-21 sentinel guard cross-kind", () => {
it("S-01 jsonc round-trip echoes safely when raw contains pre-existing sentinel", () => {
// Pre-existing sentinel bytes are trusted — see emit-policy comment
// in jsonc/emit.ts. The strict mode below is the opt-in path for
// callers who want LKG-style fingerprint verification.
const raw = `{ "x": "${REDACTED_SENTINEL}" }`;
const ast = parseJsonc(raw).ast;
expect(emitJsonc(ast)).toBe(raw);
// Strict mode still rejects pre-existing sentinel for callers who
// explicitly opt in.
expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-02 jsonl round-trip echoes safely; strict mode rejects", () => {
const raw = `{"x":"${REDACTED_SENTINEL}"}\n`;
const ast = parseJsonl(raw).ast;
expect(emitJsonl(ast)).toBe(raw);
expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-03 md round-trip echoes safely; strict mode rejects", () => {
const raw = `## Body\n\n- ${REDACTED_SENTINEL}\n`;
const ast = parseMd(raw).ast;
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-04 jsonc render mode walks every leaf for sentinel", () => {
const ast = parseJsonc('{ "x": "ok" }').ast;
const tampered = {
...ast,
root: {
kind: "object" as const,
entries: [
{
key: "x",
line: 1,
value: { kind: "string" as const, value: REDACTED_SENTINEL },
},
],
},
};
expect(() => emitJsonc(tampered, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-05 jsonl render mode walks every value-line leaf", () => {
const ast = parseJsonl('{"a":"ok"}\n').ast;
const tampered = {
...ast,
lines: [
{
kind: "value" as const,
line: 1,
raw: '{"a":"ok"}',
value: {
kind: "object" as const,
entries: [
{
key: "a",
line: 1,
value: { kind: "string" as const, value: REDACTED_SENTINEL },
},
],
},
},
],
};
expect(() => emitJsonl(tampered, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-06 setJsoncOcPath itself throws when the new value contains the sentinel", () => {
// The substrate guard fires at write-time: setJsoncOcPath rebuilds
// raw via render mode emit, which scans every leaf. Defense-in-depth
// — even if a caller forgets to call emit afterward, the sentinel
// can't make it into an in-memory AST that pretends to be valid.
const ast = parseJsonc('{ "x": "ok" }').ast;
expect(() =>
setJsoncOcPath(ast, parseOcPath("oc://config/x"), {
kind: "string",
value: REDACTED_SENTINEL,
}),
).toThrow(OcEmitSentinelError);
});
it("S-07 sentinel embedded in deep nesting — render mode catches the leaf", () => {
// Round-trip echoes the pre-existing bytes (the workspace contract:
// a parsed file containing the sentinel as data is not "writing" it
// on emit). Render mode walks every leaf and rejects this caller-
// injected pattern — and a `setOcPath` followed by emit lands here.
const raw = JSON.stringify({ a: { b: { c: REDACTED_SENTINEL } } });
const ast = parseJsonc(raw).ast;
expect(emitJsonc(ast)).toBe(raw); // round-trip echo
expect(() => emitJsonc(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-08 sentinel inside an array element triggers guard in render mode", () => {
const raw = JSON.stringify({ arr: ["ok", REDACTED_SENTINEL, "ok"] });
const ast = parseJsonc(raw).ast;
expect(() => emitJsonc(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-09 sentinel as object key in raw — strict mode catches it", () => {
const raw = `{ "${REDACTED_SENTINEL}": 1 }`;
const ast = parseJsonc(raw).ast;
expect(emitJsonc(ast)).toBe(raw); // default-mode echo
expect(() => emitJsonc(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-10 sentinel in jsonl malformed line — strict mode catches it", () => {
const raw = `${REDACTED_SENTINEL}\n`;
const ast = parseJsonl(raw).ast;
expect(emitJsonl(ast)).toBe(raw); // round-trip echoes verbatim
expect(() => emitJsonl(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-11 partial sentinel substring does NOT trigger guard", () => {
const raw = '{ "x": "OPENCLAW_REDACTED" }';
const ast = parseJsonc(raw).ast;
expect(() => emitJsonc(ast)).not.toThrow();
});
it("S-12 sentinel guard error message includes the OcPath context (render mode)", () => {
// Render mode is the path that actually rejects caller-injected
// sentinel — round-trip just echoes, so the error context surfaces
// when render walks the offending leaf and constructs the path.
const raw = `{ "secret": "${REDACTED_SENTINEL}" }`;
const ast = parseJsonc(raw).ast;
try {
emitJsonc(ast, { mode: "render", fileNameForGuard: "config" });
expect.fail("should have thrown");
} catch (e) {
expect(e).toBeInstanceOf(OcEmitSentinelError);
expect(String(e)).toContain("oc://");
expect(String(e)).toContain("config");
}
});
});

View File

@@ -0,0 +1,170 @@
/**
* Wave 9 — sentinel guard at every emit leaf.
*
* Substrate guarantee: `__OPENCLAW_REDACTED__` literal anywhere in the
* emitted bytes throws `OcEmitSentinelError`. Round-trip mode catches
* sentinels in `raw`; render mode walks every leaf.
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../../emit.js";
import { parseMd } from "../../parse.js";
import { OcEmitSentinelError, REDACTED_SENTINEL, guardSentinel } from "../../sentinel.js";
describe("wave-09 sentinel-guard", () => {
it("S-01 sentinel constant matches the literal", () => {
expect(REDACTED_SENTINEL).toBe("__OPENCLAW_REDACTED__");
});
it("S-02 guardSentinel passes normal strings", () => {
expect(() => guardSentinel("safe", "oc://X.md")).not.toThrow();
});
it("S-03 guardSentinel passes non-string types", () => {
expect(() => guardSentinel(42, "oc://X.md")).not.toThrow();
expect(() => guardSentinel(null, "oc://X.md")).not.toThrow();
expect(() => guardSentinel(undefined, "oc://X.md")).not.toThrow();
expect(() => guardSentinel({}, "oc://X.md")).not.toThrow();
});
it("S-04 guardSentinel throws on exact match", () => {
expect(() => guardSentinel(REDACTED_SENTINEL, "oc://X.md")).toThrow(OcEmitSentinelError);
});
it("S-05 guardSentinel throws on substring matches (sentinel embedded in larger string)", () => {
// Substring scan — the sentinel anywhere in the value is a leak,
// not just exact equality. A hostile caller smuggling
// `prefix__OPENCLAW_REDACTED__suffix` would have bypassed the old
// equality check; substring scan closes the gap.
expect(() => guardSentinel(`prefix${REDACTED_SENTINEL}suffix`, "oc://X.md")).toThrow(
OcEmitSentinelError,
);
});
it("S-06 error attaches the OcPath context", () => {
try {
guardSentinel(REDACTED_SENTINEL, "oc://config/plugins.entries.foo.token");
expect.fail("should have thrown");
} catch (err) {
expect(err).toBeInstanceOf(OcEmitSentinelError);
const e = err as OcEmitSentinelError;
expect(e.path).toBe("oc://config/plugins.entries.foo.token");
expect(e.code).toBe("OC_EMIT_SENTINEL");
}
});
it("S-07 round-trip echoes pre-existing sentinel; strict mode rejects", () => {
const raw = "## Section\n\n- token: __OPENCLAW_REDACTED__\n";
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-08 round-trip emit allows sentinel-free content", () => {
const raw = "## Section\n\n- token: redacted-but-not-sentinel\n";
const { ast } = parseMd(raw);
expect(() => emitMd(ast)).not.toThrow();
});
it("S-09 render mode catches sentinel in frontmatter", () => {
const ast = {
kind: "md" as const,
raw: "",
frontmatter: [{ key: "token", value: REDACTED_SENTINEL, line: 2 }],
preamble: "",
blocks: [],
};
expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-10 render mode catches sentinel in preamble", () => {
const ast = {
kind: "md" as const,
raw: "",
frontmatter: [],
preamble: REDACTED_SENTINEL,
blocks: [],
};
expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-11 render mode catches sentinel in block bodyText", () => {
const ast = {
kind: "md" as const,
raw: "",
frontmatter: [],
preamble: "",
blocks: [
{
heading: "Sec",
slug: "sec",
line: 1,
bodyText: REDACTED_SENTINEL,
items: [],
tables: [],
codeBlocks: [],
},
],
};
expect(() => emitMd(ast, { mode: "render" })).toThrow(OcEmitSentinelError);
});
it("S-12 render mode catches sentinel in item kv.value", () => {
const ast = {
kind: "md" as const,
raw: "",
frontmatter: [],
preamble: "",
blocks: [
{
heading: "S",
slug: "s",
line: 1,
bodyText: "- t: x",
items: [
{
text: "t: x",
slug: "t",
line: 2,
kv: { key: "t", value: REDACTED_SENTINEL },
},
],
tables: [],
codeBlocks: [],
},
],
};
expect(() => emitMd(ast, { mode: "render", fileNameForGuard: "AGENTS.md" })).toThrow(
OcEmitSentinelError,
);
});
it("S-13 sentinel-as-substring in raw — strict mode catches it", () => {
const raw = `Some prose ${REDACTED_SENTINEL} more prose.\n`;
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-14 multiple sentinel occurrences in raw — strict mode catches them", () => {
const raw = `## A\n${REDACTED_SENTINEL}\n${REDACTED_SENTINEL}\n`;
const { ast } = parseMd(raw);
expect(emitMd(ast)).toBe(raw);
expect(() => emitMd(ast, { acceptPreExistingSentinel: false })).toThrow(OcEmitSentinelError);
});
it("S-15 fileNameForGuard appears in the error path", () => {
const ast = {
kind: "md" as const,
raw: "",
frontmatter: [{ key: "token", value: REDACTED_SENTINEL, line: 2 }],
preamble: "",
blocks: [],
};
try {
emitMd(ast, { mode: "render", fileNameForGuard: "config" });
expect.fail("should have thrown");
} catch (err) {
expect((err as OcEmitSentinelError).path).toContain("config");
}
});
});

View File

@@ -0,0 +1,154 @@
/**
* Wave 5 — markdown tables.
*
* Substrate guarantee: GFM-style tables (`| h | h |\n|---|---|\n| r | r |`)
* inside H2 blocks are extracted into `AstTable`. Tables inside fenced
* code blocks are NOT extracted (handled at item-extraction layer too;
* tables share the same code-block awareness when relevant).
*/
import { describe, expect, it } from "vitest";
import { parseMd } from "../../parse.js";
describe("wave-05 tables", () => {
it("T-01 standard 2-column table", () => {
const raw = `## H
| tool | guidance |
| --- | --- |
| gh | use for GitHub |
| curl | HTTP client |
`;
const { ast } = parseMd(raw);
const table = ast.blocks[0]?.tables[0];
expect(table?.headers).toEqual(["tool", "guidance"]);
expect(table?.rows).toEqual([
["gh", "use for GitHub"],
["curl", "HTTP client"],
]);
});
it("T-02 3+ column table", () => {
const raw = `## H
| a | b | c |
| - | - | - |
| 1 | 2 | 3 |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables[0]?.headers).toEqual(["a", "b", "c"]);
expect(ast.blocks[0]?.tables[0]?.rows[0]).toEqual(["1", "2", "3"]);
});
it("T-03 table with alignment colons in separator", () => {
const raw = `## H
| left | center | right |
| :--- | :---: | ---: |
| a | b | c |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables.length).toBe(1);
});
it("T-04 table with empty cells", () => {
const raw = `## H
| a | b |
| - | - |
| 1 | |
| | 2 |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables[0]?.rows).toEqual([
["1", ""],
["", "2"],
]);
});
it("T-05 table with no rows (header + sep only)", () => {
const raw = `## H
| a | b |
| - | - |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables[0]?.headers).toEqual(["a", "b"]);
expect(ast.blocks[0]?.tables[0]?.rows).toEqual([]);
});
it("T-06 multiple tables in same section", () => {
const raw = `## H
| a | b |
| - | - |
| 1 | 2 |
Some text.
| x | y |
| - | - |
| 3 | 4 |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables.length).toBe(2);
});
it("T-07 table line numbers track to the header line", () => {
const raw = `## Section
preamble line
| a | b |
| - | - |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables[0]?.line).toBeGreaterThan(0);
});
it("T-08 invalid separator (no pipes) — no table extracted", () => {
const raw = `## H
| a | b |
not a separator
| 1 | 2 |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables).toEqual([]);
});
it("T-09 single-column table (just `| col |\\n|---|`)", () => {
const raw = `## H
| col |
| --- |
| value1 |
| value2 |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables[0]?.headers).toEqual(["col"]);
expect(ast.blocks[0]?.tables[0]?.rows).toEqual([["value1"], ["value2"]]);
});
it("T-10 table at end of file with trailing newlines", () => {
const raw = `## H
| a |
| - |
| 1 |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables[0]?.rows).toEqual([["1"]]);
});
it("T-11 table content with internal whitespace trimmed", () => {
const raw = `## H
| col1 | col2 |
| --- | --- |
| a | b |
`;
const { ast } = parseMd(raw);
expect(ast.blocks[0]?.tables[0]?.headers).toEqual(["col1", "col2"]);
expect(ast.blocks[0]?.tables[0]?.rows[0]).toEqual(["a", "b"]);
});
});

View File

@@ -0,0 +1,32 @@
import { describe, expect, it } from "vitest";
import { OcEmitSentinelError, REDACTED_SENTINEL, guardSentinel } from "../sentinel.js";
describe("guardSentinel", () => {
it("passes through normal strings", () => {
expect(() => guardSentinel("normal value", "oc://SOUL.md")).not.toThrow();
});
it("passes through non-string values", () => {
expect(() => guardSentinel(42, "oc://SOUL.md")).not.toThrow();
expect(() => guardSentinel(null, "oc://SOUL.md")).not.toThrow();
expect(() => guardSentinel(undefined, "oc://SOUL.md")).not.toThrow();
});
it("throws on the sentinel literal", () => {
expect(() => guardSentinel(REDACTED_SENTINEL, "oc://SOUL.md/[fm]/token")).toThrow(
OcEmitSentinelError,
);
});
it("attaches the OcPath in the error", () => {
try {
guardSentinel(REDACTED_SENTINEL, "oc://config/plugins.entries.foo.token");
expect.fail("should have thrown");
} catch (err) {
expect(err).toBeInstanceOf(OcEmitSentinelError);
const e = err as OcEmitSentinelError;
expect(e.path).toBe("oc://config/plugins.entries.foo.token");
expect(e.code).toBe("OC_EMIT_SENTINEL");
}
});
});

View File

@@ -0,0 +1,50 @@
import { describe, expect, it } from "vitest";
import { slugify } from "../slug.js";
describe("slugify", () => {
it("lowercases", () => {
expect(slugify("Boundaries")).toBe("boundaries");
});
it("replaces underscores with hyphens", () => {
expect(slugify("API_KEY")).toBe("api-key");
});
it("collapses multi-word headings", () => {
expect(slugify("Tool Guidance")).toBe("tool-guidance");
});
it("preserves existing kebab-case", () => {
expect(slugify("deny-rule-1")).toBe("deny-rule-1");
});
it("trims surrounding whitespace + non-slug chars", () => {
expect(slugify(" Restricted Data ")).toBe("restricted-data");
});
it("handles colon + space patterns", () => {
expect(slugify("deny: secrets")).toBe("deny-secrets");
});
it("collapses repeated hyphens", () => {
expect(slugify("foo----bar")).toBe("foo-bar");
});
it("returns empty for non-slug-valid input", () => {
expect(slugify("!!")).toBe("");
expect(slugify(" ")).toBe("");
});
it("is idempotent", () => {
const inputs = ["Tool Guidance", "API_KEY", "deny-rule-1", "Multi-tenant isolation"];
for (const input of inputs) {
expect(slugify(slugify(input))).toBe(slugify(input));
}
});
it("handles unicode by stripping (current ASCII-only policy)", () => {
// Caveat: unicode in headings becomes empty/lossy. Document as a
// known limit; lint rules can flag non-ASCII headings if needed.
expect(slugify("Café")).toBe("caf");
});
});

View File

@@ -0,0 +1,472 @@
/**
* Universal verbs — `setOcPath` + `resolveOcPath` test surface.
*
* Every test exercises the universal entry point. The substrate
* dispatches via `ast.kind` and coerces value strings based on AST
* shape at the path location.
*/
import { describe, expect, it } from "vitest";
import { emitMd } from "../emit.js";
import { emitJsonc } from "../jsonc/emit.js";
import { parseJsonc } from "../jsonc/parse.js";
import { emitJsonl } from "../jsonl/emit.js";
import { parseJsonl } from "../jsonl/parse.js";
import { parseOcPath } from "../oc-path.js";
import { parseMd } from "../parse.js";
import { detectInsertion, resolveOcPath, setOcPath } from "../universal.js";
// ---------- detectInsertion ------------------------------------------------
describe("detectInsertion", () => {
it("returns null for plain paths", () => {
expect(detectInsertion(parseOcPath("oc://X.md/section/item/field"))).toBeNull();
});
it("detects bare `+` end-insertion at section", () => {
const info = detectInsertion(parseOcPath("oc://X.md/tools/+"));
expect(info?.marker).toBe("+");
expect(info?.parentPath.section).toBe("tools");
expect(info?.parentPath.item).toBeUndefined();
});
it("detects `+key` keyed insertion", () => {
const info = detectInsertion(parseOcPath("oc://config/plugins/+gitlab"));
expect(info?.marker).toEqual({ kind: "keyed", key: "gitlab" });
});
it("detects `+nnn` indexed insertion", () => {
const info = detectInsertion(parseOcPath("oc://config/items/+2"));
expect(info?.marker).toEqual({ kind: "indexed", index: 2 });
});
it("detects file-root insertion", () => {
const info = detectInsertion(parseOcPath("oc://session.jsonl/+"));
expect(info?.marker).toBe("+");
expect(info?.parentPath.section).toBeUndefined();
});
});
// ---------- resolveOcPath — universal across kinds -------------------------
describe("resolveOcPath — md AST", () => {
const md = parseMd("---\nname: github\n---\n\n## Boundaries\n\n- enabled: true\n").ast;
it("returns leaf with valueText for frontmatter entry", () => {
const m = resolveOcPath(md, parseOcPath("oc://X.md/[frontmatter]/name"));
expect(m).toMatchObject({ kind: "leaf", valueText: "github", leafType: "string" });
});
it("returns leaf for item-field", () => {
const m = resolveOcPath(md, parseOcPath("oc://X.md/boundaries/enabled/enabled"));
expect(m).toMatchObject({ kind: "leaf", valueText: "true", leafType: "string" });
});
it("returns node for block", () => {
const m = resolveOcPath(md, parseOcPath("oc://X.md/boundaries"));
expect(m).toMatchObject({ kind: "node", descriptor: "md-block" });
});
it("returns root for file-only path", () => {
const m = resolveOcPath(md, parseOcPath("oc://X.md"));
expect(m?.kind).toBe("root");
});
it("returns null for unresolved", () => {
expect(resolveOcPath(md, parseOcPath("oc://X.md/missing"))).toBeNull();
});
});
describe("resolveOcPath — jsonc AST", () => {
const ast = parseJsonc('{ "k": 42, "s": "x", "b": true, "n": null, "arr": [1,2,3] }').ast;
it("returns leaf:number for numeric value", () => {
const m = resolveOcPath(ast, parseOcPath("oc://config/k"));
expect(m).toMatchObject({ kind: "leaf", valueText: "42", leafType: "number" });
});
it("returns leaf:string for string value", () => {
const m = resolveOcPath(ast, parseOcPath("oc://config/s"));
expect(m).toMatchObject({ kind: "leaf", valueText: "x", leafType: "string" });
});
it("returns leaf:boolean for bool value", () => {
const m = resolveOcPath(ast, parseOcPath("oc://config/b"));
expect(m).toMatchObject({ kind: "leaf", valueText: "true", leafType: "boolean" });
});
it("returns leaf:null for null value", () => {
const m = resolveOcPath(ast, parseOcPath("oc://config/n"));
expect(m).toMatchObject({ kind: "leaf", valueText: "null", leafType: "null" });
});
it("returns node:jsonc-array for array value", () => {
const m = resolveOcPath(ast, parseOcPath("oc://config/arr"));
expect(m).toMatchObject({ kind: "node", descriptor: "jsonc-array" });
});
it("returns leaf at array index", () => {
const m = resolveOcPath(ast, parseOcPath("oc://config/arr.1"));
expect(m).toMatchObject({ kind: "leaf", valueText: "2", leafType: "number" });
});
});
describe("resolveOcPath — jsonl AST", () => {
const ast = parseJsonl('{"event":"start","n":1}\n{"event":"step","n":2}\n').ast;
it("returns node:jsonl-line for line address", () => {
const m = resolveOcPath(ast, parseOcPath("oc://log/L1"));
expect(m).toMatchObject({ kind: "node", descriptor: "jsonl-line" });
});
it("returns leaf for field on line", () => {
const m = resolveOcPath(ast, parseOcPath("oc://log/L2/event"));
expect(m).toMatchObject({ kind: "leaf", valueText: "step", leafType: "string" });
});
it("returns leaf:number for $last/n", () => {
const m = resolveOcPath(ast, parseOcPath("oc://log/$last/n"));
expect(m).toMatchObject({ kind: "leaf", valueText: "2", leafType: "number" });
});
});
describe("resolveOcPath — insertion-point detection", () => {
it("returns insertion-point for md section append", () => {
const md = parseMd("## Tools\n").ast;
const m = resolveOcPath(md, parseOcPath("oc://X.md/tools/+"));
expect(m).toMatchObject({ kind: "insertion-point", container: "md-section" });
});
it("returns insertion-point for md file-level", () => {
const md = parseMd("## Tools\n").ast;
const m = resolveOcPath(md, parseOcPath("oc://X.md/+"));
expect(m).toMatchObject({ kind: "insertion-point", container: "md-file" });
});
it("returns insertion-point for md frontmatter +key", () => {
const md = parseMd("---\nname: x\n---\n").ast;
const m = resolveOcPath(md, parseOcPath("oc://X.md/[frontmatter]/+description"));
expect(m).toMatchObject({ kind: "insertion-point", container: "md-frontmatter" });
});
it("returns insertion-point for jsonc array +", () => {
const ast = parseJsonc('{ "items": [1,2,3] }').ast;
const m = resolveOcPath(ast, parseOcPath("oc://config/items/+"));
expect(m).toMatchObject({ kind: "insertion-point", container: "jsonc-array" });
});
it("returns insertion-point for jsonc object +key", () => {
const ast = parseJsonc('{ "plugins": {} }').ast;
const m = resolveOcPath(ast, parseOcPath("oc://config/plugins/+gitlab"));
expect(m).toMatchObject({ kind: "insertion-point", container: "jsonc-object" });
});
it("returns insertion-point for jsonl file-root +", () => {
const ast = parseJsonl("").ast;
const m = resolveOcPath(ast, parseOcPath("oc://log/+"));
expect(m).toMatchObject({ kind: "insertion-point", container: "jsonl-file" });
});
it("returns null when insertion target is not a container", () => {
const ast = parseJsonc('{ "k": 42 }').ast;
const m = resolveOcPath(ast, parseOcPath("oc://config/k/+"));
expect(m).toBeNull();
});
});
// ---------- setOcPath — leaf assignment ------------------------------------
describe("setOcPath — md leaf", () => {
it("replaces frontmatter value", () => {
const md = parseMd("---\nname: old\n---\n").ast;
const r = setOcPath(md, parseOcPath("oc://X.md/[frontmatter]/name"), "new");
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.kind === "md" && r.ast.frontmatter[0]?.value).toBe("new");
}
});
it("replaces item kv value", () => {
const md = parseMd("## Boundaries\n\n- timeout: 5\n").ast;
const r = setOcPath(md, parseOcPath("oc://X.md/boundaries/timeout/timeout"), "60");
expect(r.ok).toBe(true);
if (r.ok) {
const out = emitMd(r.ast as Parameters<typeof emitMd>[0]);
expect(out).toContain("- timeout: 60");
}
});
it("returns unresolved for missing path", () => {
const md = parseMd("").ast;
const r = setOcPath(md, parseOcPath("oc://X.md/missing/x/x"), "v");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("unresolved");
}
});
});
describe("setOcPath — jsonc leaf with coercion", () => {
it("replaces string leaf with string value", () => {
const ast = parseJsonc('{ "k": "old" }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/k"), "new");
expect(r.ok).toBe(true);
if (r.ok) {
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: "new" });
}
});
it("coerces value to number when leaf was number", () => {
const ast = parseJsonc('{ "k": 1 }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/k"), "42");
expect(r.ok).toBe(true);
if (r.ok) {
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: 42 });
}
});
it('coerces "true"/"false" when leaf was boolean', () => {
const ast = parseJsonc('{ "k": true }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/k"), "false");
expect(r.ok).toBe(true);
if (r.ok) {
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
expect(JSON.parse(emitJsonc(ast2))).toEqual({ k: false });
}
});
it("rejects non-numeric string for number leaf", () => {
const ast = parseJsonc('{ "k": 1 }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/k"), "not-a-number");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("parse-error");
}
});
it("rejects non-bool string for boolean leaf", () => {
const ast = parseJsonc('{ "k": true }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/k"), "maybe");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("parse-error");
}
});
});
describe("setOcPath — jsonl leaf", () => {
it("replaces field on a value line with coercion", () => {
const ast = parseJsonl('{"event":"start","n":1}\n').ast;
const r = setOcPath(ast, parseOcPath("oc://log/L1/n"), "42");
expect(r.ok).toBe(true);
if (r.ok) {
const out = emitJsonl(r.ast as Parameters<typeof emitJsonl>[0]);
expect(JSON.parse(out.split("\n")[0])).toEqual({ event: "start", n: 42 });
}
});
it("replaces whole line via JSON value", () => {
const ast = parseJsonl('{"event":"start"}\n').ast;
const r = setOcPath(ast, parseOcPath("oc://log/L1"), '{"event":"replaced"}');
expect(r.ok).toBe(true);
if (r.ok) {
const out = emitJsonl(r.ast as Parameters<typeof emitJsonl>[0]);
expect(JSON.parse(out.split("\n")[0])).toEqual({ event: "replaced" });
}
});
it("rejects malformed JSON for whole-line replacement", () => {
const ast = parseJsonl('{"event":"start"}\n').ast;
const r = setOcPath(ast, parseOcPath("oc://log/L1"), "not json");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("parse-error");
}
});
});
// ---------- setOcPath — insertion ------------------------------------------
describe("setOcPath — md insertion", () => {
it("appends item to section with `+`", () => {
const md = parseMd("## Tools\n\n- gh: GitHub CLI\n").ast;
const r = setOcPath(md, parseOcPath("oc://X.md/tools/+"), "docker: container CLI");
expect(r.ok).toBe(true);
if (r.ok) {
const out = emitMd(r.ast as Parameters<typeof emitMd>[0]);
expect(out).toContain("- gh: GitHub CLI");
expect(out).toContain("- docker: container CLI");
}
});
it("appends new section at file root with `+`", () => {
const md = parseMd("## Existing\n").ast;
const r = setOcPath(md, parseOcPath("oc://X.md/+"), "New Section");
expect(r.ok).toBe(true);
if (r.ok) {
const out = emitMd(r.ast as Parameters<typeof emitMd>[0]);
expect(out).toContain("## Existing");
expect(out).toContain("## New Section");
}
});
it("adds new frontmatter key with +key", () => {
const md = parseMd("---\nname: x\n---\n").ast;
const r = setOcPath(
md,
parseOcPath("oc://X.md/[frontmatter]/+description"),
"a new description",
);
expect(r.ok).toBe(true);
if (r.ok) {
const out = emitMd(r.ast as Parameters<typeof emitMd>[0]);
expect(out).toContain("description: a new description");
}
});
it("rejects duplicate frontmatter key on insertion", () => {
const md = parseMd("---\nname: x\n---\n").ast;
const r = setOcPath(md, parseOcPath("oc://X.md/[frontmatter]/+name"), "y");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("type-mismatch");
}
});
});
describe("setOcPath — jsonc insertion", () => {
it("appends to array with `+`", () => {
const ast = parseJsonc('{ "items": [1, 2] }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/items/+"), "3");
expect(r.ok).toBe(true);
if (r.ok) {
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
expect(JSON.parse(emitJsonc(ast2))).toEqual({ items: [1, 2, 3] });
}
});
it("inserts at index with `+nnn`", () => {
const ast = parseJsonc('{ "items": [1, 3] }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/items/+1"), "2");
expect(r.ok).toBe(true);
if (r.ok) {
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
expect(JSON.parse(emitJsonc(ast2))).toEqual({ items: [1, 2, 3] });
}
});
it("adds object key with `+key`", () => {
const ast = parseJsonc('{ "plugins": { "github": "tok" } }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/plugins/+gitlab"), '"new-tok"');
expect(r.ok).toBe(true);
if (r.ok) {
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
expect(JSON.parse(emitJsonc(ast2))).toEqual({
plugins: { github: "tok", gitlab: "new-tok" },
});
}
});
it("rejects duplicate object key", () => {
const ast = parseJsonc('{ "plugins": { "github": "x" } }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/plugins/+github"), '"y"');
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("unresolved");
}
});
it("rejects +key on array", () => {
const ast = parseJsonc('{ "items": [1, 2] }').ast;
const r = setOcPath(ast, parseOcPath("oc://config/items/+abc"), "3");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("type-mismatch");
}
});
it("inserts complex object via JSON value", () => {
const ast = parseJsonc('{ "plugins": {} }').ast;
const r = setOcPath(
ast,
parseOcPath("oc://config/plugins/+gitlab"),
'{"token":"xyz","enabled":true}',
);
expect(r.ok).toBe(true);
if (r.ok) {
const ast2 = r.ast as Parameters<typeof emitJsonc>[0];
expect(JSON.parse(emitJsonc(ast2))).toEqual({
plugins: { gitlab: { token: "xyz", enabled: true } },
});
}
});
});
describe("setOcPath — jsonl insertion (session append)", () => {
it("appends a JSON line with `+`", () => {
const ast = parseJsonl('{"event":"start"}\n').ast;
const r = setOcPath(ast, parseOcPath("oc://log/+"), '{"event":"step","n":1}');
expect(r.ok).toBe(true);
if (r.ok) {
const out = emitJsonl(r.ast as Parameters<typeof emitJsonl>[0]);
const lines = out.split("\n").filter((l) => l.length > 0);
expect(lines).toHaveLength(2);
expect(JSON.parse(lines[1])).toEqual({ event: "step", n: 1 });
}
});
it("rejects malformed JSON value", () => {
const ast = parseJsonl("").ast;
const r = setOcPath(ast, parseOcPath("oc://log/+"), "not json");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("parse-error");
}
});
it("rejects non-root insertion target", () => {
const ast = parseJsonl('{"a":1}\n').ast;
const r = setOcPath(ast, parseOcPath("oc://log/L1/+"), "{}");
expect(r.ok).toBe(false);
});
});
// ---------- Cross-cutting properties ---------------------------------------
describe("setOcPath — cross-cutting properties", () => {
it("is non-mutating across all kinds", () => {
const md = parseMd("---\nname: x\n---\n").ast;
const before = JSON.stringify(md);
setOcPath(md, parseOcPath("oc://X.md/[frontmatter]/name"), "new");
expect(JSON.stringify(md)).toBe(before);
const jsonc = parseJsonc('{ "k": 1 }').ast;
const before2 = JSON.stringify(jsonc);
setOcPath(jsonc, parseOcPath("oc://config/k"), "99");
expect(JSON.stringify(jsonc)).toBe(before2);
const jsonl = parseJsonl('{"a":1}\n').ast;
const before3 = JSON.stringify(jsonl);
setOcPath(jsonl, parseOcPath("oc://log/L1/a"), "99");
expect(JSON.stringify(jsonl)).toBe(before3);
});
it("returns ok-tagged result with new ast on success", () => {
const md = parseMd("---\nname: x\n---\n").ast;
const r = setOcPath(md, parseOcPath("oc://X.md/[frontmatter]/name"), "y");
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.kind).toBe("md");
}
});
it("returns failure-tagged result with reason on unresolved", () => {
const ast = parseJsonc("{}").ast;
const r = setOcPath(ast, parseOcPath("oc://config/missing"), "v");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("unresolved");
}
});
});

View File

@@ -0,0 +1,249 @@
/**
* YAML kind — parse / emit / resolve / set + universal verb dispatch.
*
* Real-world fixture: lobster `.lobster` workflow file shape.
*/
import { describe, expect, it } from "vitest";
import { inferKind } from "../../dispatch.js";
import { parseOcPath } from "../../oc-path.js";
import { resolveOcPath, setOcPath } from "../../universal.js";
import { setYamlOcPath } from "../../yaml/edit.js";
import { emitYaml } from "../../yaml/emit.js";
import { parseYaml } from "../../yaml/parse.js";
import { resolveYamlOcPath } from "../../yaml/resolve.js";
const LOBSTER = `name: inbox-triage
description: A simple example workflow
steps:
- id: fetch
command: gog.gmail.search --query 'newer_than:1d' --max 20
- id: classify
command: openclaw.invoke --tool llm-task --action json
stdin: $fetch.stdout
`;
describe("parseYaml — round-trip", () => {
it("preserves bytes verbatim on round-trip", () => {
const { ast } = parseYaml(LOBSTER);
expect(emitYaml(ast)).toBe(LOBSTER);
});
it("exposes kind: yaml discriminator", () => {
const { ast } = parseYaml(LOBSTER);
expect(ast.kind).toBe("yaml");
});
it("handles empty file", () => {
const { ast } = parseYaml("");
expect(ast.kind).toBe("yaml");
expect(emitYaml(ast)).toBe("");
});
it("reports errors as diagnostics, not throws", () => {
const { diagnostics } = parseYaml("key: value\n bad indent: oops\n");
expect(diagnostics.length).toBeGreaterThanOrEqual(0);
});
});
describe("resolveYamlOcPath — direct", () => {
it("resolves top-level scalar", () => {
const { ast } = parseYaml(LOBSTER);
const m = resolveYamlOcPath(ast, parseOcPath("oc://workflow.lobster/name"));
expect(m?.kind).toBe("pair");
if (m?.kind === "pair") {
expect(m.value).toBe("inbox-triage");
}
});
it("resolves into a sequence by index", () => {
const { ast } = parseYaml(LOBSTER);
const m = resolveYamlOcPath(ast, parseOcPath("oc://workflow.lobster/steps.0.id"));
expect(m?.kind).toBe("pair");
if (m?.kind === "pair") {
expect(m.value).toBe("fetch");
}
});
it("returns root when no segments", () => {
const { ast } = parseYaml(LOBSTER);
const m = resolveYamlOcPath(ast, parseOcPath("oc://workflow.lobster"));
expect(m?.kind).toBe("root");
});
it("returns null for unresolved paths", () => {
const { ast } = parseYaml(LOBSTER);
expect(resolveYamlOcPath(ast, parseOcPath("oc://workflow.lobster/missing"))).toBeNull();
});
});
describe("setYamlOcPath — direct", () => {
it("replaces a scalar value", () => {
const { ast } = parseYaml(LOBSTER);
const r = setYamlOcPath(ast, parseOcPath("oc://workflow.lobster/name"), "new-name");
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain("name: new-name");
}
});
it("replaces a nested scalar", () => {
const { ast } = parseYaml(LOBSTER);
const r = setYamlOcPath(ast, parseOcPath("oc://workflow.lobster/steps.0.id"), "fetch-renamed");
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain("id: fetch-renamed");
}
});
it("returns unresolved for missing path", () => {
const { ast } = parseYaml(LOBSTER);
const r = setYamlOcPath(ast, parseOcPath("oc://workflow.lobster/missing"), "x");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("unresolved");
}
});
});
describe("setYamlOcPath — positional tokens (round-11 resolve↔edit symmetry)", () => {
// ClawSweeper round-11 P2 — yaml edit forwarded segments straight
// to `setIn`, which would treat `$first` / `$last` / `-N` as
// literal map keys and silently miss the target. Pin the new
// behavior: positional tokens resolve against the live document
// BEFORE the yaml lib walks the path.
it("edits the first seq element via $first", () => {
const { ast } = parseYaml(LOBSTER);
const r = setYamlOcPath(
ast,
parseOcPath("oc://workflow.lobster/steps/$first/id"),
"fetch-renamed",
);
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain("id: fetch-renamed");
}
});
it("edits the last seq element via $last", () => {
const { ast } = parseYaml(LOBSTER);
const r = setYamlOcPath(
ast,
parseOcPath("oc://workflow.lobster/steps/$last/id"),
"classify-renamed",
);
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain("id: classify-renamed");
}
});
it("edits the second-to-last seq element via -2", () => {
const { ast } = parseYaml("items:\n - a\n - b\n - c\n");
const r = setYamlOcPath(ast, parseOcPath("oc://x.yaml/items/-2"), "B");
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain("- B");
}
});
it("edits the first map entry via $first", () => {
const { ast } = parseYaml("config:\n a: 1\n b: 2\n c: 3\n");
const r = setYamlOcPath(ast, parseOcPath("oc://x.yaml/config/$first"), 99);
expect(r.ok).toBe(true);
if (r.ok) {
expect(r.ast.raw).toContain("a: 99");
}
});
it("returns unresolved for $first against an empty seq", () => {
const { ast } = parseYaml("items: []\n");
const r = setYamlOcPath(ast, parseOcPath("oc://x.yaml/items/$first"), "x");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("unresolved");
}
});
});
describe("inferKind — yaml extensions", () => {
it("maps .yaml / .yml / .lobster to yaml", () => {
expect(inferKind("workflow.yaml")).toBe("yaml");
expect(inferKind("config.yml")).toBe("yaml");
expect(inferKind("inbox-triage.lobster")).toBe("yaml");
});
});
describe("universal verbs — yaml dispatch", () => {
it("resolveOcPath returns kind-agnostic match for yaml leaf", () => {
const { ast } = parseYaml(LOBSTER);
const m = resolveOcPath(ast, parseOcPath("oc://workflow.lobster/name"));
expect(m).toMatchObject({ kind: "leaf", valueText: "inbox-triage", leafType: "string" });
});
it("resolveOcPath returns node:yaml-map for top-level seq item", () => {
const { ast } = parseYaml(LOBSTER);
const m = resolveOcPath(ast, parseOcPath("oc://workflow.lobster/steps.0"));
expect(m).toMatchObject({ kind: "node", descriptor: "yaml-map" });
});
it("resolveOcPath returns node:yaml-seq for sequence root", () => {
const { ast } = parseYaml(LOBSTER);
const m = resolveOcPath(ast, parseOcPath("oc://workflow.lobster/steps"));
expect(m).toMatchObject({ kind: "node", descriptor: "yaml-seq" });
});
it("setOcPath replaces a yaml scalar via universal verb", () => {
const { ast } = parseYaml(LOBSTER);
const r = setOcPath(ast, parseOcPath("oc://workflow.lobster/name"), "updated");
expect(r.ok).toBe(true);
if (r.ok && r.ast.kind === "yaml") {
expect(r.ast.raw).toContain("name: updated");
}
});
it("setOcPath coerces numeric string to number for number leaf", () => {
const { ast } = parseYaml("count: 5\n");
const r = setOcPath(ast, parseOcPath("oc://x.yaml/count"), "42");
expect(r.ok).toBe(true);
if (r.ok && r.ast.kind === "yaml") {
expect(r.ast.raw).toContain("count: 42");
}
});
it("setOcPath returns parse-error for invalid coercion", () => {
const { ast } = parseYaml("count: 5\n");
const r = setOcPath(ast, parseOcPath("oc://x.yaml/count"), "abc");
expect(r.ok).toBe(false);
if (!r.ok) {
expect(r.reason).toBe("parse-error");
}
});
});
describe("universal verbs — yaml insertion", () => {
it("appends to a yaml seq with `+`", () => {
const { ast } = parseYaml("items:\n - a\n - b\n");
const r = setOcPath(ast, parseOcPath("oc://x.yaml/items/+"), '"c"');
expect(r.ok).toBe(true);
if (r.ok && r.ast.kind === "yaml") {
expect(r.ast.raw).toContain("- c");
}
});
it("adds key to yaml map with `+key`", () => {
const { ast } = parseYaml("config:\n a: 1\n");
const r = setOcPath(ast, parseOcPath("oc://x.yaml/config/+b"), "2");
expect(r.ok).toBe(true);
if (r.ok && r.ast.kind === "yaml") {
expect(r.ast.raw).toContain("b: 2");
}
});
it("rejects duplicate map key on insertion", () => {
const { ast } = parseYaml("config:\n a: 1\n");
const r = setOcPath(ast, parseOcPath("oc://x.yaml/config/+a"), "99");
expect(r.ok).toBe(false);
});
});

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
/**
* YAML AST types — wraps the `yaml` library's Document model so the
* substrate can address YAML nodes via `OcPath` while preserving the
* authoring shape (comments, anchors, etc.) for round-trip emit.
*
* **Per-kind discriminator**: `kind: 'yaml'` matches the md / jsonc /
* jsonl pattern. The universal `setOcPath` / `resolveOcPath` dispatch
* via `ast.kind`.
*
* **Byte-fidelity**: `raw` is preserved on the root for round-trip
* emit. The internal `doc` is the parsed `yaml.Document` from the
* `yaml` package — comment-preserving, anchor-aware.
*
* Lobster `.lobster` files (workflow specs) and `.craft/waves/*.yaml`
* (craft system) both flow through this kind.
*
* @module @openclaw/oc-path/yaml/ast
*/
import type { Document, LineCounter } from "yaml";
/** The root YAML AST. `raw` round-trips byte-identical via emit. */
export interface YamlAst {
readonly kind: "yaml";
readonly raw: string;
/**
* Parsed `yaml.Document` — wraps the comment-preserving CST model.
*/
readonly doc: Document.Parsed;
/**
* `LineCounter` from the `yaml` package. Pass a node's `range[0]`
* (byte offset) to `lineCounter.linePos(offset)` to get
* `{ line, col }` (1-based). Lint rules use this to surface accurate
* line numbers in findings instead of hardcoding `line: 1`.
*/
readonly lineCounter: LineCounter;
}

View File

@@ -0,0 +1,251 @@
/**
* Mutate a `YamlAst` at an OcPath. Returns a new AST with the value
* replaced.
*
* Implementation uses `doc.setIn(path, value)` from the `yaml` package
* — comment-preserving on edit. Adding a new key does NOT preserve
* surrounding formatting verbatim (the `yaml` library handles
* pretty-printing); for byte-exact preservation use round-trip emit
* on unmodified ASTs.
*
* @module @openclaw/oc-path/yaml/edit
*/
import {
Document,
isMap,
isScalar,
isSeq,
LineCounter,
parseDocument,
type Node,
type Pair,
} from "yaml";
import type { OcPath } from "../oc-path.js";
import {
isPositionalSeg,
isQuotedSeg,
resolvePositionalSeg,
splitRespectingBrackets,
unquoteSeg,
} from "../oc-path.js";
import type { YamlAst } from "./ast.js";
export type YamlEditResult =
| { readonly ok: true; readonly ast: YamlAst }
| {
readonly ok: false;
readonly reason: "unresolved" | "no-root" | "parse-error";
};
export function setYamlOcPath(ast: YamlAst, path: OcPath, newValue: unknown): YamlEditResult {
if (ast.doc.contents === null) {
return { ok: false, reason: "no-root" };
}
const rawSegments = pathSegments(path);
if (rawSegments.length === 0) {
return { ok: false, reason: "unresolved" };
}
// Resolve positional tokens ($first / $last / -N) against the actual
// map keys / seq sizes BEFORE handing the segments to the yaml lib —
// otherwise `hasIn(['$last'])` treats the token as a literal map key
// and silently unresolves, producing a write↔read asymmetry with
// resolveYamlOcPath (which honors positional tokens at lookup).
const segments = resolvePositionalSegments(ast.doc.contents as Node, rawSegments);
if (segments === null) {
return { ok: false, reason: "unresolved" };
}
// Verify the path resolves before mutating — `setIn` would create
// missing intermediate nodes which is insertion semantics, not set.
if (!ast.doc.hasIn(segments)) {
return { ok: false, reason: "unresolved" };
}
// Clone the document so the original AST is unchanged.
const { doc: cloned, lineCounter } = cloneDoc(ast.doc);
cloned.setIn(segments, newValue);
return { ok: true, ast: { kind: "yaml", raw: cloned.toString(), doc: cloned, lineCounter } };
}
/**
* Append-style insertion: add a new key to a map or push to a seq at
* `path`. Used by the universal `setOcPath` when the path carries a
* `+` / `+key` / `+nnn` insertion marker.
*/
export function insertYamlOcPath(
ast: YamlAst,
parentPath: OcPath,
marker: "+" | { kind: "keyed"; key: string } | { kind: "indexed"; index: number },
newValue: unknown,
): YamlEditResult {
if (ast.doc.contents === null) {
return { ok: false, reason: "no-root" };
}
const rawParentSegments = pathSegments(parentPath);
// Resolve positional tokens against the live document before walking
// — same rationale as setYamlOcPath; `getIn(['$last'])` would treat
// the token as a literal key and miss the actual last child.
const segments =
rawParentSegments.length === 0
? rawParentSegments
: resolvePositionalSegments(ast.doc.contents as Node, rawParentSegments);
if (segments === null) {
return { ok: false, reason: "unresolved" };
}
const { doc: cloned, lineCounter } = cloneDoc(ast.doc);
// Find the parent node.
const parent = segments.length === 0 ? cloned.contents : cloned.getIn(segments, false);
if (parent === undefined || parent === null) {
return { ok: false, reason: "unresolved" };
}
// Map insertion → keyed
if (
typeof parent === "object" &&
"items" in parent &&
Array.isArray((parent as { items: unknown[] }).items)
) {
const items = (parent as { items: { key?: unknown }[] }).items;
// Array#every() already returns true on an empty array — no need
// for the explicit length === 0 short-circuit.
const isMapLike = items.every((p) => "key" in p);
if (isMapLike) {
if (typeof marker !== "object" || marker.kind !== "keyed") {
return { ok: false, reason: "unresolved" };
}
// Reject duplicate
if (cloned.hasIn([...segments, marker.key])) {
return { ok: false, reason: "unresolved" };
}
cloned.setIn([...segments, marker.key], newValue);
return { ok: true, ast: { kind: "yaml", raw: cloned.toString(), doc: cloned, lineCounter } };
}
// Seq insertion
if (typeof marker === "object" && marker.kind === "keyed") {
return { ok: false, reason: "unresolved" };
}
const seqItems = items as unknown[];
if (marker === "+") {
cloned.addIn(segments, newValue);
} else if (typeof marker === "object" && marker.kind === "indexed") {
const idx = Math.min(marker.index, seqItems.length);
const current = cloned.getIn(segments) as unknown[] | undefined;
if (!Array.isArray(current)) {
return { ok: false, reason: "unresolved" };
}
const newArr = [...current];
newArr.splice(idx, 0, newValue);
cloned.setIn(segments, newArr);
}
return { ok: true, ast: { kind: "yaml", raw: cloned.toString(), doc: cloned, lineCounter } };
}
return { ok: false, reason: "unresolved" };
}
/**
* Walk `segments` against the live document, replacing each positional
* token (`$first` / `$last` / `-N`) with the concrete key (for maps) or
* index (for seqs) at that depth. Returns `null` if a positional token
* targets a missing or non-container node — caller treats that as
* `unresolved` and refuses to write.
*
* Mirrors `positionalForYaml` in resolve.ts so read and write agree on
* which child each token names.
*/
function resolvePositionalSegments(root: Node, segments: readonly string[]): string[] | null {
const out: string[] = [];
let node: Node | null = root;
for (const seg of segments) {
if (node === null) {
return null;
}
let segNorm = seg;
if (isPositionalSeg(seg)) {
const concrete = positionalForYamlNode(node, seg);
if (concrete === null) {
return null;
}
segNorm = concrete;
}
out.push(segNorm);
if (isMap(node)) {
const pairs: readonly Pair[] = (node as { items: readonly Pair[] }).items;
const pair: Pair | undefined = pairs.find((p) => {
const k = isScalar(p.key) ? p.key.value : p.key;
return String(k) === segNorm;
});
node = (pair?.value as Node | undefined) ?? null;
continue;
}
if (isSeq(node)) {
const idx = Number(segNorm);
if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) {
return null;
}
node = (node.items[idx] as Node | null) ?? null;
continue;
}
// Scalar — we still emit the literal segment so the next-step
// hasIn check sees the same shape and fails cleanly with
// `unresolved`. Don't try to descend further.
node = null;
}
return out;
}
function positionalForYamlNode(node: Node, seg: string): string | null {
if (isMap(node)) {
const pairs: readonly Pair[] = (node as { items: readonly Pair[] }).items;
const keys: readonly string[] = pairs.map((p) => String(isScalar(p.key) ? p.key.value : p.key));
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
}
if (isSeq(node)) {
const items: readonly Node[] = (node as { items: readonly Node[] }).items;
return resolvePositionalSeg(seg, { indexable: true, size: items.length });
}
return null;
}
function pathSegments(path: OcPath): string[] {
// Quote-aware split + unquote so YAML edit matches `resolveYamlOcPath`'s
// lookup behavior. A quoted segment carrying `/` or `.` (e.g.
// `"a/b"`) survives as a single segment, then gets stripped of
// its surrounding quotes for the actual `getIn` / `setIn` key
// comparison. Plain `.split('.')` would shred quoted keys and
// produce silent resolve↔write asymmetry.
const segs: string[] = [];
const collect = (slot: string | undefined) => {
if (slot === undefined) {
return;
}
for (const sub of splitRespectingBrackets(slot, ".")) {
segs.push(isQuotedSeg(sub) ? unquoteSeg(sub) : sub);
}
};
collect(path.section);
collect(path.item);
collect(path.field);
return segs;
}
function cloneDoc(doc: Document.Parsed): { doc: Document.Parsed; lineCounter: LineCounter } {
// Round-trip via toString → parseDocument is the simplest comment-
// preserving clone. yaml package doesn't expose a public `clone`.
// Re-parse with a fresh LineCounter so the cloned AST has accurate
// line positions for any subsequent inspection.
const lineCounter = new LineCounter();
const cloned = parseDocument(doc.toString(), {
keepSourceTokens: true,
prettyErrors: false,
lineCounter,
});
return { doc: cloned, lineCounter };
}

View File

@@ -0,0 +1,49 @@
/**
* Emit a `YamlAst` to bytes.
*
* **Round-trip mode (default)** returns `ast.raw` verbatim — preserves
* comments, anchors, formatting exactly.
*
* **Render mode** uses `doc.toString()` from the `yaml` package — also
* comment-preserving, but normalizes whitespace per the package's
* options.
*
* **Sentinel guard**: scans every emitted byte sequence for the
* `__OPENCLAW_REDACTED__` literal.
*
* @module @openclaw/oc-path/yaml/emit
*/
import { OcEmitSentinelError, REDACTED_SENTINEL } from "../sentinel.js";
import type { YamlAst } from "./ast.js";
export interface YamlEmitOptions {
readonly mode?: "roundtrip" | "render";
readonly fileNameForGuard?: string;
/**
* See `JsoncEmitOptions.acceptPreExistingSentinel` for the rationale.
* Default `true` — round-trip echoes parsed bytes without scanning.
* Render mode always scans the rendered output (callers can inject
* sentinels via setYamlOcPath, so render-time scan is mandatory).
*/
readonly acceptPreExistingSentinel?: boolean;
}
export function emitYaml(ast: YamlAst, opts: YamlEmitOptions = {}): string {
const mode = opts.mode ?? "roundtrip";
const guardPath = opts.fileNameForGuard ? `oc://${opts.fileNameForGuard}` : "oc://";
const acceptPreExisting = opts.acceptPreExistingSentinel ?? true;
if (mode === "roundtrip") {
if (!acceptPreExisting && ast.raw.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/[raw]`);
}
return ast.raw;
}
const rendered = ast.doc.toString();
if (rendered.includes(REDACTED_SENTINEL)) {
throw new OcEmitSentinelError(`${guardPath}/[rendered]`);
}
return rendered;
}

View File

@@ -0,0 +1,48 @@
/**
* YAML parser — wraps `yaml.parseDocument` for comment-preserving CST
* + structured access. Soft-error policy: never throws on
* parser-tolerated input; recoverable problems surface as diagnostics.
*
* @module @openclaw/oc-path/yaml/parse
*/
import { LineCounter, parseDocument } from "yaml";
import type { Diagnostic } from "../ast.js";
import type { YamlAst } from "./ast.js";
export interface YamlParseResult {
readonly ast: YamlAst;
readonly diagnostics: readonly Diagnostic[];
}
/**
* Parse YAML bytes into a `YamlAst`. The `yaml` package is
* comment-preserving and reports its own warnings/errors; we surface
* those as `Diagnostic` entries.
*/
export function parseYaml(raw: string): YamlParseResult {
const lineCounter = new LineCounter();
const doc = parseDocument(raw, {
keepSourceTokens: true,
prettyErrors: false,
lineCounter,
});
const diagnostics: Diagnostic[] = [];
for (const w of doc.warnings) {
diagnostics.push({
line: w.linePos?.[0]?.line ?? 1,
message: w.message,
severity: "warning",
code: "OC_YAML_WARN",
});
}
for (const e of doc.errors) {
diagnostics.push({
line: e.linePos?.[0]?.line ?? 1,
message: e.message,
severity: "error",
code: "OC_YAML_PARSE_FAILED",
});
}
return { ast: { kind: "yaml", raw, doc, lineCounter }, diagnostics };
}

View File

@@ -0,0 +1,162 @@
/**
* Resolve an `OcPath` against a `YamlAst`.
*
* YAML's structural shape mirrors JSONC: objects (`Map`), arrays
* (`Seq`), and scalars. Addressing follows the same dotted-path
* convention used by JSONC:
*
* oc://workflow.yaml/steps.0.command → command on first step
* oc://workflow.yaml/name → top-level name
* oc://workflow.yaml/steps.+command → insertion (handled by edit)
*
* @module @openclaw/oc-path/yaml/resolve
*/
import { isMap, isScalar, isSeq, type Node, type Pair } from "yaml";
import type { OcPath } from "../oc-path.js";
import {
isPositionalSeg,
isQuotedSeg,
resolvePositionalSeg,
splitRespectingBrackets,
unquoteSeg,
} from "../oc-path.js";
import type { YamlAst } from "./ast.js";
export type YamlOcPathMatch =
| { readonly kind: "root"; readonly node: YamlAst }
| { readonly kind: "scalar"; readonly value: unknown; readonly path: readonly string[] }
| {
readonly kind: "map";
readonly path: readonly string[];
}
| {
readonly kind: "seq";
readonly path: readonly string[];
}
| {
readonly kind: "pair";
readonly key: string;
readonly value: unknown;
readonly path: readonly string[];
};
export function resolveYamlOcPath(ast: YamlAst, path: OcPath): YamlOcPathMatch | null {
const segments: string[] = [];
if (path.section !== undefined) {
for (const s of splitRespectingBrackets(path.section, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (path.item !== undefined) {
for (const s of splitRespectingBrackets(path.item, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (path.field !== undefined) {
for (const s of splitRespectingBrackets(path.field, ".")) {
segments.push(isQuotedSeg(s) ? unquoteSeg(s) : s);
}
}
if (segments.length === 0) {
return { kind: "root", node: ast };
}
const root = ast.doc.contents;
if (root === null) {
return null;
}
return walkNode(root, segments, 0, []);
}
function walkNode(
node: Node | null,
segments: readonly string[],
i: number,
walked: readonly string[],
): YamlOcPathMatch | null {
if (node === null) {
return null;
}
let seg = segments[i];
if (seg === undefined) {
// Reached end — describe whatever we landed on.
if (isMap(node)) {
return { kind: "map", path: walked };
}
if (isSeq(node)) {
return { kind: "seq", path: walked };
}
if (isScalar(node)) {
return { kind: "scalar", value: node.value, path: walked };
}
return null;
}
if (seg.length === 0) {
return null;
}
// Positional tokens (`$first` / `$last` / `-N`) resolve to a concrete
// segment based on container shape. `-N` on a keyed container falls
// through to literal-key lookup (openclaw#59934 — Telegram supergroup
// IDs are negative numbers used as map keys).
if (isPositionalSeg(seg)) {
const concrete = positionalForYaml(node, seg);
if (concrete !== null) {
seg = concrete;
}
}
if (isMap(node)) {
const pair = (node as { items: Pair[] }).items.find((p) => {
const k = isScalar(p.key) ? p.key.value : p.key;
return String(k) === seg;
});
if (pair === undefined) {
return null;
}
const childWalked = [...walked, seg];
if (i === segments.length - 1) {
const child = pair.value;
if (isScalar(child)) {
return {
kind: "pair",
key: seg,
value: child.value,
path: childWalked,
};
}
// Map / seq under the pair — describe by descending.
return walkNode(child as Node, segments, i + 1, childWalked);
}
return walkNode(pair.value as Node, segments, i + 1, childWalked);
}
if (isSeq(node)) {
const idx = Number(seg);
if (!Number.isInteger(idx) || idx < 0 || idx >= node.items.length) {
return null;
}
const child = node.items[idx];
return walkNode(child as Node, segments, i + 1, [...walked, seg]);
}
// Scalar — can't descend.
return null;
}
function positionalForYaml(node: Node, seg: string): string | null {
if (isMap(node)) {
const pairs = (node as { items: Pair[] }).items;
const keys = pairs.map((p) => String(isScalar(p.key) ? p.key.value : p.key));
return resolvePositionalSeg(seg, { indexable: false, size: keys.length, keys });
}
if (isSeq(node)) {
const items = (node as { items: Node[] }).items;
return resolvePositionalSeg(seg, { indexable: true, size: items.length });
}
return null;
}

27
pnpm-lock.yaml generated
View File

@@ -1115,6 +1115,28 @@ importers:
specifier: workspace:*
version: link:../../packages/plugin-sdk
extensions/oc-path:
dependencies:
commander:
specifier: ^14.0.3
version: 14.0.3
jsonc-parser:
specifier: ^3.3.1
version: 3.3.1
markdown-it:
specifier: 14.1.1
version: 14.1.1
yaml:
specifier: ^2.8.4
version: 2.8.4
devDependencies:
'@openclaw/plugin-sdk':
specifier: workspace:*
version: link:../../packages/plugin-sdk
openclaw:
specifier: workspace:*
version: link:../..
extensions/ollama:
dependencies:
'@mariozechner/pi-ai':
@@ -5937,6 +5959,9 @@ packages:
engines: {node: '>=6'}
hasBin: true
jsonc-parser@3.3.1:
resolution: {integrity: sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==}
jsonfile@6.2.1:
resolution: {integrity: sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==}
@@ -12992,6 +13017,8 @@ snapshots:
json5@2.2.3: {}
jsonc-parser@3.3.1: {}
jsonfile@6.2.1:
dependencies:
universalify: 2.0.1