diff --git a/packages/pipelines/README.md b/packages/pipelines/README.md new file mode 100644 index 000000000..a195ffd9a --- /dev/null +++ b/packages/pipelines/README.md @@ -0,0 +1,519 @@ +# @ucdjs/pipelines + +Pipeline framework for processing Unicode Character Database files. + +## Pipeline Flow + +``` +definePipeline({ versions, inputs, artifacts?, cacheStore?, routes }) + | + v + +---------------------------+ + | FOR EACH VERSION | + | (16.0.0, 15.1.0...) | + +---------------------------+ + | + +-------------------------------+-------------------------------+ + v v v ++-----------+ +---------------+ +-----------+ +| INPUTS |---- files ----->| ARTIFACTS |--- context --->| ROUTES | ++-----------+ | (optional) | +-----------+ + +---------------+ | + +----------------------------------------------+ + v + +---------------------+ + | FOR EACH FILE | + +---------------------+ + | + v + +---------------------+ + | ROUTE MATCHING | + | | + | byName, byDir, | + | byGlob, and, or... | + +---------------------+ + | + +-------------+-------------+ + v v v + +---------+ +---------+ +----------+ + | MATCHED | | SKIPPED | | FALLBACK | + +---------+ +---------+ +----------+ + | | + +------------+---------------+ + v + +---------------------+ + | CACHE CHECK | + | | + | key = route+version | + | + content hash | + | + artifact hash | + +---------------------+ + | + +----------+----------+ + v v + +-------------+ +-------------+ + | CACHE HIT | | CACHE MISS | + | | | | + | return | | execute | + | cached | | route | + +-------------+ +-------------+ + | | + | v + | +---------------------+ + | | PARSER | + | | | + | | file -> ParsedRow[] | + | +---------------------+ + | | + | v + | +---------------------+ + | | RESOLVER | + | | | + | | getArtifact(id) | + | | emitArtifact(id,v) | + | | return outputs | + | +---------------------+ + | | + | v + | +---------------------+ + | | CACHE STORE | + | +---------------------+ + | | + +----------+----------+ + v + +---------------------+ + | MERGE ARTIFACTS | + | | + | emitted artifacts | + | available to next | + | routes | + +---------------------+ + | + v + +---------------------+ + | RESULT | + | | + | { data, graph, | + | errors, summary } | + +---------------------+ +``` + +## Concepts + +### Source + +A **Source** is the data provider that tells the pipeline where to get files from. Sources are defined using `definePipelineSource()` and passed to the pipeline via the `inputs` array. + +Each source has: +- **id** - Unique identifier for the source +- **backend** - Object implementing `listFiles(version)` and `readFile(file)` +- **includes** - Optional filter to include only matching files +- **excludes** - Optional filter to exclude matching files + +```ts +import { definePipelineSource, definePipeline, byGlob } from "@ucdjs/pipelines"; + +// Define a source with custom backend +const mySource = definePipelineSource({ + id: "unicode-files", + backend: { + listFiles: async (version) => { + // Return array of FileContext objects + return [{ path: "LineBreak.txt", name: "LineBreak.txt", dir: "", ext: ".txt", version }]; + }, + readFile: async (file) => { + // Return file content as string + return "# File content here..."; + }, + }, + // Optional: filter files + includes: byGlob("**/*.txt"), + excludes: byGlob("**/Test*.txt"), +}); + +// Use in pipeline +const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [mySource], + routes: [myRoute], +}); +``` + +#### Multiple Sources + +Pipelines support multiple sources. Files from later sources with the same path will override earlier ones: + +```ts +const localSource = definePipelineSource({ + id: "local", + backend: createLocalBackend({ dir: "./ucd-files" }), +}); + +const httpSource = definePipelineSource({ + id: "unicode-http", + backend: createHttpBackend({ baseUrl: "https://unicode.org/Public" }), +}); + +const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [httpSource, localSource], // Local files override HTTP files + routes: [myRoute], +}); +``` + +#### Filtering by Source + +Use `bySource()` filter to match files from a specific source: + +```ts +import { bySource, and, byName } from "@ucdjs/pipelines"; + +const route = definePipelineRoute({ + id: "local-only", + filter: and(bySource("local"), byName("CustomData.txt")), + // ... +}); +``` + +### Artifacts + +**Artifacts** are computed values that can be shared across routes. They enable: + +- **Shared lookup tables** - e.g., a Map of codepoint -> character name that multiple routes need +- **Route dependencies** - One route produces data that another route consumes +- **Cross-file data** - Combine data from multiple files into a single structure + +There are two ways to create artifacts: + +#### Pre-defined Artifacts + +Use `definePipelineArtifact()` for artifacts that should be built **before** any routes execute. These are ideal for lookup tables that many routes need. + +```ts +import { definePipelineArtifact, definePipeline, definePipelineSource, byName } from "@ucdjs/pipelines"; + +// Define an artifact that builds a character names lookup table +const namesArtifact = definePipelineArtifact({ + id: "names", + // Optional: filter to find the source file + filter: byName("UnicodeData.txt"), + // Optional: parser to read the file + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const [codePoint, name] = line.split(";"); + yield { sourceFile: ctx.file.path, kind: "point", codePoint, value: name }; + } + }, + // Build function receives version context and optional parsed rows + build: async (ctx, rows) => { + const names = new Map(); + if (rows) { + for await (const row of rows) { + if (row.codePoint && row.value) { + names.set(row.codePoint, String(row.value)); + } + } + } + return names; + }, +}); + +// Define a source +const mySource = definePipelineSource({ + id: "my-source", + backend: myBackend, +}); + +// Use in pipeline +const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [mySource], + artifacts: [namesArtifact], // Built before routes + routes: [myRoute], +}); +``` + +Routes access pre-defined artifacts via `ctx.getArtifact(id)`: + +```ts +const myRoute = definePipelineRoute({ + id: "my-route", + filter: byName("SomeFile.txt"), + parser: async function* (ctx) { /* ... */ }, + resolver: async (ctx, rows) => { + // Access the pre-defined artifact + const names = ctx.getArtifact("names") as Map; + // Use names... + return [/* outputs */]; + }, +}); +``` + +#### Route-Emitted Artifacts + +Routes can also produce artifacts for **subsequent** routes using `ctx.emitArtifact()`. This is useful when the artifact depends on route-specific processing. + +```ts +// Route that PRODUCES an artifact +const namesRoute = definePipelineRoute({ + id: "unicode-data-names", + filter: byName("UnicodeData.txt"), + parser: async function* (ctx) { /* ... */ }, + resolver: async (ctx, rows) => { + const names = new Map(); + for await (const row of rows) { + if (row.codePoint && row.value) { + names.set(row.codePoint, String(row.value)); + } + } + + // Emit artifact for subsequent routes + ctx.emitArtifact("names", names); + + return []; // No output required + }, +}); + +// Route that CONSUMES the artifact +const lineBreakRoute = definePipelineRoute({ + id: "line-break", + filter: byName("LineBreak.txt"), + parser: async function* (ctx) { /* ... */ }, + resolver: async (ctx, rows) => { + // Get artifact (throws if not found) + const names = ctx.getArtifact("names") as Map; + + // Use names to enrich output... + return [{ version: ctx.version, property: "Line_Break", entries }]; + }, +}); + +// Order matters! Producer must come before consumer +const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [mySource], + routes: [namesRoute, lineBreakRoute], // namesRoute first! +}); +``` + +#### When to Use Which + +| Use Case | Approach | +|----------|----------| +| Lookup table needed by many routes | Pre-defined artifact | +| Artifact doesn't depend on route logic | Pre-defined artifact | +| Artifact is derived from route processing | Route-emitted artifact | +| Only one or two routes need it | Route-emitted artifact | + +**Important**: `ctx.getArtifact(id)` throws if the artifact doesn't exist. Make sure: +- Pre-defined artifacts are listed in the `artifacts` array +- Route-emitted artifacts come from routes that run **before** the consuming route + +### Route + +A **Route** defines how to process a specific type of file. Each route has: + +- **id** - Unique identifier for the route +- **filter** - Predicate that matches files this route handles +- **parser** - Async generator that yields parsed rows from file content +- **resolver** - Function that transforms parsed rows into output format +- **cache** - Optional boolean to enable/disable caching for this route (default: `true`) + +### Filter + +**Filters** are predicate functions that determine which files a route handles: + +```ts +byName("LineBreak.txt") // Exact filename match +byDir("auxiliary") // Files in specific directory +byExt(".txt") // Files with extension +byGlob("**/*Test*.txt") // Glob pattern matching +byPath("ucd/LineBreak.txt") // Exact path match +byProp("Line_Break") // Match by property in row context +bySource("my-source") // Files from a specific source + +// Combinators +and(byExt(".txt"), byDir("ucd")) // All conditions must match +or(byName("A.txt"), byName("B.txt")) // Any condition matches +not(byGlob("**/Test*")) // Negate a filter +always() // Always matches +never() // Never matches +``` + +### Caching + +The pipeline supports caching route outputs to avoid reprocessing unchanged files. Cache entries are keyed by: + +- Route ID +- Unicode version +- Input file content hash +- Hashes of consumed artifacts + +```ts +import { createMemoryCacheStore, definePipelineSource, definePipeline } from "@ucdjs/pipelines"; + +const cacheStore = createMemoryCacheStore(); + +const mySource = definePipelineSource({ + id: "my-source", + backend: myBackend, +}); + +const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [mySource], + cacheStore, // Enable caching + routes: [namesRoute, lineBreakRoute], +}); + +// First run: cache miss, routes execute +await pipeline.run(); + +// Second run: cache hit, results returned from cache +await pipeline.run(); + +// Force recompute (ignore cache) +await pipeline.run({ cache: false }); +``` + +You can disable caching per-route: + +```ts +const volatileRoute = definePipelineRoute({ + id: "volatile", + cache: false, // Never cache this route + // ... +}); +``` + +**Note**: Pre-defined artifacts are **not cached** - they are rebuilt for each pipeline run. Only route outputs and route-emitted artifacts are cached. + +## Usage + +```ts +import { + definePipeline, + definePipelineRoute, + definePipelineArtifact, + definePipelineSource, + createMemoryCacheStore, + byName, +} from "@ucdjs/pipelines"; + +// Define a source +const mySource = definePipelineSource({ + id: "unicode-files", + backend: { + listFiles: async (version) => { + // Your implementation to list files + return [ + { path: "UnicodeData.txt", name: "UnicodeData.txt", dir: "", ext: ".txt", version }, + { path: "LineBreak.txt", name: "LineBreak.txt", dir: "", ext: ".txt", version }, + ]; + }, + readFile: async (file) => { + // Your implementation to read file content + return "# File content..."; + }, + }, +}); + +// Pre-defined artifact for character names (built before routes) +const namesArtifact = definePipelineArtifact({ + id: "names", + filter: byName("UnicodeData.txt"), + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const parts = line.split(";"); + yield { sourceFile: ctx.file.path, kind: "point", codePoint: parts[0], value: parts[1] }; + } + }, + build: async (_ctx, rows) => { + const names = new Map(); + if (rows) { + for await (const row of rows) { + if (row.codePoint && row.value) { + names.set(row.codePoint, String(row.value)); + } + } + } + return names; + }, +}); + +// Route that uses the pre-defined artifact +const lineBreakRoute = definePipelineRoute({ + id: "line-break", + filter: byName("LineBreak.txt"), + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const [range, property] = line.split(";").map((s) => s.trim()); + yield { sourceFile: ctx.file.path, kind: "range", start: range.split("..")[0], end: range.split("..")[1], value: property }; + } + }, + resolver: async (ctx, rows) => { + const names = ctx.getArtifact("names") as Map; + const entries = []; + for await (const row of rows) { + entries.push({ range: `${row.start}..${row.end}`, value: row.value }); + } + return [{ + version: ctx.version, + property: "Line_Break", + file: ctx.file.name, + entries: ctx.normalizeEntries(entries), + }]; + }, +}); + +// Create and run pipeline +const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + inputs: [mySource], + artifacts: [namesArtifact], // Pre-defined artifacts + cacheStore: createMemoryCacheStore(), + routes: [lineBreakRoute], + onEvent: (event) => console.log(event.type), +}); + +const result = await pipeline.run(); + +console.log(result.summary); +// { versions: [...], totalFiles: 100, matchedFiles: 2, ... } + +console.log(result.data); +// [{ version: "16.0.0", property: "Line_Break", ... }, ...] +``` + +## Events + +The pipeline emits events during execution: + +| Event | Description | +|-------|-------------| +| `pipeline:start` | Pipeline execution started | +| `pipeline:end` | Pipeline execution completed | +| `version:start` | Started processing a version | +| `version:end` | Finished processing a version | +| `artifact:start` | Started building a pre-defined artifact | +| `artifact:end` | Finished building a pre-defined artifact | +| `artifact:produced` | Route emitted an artifact | +| `artifact:consumed` | Route consumed an artifact | +| `file:matched` | File matched a route | +| `file:skipped` | File skipped (no matching route) | +| `file:fallback` | File handled by fallback | +| `parse:start` | Started parsing a file | +| `parse:end` | Finished parsing a file | +| `resolve:start` | Started resolving a file | +| `resolve:end` | Finished resolving a file | +| `cache:hit` | Route result loaded from cache | +| `cache:miss` | Route result not in cache | +| `cache:store` | Route result stored in cache | +| `error` | An error occurred | + +## License + +MIT diff --git a/packages/pipelines/package.json b/packages/pipelines/package.json new file mode 100644 index 000000000..7eb8588b0 --- /dev/null +++ b/packages/pipelines/package.json @@ -0,0 +1,60 @@ +{ + "name": "@ucdjs/pipelines", + "version": "1.0.0", + "type": "module", + "author": { + "name": "Lucas Norgaard", + "email": "lucasnrgaard@gmail.com", + "url": "https://luxass.dev" + }, + "packageManager": "pnpm@10.27.0", + "license": "MIT", + "homepage": "https://github.com/ucdjs/ucd", + "repository": { + "type": "git", + "url": "git+https://github.com/ucdjs/ucd.git", + "directory": "packages/pipelines" + }, + "bugs": { + "url": "https://github.com/ucdjs/ucd/issues" + }, + "exports": { + ".": "./dist/index.mjs", + "./package.json": "./package.json" + }, + "main": "./dist/index.mjs", + "module": "./dist/index.mjs", + "types": "./dist/index.d.mts", + "files": [ + "dist" + ], + "engines": { + "node": ">=22.18" + }, + "scripts": { + "build": "tsdown --tsconfig=./tsconfig.build.json", + "dev": "tsdown --watch", + "clean": "git clean -xdf dist node_modules", + "lint": "eslint .", + "typecheck": "tsc --noEmit -p tsconfig.build.json" + }, + "dependencies": { + "@ucdjs-internal/shared": "workspace:*", + "picomatch": "catalog:prod", + "zod": "catalog:prod" + }, + "devDependencies": { + "@luxass/eslint-config": "catalog:linting", + "@types/picomatch": "catalog:types", + "@ucdjs-tooling/tsconfig": "workspace:*", + "@ucdjs-tooling/tsdown-config": "workspace:*", + "eslint": "catalog:linting", + "publint": "catalog:dev", + "tsdown": "catalog:dev", + "tsx": "catalog:dev", + "typescript": "catalog:dev" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/packages/pipelines/playgrounds/full-pipeline.ts b/packages/pipelines/playgrounds/full-pipeline.ts new file mode 100644 index 000000000..ec36c3c33 --- /dev/null +++ b/packages/pipelines/playgrounds/full-pipeline.ts @@ -0,0 +1,360 @@ +import { z } from "zod"; +import { + artifact, + byName, + createMemoryCacheStore, + definePipeline, + definePipelineRoute, + definePipelineSource, + type FileContext, + type PipelineEvent, + type PropertyJson, + type ResolvedEntry, +} from "../src"; + +const MOCK_FILES: Record> = { + "16.0.0": { + "ucd/UnicodeData.txt": `0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061; +0042;LATIN CAPITAL LETTER B;Lu;0;L;;;;;N;;;;0062; +0043;LATIN CAPITAL LETTER C;Lu;0;L;;;;;N;;;;0063; +0061;LATIN SMALL LETTER A;Ll;0;L;;;;;N;;;0041;;0041 +0062;LATIN SMALL LETTER B;Ll;0;L;;;;;N;;;0042;;0042 +0063;LATIN SMALL LETTER C;Ll;0;L;;;;;N;;;0043;;0043`, + "ucd/LineBreak.txt": `# LineBreak.txt +0041..005A;AL +0061..007A;AL +0030..0039;NU`, + "ucd/Scripts.txt": `# Scripts.txt +0041..005A;Latin +0061..007A;Latin +0030..0039;Common`, + }, + "15.1.0": { + "ucd/UnicodeData.txt": `0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061; +0042;LATIN CAPITAL LETTER B;Lu;0;L;;;;;N;;;;0062; +0061;LATIN SMALL LETTER A;Ll;0;L;;;;;N;;;0041;;0041 +0062;LATIN SMALL LETTER B;Ll;0;L;;;;;N;;;0042;;0042`, + "ucd/LineBreak.txt": `# LineBreak.txt +0041..005A;AL +0061..007A;AL`, + "ucd/Scripts.txt": `# Scripts.txt +0041..005A;Latin +0061..007A;Latin`, + }, +}; + +const mockSource = definePipelineSource({ + id: "mock", + backend: { + async listFiles(version: string): Promise { + const versionFiles = MOCK_FILES[version]; + if (!versionFiles) { + return []; + } + + return Object.keys(versionFiles).map((path) => { + const parts = path.split("/"); + const name = parts[parts.length - 1] ?? ""; + const dir = parts[0] ?? ""; + const ext = name.includes(".") ? `.${name.split(".").pop()}` : ""; + + return { version, path, name, dir, ext }; + }); + }, + + async readFile(file: FileContext): Promise { + const versionFiles = MOCK_FILES[file.version]; + if (!versionFiles) { + throw new Error(`Version not found: ${file.version}`); + } + + const content = versionFiles[file.path]; + if (content === undefined) { + throw new Error(`File not found: ${file.path}`); + } + + return content; + }, + }, +}); + +const unicodeDataRoute = definePipelineRoute({ + id: "unicode-data-names", + filter: byName("UnicodeData.txt"), + emits: { + names: artifact(z.map(z.string(), z.string())), + }, + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const parts = line.split(";"); + if (parts.length < 2) continue; + + const codePoint = parts[0]; + const name = parts[1]; + + yield { + sourceFile: ctx.file.path, + kind: "point" as const, + codePoint, + value: name, + }; + } + }, + resolver: async (ctx, rows) => { + const names = new Map(); + + for await (const row of rows) { + if (row.codePoint && row.value) { + names.set(row.codePoint, String(row.value)); + } + } + + ctx.emitArtifact("names", names); + + return []; + }, +}); + +const lineBreakRoute = definePipelineRoute({ + id: "line-break", + filter: byName("LineBreak.txt"), + depends: ["artifact:unicode-data-names:names"] as const, + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const [range, value] = line.split(";").map((s) => s.trim()); + if (!range || !value) continue; + + if (range.includes("..")) { + const [start, end] = range.split(".."); + yield { + sourceFile: ctx.file.path, + kind: "range" as const, + start, + end, + value, + }; + } else { + yield { + sourceFile: ctx.file.path, + kind: "point" as const, + codePoint: range, + value, + }; + } + } + }, + resolver: async (ctx, rows) => { + const names = ctx.getArtifact("unicode-data-names:names") as Map; + const entries: ResolvedEntry[] = []; + const meta: Record = { + enrichedWithNames: true, + namesCount: names.size, + }; + + for await (const row of rows) { + if (row.kind === "range" && row.start && row.end) { + const startInt = parseInt(row.start, 16); + const endInt = parseInt(row.end, 16); + const sampleNames: string[] = []; + for (let cp = startInt; cp <= Math.min(endInt, startInt + 2); cp++) { + const hex = cp.toString(16).toUpperCase().padStart(4, "0"); + const name = names.get(hex); + if (name) sampleNames.push(name); + } + entries.push({ + range: `${row.start}..${row.end}`, + value: [String(row.value), ...sampleNames], + }); + } else if (row.codePoint) { + const name = names.get(row.codePoint); + entries.push({ + codePoint: row.codePoint, + value: name ? [String(row.value), name] : String(row.value), + }); + } + } + + return [ + { + version: ctx.version, + property: "Line_Break", + file: ctx.file.name, + entries: ctx.normalizeEntries(entries), + meta, + }, + ]; + }, +}); + +const scriptsRoute = definePipelineRoute({ + id: "scripts", + filter: byName("Scripts.txt"), + depends: ["artifact:unicode-data-names:names"] as const, + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const [range, value] = line.split(";").map((s) => s.trim()); + if (!range || !value) continue; + + if (range.includes("..")) { + const [start, end] = range.split(".."); + yield { + sourceFile: ctx.file.path, + kind: "range" as const, + start, + end, + value, + }; + } else { + yield { + sourceFile: ctx.file.path, + kind: "point" as const, + codePoint: range, + value, + }; + } + } + }, + resolver: async (ctx, rows) => { + const names = ctx.getArtifact("unicode-data-names:names") as Map; + const entries: ResolvedEntry[] = []; + const meta: Record = { + enrichedWithNames: true, + namesCount: names.size, + }; + + for await (const row of rows) { + if (row.kind === "range" && row.start && row.end) { + const startInt = parseInt(row.start, 16); + const endInt = parseInt(row.end, 16); + const sampleNames: string[] = []; + for (let cp = startInt; cp <= Math.min(endInt, startInt + 2); cp++) { + const hex = cp.toString(16).toUpperCase().padStart(4, "0"); + const name = names.get(hex); + if (name) sampleNames.push(name); + } + entries.push({ + range: `${row.start}..${row.end}`, + value: [String(row.value), ...sampleNames], + }); + } else if (row.codePoint) { + const name = names.get(row.codePoint); + entries.push({ + codePoint: row.codePoint, + value: name ? [String(row.value), name] : String(row.value), + }); + } + } + + return [ + { + version: ctx.version, + property: "Script", + file: ctx.file.name, + entries: ctx.normalizeEntries(entries), + meta, + }, + ]; + }, +}); + +async function main() { + const cacheStore = createMemoryCacheStore(); + + const events: PipelineEvent[] = []; + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + inputs: [mockSource], + cacheStore, + routes: [unicodeDataRoute, lineBreakRoute, scriptsRoute], + onEvent: (event) => { + events.push(event); + if (event.type === "artifact:produced") { + console.log(` [ARTIFACT] ${event.artifactId} produced by ${event.routeId}`); + } else if (event.type === "artifact:consumed") { + console.log(` [ARTIFACT] ${event.artifactId} consumed by ${event.routeId}`); + } else if (event.type === "cache:hit") { + console.log(` [CACHE HIT] ${event.routeId} for ${event.file.name}`); + } else if (event.type === "cache:miss") { + console.log(` [CACHE MISS] ${event.routeId} for ${event.file.name}`); + } else if (event.type === "cache:store") { + console.log(` [CACHE STORE] ${event.routeId} for ${event.file.name}`); + } + }, + }); + + console.log("=== First Run (cache miss expected) ===\n"); + + const result1 = await pipeline.run(); + + console.log("\n--- Summary ---"); + console.log(`Versions: ${result1.summary.versions.join(", ")}`); + console.log(`Total files: ${result1.summary.totalFiles}`); + console.log(`Matched files: ${result1.summary.matchedFiles}`); + console.log(`Outputs: ${result1.summary.totalOutputs}`); + console.log(`Duration: ${result1.summary.durationMs.toFixed(2)}ms`); + console.log(`Errors: ${result1.errors.length}`); + + if (result1.errors.length > 0) { + console.log("\nErrors:"); + for (const error of result1.errors) { + console.log(` - ${error.message}`); + } + } + + console.log("\n--- Outputs ---"); + for (const output of result1.data) { + const pj = output as PropertyJson; + console.log(` ${pj.version} / ${pj.property}: ${pj.entries.length} entries`); + } + + const stats1 = await cacheStore.stats?.(); + console.log(`\n--- Cache Stats ---`); + console.log(`Entries: ${stats1?.entries}`); + console.log(`Hits: ${stats1?.hits}`); + console.log(`Misses: ${stats1?.misses}`); + + console.log("\n\n=== Second Run (cache hit expected) ===\n"); + + events.length = 0; + + const result2 = await pipeline.run(); + + console.log("\n--- Summary ---"); + console.log(`Duration: ${result2.summary.durationMs.toFixed(2)}ms`); + console.log(`Outputs: ${result2.summary.totalOutputs}`); + + const stats2 = await cacheStore.stats?.(); + console.log(`\n--- Cache Stats ---`); + console.log(`Entries: ${stats2?.entries}`); + console.log(`Hits: ${stats2?.hits}`); + console.log(`Misses: ${stats2?.misses}`); + + console.log("\n\n=== Third Run (cache disabled) ===\n"); + + events.length = 0; + + const result3 = await pipeline.run({ cache: false }); + + console.log("\n--- Summary ---"); + console.log(`Duration: ${result3.summary.durationMs.toFixed(2)}ms`); + + const stats3 = await cacheStore.stats?.(); + console.log(`\n--- Cache Stats (unchanged) ---`); + console.log(`Entries: ${stats3?.entries}`); + console.log(`Hits: ${stats3?.hits}`); + console.log(`Misses: ${stats3?.misses}`); + + console.log("\n\n=== Verification ==="); + const outputsMatch = JSON.stringify(result1.data) === JSON.stringify(result2.data); + console.log(`Outputs from run 1 and run 2 match: ${outputsMatch}`); + + const cacheHitEvents = events.filter((e) => e.type === "cache:hit").length; + const cacheMissEvents = events.filter((e) => e.type === "cache:miss").length; + console.log(`Run 2: ${cacheHitEvents} cache hits, ${cacheMissEvents} cache misses`); +} + +main().catch(console.error); diff --git a/packages/pipelines/playgrounds/minimal-pipeline.ts b/packages/pipelines/playgrounds/minimal-pipeline.ts new file mode 100644 index 000000000..b7b75e538 --- /dev/null +++ b/packages/pipelines/playgrounds/minimal-pipeline.ts @@ -0,0 +1,102 @@ +import { + byName, + definePipeline, + definePipelineRoute, + definePipelineSource, + type FileContext, +} from "../src"; + +// Minimal in-memory source with just one file +const MOCK_DATA = `# LineBreak.txt - Simple test data +0041..005A;AL # Latin uppercase +0061..007A;AL # Latin lowercase +0030..0039;NU # ASCII digits`; + +const minimalSource = definePipelineSource({ + id: "minimal", + backend: { + async listFiles(version: string): Promise { + return [ + { + version, + path: "ucd/LineBreak.txt", + name: "LineBreak.txt", + dir: "ucd", + ext: ".txt", + }, + ]; + }, + + async readFile(_file: FileContext): Promise { + return MOCK_DATA; + }, + }, +}); + +const lineBreakRoute = definePipelineRoute({ + id: "line-break", + filter: byName("LineBreak.txt"), + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + + const [range, value] = line.split(";").map((s) => s.trim()); + if (!range || !value) continue; + + const [start, end] = range.includes("..") + ? range.split("..") + : [range, range]; + + yield { + sourceFile: ctx.file.path, + kind: "range" as const, + start, + end, + value, + }; + } + }, + resolver: async (ctx, rows) => { + const entries = []; + for await (const row of rows) { + if (row.start && row.end) { + entries.push({ + range: `${row.start}..${row.end}` as const, + value: String(row.value), + }); + } + } + + return [ + { + version: ctx.version, + property: "Line_Break", + file: ctx.file.name, + entries, + }, + ]; + }, +}); + +async function main() { + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [minimalSource], + routes: [lineBreakRoute], + onEvent: (event) => { + console.log(`[${event.type}]`, event); + }, + }); + + const result = await pipeline.run(); + + console.log("\n=== Results ==="); + console.log(`Files processed: ${result.summary.matchedFiles}`); + console.log(`Outputs: ${result.summary.totalOutputs}`); + console.log(`Duration: ${result.summary.durationMs.toFixed(2)}ms`); + + console.log("\n=== Data ==="); + console.log(JSON.stringify(result.data, null, 2)); +} + +main().catch(console.error); diff --git a/packages/pipelines/playgrounds/multiple-routes-same-filter.ts b/packages/pipelines/playgrounds/multiple-routes-same-filter.ts new file mode 100644 index 000000000..a6269de39 --- /dev/null +++ b/packages/pipelines/playgrounds/multiple-routes-same-filter.ts @@ -0,0 +1,140 @@ +import { + byName, + definePipeline, + definePipelineRoute, + definePipelineSource, + type FileContext, +} from "../src"; + +const MOCK_DATA = `0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061; +0042;LATIN CAPITAL LETTER B;Lu;0;L;;;;;N;;;;0062; +0043;LATIN CAPITAL LETTER C;Lu;0;L;;;;;N;;;;0063;`; + +const multipleRoutesSource = definePipelineSource({ + id: "multiple-routes", + backend: { + async listFiles(version: string): Promise { + return [ + { + version, + path: "ucd/UnicodeData.txt", + name: "UnicodeData.txt", + dir: "ucd", + ext: ".txt", + }, + ]; + }, + + async readFile(_file: FileContext): Promise { + return MOCK_DATA; + }, + }, +}); + +const firstRoute = definePipelineRoute({ + id: "extract-names", + filter: byName("UnicodeData.txt"), + + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const parts = line.split(";"); + yield { + sourceFile: ctx.file.path, + kind: "point" as const, + codePoint: parts[0], + value: parts[1], + }; + } + }, + + resolver: async (ctx, rows) => { + const names = []; + for await (const row of rows) { + names.push(`${row.codePoint}: ${row.value}`); + } + + console.log("\nāœ… FIRST ROUTE EXECUTED (extract-names)"); + console.log(` Processed ${names.length} names`); + + return [ + { + version: ctx.version, + property: "Character_Names", + file: ctx.file.name, + entries: [], + }, + ]; + }, +}); + +const secondRoute = definePipelineRoute({ + id: "extract-categories", + filter: byName("UnicodeData.txt"), + + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const parts = line.split(";"); + yield { + sourceFile: ctx.file.path, + kind: "point" as const, + codePoint: parts[0], + value: parts[2], + }; + } + }, + + resolver: async (ctx, rows) => { + const categories = []; + for await (const row of rows) { + categories.push(`${row.codePoint}: ${row.value}`); + } + + console.log("\nāŒ SECOND ROUTE EXECUTED (extract-categories)"); + console.log(` Processed ${categories.length} categories`); + + return [ + { + version: ctx.version, + property: "General_Category", + file: ctx.file.name, + entries: [], + }, + ]; + }, +}); + +async function main() { + console.log("=== Testing: Two routes with same filter ===\n"); + console.log("Routes defined:"); + console.log(" 1. extract-names (filter: UnicodeData.txt)"); + console.log(" 2. extract-categories (filter: UnicodeData.txt)"); + console.log("\nExpected: Only first route should execute"); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [multipleRoutesSource], + routes: [firstRoute, secondRoute], + }); + + const result = await pipeline.run(); + + console.log("\n=== Results ==="); + console.log(`Outputs: ${result.summary.totalOutputs}`); + console.log("\nData:"); + for (const output of result.data) { + const prop = output as { property: string }; + console.log(` - ${prop.property}`); + } + + console.log("\n=== Conclusion ==="); + if (result.summary.totalOutputs === 1) { + console.log("āœ… Only the FIRST route executed (routes.find() behavior)"); + console.log(" The second route was ignored even though it matched."); + } else { + console.log("āŒ Both routes executed (unexpected!)"); + } +} + +main().catch(console.error); diff --git a/packages/pipelines/playgrounds/new-design.ts b/packages/pipelines/playgrounds/new-design.ts new file mode 100644 index 000000000..6d4811256 --- /dev/null +++ b/packages/pipelines/playgrounds/new-design.ts @@ -0,0 +1,76 @@ +const source1 = definePipelineSource({ + id: "source-1", + backend: fromFSBridgeBackend(HTTPFileSystemBridge, { + // fsbridge options + }), + includes: glob("**/*.txt"), + exclude: glob("**/excluded-*.txt"), +}) + +const route1 = definePipelineRoute({ + id: "route-1", + depends: ["artifact:route-2:names", "routes:route-2"], // This will require route1 to wait for route2 to finish, and artifact:route-2:names to be available + filter: byExt(".txt") && byVersion("15.0.0"), + // filter: "version is 15.0.0 and extension is .txt", // This can be implemented later. + parser: async function* (ctx) { + // parsing logic + }, + transforms: [ + definePipelineTransform({ + id: "transform-1", + fn: async function* (ctx, rows) { + yield { /* transformed row */} + } + }), + definePipelineTransform({ + id: "transform-2", + fn: async function* (ctx, rows) { + yield { /* transformed row */} + } + }) + ], + resolver: async (ctx, rows) => { + // resolving logic + return [/* resolved entries */] + }, +}) + +const route2 = definePipelineRoute({ + id: "route-2", + filter: byName("SomeFile.txt"), + parser: async function* (ctx) { + // parsing logic + }, + resolver: async (ctx, rows) => { + await ctx.emitArtifact("names", new Map()); + // resolving logic + return [/* resolved entries */] + }, + cache: false, +}) + +const pipeline = definePipeline({ + // This will tell the pipeline, that it can run on these two versions. + // So if the run method is called with one of these versions, it can proceed. + // If the run method is called with other versions, it will error out. + versions: ["15.0.0", "16.0.0"], + inputs: [ + source1, + // More sources? + ], + routes: [ + route1, + route2, + ], + cache: { + store: new FSCacheStore("/path/to/cache/dir"), // The fs cache store will cache artifacts, route results, etc. + } +}) + +pipeline.run({ + versions: ["15.0.0", "16.0.0"], + cache: false, // Disable cache for everything this run. + onEvent: (event) => { + console.log(`[${event.type}]`, event); + } +}) diff --git a/packages/pipelines/playgrounds/route-context-sharing.ts b/packages/pipelines/playgrounds/route-context-sharing.ts new file mode 100644 index 000000000..550693723 --- /dev/null +++ b/packages/pipelines/playgrounds/route-context-sharing.ts @@ -0,0 +1,187 @@ +import { z } from "zod"; +import { + artifact, + byName, + definePipeline, + definePipelineRoute, + definePipelineSource, + type FileContext, + type ResolvedEntry, +} from "../src"; + +const UNICODE_DATA = `0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061; +0042;LATIN CAPITAL LETTER B;Lu;0;L;;;;;N;;;;0062; +0043;LATIN CAPITAL LETTER C;Lu;0;L;;;;;N;;;;0063; +0061;LATIN SMALL LETTER A;Ll;0;L;;;;;N;;;0041;;0041 +0062;LATIN SMALL LETTER B;Ll;0;L;;;;;N;;;0042;;0042 +0063;LATIN SMALL LETTER C;Ll;0;L;;;;;N;;;0043;;0043`; + +const LINE_BREAK_DATA = `# LineBreak.txt +0041..005A;AL +0061..007A;AL`; + +const FILES: Record = { + "ucd/UnicodeData.txt": UNICODE_DATA, + "ucd/LineBreak.txt": LINE_BREAK_DATA, +}; + +const contextSharingSource = definePipelineSource({ + id: "context-sharing", + backend: { + async listFiles(version: string): Promise { + return Object.keys(FILES).map((path) => { + const parts = path.split("/"); + const name = parts[parts.length - 1]!; + const dir = parts[0] || "ucd"; + return { + version, + path, + name, + dir, + ext: "." + name.split(".").pop()!, + }; + }); + }, + + async readFile(file: FileContext): Promise { + return FILES[file.path] || ""; + }, + }, +}); + +console.log("=== Pattern: Route-to-Route Data Context Sharing ===\n"); + +const namesRoute = definePipelineRoute({ + id: "extract-names", + filter: byName("UnicodeData.txt"), + emits: { + names: artifact(z.map(z.string(), z.string())), + }, + parser: async function* (ctx) { + console.log("šŸ“„ Processing UnicodeData.txt (names route)"); + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const parts = line.split(";"); + yield { + sourceFile: ctx.file.path, + kind: "point" as const, + codePoint: parts[0], + value: parts[1], + }; + } + }, + resolver: async (ctx, rows) => { + const names = new Map(); + + for await (const row of rows) { + if (row.codePoint && row.value) { + names.set(row.codePoint, String(row.value)); + } + } + + console.log(`āœ… Extracted ${names.size} character names`); + console.log(` Emitting artifact: "names"\n`); + + ctx.emitArtifact("names", names); + + return []; + }, +}); + +const lineBreakRoute = definePipelineRoute({ + id: "line-break-enriched", + filter: byName("LineBreak.txt"), + depends: ["artifact:extract-names:names"] as const, + parser: async function* (ctx) { + console.log("šŸ“„ Processing LineBreak.txt"); + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const [range, value] = line.split(";").map((s) => s.trim()); + if (!range || !value) continue; + + const [start, end] = range.includes("..") + ? range.split("..") + : [range, range]; + + yield { + sourceFile: ctx.file.path, + kind: "range" as const, + start, + end, + value, + }; + } + }, + resolver: async (ctx, rows) => { + console.log("šŸ” Attempting to get 'names' artifact..."); + + const names = ctx.getArtifact("extract-names:names") as Map; + + console.log(`āœ… Got names artifact with ${names.size} entries`); + console.log(" Now enriching LineBreak data with character names\n"); + + const entries: ResolvedEntry[] = []; + + for await (const row of rows) { + if (row.start && row.end) { + const startInt = parseInt(row.start, 16); + const endInt = parseInt(row.end, 16); + + const sampleNames: string[] = []; + for (let cp = startInt; cp <= Math.min(endInt, startInt + 2); cp++) { + const hex = cp.toString(16).toUpperCase().padStart(4, "0"); + const name = names.get(hex); + if (name) { + sampleNames.push(name); + } + } + + entries.push({ + range: `${row.start}..${row.end}` as const, + value: [String(row.value), `Examples: ${sampleNames.join(", ")}`], + }); + } + } + + return [ + { + version: ctx.version, + property: "Line_Break", + file: ctx.file.name, + entries, + meta: { + enrichedWithNames: true, + namesCount: names.size, + }, + }, + ]; + }, +}); + +async function main() { + console.log("Route execution order:"); + console.log(" 1. extract-names (UnicodeData.txt) → emits 'names' artifact"); + console.log(" 2. line-break-enriched (LineBreak.txt) → consumes 'extract-names:names' artifact\n"); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [contextSharingSource], + routes: [ + namesRoute, + lineBreakRoute, + ], + }); + + const result = await pipeline.run(); + + console.log("=== Results ==="); + console.log(`Outputs: ${result.summary.totalOutputs}`); + console.log("\n=== Enriched Data ==="); + console.log(JSON.stringify(result.data, null, 2)); + + console.log("\n=== Key Insight ==="); + console.log("āœ… With the new DAG-based execution, route order is determined by dependencies."); + console.log(" Routes declare 'depends: [\"artifact:extract-names:names\"]' to ensure correct ordering."); +} + +main().catch(console.error); diff --git a/packages/pipelines/playgrounds/same-file-context-problem.ts b/packages/pipelines/playgrounds/same-file-context-problem.ts new file mode 100644 index 000000000..52412f86b --- /dev/null +++ b/packages/pipelines/playgrounds/same-file-context-problem.ts @@ -0,0 +1,185 @@ +import { z } from "zod"; +import { + artifact, + byName, + definePipeline, + definePipelineRoute, + definePipelineSource, + type FileContext, + type ResolvedEntry, +} from "../src"; + +const UNICODE_DATA = `0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061; +0042;LATIN CAPITAL LETTER B;Lu;0;L;;;;;N;;;;0062; +0043;LATIN CAPITAL LETTER C;Lu;0;L;;;;;N;;;;0063;`; + +const sameFileSource = definePipelineSource({ + id: "same-file", + backend: { + async listFiles(version: string): Promise { + return [ + { + version, + path: "ucd/UnicodeData.txt", + name: "UnicodeData.txt", + dir: "ucd", + ext: ".txt", + }, + ]; + }, + + async readFile(_file: FileContext): Promise { + return UNICODE_DATA; + }, + }, +}); + +console.log("=== Problem: Adding context to the SAME file ===\n"); +console.log("Scenario: We want to:"); +console.log(" 1. First pass: Extract basic data"); +console.log(" 2. Second pass: Enrich the same data with additional context"); +console.log(" Both passes need to process UnicodeData.txt\n"); + +const firstPassRoute = definePipelineRoute({ + id: "first-pass-basic", + filter: byName("UnicodeData.txt"), + emits: { + "basic-data": artifact(z.array(z.object({ codePoint: z.string(), name: z.string() }))), + }, + parser: async function* (ctx) { + console.log("šŸ“„ FIRST PASS: Extracting basic character data"); + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const parts = line.split(";"); + yield { + sourceFile: ctx.file.path, + kind: "point" as const, + codePoint: parts[0], + value: parts[1], + }; + } + }, + + resolver: async (ctx, rows) => { + const basicData: Array<{ codePoint: string; name: string }> = []; + + for await (const row of rows) { + if (row.codePoint && row.value) { + basicData.push({ + codePoint: row.codePoint, + name: String(row.value), + }); + } + } + + console.log(`āœ… First pass extracted ${basicData.length} basic entries`); + console.log(" Emitting artifact: 'basic-data'\n"); + + ctx.emitArtifact("basic-data", basicData); + + return []; + }, +}); + +const secondPassRoute = definePipelineRoute({ + id: "second-pass-enriched", + filter: byName("UnicodeData.txt"), + depends: ["artifact:first-pass-basic:basic-data"] as const, + parser: async function* (ctx) { + console.log("šŸ“„ SECOND PASS: Would extract category data"); + for await (const line of ctx.readLines()) { + if (ctx.isComment(line)) continue; + const parts = line.split(";"); + yield { + sourceFile: ctx.file.path, + kind: "point" as const, + codePoint: parts[0], + value: parts[2], + }; + } + }, + + resolver: async (ctx, rows) => { + console.log("šŸ” Attempting to get 'basic-data' artifact..."); + + const basicData = ctx.getArtifact("first-pass-basic:basic-data") as Array<{ + codePoint: string; + name: string; + }>; + + console.log(`āœ… Got basic data with ${basicData.length} entries`); + + const entries: ResolvedEntry[] = []; + + for await (const row of rows) { + if (row.codePoint && row.value) { + const basic = basicData.find((b) => b.codePoint === row.codePoint); + + entries.push({ + codePoint: row.codePoint, + value: basic + ? `${basic.name} (${row.value})` + : String(row.value), + }); + } + } + + return [ + { + version: ctx.version, + property: "Enriched_Data", + file: ctx.file.name, + entries, + }, + ]; + }, +}); + +async function main() { + console.log("Routes defined:"); + console.log(" 1. first-pass-basic (UnicodeData.txt) - emits 'basic-data'"); + console.log(" 2. second-pass-enriched (UnicodeData.txt) - depends on 'first-pass-basic:basic-data'"); + console.log("\nWith DAG-based execution, both routes can process the same file!\n"); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [sameFileSource], + routes: [ + firstPassRoute, + secondPassRoute, + ], + }); + + try { + const result = await pipeline.run(); + + console.log("\n=== Results ==="); + console.log(`Outputs: ${result.summary.totalOutputs}`); + console.log(`Errors: ${result.errors.length}`); + + if (result.errors.length > 0) { + console.log("\nāŒ Errors:"); + for (const error of result.errors) { + console.log(` ${error.message}`); + } + } + + if (result.data.length > 0) { + console.log("\n=== Output Data ==="); + console.log(JSON.stringify(result.data, null, 2)); + } + } catch (error) { + console.log("\nāŒ Pipeline failed:"); + console.log(` ${error}`); + } + + console.log("\n=== Solution ==="); + console.log("With the new DAG-based execution:"); + console.log(" 1. Routes declare dependencies via 'depends' array"); + console.log(" 2. Routes declare what they emit via 'emits' object with Zod schemas"); + console.log(" 3. Multiple routes CAN match the same file"); + console.log(" 4. Execution order is determined by dependency graph, not array order"); + console.log(" 5. Artifacts use prefixed keys: 'route-id:artifact-name'"); +} + +main().catch(console.error); diff --git a/packages/pipelines/src/artifact-schema.ts b/packages/pipelines/src/artifact-schema.ts new file mode 100644 index 000000000..294b3c288 --- /dev/null +++ b/packages/pipelines/src/artifact-schema.ts @@ -0,0 +1,68 @@ +import type { z } from "zod"; + +export interface Artifact { + _type: "artifact"; + schema: TSchema; + scope: "version"; +} + +export interface GlobalArtifact { + _type: "global-artifact"; + schema: TSchema; + scope: "global"; +} + +export type ArtifactDefinition = + | Artifact + | GlobalArtifact; + +export function artifact( + schema: TSchema +): Artifact; + +export function artifact( + schema: TSchema, + scope: "version" +): Artifact; + +export function artifact( + schema: TSchema, + scope: "global" +): GlobalArtifact; + +export function artifact( + schema: TSchema, + scope?: "version" | "global", +): ArtifactDefinition { + if (scope === "global") { + return { + _type: "global-artifact", + schema, + scope: "global", + }; + } + return { + _type: "artifact", + schema, + scope: "version", + }; +} + +export type InferArtifactType = + T extends ArtifactDefinition ? z.infer : never; + +export type InferEmittedArtifacts> = { + [K in keyof TEmits]: InferArtifactType; +}; + +export function isGlobalArtifact( + def: ArtifactDefinition, +): def is GlobalArtifact { + return def._type === "global-artifact"; +} + +export function isVersionArtifact( + def: ArtifactDefinition, +): def is Artifact { + return def._type === "artifact"; +} diff --git a/packages/pipelines/src/artifact.ts b/packages/pipelines/src/artifact.ts new file mode 100644 index 000000000..eb558d8db --- /dev/null +++ b/packages/pipelines/src/artifact.ts @@ -0,0 +1,31 @@ +import type { ParseContext, ParsedRow, PipelineFilter } from "./types"; + +export interface ArtifactBuildContext { + version: string; +} + +export interface PipelineArtifactDefinition< + TId extends string = string, + TValue = unknown, +> { + id: TId; + filter?: PipelineFilter; + parser?: (ctx: ParseContext) => AsyncIterable; + build: (ctx: ArtifactBuildContext, rows?: AsyncIterable) => Promise; +} + +export function definePipelineArtifact< + const TId extends string, + TValue, +>( + definition: PipelineArtifactDefinition, +): PipelineArtifactDefinition { + return definition; +} + +export type InferArtifactId = T extends PipelineArtifactDefinition ? TId : never; +export type InferArtifactValue = T extends PipelineArtifactDefinition ? TValue : never; + +export type InferArtifactsMap = { + [K in T[number] as InferArtifactId]: InferArtifactValue; +}; diff --git a/packages/pipelines/src/cache.ts b/packages/pipelines/src/cache.ts new file mode 100644 index 000000000..e54f75aee --- /dev/null +++ b/packages/pipelines/src/cache.ts @@ -0,0 +1,254 @@ +/** + * Unique identifier for a cache entry. + * Composed of route ID, version, input hash, and artifact dependency hashes. + */ +export interface CacheKey { + /** + * The route ID that produced this cache entry. + */ + routeId: string; + + /** + * The Unicode version being processed. + */ + version: string; + + /** + * Hash of the input file content. + */ + inputHash: string; + + /** + * Hashes of artifact dependencies used by this route. + * Key is artifact ID, value is the artifact's content hash. + */ + artifactHashes: Record; +} + +/** + * A cached result from a route execution. + */ +export interface CacheEntry { + /** + * The cache key that identifies this entry. + */ + key: CacheKey; + + /** + * The cached output data. + */ + output: TOutput[]; + + /** + * Artifacts produced by this route during execution. + * Key is artifact ID, value is the serialized artifact. + */ + producedArtifacts: Record; + + /** + * Timestamp when this entry was created (ISO 8601). + */ + createdAt: string; + + /** + * Optional metadata about the cache entry. + */ + meta?: Record; +} + +/** + * Converts a CacheKey to a string for storage lookup. + */ +export function serializeCacheKey(key: CacheKey): string { + const artifactHashStr = Object.entries(key.artifactHashes) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([id, hash]) => `${id}:${hash}`) + .join(","); + + return `${key.routeId}|${key.version}|${key.inputHash}|${artifactHashStr}`; +} + +/** + * Pluggable cache store interface. + * Implementations can use different backends (memory, filesystem, remote). + */ +export interface CacheStore { + /** + * Retrieve a cached entry by key. + * Returns undefined if not found. + */ + get: (key: CacheKey) => Promise; + + /** + * Store a cache entry. + */ + set: (entry: CacheEntry) => Promise; + + /** + * Check if a cache entry exists for the given key. + */ + has: (key: CacheKey) => Promise; + + /** + * Delete a cache entry by key. + * Returns true if the entry existed and was deleted. + */ + delete: (key: CacheKey) => Promise; + + /** + * Clear all cache entries. + */ + clear: () => Promise; + + /** + * Get cache statistics (optional). + */ + stats?: () => Promise; +} + +/** + * Statistics about the cache store. + */ +export interface CacheStats { + /** + * Total number of entries in the cache. + */ + entries: number; + + /** + * Total size of cached data in bytes (if available). + */ + sizeBytes?: number; + + /** + * Number of cache hits since last clear. + */ + hits?: number; + + /** + * Number of cache misses since last clear. + */ + misses?: number; +} + +/** + * Options for cache behavior. + */ +export interface CacheOptions { + /** + * Whether caching is enabled. + * @default true + */ + enabled?: boolean; + + /** + * Custom hash function for content. + * Defaults to a simple hash based on content length and sample. + */ + hashFn?: (content: string) => string; +} + +/** + * Simple in-memory cache store implementation. + */ +export function createMemoryCacheStore(): CacheStore { + const cache = new Map(); + let hits = 0; + let misses = 0; + + return { + async get(key: CacheKey): Promise { + const serialized = serializeCacheKey(key); + const entry = cache.get(serialized); + if (entry) { + hits++; + } else { + misses++; + } + return entry; + }, + + async set(entry: CacheEntry): Promise { + const serialized = serializeCacheKey(entry.key); + cache.set(serialized, entry); + }, + + async has(key: CacheKey): Promise { + const serialized = serializeCacheKey(key); + return cache.has(serialized); + }, + + async delete(key: CacheKey): Promise { + const serialized = serializeCacheKey(key); + return cache.delete(serialized); + }, + + async clear(): Promise { + cache.clear(); + hits = 0; + misses = 0; + }, + + async stats(): Promise { + return { + entries: cache.size, + hits, + misses, + }; + }, + }; +} + +/** + * Default hash function for content. + * Uses a simple but fast algorithm suitable for cache keys. + */ +export function defaultHashFn(content: string): string { + // Simple djb2-like hash + let hash = 5381; + for (let i = 0; i < content.length; i++) { + hash = ((hash << 5) + hash) ^ content.charCodeAt(i); + } + // Convert to unsigned 32-bit and then to hex + return (hash >>> 0).toString(16).padStart(8, "0"); +} + +/** + * Hash an artifact value for cache key purposes. + * Handles different types of artifact values. + */ +export function hashArtifact(value: unknown): string { + if (value === null || value === undefined) { + return "null"; + } + + if (typeof value === "string") { + return defaultHashFn(value); + } + + if (value instanceof Map) { + const entries = Array.from(value.entries()) + .sort(([a], [b]) => String(a).localeCompare(String(b))) + .map(([k, v]) => `${String(k)}=${String(v)}`) + .join(";"); + return defaultHashFn(entries); + } + + if (value instanceof Set) { + const entries = Array.from(value) + .map(String) + .sort() + .join(";"); + return defaultHashFn(entries); + } + + if (Array.isArray(value)) { + return defaultHashFn(JSON.stringify(value)); + } + + if (typeof value === "object") { + return defaultHashFn(JSON.stringify(value)); + } + + return defaultHashFn(String(value)); +} diff --git a/packages/pipelines/src/dag.ts b/packages/pipelines/src/dag.ts new file mode 100644 index 000000000..cbb856954 --- /dev/null +++ b/packages/pipelines/src/dag.ts @@ -0,0 +1,225 @@ +import type { PipelineRouteDefinition } from "./route"; +import { isArtifactDependency, isRouteDependency, parseDependency } from "./dependencies"; + +export interface DAGNode { + id: string; + dependencies: Set; + dependents: Set; + emittedArtifacts: Set; +} + +export interface DAG { + nodes: Map; + executionOrder: string[]; +} + +export interface DAGValidationError { + type: "cycle" | "missing-route" | "missing-artifact"; + message: string; + details: { + routeId?: string; + dependencyId?: string; + cycle?: string[]; + }; +} + +export interface DAGValidationResult { + valid: boolean; + errors: DAGValidationError[]; + dag?: DAG; +} + +export function buildDAG(routes: readonly PipelineRouteDefinition[]): DAGValidationResult { + const errors: DAGValidationError[] = []; + const nodes = new Map(); + const routeIds = new Set(routes.map((r) => r.id)); + const artifactsByRoute = new Map>(); + + for (const route of routes) { + const emittedArtifacts = new Set(); + if (route.emits) { + for (const artifactName of Object.keys(route.emits)) { + emittedArtifacts.add(`${route.id}:${artifactName}`); + } + } + artifactsByRoute.set(route.id, emittedArtifacts); + + nodes.set(route.id, { + id: route.id, + dependencies: new Set(), + dependents: new Set(), + emittedArtifacts, + }); + } + + for (const route of routes) { + const node = nodes.get(route.id)!; + + if (!route.depends) continue; + + for (const dep of route.depends) { + const parsed = parseDependency(dep); + + if (isRouteDependency(dep)) { + if (!routeIds.has(parsed.routeId)) { + errors.push({ + type: "missing-route", + message: `Route "${route.id}" depends on non-existent route "${parsed.routeId}"`, + details: { routeId: route.id, dependencyId: parsed.routeId }, + }); + continue; + } + node.dependencies.add(parsed.routeId); + nodes.get(parsed.routeId)!.dependents.add(route.id); + } else if (isArtifactDependency(dep)) { + const artifactParsed = parseDependency(dep); + if (artifactParsed.type !== "artifact") continue; + + if (!routeIds.has(artifactParsed.routeId)) { + errors.push({ + type: "missing-route", + message: `Route "${route.id}" depends on artifact from non-existent route "${artifactParsed.routeId}"`, + details: { routeId: route.id, dependencyId: artifactParsed.routeId }, + }); + continue; + } + + const routeArtifacts = artifactsByRoute.get(artifactParsed.routeId); + const artifactKey = `${artifactParsed.routeId}:${artifactParsed.artifactName}`; + if (!routeArtifacts?.has(artifactKey)) { + errors.push({ + type: "missing-artifact", + message: `Route "${route.id}" depends on non-existent artifact "${artifactParsed.artifactName}" from route "${artifactParsed.routeId}"`, + details: { routeId: route.id, dependencyId: artifactKey }, + }); + continue; + } + + node.dependencies.add(artifactParsed.routeId); + nodes.get(artifactParsed.routeId)!.dependents.add(route.id); + } + } + } + + const cycleResult = detectCycle(nodes); + if (cycleResult) { + errors.push({ + type: "cycle", + message: `Circular dependency detected: ${cycleResult.join(" -> ")}`, + details: { cycle: cycleResult }, + }); + } + + if (errors.length > 0) { + return { valid: false, errors }; + } + + const executionOrder = topologicalSort(nodes); + + return { + valid: true, + errors: [], + dag: { nodes, executionOrder }, + }; +} + +function detectCycle(nodes: Map): string[] | null { + const visited = new Set(); + const recursionStack = new Set(); + const path: string[] = []; + + function dfs(nodeId: string): string[] | null { + visited.add(nodeId); + recursionStack.add(nodeId); + path.push(nodeId); + + const node = nodes.get(nodeId); + if (node) { + for (const depId of node.dependencies) { + if (!visited.has(depId)) { + const cycle = dfs(depId); + if (cycle) return cycle; + } else if (recursionStack.has(depId)) { + const cycleStart = path.indexOf(depId); + return [...path.slice(cycleStart), depId]; + } + } + } + + path.pop(); + recursionStack.delete(nodeId); + return null; + } + + for (const nodeId of nodes.keys()) { + if (!visited.has(nodeId)) { + const cycle = dfs(nodeId); + if (cycle) return cycle; + } + } + + return null; +} + +function topologicalSort(nodes: Map): string[] { + const result: string[] = []; + const visited = new Set(); + const temp = new Set(); + + function visit(nodeId: string): void { + if (temp.has(nodeId)) return; + if (visited.has(nodeId)) return; + + temp.add(nodeId); + + const node = nodes.get(nodeId); + if (node) { + for (const depId of node.dependencies) { + visit(depId); + } + } + + temp.delete(nodeId); + visited.add(nodeId); + result.push(nodeId); + } + + for (const nodeId of nodes.keys()) { + if (!visited.has(nodeId)) { + visit(nodeId); + } + } + + return result; +} + +export function getExecutionLayers(dag: DAG): string[][] { + const layers: string[][] = []; + const scheduled = new Set(); + const remaining = new Set(dag.nodes.keys()); + + while (remaining.size > 0) { + const layer: string[] = []; + + for (const nodeId of remaining) { + const node = dag.nodes.get(nodeId)!; + const allDepsScheduled = [...node.dependencies].every((dep) => scheduled.has(dep)); + if (allDepsScheduled) { + layer.push(nodeId); + } + } + + if (layer.length === 0) { + break; + } + + for (const nodeId of layer) { + remaining.delete(nodeId); + scheduled.add(nodeId); + } + + layers.push(layer); + } + + return layers; +} diff --git a/packages/pipelines/src/dependencies.ts b/packages/pipelines/src/dependencies.ts new file mode 100644 index 000000000..7f07f608e --- /dev/null +++ b/packages/pipelines/src/dependencies.ts @@ -0,0 +1,70 @@ +type RouteDependency = `route:${string}`; +type ArtifactDependency = `artifact:${string}:${string}`; + +export type PipelineDependency = RouteDependency | ArtifactDependency; + +export interface ParsedRouteDependency { + type: "route"; + routeId: string; +} + +export interface ParsedArtifactDependency { + type: "artifact"; + routeId: string; + artifactName: string; +} + +export type ParsedDependency = ParsedRouteDependency | ParsedArtifactDependency; + +export type ParseDependencyType = + T extends `route:${infer RouteId}` + ? { type: "route"; routeId: RouteId } + : T extends `artifact:${infer RouteId}:${infer ArtifactName}` + ? { type: "artifact"; routeId: RouteId; artifactName: ArtifactName } + : never; + +export type ExtractRouteDependencies = { + [K in keyof T]: T[K] extends `route:${infer RouteId}` ? RouteId : never; +}[number]; + +export type ExtractArtifactDependencies = { + [K in keyof T]: T[K] extends `artifact:${infer RouteId}:${infer ArtifactName}` + ? { routeId: RouteId; artifactName: ArtifactName } + : never; +}[number]; + +export type ExtractArtifactKeys = { + [K in keyof T]: T[K] extends `artifact:${infer RouteId}:${infer ArtifactName}` + ? `${RouteId}:${ArtifactName}` + : never; +}[number]; + +export function parseDependency(dep: PipelineDependency): ParsedDependency { + const parts = dep.split(":"); + + if (parts[0] === "route" && parts[1]) { + return { type: "route", routeId: parts[1] }; + } + + if (parts[0] === "artifact" && parts[1] && parts[2]) { + return { type: "artifact", routeId: parts[1], artifactName: parts[2] }; + } + + throw new Error(`Invalid dependency format: ${dep}. Expected "route:" or "artifact::"`); +} + +export function isRouteDependency(dep: PipelineDependency): dep is RouteDependency { + return dep.startsWith("route:"); +} + +export function isArtifactDependency(dep: PipelineDependency): dep is ArtifactDependency { + return dep.startsWith("artifact:"); +} + +export function createRouteDependency(routeId: string): RouteDependency { + return `route:${routeId}`; +} + +export function createArtifactDependency(routeId: string, artifactName: string): ArtifactDependency { + return `artifact:${routeId}:${artifactName}`; +} diff --git a/packages/pipelines/src/events.ts b/packages/pipelines/src/events.ts new file mode 100644 index 000000000..3bf9079d9 --- /dev/null +++ b/packages/pipelines/src/events.ts @@ -0,0 +1,215 @@ +import type { FileContext } from "./types"; + +export type PipelineEventType = + | "pipeline:start" + | "pipeline:end" + | "version:start" + | "version:end" + | "artifact:start" + | "artifact:end" + | "artifact:produced" + | "artifact:consumed" + | "file:matched" + | "file:skipped" + | "file:fallback" + | "parse:start" + | "parse:end" + | "resolve:start" + | "resolve:end" + | "cache:hit" + | "cache:miss" + | "cache:store" + | "error"; + +export type PipelineStartEvent = { + type: "pipeline:start"; + versions: string[]; + timestamp: number; +}; + +export type PipelineEndEvent = { + type: "pipeline:end"; + durationMs: number; + timestamp: number; +}; + +export type VersionStartEvent = { + type: "version:start"; + version: string; + timestamp: number; +}; + +export type VersionEndEvent = { + type: "version:end"; + version: string; + durationMs: number; + timestamp: number; +}; + +export type ArtifactStartEvent = { + type: "artifact:start"; + artifactId: string; + version: string; + timestamp: number; +}; + +export type ArtifactEndEvent = { + type: "artifact:end"; + artifactId: string; + version: string; + durationMs: number; + timestamp: number; +}; + +export type ArtifactProducedEvent = { + type: "artifact:produced"; + artifactId: string; + routeId: string; + version: string; + timestamp: number; +}; + +export type ArtifactConsumedEvent = { + type: "artifact:consumed"; + artifactId: string; + routeId: string; + version: string; + timestamp: number; +}; + +export type FileMatchedEvent = { + type: "file:matched"; + file: FileContext; + routeId: string; + timestamp: number; +}; + +export type FileSkippedEvent = { + type: "file:skipped"; + file: FileContext; + reason: "no-match" | "filtered"; + timestamp: number; +}; + +export type FileFallbackEvent = { + type: "file:fallback"; + file: FileContext; + timestamp: number; +}; + +export type ParseStartEvent = { + type: "parse:start"; + file: FileContext; + routeId: string; + timestamp: number; +}; + +export type ParseEndEvent = { + type: "parse:end"; + file: FileContext; + routeId: string; + rowCount: number; + durationMs: number; + timestamp: number; +}; + +export type ResolveStartEvent = { + type: "resolve:start"; + file: FileContext; + routeId: string; + timestamp: number; +}; + +export type ResolveEndEvent = { + type: "resolve:end"; + file: FileContext; + routeId: string; + outputCount: number; + durationMs: number; + timestamp: number; +}; + +export type CacheHitEvent = { + type: "cache:hit"; + routeId: string; + file: FileContext; + version: string; + timestamp: number; +}; + +export type CacheMissEvent = { + type: "cache:miss"; + routeId: string; + file: FileContext; + version: string; + timestamp: number; +}; + +export type CacheStoreEvent = { + type: "cache:store"; + routeId: string; + file: FileContext; + version: string; + timestamp: number; +}; + +export type PipelineErrorEvent = { + type: "error"; + error: PipelineError; + timestamp: number; +}; + +export type PipelineEvent = + | PipelineStartEvent + | PipelineEndEvent + | VersionStartEvent + | VersionEndEvent + | ArtifactStartEvent + | ArtifactEndEvent + | ArtifactProducedEvent + | ArtifactConsumedEvent + | FileMatchedEvent + | FileSkippedEvent + | FileFallbackEvent + | ParseStartEvent + | ParseEndEvent + | ResolveStartEvent + | ResolveEndEvent + | CacheHitEvent + | CacheMissEvent + | CacheStoreEvent + | PipelineErrorEvent; + +export type PipelineErrorScope = "pipeline" | "version" | "file" | "route" | "artifact"; + +export interface PipelineError { + scope: PipelineErrorScope; + message: string; + error?: unknown; + file?: FileContext; + routeId?: string; + artifactId?: string; + version?: string; +} + +export type PipelineGraphNodeType = "source" | "file" | "route" | "artifact" | "output"; + +export type PipelineGraphNode = + | { id: string; type: "source"; version: string } + | { id: string; type: "file"; file: FileContext } + | { id: string; type: "route"; routeId: string } + | { id: string; type: "artifact"; artifactId: string } + | { id: string; type: "output"; outputIndex: number; property?: string }; + +export type PipelineGraphEdgeType = "provides" | "matched" | "parsed" | "resolved" | "uses-artifact"; + +export interface PipelineGraphEdge { + from: string; + to: string; + type: PipelineGraphEdgeType; +} + +export interface PipelineGraph { + nodes: PipelineGraphNode[]; + edges: PipelineGraphEdge[]; +} diff --git a/packages/pipelines/src/filters.ts b/packages/pipelines/src/filters.ts new file mode 100644 index 000000000..7adb1c7a0 --- /dev/null +++ b/packages/pipelines/src/filters.ts @@ -0,0 +1,62 @@ +import type { FileContext, PipelineFilter } from "./types"; +import picomatch from "picomatch"; + +export function byName(name: string): PipelineFilter { + return (ctx) => ctx.file.name === name; +} + +export function byDir(dir: FileContext["dir"]): PipelineFilter { + return (ctx) => ctx.file.dir === dir; +} + +export function byExt(ext: string): PipelineFilter { + if (ext === "") { + return (ctx) => ctx.file.ext === ""; + } + const normalizedExt = ext.startsWith(".") ? ext : `.${ext}`; + return (ctx) => ctx.file.ext === normalizedExt; +} + +export function byGlob(pattern: string): PipelineFilter { + const matcher = picomatch(pattern); + return (ctx) => matcher(ctx.file.path); +} + +export function byPath(pathPattern: string | RegExp): PipelineFilter { + if (typeof pathPattern === "string") { + return (ctx) => ctx.file.path === pathPattern; + } + return (ctx) => pathPattern.test(ctx.file.path); +} + +export function byProp(pattern: string | RegExp): PipelineFilter { + if (typeof pattern === "string") { + return (ctx) => ctx.row?.property === pattern; + } + return (ctx) => !!ctx.row?.property && pattern.test(ctx.row.property); +} + +export function bySource(sourceIds: string | string[]): PipelineFilter { + const ids = Array.isArray(sourceIds) ? sourceIds : [sourceIds]; + return (ctx) => ctx.source != null && ids.includes(ctx.source.id); +} + +export function and(...filters: PipelineFilter[]): PipelineFilter { + return (ctx) => filters.every((f) => f(ctx)); +} + +export function or(...filters: PipelineFilter[]): PipelineFilter { + return (ctx) => filters.some((f) => f(ctx)); +} + +export function not(filter: PipelineFilter): PipelineFilter { + return (ctx) => !filter(ctx); +} + +export function always(): PipelineFilter { + return () => true; +} + +export function never(): PipelineFilter { + return () => false; +} diff --git a/packages/pipelines/src/index.ts b/packages/pipelines/src/index.ts new file mode 100644 index 000000000..f50674a41 --- /dev/null +++ b/packages/pipelines/src/index.ts @@ -0,0 +1,167 @@ +export { + definePipeline, + type FallbackRouteDefinition, + type Pipeline, + type PipelineOptions, + type PipelineRunOptions, +} from "./pipeline"; + +export { + definePipelineArtifact, + type ArtifactBuildContext, + type InferArtifactId, + type InferArtifactsMap, + type InferArtifactValue, + type PipelineArtifactDefinition, +} from "./artifact"; + +export { + artifact, + isGlobalArtifact, + isVersionArtifact, + type Artifact, + type ArtifactDefinition, + type GlobalArtifact, + type InferArtifactType, + type InferEmittedArtifacts, +} from "./artifact-schema"; + +export { + definePipelineRoute, + type InferEmittedArtifactsFromRoute, + type InferRouteDepends, + type InferRouteEmits, + type InferRouteId, + type InferRouteOutput, + type InferRoutesOutput, + type InferRouteTransforms, + type PipelineRouteDefinition, + type RouteResolveContext, +} from "./route"; + +export { + definePipelineTransform, + applyTransforms, + type ChainTransforms, + type InferTransformInput, + type InferTransformOutput, + type PipelineTransformDefinition, + type TransformContext, +} from "./transform"; + +export { + definePipelineSource, + resolveSourceFiles, + resolveMultipleSourceFiles, + type FileMetadata, + type InferSourceId, + type InferSourceIds, + type PipelineSourceDefinition, + type SourceBackend, + type SourceFileContext, + type StreamOptions, +} from "./source"; + +export { + createArtifactDependency, + createRouteDependency, + isArtifactDependency, + isRouteDependency, + parseDependency, + type ExtractArtifactDependencies, + type ExtractArtifactKeys, + type ExtractRouteDependencies, + type ParsedArtifactDependency, + type ParsedDependency, + type ParsedRouteDependency, + type ParseDependencyType, + type PipelineDependency, +} from "./dependencies"; + +export { + buildDAG, + getExecutionLayers, + type DAG, + type DAGNode, + type DAGValidationError, + type DAGValidationResult, +} from "./dag"; + +export { + always, + and, + byDir, + byExt, + byGlob, + byName, + byPath, + byProp, + bySource, + never, + not, + or, +} from "./filters"; + +export type { + DefaultRange, + FileContext, + FilterContext, + ParseContext, + ParsedRow, + ParserFn, + PipelineFilter, + PropertyJson, + ResolvedEntry, + ResolveContext, + ResolverFn, + RouteOutput, + RowContext, +} from "./types"; + +export type { + ArtifactConsumedEvent, + ArtifactEndEvent, + ArtifactProducedEvent, + ArtifactStartEvent, + CacheHitEvent, + CacheMissEvent, + CacheStoreEvent, + FileMatchedEvent, + FileSkippedEvent, + FileFallbackEvent, + ParseEndEvent, + ParseStartEvent, + PipelineEndEvent, + PipelineError, + PipelineErrorEvent, + PipelineErrorScope, + PipelineEvent, + PipelineEventType, + PipelineGraph, + PipelineGraphEdge, + PipelineGraphEdgeType, + PipelineGraphNode, + PipelineGraphNodeType, + PipelineStartEvent, + ResolveEndEvent, + ResolveStartEvent, + VersionEndEvent, + VersionStartEvent, +} from "./events"; + +export type { + PipelineRunResult, + PipelineSummary, +} from "./results"; + +export { + createMemoryCacheStore, + defaultHashFn, + hashArtifact, + serializeCacheKey, + type CacheEntry, + type CacheKey, + type CacheOptions, + type CacheStats, + type CacheStore, +} from "./cache"; diff --git a/packages/pipelines/src/pipeline.ts b/packages/pipelines/src/pipeline.ts new file mode 100644 index 000000000..0090608c6 --- /dev/null +++ b/packages/pipelines/src/pipeline.ts @@ -0,0 +1,925 @@ +import type { InferArtifactsMap, PipelineArtifactDefinition } from "./artifact"; +import type { ArtifactDefinition } from "./artifact-schema"; +import { isGlobalArtifact } from "./artifact-schema"; +import type { CacheEntry, CacheKey, CacheStore } from "./cache"; +import { defaultHashFn, hashArtifact } from "./cache"; +import { buildDAG, getExecutionLayers } from "./dag"; +import type { PipelineEvent, PipelineGraph, PipelineGraphEdge, PipelineGraphNode } from "./events"; +import type { PipelineRunResult, PipelineSummary } from "./results"; +import type { InferRoutesOutput, PipelineRouteDefinition, RouteResolveContext } from "./route"; +import type { PipelineSourceDefinition, SourceBackend, SourceFileContext } from "./source"; +import { resolveMultipleSourceFiles } from "./source"; +import { applyTransforms } from "./transform"; +import type { + FileContext, + ParseContext, + ParsedRow, + PipelineFilter, + ResolvedEntry, + ResolveContext, +} from "./types"; + +interface SourceAdapter { + listFiles: (version: string) => Promise; + readFile: (file: FileContext) => Promise; +} + +export interface FallbackRouteDefinition< + TArtifacts extends Record = Record, + TOutput = unknown, +> { + filter?: PipelineFilter; + parser: (ctx: ParseContext) => AsyncIterable; + resolver: (ctx: ResolveContext, rows: AsyncIterable) => Promise; +} + +export interface PipelineOptions< + TArtifacts extends readonly PipelineArtifactDefinition[] = readonly PipelineArtifactDefinition[], + TRoutes extends readonly PipelineRouteDefinition[] = readonly PipelineRouteDefinition[], +> { + versions: string[]; + inputs: PipelineSourceDefinition[]; + artifacts?: TArtifacts; + routes: TRoutes; + include?: PipelineFilter; + strict?: boolean; + concurrency?: number; + cacheStore?: CacheStore; + fallback?: FallbackRouteDefinition>; + onEvent?: (event: PipelineEvent) => void | Promise; +} + +export interface PipelineRunOptions { + cache?: boolean; + versions?: string[]; +} + +export interface Pipeline { + run: (options?: PipelineRunOptions) => Promise>; +} + +type InferPipelineOutput< + TRoutes extends readonly PipelineRouteDefinition[], + TFallback extends FallbackRouteDefinition | undefined, +> = TFallback extends FallbackRouteDefinition + ? InferRoutesOutput | TFallbackOutput + : InferRoutesOutput; + +export function definePipeline< + const TArtifacts extends readonly PipelineArtifactDefinition[], + const TRoutes extends readonly PipelineRouteDefinition[], + TFallback extends FallbackRouteDefinition, unknown> | undefined = undefined, +>( + options: PipelineOptions & { fallback?: TFallback }, +): Pipeline> { + return createPipelineExecutor(options); +} + +function createPipelineExecutor( + options: PipelineOptions, +): Pipeline { + const { + versions, + inputs, + artifacts = [], + routes, + include, + strict = false, + concurrency = 4, + cacheStore, + fallback, + onEvent, + } = options; + + const dagResult = buildDAG(routes); + if (!dagResult.valid) { + throw new Error(`Pipeline DAG validation failed:\n${dagResult.errors.map((e) => ` - ${e.message}`).join("\n")}`); + } + const dag = dagResult.dag!; + + async function emit(event: PipelineEvent): Promise { + if (onEvent) { + await onEvent(event); + } + } + + async function buildCacheKey( + routeId: string, + version: string, + file: FileContext, + fileContent: string, + artifactsMap: Record, + consumedArtifactIds: string[], + ): Promise { + const artifactHashes: Record = {}; + for (const id of consumedArtifactIds) { + if (id in artifactsMap) { + artifactHashes[id] = hashArtifact(artifactsMap[id]); + } + } + + return { + routeId, + version, + inputHash: defaultHashFn(fileContent), + artifactHashes, + }; + } + + function createSourceAdapter(): SourceAdapter { + if (inputs.length === 0) { + throw new Error("Pipeline requires at least one input source"); + } + + const backends = new Map(); + for (const input of inputs) { + backends.set(input.id, input.backend); + } + + return { + listFiles: async (version: string) => { + return resolveMultipleSourceFiles(inputs, version); + }, + readFile: async (file: FileContext) => { + const sourceFile = file as SourceFileContext; + if (sourceFile.source) { + const backend = backends.get(sourceFile.source.id); + if (backend) { + return backend.readFile(file); + } + } + const firstBackend = backends.values().next().value; + if (firstBackend) { + return firstBackend.readFile(file); + } + throw new Error(`No backend found for file: ${file.path}`); + }, + }; + } + + async function run(runOptions: PipelineRunOptions = {}): Promise> { + const { cache: enableCache = true, versions: runVersions } = runOptions; + const useCache = enableCache && cacheStore != null; + const versionsToRun = runVersions ?? versions; + + const effectiveSource = createSourceAdapter(); + + const startTime = performance.now(); + const graphNodes: PipelineGraphNode[] = []; + const graphEdges: PipelineGraphEdge[] = []; + const allOutputs: TOutput[] = []; + const errors: PipelineRunResult["errors"] = []; + + let totalFiles = 0; + let matchedFiles = 0; + let skippedFiles = 0; + let fallbackFiles = 0; + + await emit({ type: "pipeline:start", versions: versionsToRun, timestamp: Date.now() }); + + for (const version of versionsToRun) { + const versionStartTime = performance.now(); + await emit({ type: "version:start", version, timestamp: Date.now() }); + + const sourceNodeId = `source:${version}`; + graphNodes.push({ id: sourceNodeId, type: "source", version }); + + const artifactsMap: Record = {}; + const globalArtifactsMap: Record = {}; + + for (const artifactDef of artifacts) { + const artifactStartTime = performance.now(); + await emit({ + type: "artifact:start", + artifactId: artifactDef.id, + version, + timestamp: Date.now(), + }); + + const artifactNodeId = `artifact:${version}:${artifactDef.id}`; + graphNodes.push({ id: artifactNodeId, type: "artifact", artifactId: artifactDef.id }); + graphEdges.push({ from: sourceNodeId, to: artifactNodeId, type: "provides" }); + + try { + let rows: AsyncIterable | undefined; + + if (artifactDef.filter && artifactDef.parser) { + const files = await effectiveSource.listFiles(version); + for (const file of files) { + if (artifactDef.filter({ file })) { + const parseCtx = createParseContext(file, effectiveSource); + rows = artifactDef.parser(parseCtx); + break; + } + } + } + + const value = await artifactDef.build({ version }, rows); + artifactsMap[artifactDef.id] = value; + } catch (err) { + const pipelineError = { + scope: "artifact" as const, + message: err instanceof Error ? err.message : String(err), + error: err, + artifactId: artifactDef.id, + version, + }; + errors.push(pipelineError); + await emit({ + type: "error", + error: pipelineError, + timestamp: Date.now(), + }); + } + + await emit({ + type: "artifact:end", + artifactId: artifactDef.id, + version, + durationMs: performance.now() - artifactStartTime, + timestamp: Date.now(), + }); + } + + const files = await effectiveSource.listFiles(version); + totalFiles += files.length; + + const filesToProcess = include + ? files.filter((file) => include({ file })) + : files; + + const executionLayers = getExecutionLayers(dag); + + for (const layer of executionLayers) { + const processingQueue = createProcessingQueue(concurrency); + const layerRoutes = routes.filter((r) => layer.includes(r.id)); + + for (const route of layerRoutes) { + const matchingFiles = filesToProcess.filter((file) => { + const sourceFile = file as SourceFileContext; + const filterCtx = { + file, + source: sourceFile.source, + }; + return route.filter(filterCtx); + }); + + for (const file of matchingFiles) { + await processingQueue.add(async () => { + const fileNodeId = `file:${version}:${file.path}`; + if (!graphNodes.some((n) => n.id === fileNodeId)) { + graphNodes.push({ id: fileNodeId, type: "file", file }); + graphEdges.push({ from: sourceNodeId, to: fileNodeId, type: "provides" }); + } + + matchedFiles++; + const routeNodeId = `route:${version}:${route.id}`; + + if (!graphNodes.some((n) => n.id === routeNodeId)) { + graphNodes.push({ id: routeNodeId, type: "route", routeId: route.id }); + } + + graphEdges.push({ from: fileNodeId, to: routeNodeId, type: "matched" }); + + await emit({ + type: "file:matched", + file, + routeId: route.id, + timestamp: Date.now(), + }); + + try { + const routeCacheEnabled = useCache && route.cache !== false; + let result: ProcessRouteResult | null = null; + let cacheHit = false; + + if (routeCacheEnabled && cacheStore) { + const fileContent = await effectiveSource.readFile(file); + const inputHash = defaultHashFn(fileContent); + + const partialKey: CacheKey = { + routeId: route.id, + version, + inputHash, + artifactHashes: {}, + }; + + const cachedEntry = await cacheStore.get(partialKey); + + if (cachedEntry) { + const currentArtifactHashes: Record = {}; + for (const id of Object.keys(cachedEntry.key.artifactHashes)) { + const combinedMap = { ...artifactsMap, ...globalArtifactsMap }; + if (id in combinedMap) { + currentArtifactHashes[id] = hashArtifact(combinedMap[id]); + } + } + + const artifactHashesMatch = Object.keys(cachedEntry.key.artifactHashes).every( + (id) => currentArtifactHashes[id] === cachedEntry.key.artifactHashes[id], + ); + + if (artifactHashesMatch) { + cacheHit = true; + result = { + outputs: cachedEntry.output, + emittedArtifacts: cachedEntry.producedArtifacts, + consumedArtifactIds: Object.keys(cachedEntry.key.artifactHashes), + }; + + await emit({ + type: "cache:hit", + routeId: route.id, + file, + version, + timestamp: Date.now(), + }); + } + } + + if (!cacheHit) { + await emit({ + type: "cache:miss", + routeId: route.id, + file, + version, + timestamp: Date.now(), + }); + } + } + + if (!result) { + result = await processRoute( + file, + route, + { ...artifactsMap, ...globalArtifactsMap }, + effectiveSource, + version, + emit, + ); + + if (routeCacheEnabled && cacheStore) { + const fileContent = await effectiveSource.readFile(file); + const combinedMap = { ...artifactsMap, ...globalArtifactsMap }; + const cacheKey = await buildCacheKey( + route.id, + version, + file, + fileContent, + combinedMap, + result.consumedArtifactIds, + ); + + const cacheEntry: CacheEntry = { + key: cacheKey, + output: result.outputs, + producedArtifacts: result.emittedArtifacts, + createdAt: new Date().toISOString(), + }; + + await cacheStore.set(cacheEntry); + + await emit({ + type: "cache:store", + routeId: route.id, + file, + version, + timestamp: Date.now(), + }); + } + } + + for (const [artifactName, artifactValue] of Object.entries(result.emittedArtifacts)) { + const prefixedKey = `${route.id}:${artifactName}`; + const artifactDef = route.emits?.[artifactName]; + + if (artifactDef && isGlobalArtifact(artifactDef)) { + globalArtifactsMap[prefixedKey] = artifactValue; + } else { + artifactsMap[prefixedKey] = artifactValue; + } + } + + for (const output of result.outputs) { + const outputIndex = allOutputs.length; + allOutputs.push(output as TOutput); + + const outputNodeId = `output:${version}:${outputIndex}`; + graphNodes.push({ + id: outputNodeId, + type: "output", + outputIndex, + property: (output as { property?: string }).property, + }); + graphEdges.push({ from: routeNodeId, to: outputNodeId, type: "resolved" }); + } + } catch (err) { + const pipelineError = { + scope: "route" as const, + message: err instanceof Error ? err.message : String(err), + error: err, + file, + routeId: route.id, + version, + }; + errors.push(pipelineError); + await emit({ + type: "error", + error: pipelineError, + timestamp: Date.now(), + }); + } + }); + } + } + + await processingQueue.drain(); + } + + const processedFiles = new Set(); + for (const route of routes) { + for (const file of filesToProcess) { + const sourceFile = file as SourceFileContext; + const filterCtx = { file, source: sourceFile.source }; + if (route.filter(filterCtx)) { + processedFiles.add(file.path); + } + } + } + + for (const file of filesToProcess) { + if (processedFiles.has(file.path)) continue; + + if (fallback) { + const shouldUseFallback = !fallback.filter || fallback.filter({ file }); + + if (shouldUseFallback) { + fallbackFiles++; + + const fileNodeId = `file:${version}:${file.path}`; + if (!graphNodes.some((n) => n.id === fileNodeId)) { + graphNodes.push({ id: fileNodeId, type: "file", file }); + graphEdges.push({ from: sourceNodeId, to: fileNodeId, type: "provides" }); + } + + await emit({ + type: "file:fallback", + file, + timestamp: Date.now(), + }); + + try { + const outputs = await processFallback( + file, + fallback, + { ...artifactsMap, ...globalArtifactsMap }, + effectiveSource, + version, + emit, + ); + + for (const output of outputs) { + const outputIndex = allOutputs.length; + allOutputs.push(output as TOutput); + + const outputNodeId = `output:${version}:${outputIndex}`; + graphNodes.push({ + id: outputNodeId, + type: "output", + outputIndex, + property: (output as { property?: string }).property, + }); + graphEdges.push({ from: fileNodeId, to: outputNodeId, type: "resolved" }); + } + } catch (err) { + const pipelineError = { + scope: "file" as const, + message: err instanceof Error ? err.message : String(err), + error: err, + file, + version, + }; + errors.push(pipelineError); + await emit({ + type: "error", + error: pipelineError, + timestamp: Date.now(), + }); + } + } else { + skippedFiles++; + await emit({ + type: "file:skipped", + file, + reason: "filtered", + timestamp: Date.now(), + }); + } + } else { + skippedFiles++; + + if (strict) { + const pipelineError = { + scope: "file" as const, + message: `No matching route for file: ${file.path}`, + file, + version, + }; + errors.push(pipelineError); + await emit({ + type: "error", + error: pipelineError, + timestamp: Date.now(), + }); + } else { + await emit({ + type: "file:skipped", + file, + reason: "no-match", + timestamp: Date.now(), + }); + } + } + } + + await emit({ + type: "version:end", + version, + durationMs: performance.now() - versionStartTime, + timestamp: Date.now(), + }); + } + + const durationMs = performance.now() - startTime; + + await emit({ + type: "pipeline:end", + durationMs, + timestamp: Date.now(), + }); + + const summary: PipelineSummary = { + versions: versionsToRun, + totalFiles, + matchedFiles, + skippedFiles, + fallbackFiles, + totalOutputs: allOutputs.length, + durationMs, + }; + + const graph: PipelineGraph = { + nodes: graphNodes, + edges: graphEdges, + }; + + return { + data: allOutputs, + graph, + errors, + summary, + }; + } + + return { run }; +} + +function createParseContext(file: FileContext, source: SourceAdapter): ParseContext { + let cachedContent: string | null = null; + + return { + file, + readContent: async () => { + if (cachedContent === null) { + cachedContent = await source.readFile(file); + } + return cachedContent!; + }, + readLines: async function* () { + const content = await source.readFile(file); + const lines = content.split(/\r?\n/); + for (const line of lines) { + yield line; + } + }, + isComment: (line: string) => line.startsWith("#") || line.trim() === "", + }; +} + +interface ResolveContextOptions { + version: string; + file: FileContext; + routeId: string; + artifactsMap: Record; + emittedArtifacts: Record; + emitsDefinition?: Record; + onArtifactEmit?: (id: string, value: unknown) => void; + onArtifactGet?: (id: string) => void; +} + +function createRouteResolveContext( + options: ResolveContextOptions, +): RouteResolveContext> { + const { version, file, routeId, artifactsMap, emittedArtifacts, emitsDefinition, onArtifactEmit, onArtifactGet } = options; + + return { + version, + file, + getArtifact: (key: K): unknown => { + if (!(key in artifactsMap)) { + throw new Error(`Artifact "${key}" not found. Make sure a route that produces this artifact runs before route "${routeId}".`); + } + onArtifactGet?.(key); + return artifactsMap[key]; + }, + emitArtifact: (id: K, value: unknown): void => { + if (emitsDefinition) { + const def = emitsDefinition[id]; + if (def) { + const result = def.schema.safeParse(value); + if (!result.success) { + throw new Error(`Artifact "${id}" validation failed: ${result.error.message}`); + } + } + } + emittedArtifacts[id] = value; + onArtifactEmit?.(id, value); + }, + normalizeEntries: (entries) => { + return entries.sort((a, b) => { + const aStart = a.range?.split("..")[0] ?? a.codePoint ?? ""; + const bStart = b.range?.split("..")[0] ?? b.codePoint ?? ""; + return aStart.localeCompare(bStart); + }); + }, + now: () => new Date().toISOString(), + }; +} + +function createResolveContext>( + options: Omit, +): ResolveContext { + const { version, file, routeId, artifactsMap, emittedArtifacts, onArtifactEmit, onArtifactGet } = options; + + return { + version, + file, + getArtifact: (id: K): TArtifacts[K] => { + if (!(id in artifactsMap)) { + throw new Error(`Artifact "${String(id)}" not found. Make sure a route that produces this artifact runs before route "${routeId}".`); + } + onArtifactGet?.(String(id)); + return artifactsMap[id as string] as TArtifacts[K]; + }, + emitArtifact: (id: K, value: V): void => { + emittedArtifacts[id] = value; + onArtifactEmit?.(id, value); + }, + normalizeEntries: (entries: ResolvedEntry[]) => { + return entries.sort((a, b) => { + const aStart = a.range?.split("..")[0] ?? a.codePoint ?? ""; + const bStart = b.range?.split("..")[0] ?? b.codePoint ?? ""; + return aStart.localeCompare(bStart); + }); + }, + now: () => new Date().toISOString(), + }; +} + +interface ProcessRouteResult { + outputs: unknown[]; + emittedArtifacts: Record; + consumedArtifactIds: string[]; +} + +async function processRoute( + file: FileContext, + route: PipelineRouteDefinition, + artifactsMap: Record, + source: SourceAdapter, + version: string, + emit: (event: PipelineEvent) => Promise, +): Promise { + const parseStartTime = performance.now(); + await emit({ + type: "parse:start", + file, + routeId: route.id, + timestamp: Date.now(), + }); + + const parseCtx = createParseContext(file, source); + let rows: AsyncIterable = route.parser(parseCtx); + + const collectedRows: ParsedRow[] = []; + const filteredRows = filterRows(rows as AsyncIterable, file, route.filter, collectedRows); + + if (route.transforms && route.transforms.length > 0) { + rows = applyTransforms( + { version, file }, + filteredRows, + route.transforms, + ); + } else { + rows = filteredRows; + } + + await emit({ + type: "parse:end", + file, + routeId: route.id, + rowCount: collectedRows.length, + durationMs: performance.now() - parseStartTime, + timestamp: Date.now(), + }); + + const resolveStartTime = performance.now(); + await emit({ + type: "resolve:start", + file, + routeId: route.id, + timestamp: Date.now(), + }); + + const emittedArtifacts: Record = {}; + const consumedArtifactIds: string[] = []; + + const resolveCtx = createRouteResolveContext({ + version, + file, + routeId: route.id, + artifactsMap, + emittedArtifacts, + emitsDefinition: route.emits, + onArtifactEmit: async (id) => { + await emit({ + type: "artifact:produced", + artifactId: `${route.id}:${id}`, + routeId: route.id, + version, + timestamp: Date.now(), + }); + }, + onArtifactGet: async (id) => { + if (!consumedArtifactIds.includes(id)) { + consumedArtifactIds.push(id); + await emit({ + type: "artifact:consumed", + artifactId: id, + routeId: route.id, + version, + timestamp: Date.now(), + }); + } + }, + }); + + const outputs = await route.resolver(resolveCtx, rows); + + const outputArray = Array.isArray(outputs) ? outputs : [outputs]; + + await emit({ + type: "resolve:end", + file, + routeId: route.id, + outputCount: outputArray.length, + durationMs: performance.now() - resolveStartTime, + timestamp: Date.now(), + }); + + return { outputs: outputArray, emittedArtifacts, consumedArtifactIds }; +} + +async function processFallback( + file: FileContext, + fallback: FallbackRouteDefinition, + artifactsMap: Record, + source: SourceAdapter, + version: string, + emit: (event: PipelineEvent) => Promise, +): Promise { + const parseStartTime = performance.now(); + await emit({ + type: "parse:start", + file, + routeId: "__fallback__", + timestamp: Date.now(), + }); + + const parseCtx = createParseContext(file, source); + const rows = fallback.parser(parseCtx); + + const collectedRows: ParsedRow[] = []; + const filteredRows = filterRows(rows, file, fallback.filter, collectedRows); + + await emit({ + type: "parse:end", + file, + routeId: "__fallback__", + rowCount: collectedRows.length, + durationMs: performance.now() - parseStartTime, + timestamp: Date.now(), + }); + + const resolveStartTime = performance.now(); + await emit({ + type: "resolve:start", + file, + routeId: "__fallback__", + timestamp: Date.now(), + }); + + const emittedArtifacts: Record = {}; + + const resolveCtx = createResolveContext({ + version, + file, + routeId: "__fallback__", + artifactsMap, + emittedArtifacts, + }); + const outputs = await fallback.resolver(resolveCtx, filteredRows); + + const outputArray = Array.isArray(outputs) ? outputs : [outputs]; + + await emit({ + type: "resolve:end", + file, + routeId: "__fallback__", + outputCount: outputArray.length, + durationMs: performance.now() - resolveStartTime, + timestamp: Date.now(), + }); + + return outputArray; +} + +async function* filterRows( + rows: AsyncIterable, + file: FileContext, + filter: PipelineFilter | undefined, + collector: ParsedRow[], +): AsyncIterable { + for await (const row of rows) { + collector.push(row); + + if (!filter) { + yield row; + continue; + } + + const shouldInclude = filter({ + file, + row: { property: row.property }, + }); + + if (shouldInclude) { + yield row; + } + } +} + +interface ProcessingQueue { + add: (task: () => Promise) => Promise; + drain: () => Promise; +} + +function createProcessingQueue(concurrency: number): ProcessingQueue { + const queue: (() => Promise)[] = []; + let running = 0; + let resolveIdle: (() => void) | null = null; + + async function runNext(): Promise { + if (running >= concurrency || queue.length === 0) { + if (running === 0 && queue.length === 0 && resolveIdle) { + resolveIdle(); + } + return; + } + + running++; + const task = queue.shift()!; + + try { + await task(); + } finally { + running--; + runNext(); + } + } + + return { + add: async (task) => { + queue.push(task); + runNext(); + }, + drain: () => { + if (running === 0 && queue.length === 0) { + return Promise.resolve(); + } + return new Promise((resolve) => { + resolveIdle = resolve; + }); + }, + }; +} diff --git a/packages/pipelines/src/results.ts b/packages/pipelines/src/results.ts new file mode 100644 index 000000000..09db2b28c --- /dev/null +++ b/packages/pipelines/src/results.ts @@ -0,0 +1,18 @@ +import type { PipelineError, PipelineGraph } from "./events"; + +export interface PipelineSummary { + versions: string[]; + totalFiles: number; + matchedFiles: number; + skippedFiles: number; + fallbackFiles: number; + totalOutputs: number; + durationMs: number; +} + +export interface PipelineRunResult { + data: TData[]; + graph: PipelineGraph; + errors: PipelineError[]; + summary: PipelineSummary; +} diff --git a/packages/pipelines/src/route.ts b/packages/pipelines/src/route.ts new file mode 100644 index 000000000..25bdfc02a --- /dev/null +++ b/packages/pipelines/src/route.ts @@ -0,0 +1,89 @@ +import type { z } from "zod"; +import type { ArtifactDefinition, InferArtifactType } from "./artifact-schema"; +import type { ExtractArtifactKeys, PipelineDependency } from "./dependencies"; +import type { ChainTransforms, PipelineTransformDefinition } from "./transform"; +import type { + FileContext, + ParserFn, + ParsedRow, + PipelineFilter, + PropertyJson, + RouteOutput, +} from "./types"; + +export interface RouteResolveContext< + TArtifactKeys extends string = string, + TEmits extends Record = Record, +> { + version: string; + file: FileContext; + getArtifact: (key: K) => unknown; + emitArtifact: ( + key: K, + value: InferArtifactType, + ) => void; + normalizeEntries: (entries: Array<{ range?: string; codePoint?: string; sequence?: string[]; value: string | string[] }>) => Array<{ range?: string; codePoint?: string; sequence?: string[]; value: string | string[] }>; + now: () => string; +} + +export interface PipelineRouteDefinition< + TId extends string = string, + TDepends extends readonly PipelineDependency[] = readonly PipelineDependency[], + TEmits extends Record = Record, + TTransforms extends readonly PipelineTransformDefinition[] = readonly PipelineTransformDefinition[], + TOutput = PropertyJson[], +> { + id: TId; + filter: PipelineFilter; + depends?: TDepends; + emits?: TEmits; + parser: ParserFn; + transforms?: TTransforms; + resolver: ( + ctx: RouteResolveContext, TEmits>, + rows: AsyncIterable>, + ) => Promise; + out?: RouteOutput; + cache?: boolean; +} + +export function definePipelineRoute< + const TId extends string, + const TDepends extends readonly PipelineDependency[] = readonly [], + const TEmits extends Record = Record, + const TTransforms extends readonly PipelineTransformDefinition[] = readonly [], + TOutput = PropertyJson[], +>( + definition: PipelineRouteDefinition, +): PipelineRouteDefinition { + return definition; +} + +export type InferRouteId = T extends PipelineRouteDefinition + ? TId + : never; + +export type InferRouteDepends = T extends PipelineRouteDefinition + ? TDepends + : never; + +export type InferRouteEmits = T extends PipelineRouteDefinition + ? TEmits + : never; + +export type InferRouteTransforms = T extends PipelineRouteDefinition + ? TTransforms + : never; + +export type InferRouteOutput = T extends PipelineRouteDefinition + ? TOutput + : never; + +export type InferRoutesOutput[]> = + T[number] extends PipelineRouteDefinition + ? TOutput extends unknown[] ? TOutput[number] : TOutput + : never; + +export type InferEmittedArtifactsFromRoute = T extends PipelineRouteDefinition + ? { [K in keyof TEmits]: TEmits[K] extends ArtifactDefinition ? z.infer : never } + : never; diff --git a/packages/pipelines/src/source.ts b/packages/pipelines/src/source.ts new file mode 100644 index 000000000..5ac84a6b3 --- /dev/null +++ b/packages/pipelines/src/source.ts @@ -0,0 +1,191 @@ +import type { FileContext, PipelineFilter } from "./types"; + +/** + * Options for streaming file content. + */ +export interface StreamOptions { + /** + * The size of each chunk in bytes. + */ + chunkSize?: number; + + /** + * The byte offset to start reading from. + */ + start?: number; + + /** + * The byte offset to stop reading at (exclusive). + */ + end?: number; +} + +/** + * Metadata about a file. + */ +export interface FileMetadata { + /** + * The size of the file in bytes. + */ + size: number; + + /** + * Optional hash of the file content (e.g., SHA-256). + */ + hash?: string; + + /** + * Optional last modified timestamp (ISO 8601 format). + */ + lastModified?: string; +} + +/** + * Backend interface for providing files to the pipeline. + * Implementations can wrap different storage systems (fs-bridge, HTTP, in-memory, etc.). + */ +export interface SourceBackend { + /** + * List all files available for a given Unicode version. + */ + listFiles(version: string): Promise; + + /** + * Read the full content of a file as a string. + */ + readFile(file: FileContext): Promise; + + /** + * Optional: Stream file content as chunks. + * Useful for large files to avoid loading everything into memory. + */ + readFileStream?(file: FileContext, options?: StreamOptions): AsyncIterable; + + /** + * Optional: Get metadata about a file without reading its content. + */ + getMetadata?(file: FileContext): Promise; +} + +/** + * Definition for a pipeline source. + * A source provides files from a specific backend with optional filtering. + */ +export interface PipelineSourceDefinition { + /** + * Unique identifier for this source. + */ + id: TId; + + /** + * The backend that provides file access. + */ + backend: SourceBackend; + + /** + * Optional filter to include only matching files. + * If not specified, all files from the backend are included. + */ + includes?: PipelineFilter; + + /** + * Optional filter to exclude matching files. + * Applied after includes filter. + */ + excludes?: PipelineFilter; +} + +/** + * A source with resolved file context that includes source metadata. + */ +export interface SourceFileContext extends FileContext { + /** + * The source this file came from. + */ + source: { + /** + * The source ID. + */ + id: string; + }; +} + +/** + * Define a pipeline source with a specific backend and optional filters. + * + * @example + * ```ts + * const httpSource = definePipelineSource({ + * id: "unicode-http", + * backend: createHttpBackend({ baseUrl: "https://unicode.org/Public" }), + * includes: byGlob("**\/*.txt"), + * excludes: byGlob("**\/Test*.txt"), + * }); + * ``` + */ +export function definePipelineSource( + definition: PipelineSourceDefinition, +): PipelineSourceDefinition { + return definition; +} + +/** + * Resolve files from a source for a given version, applying include/exclude filters. + */ +export async function resolveSourceFiles( + source: PipelineSourceDefinition, + version: string, +): Promise { + const allFiles = await source.backend.listFiles(version); + + const filteredFiles = allFiles.filter((file) => { + const ctx = { file }; + + if (source.includes && !source.includes(ctx)) { + return false; + } + + if (source.excludes && source.excludes(ctx)) { + return false; + } + + return true; + }); + + return filteredFiles.map((file) => ({ + ...file, + source: { id: source.id }, + })); +} + +/** + * Resolve files from multiple sources, merging results. + * Files from later sources with the same path will override earlier ones. + */ +export async function resolveMultipleSourceFiles( + sources: PipelineSourceDefinition[], + version: string, +): Promise { + const filesByPath = new Map(); + + for (const source of sources) { + const files = await resolveSourceFiles(source, version); + for (const file of files) { + filesByPath.set(file.path, file); + } + } + + return Array.from(filesByPath.values()); +} + +/** + * Type helper to extract the ID type from a source definition. + */ +export type InferSourceId = T extends PipelineSourceDefinition ? TId : never; + +/** + * Type helper to extract IDs from multiple source definitions. + */ +export type InferSourceIds = { + [K in keyof T]: InferSourceId; +}[number]; diff --git a/packages/pipelines/src/transform.ts b/packages/pipelines/src/transform.ts new file mode 100644 index 000000000..0c3725471 --- /dev/null +++ b/packages/pipelines/src/transform.ts @@ -0,0 +1,144 @@ +import type { FileContext } from "./types"; + +export interface TransformContext { + version: string; + file: FileContext; +} + +export interface PipelineTransformDefinition { + id: string; + fn: (ctx: TransformContext, rows: AsyncIterable) => AsyncIterable; +} + +export function definePipelineTransform( + definition: PipelineTransformDefinition, +): PipelineTransformDefinition { + return definition; +} + +export type InferTransformInput = + T extends PipelineTransformDefinition ? TInput : never; + +export type InferTransformOutput = + T extends PipelineTransformDefinition ? TOutput : never; + +type ChainTwo = T1 extends PipelineTransformDefinition + ? T2 extends PipelineTransformDefinition + ? O2 + : never + : never; + +type ChainThree = T1 extends PipelineTransformDefinition + ? T2 extends PipelineTransformDefinition + ? T3 extends PipelineTransformDefinition + ? O3 + : never + : never + : never; + +type ChainFour = T1 extends PipelineTransformDefinition + ? T2 extends PipelineTransformDefinition + ? T3 extends PipelineTransformDefinition + ? T4 extends PipelineTransformDefinition + ? O4 + : never + : never + : never + : never; + +type ChainFive = T1 extends PipelineTransformDefinition + ? T2 extends PipelineTransformDefinition + ? T3 extends PipelineTransformDefinition + ? T4 extends PipelineTransformDefinition + ? T5 extends PipelineTransformDefinition + ? O5 + : never + : never + : never + : never + : never; + +type ChainSix = T1 extends PipelineTransformDefinition + ? T2 extends PipelineTransformDefinition + ? T3 extends PipelineTransformDefinition + ? T4 extends PipelineTransformDefinition + ? T5 extends PipelineTransformDefinition + ? T6 extends PipelineTransformDefinition + ? O6 + : never + : never + : never + : never + : never + : never; + +type ChainSeven = T1 extends PipelineTransformDefinition + ? T2 extends PipelineTransformDefinition + ? T3 extends PipelineTransformDefinition + ? T4 extends PipelineTransformDefinition + ? T5 extends PipelineTransformDefinition + ? T6 extends PipelineTransformDefinition + ? T7 extends PipelineTransformDefinition + ? O7 + : never + : never + : never + : never + : never + : never + : never; + +type ChainEight = T1 extends PipelineTransformDefinition + ? T2 extends PipelineTransformDefinition + ? T3 extends PipelineTransformDefinition + ? T4 extends PipelineTransformDefinition + ? T5 extends PipelineTransformDefinition + ? T6 extends PipelineTransformDefinition + ? T7 extends PipelineTransformDefinition + ? T8 extends PipelineTransformDefinition + ? O8 + : never + : never + : never + : never + : never + : never + : never + : never; + +export type ChainTransforms< + TInput, + TTransforms extends readonly PipelineTransformDefinition[], +> = TTransforms extends readonly [] + ? TInput + : TTransforms extends readonly [infer T1 extends PipelineTransformDefinition] + ? O1 + : TTransforms extends readonly [infer T1, infer T2] + ? ChainTwo + : TTransforms extends readonly [infer T1, infer T2, infer T3] + ? ChainThree + : TTransforms extends readonly [infer T1, infer T2, infer T3, infer T4] + ? ChainFour + : TTransforms extends readonly [infer T1, infer T2, infer T3, infer T4, infer T5] + ? ChainFive + : TTransforms extends readonly [infer T1, infer T2, infer T3, infer T4, infer T5, infer T6] + ? ChainSix + : TTransforms extends readonly [infer T1, infer T2, infer T3, infer T4, infer T5, infer T6, infer T7] + ? ChainSeven + : TTransforms extends readonly [infer T1, infer T2, infer T3, infer T4, infer T5, infer T6, infer T7, infer T8] + ? ChainEight + : unknown; + +export async function* applyTransforms( + ctx: TransformContext, + rows: AsyncIterable, + transforms: readonly PipelineTransformDefinition[], +): AsyncIterable { + let current: AsyncIterable = rows; + + for (const transform of transforms) { + current = transform.fn(ctx, current); + } + + yield* current; +} diff --git a/packages/pipelines/src/types.ts b/packages/pipelines/src/types.ts new file mode 100644 index 000000000..dfa39e1e1 --- /dev/null +++ b/packages/pipelines/src/types.ts @@ -0,0 +1,267 @@ +/** + * Represents the context of a file being processed in the pipeline. + */ +export interface FileContext { + /** + * The Unicode version being processed (e.g., "16.0.0"). + */ + version: string; + + /** + * The directory category of the file. + */ + dir: "ucd" | "extracted" | "auxiliary" | "emoji" | "unihan" | string; + + /** + * The relative path from the version root (e.g., "ucd/LineBreak.txt"). + */ + path: string; + + /** + * The file name (e.g., "LineBreak.txt"). + */ + name: string; + + /** + * The file extension (e.g., ".txt"). + */ + ext: string; +} + +/** + * Context for a specific row/line within a file. + * Used during row-level filtering in multi-property files. + */ +export interface RowContext { + /** + * The property name for multi-property files (e.g., "NFKC_Casefold"). + */ + property?: string; +} + +/** + * Combined context passed to filter predicates. + * During file routing, only `file` is defined. + * During row filtering, both `file` and `row` are defined. + */ +export interface FilterContext { + /** + * The file context. + */ + file: FileContext; + + /** + * The row context (only defined during row-level filtering). + */ + row?: RowContext; + + /** + * The source context (only defined when using multiple sources). + */ + source?: { + /** + * The source ID. + */ + id: string; + }; +} + +/** + * A predicate function that determines if a file or row should be processed. + */ +export type PipelineFilter = (ctx: FilterContext) => boolean; + +/** + * A parsed row from a UCD file. + */ +export interface ParsedRow { + /** + * The source file path relative to the version root. + */ + sourceFile: string; + + /** + * The kind of entry. + */ + kind: "range" | "point" | "sequence" | "alias"; + + /** + * Start of range (hex string, e.g., "0041"). + */ + start?: string; + + /** + * End of range (hex string, e.g., "005A"). + */ + end?: string; + + /** + * Single code point (hex string). + */ + codePoint?: string; + + /** + * Sequence of code points (hex strings). + */ + sequence?: string[]; + + /** + * Property name for multi-property files. + */ + property?: string; + + /** + * The value(s) associated with this entry. + */ + value?: string | string[]; + + /** + * Additional metadata (comments, line numbers, etc.). + */ + meta?: Record; +} + +/** + * Context passed to parser functions. + */ +export interface ParseContext { + /** + * The file being parsed. + */ + file: FileContext; + + /** + * Read the raw content of the file. + */ + readContent: () => Promise; + + /** + * Read the file line by line. + */ + readLines: () => AsyncIterable; + + /** + * Check if a line is a comment. + */ + isComment: (line: string) => boolean; +} + +/** + * A parser function that converts file content to parsed rows. + */ +export type ParserFn = (ctx: ParseContext) => AsyncIterable; + +/** + * A resolved entry in the output JSON. + */ +export interface ResolvedEntry { + /** + * Range in "XXXX..YYYY" format (hex, inclusive). + */ + range?: `${string}..${string}`; + + /** + * Single code point in hex. + */ + codePoint?: string; + + /** + * Sequence of code points. + */ + sequence?: string[]; + + /** + * The value(s) for this entry. + */ + value: string | string[]; +} + +/** + * A default range from @missing declarations. + */ +export interface DefaultRange { + /** + * The range this default applies to. + */ + range: `${string}..${string}`; + + /** + * The default value. + */ + value: string | string[]; +} + +/** + * The standardized JSON output for a property. + */ +export interface PropertyJson { + /** + * The Unicode version (e.g., "16.0.0"). + */ + version: string; + + /** + * The property name (e.g., "Line_Break"). + */ + property: string; + + /** + * The source file name (e.g., "LineBreak.txt"). + */ + file: string; + + /** + * The resolved entries. + */ + entries: ResolvedEntry[]; + + /** + * Default ranges from @missing (in encounter order). + */ + defaults?: DefaultRange[]; + + /** + * Additional metadata. + */ + meta?: Record; +} + +export interface ResolveContext = Record> { + version: string; + + file: FileContext; + + getArtifact: (id: K) => TArtifacts[K]; + + emitArtifact: (id: K, value: V) => void; + + normalizeEntries: (entries: ResolvedEntry[]) => ResolvedEntry[]; + + now: () => string; +} + +/** + * A resolver function that converts parsed rows to property JSON. + */ +export type ResolverFn< + TArtifacts extends Record = Record, + TOutput = PropertyJson[], +> = ( + ctx: ResolveContext, + rows: AsyncIterable, +) => Promise; + +/** + * Output configuration for a route. + */ +export interface RouteOutput { + /** + * Custom output directory. + */ + dir?: string; + + /** + * Custom file name generator. + */ + fileName?: (pj: PropertyJson) => string; +} diff --git a/packages/pipelines/test/artifact.test.ts b/packages/pipelines/test/artifact.test.ts new file mode 100644 index 000000000..336762608 --- /dev/null +++ b/packages/pipelines/test/artifact.test.ts @@ -0,0 +1,392 @@ +import type { FileContext, ParseContext, ParsedRow } from "../src/types"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { + definePipelineArtifact, + type InferArtifactId, + type InferArtifactsMap, + type InferArtifactValue, + type PipelineArtifactDefinition, +} from "../src/artifact"; +import { byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { definePipelineRoute } from "../src/route"; +import { definePipelineSource } from "../src/source"; + +let mockSourceCounter = 0; + +function createMockSource(files: Record>) { + return definePipelineSource({ + id: `mock-${++mockSourceCounter}`, + backend: { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }, + }); +} + +function createRow(ctx: ParseContext, props: Partial): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("definePipelineArtifact", () => { + it("should create an artifact definition with id and build function", () => { + const artifact = definePipelineArtifact({ + id: "test-artifact", + build: async () => ({ value: 42 }), + }); + + expect(artifact.id).toBe("test-artifact"); + expect(typeof artifact.build).toBe("function"); + }); + + it("should preserve the artifact id as a literal type", () => { + const artifact = definePipelineArtifact({ + id: "my-specific-id", + build: async () => "result", + }); + + expectTypeOf(artifact.id).toEqualTypeOf<"my-specific-id">(); + }); + + it("should infer the build return type", () => { + const mapArtifact = definePipelineArtifact({ + id: "map-artifact", + build: async () => new Map(), + }); + + const setArtifact = definePipelineArtifact({ + id: "set-artifact", + build: async () => new Set(), + }); + + const objectArtifact = definePipelineArtifact({ + id: "object-artifact", + build: async () => ({ count: 0, names: ["a", "b"] }), + }); + + expectTypeOf>().toEqualTypeOf>(); + expectTypeOf>().toEqualTypeOf>(); + expectTypeOf>().toEqualTypeOf<{ count: number; names: string[] }>(); + }); + + it("should support optional filter and parser", () => { + const artifact = definePipelineArtifact({ + id: "with-parser", + filter: byName("PropertyValueAliases.txt"), + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (!ctx.isComment(line)) { + yield createRow(ctx, { property: line, value: line }); + } + } + }, + build: async (_ctx, rows) => { + const aliases = new Map(); + if (rows) { + for await (const row of rows) { + if (row.property) { + aliases.set(row.property, [row.value as string]); + } + } + } + return aliases; + }, + }); + + expect(artifact.filter).toBeDefined(); + expect(artifact.parser).toBeDefined(); + expectTypeOf>().toEqualTypeOf>(); + }); + + it("should receive version in build context", async () => { + let receivedVersion: string | undefined; + + const artifact = definePipelineArtifact({ + id: "version-check", + build: async (ctx) => { + receivedVersion = ctx.version; + return ctx.version; + }, + }); + + const route = definePipelineRoute({ + id: "dummy", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx) => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + artifacts: [artifact], + routes: [route], + }); + + await pipeline.run(); + + expect(receivedVersion).toBe("16.0.0"); + }); + + it("should build artifacts before routes execute", async () => { + const executionOrder: string[] = []; + + const artifact = definePipelineArtifact({ + id: "first", + build: async () => { + executionOrder.push("artifact:build"); + return 42; + }, + }); + + const route = definePipelineRoute({ + id: "second", + filter: byName("test.txt"), + parser: async function* (ctx) { + executionOrder.push("route:parse"); + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows) => { + for await (const _row of rows) { + // consume rows to trigger parser + } + executionOrder.push("route:resolve"); + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + artifacts: [artifact], + routes: [route], + }); + + await pipeline.run(); + + expect(executionOrder[0]).toBe("artifact:build"); + expect(executionOrder).toContain("route:parse"); + expect(executionOrder).toContain("route:resolve"); + }); + + it("should make artifact values available in resolver via getArtifact", async () => { + const aliasArtifact = definePipelineArtifact({ + id: "aliases", + build: async () => new Map([["A", "LATIN CAPITAL LETTER A"]]), + }); + + let retrievedValue: Map | undefined; + + const route = definePipelineRoute({ + id: "consumer", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", property: "A", value: "A" }); + }, + resolver: async (ctx) => { + retrievedValue = ctx.getArtifact("aliases") as Map | undefined; + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + artifacts: [aliasArtifact], + routes: [route], + }); + + await pipeline.run(); + + expect(retrievedValue).toBeInstanceOf(Map); + expect(retrievedValue?.get("A")).toBe("LATIN CAPITAL LETTER A"); + }); + + it("should rebuild artifacts for each version", async () => { + const buildCalls: string[] = []; + + const artifact = definePipelineArtifact({ + id: "per-version", + build: async (ctx) => { + buildCalls.push(ctx.version); + return ctx.version; + }, + }); + + const route = definePipelineRoute({ + id: "dummy", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx) => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0", "14.0.0"], + inputs: [createMockSource({ + "16.0.0": { "test.txt": "a" }, + "15.1.0": { "test.txt": "b" }, + "14.0.0": { "test.txt": "c" }, + })], + artifacts: [artifact], + routes: [route], + }); + + await pipeline.run(); + + expect(buildCalls).toEqual(["16.0.0", "15.1.0", "14.0.0"]); + }); + + it("should emit artifact events", async () => { + const events: string[] = []; + + const artifact = definePipelineArtifact({ + id: "event-test", + build: async () => "value", + }); + + const route = definePipelineRoute({ + id: "dummy", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx) => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + artifacts: [artifact], + routes: [route], + onEvent: (event) => { + if (event.type.startsWith("artifact:")) { + events.push(event.type); + } + }, + }); + + await pipeline.run(); + + expect(events).toContain("artifact:start"); + expect(events).toContain("artifact:end"); + }); + + it("should handle artifact build errors", async () => { + const artifact = definePipelineArtifact({ + id: "failing", + build: async () => { + throw new Error("Artifact build failed"); + }, + }); + + const route = definePipelineRoute({ + id: "dummy", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx) => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + artifacts: [artifact], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.scope).toBe("artifact"); + expect(result.errors[0]!.artifactId).toBe("failing"); + expect(result.errors[0]!.message).toBe("Artifact build failed"); + }); +}); + +describe("artifact type inference", () => { + it("should infer artifact id type", () => { + const artifact = definePipelineArtifact({ + id: "specific-id", + build: async () => 42, + }); + + type Id = InferArtifactId; + expectTypeOf().toEqualTypeOf<"specific-id">(); + }); + + it("should infer artifact value type", () => { + const artifact = definePipelineArtifact({ + id: "typed-value", + build: async () => ({ nested: { deep: true }, array: [1, 2, 3] }), + }); + + type Value = InferArtifactValue; + expectTypeOf().toEqualTypeOf<{ nested: { deep: boolean }; array: number[] }>(); + }); + + it("should infer artifacts map from array of artifacts", () => { + const aliasArtifact = definePipelineArtifact({ + id: "aliases", + build: async () => new Map(), + }); + + const countArtifact = definePipelineArtifact({ + id: "count", + build: async () => 42, + }); + + const configArtifact = definePipelineArtifact({ + id: "config", + build: async () => ({ enabled: true, threshold: 0.5 }), + }); + + type ArtifactsMap = InferArtifactsMap<[typeof aliasArtifact, typeof countArtifact, typeof configArtifact]>; + + expectTypeOf().toEqualTypeOf<{ + aliases: Map; + count: number; + config: { enabled: boolean; threshold: number }; + }>(); + }); + + it("should type PipelineArtifactDefinition with generics", () => { + type MapArtifact = PipelineArtifactDefinition<"map-id", Map>; + + const artifact: MapArtifact = { + id: "map-id", + build: async () => new Map(), + }; + + expectTypeOf(artifact.id).toEqualTypeOf<"map-id">(); + expectTypeOf(artifact.build).returns.resolves.toEqualTypeOf>(); + }); +}); diff --git a/packages/pipelines/test/cache.test.ts b/packages/pipelines/test/cache.test.ts new file mode 100644 index 000000000..1af2c2720 --- /dev/null +++ b/packages/pipelines/test/cache.test.ts @@ -0,0 +1,249 @@ +import { describe, expect, it } from "vitest"; +import { + type CacheEntry, + type CacheKey, + createMemoryCacheStore, + defaultHashFn, + hashArtifact, + serializeCacheKey, +} from "../src/cache"; + +describe("CacheKey", () => { + it("should serialize cache key correctly", () => { + const key: CacheKey = { + routeId: "line-break", + version: "16.0.0", + inputHash: "abc123", + artifactHashes: { names: "def456", blocks: "ghi789" }, + }; + + const serialized = serializeCacheKey(key); + expect(serialized).toBe("line-break|16.0.0|abc123|blocks:ghi789,names:def456"); + }); + + it("should serialize cache key with empty artifact hashes", () => { + const key: CacheKey = { + routeId: "simple-route", + version: "15.0.0", + inputHash: "xyz", + artifactHashes: {}, + }; + + const serialized = serializeCacheKey(key); + expect(serialized).toBe("simple-route|15.0.0|xyz|"); + }); + + it("should sort artifact hashes alphabetically", () => { + const key: CacheKey = { + routeId: "test", + version: "1.0.0", + inputHash: "hash", + artifactHashes: { zebra: "z", alpha: "a", middle: "m" }, + }; + + const serialized = serializeCacheKey(key); + expect(serialized).toBe("test|1.0.0|hash|alpha:a,middle:m,zebra:z"); + }); +}); + +describe("defaultHashFn", () => { + it("should return consistent hash for same input", () => { + const input = "Hello, World!"; + const hash1 = defaultHashFn(input); + const hash2 = defaultHashFn(input); + expect(hash1).toBe(hash2); + }); + + it("should return different hashes for different inputs", () => { + const hash1 = defaultHashFn("Hello"); + const hash2 = defaultHashFn("World"); + expect(hash1).not.toBe(hash2); + }); + + it("should return 8-character hex string", () => { + const hash = defaultHashFn("test input"); + expect(hash).toMatch(/^[0-9a-f]{8}$/); + }); + + it("should handle empty string", () => { + const hash = defaultHashFn(""); + expect(hash).toMatch(/^[0-9a-f]{8}$/); + }); +}); + +describe("hashArtifact", () => { + it("should hash null/undefined as 'null'", () => { + expect(hashArtifact(null)).toBe("null"); + expect(hashArtifact(undefined)).toBe("null"); + }); + + it("should hash strings", () => { + const hash = hashArtifact("test string"); + expect(hash).toMatch(/^[0-9a-f]{8}$/); + }); + + it("should hash Map consistently", () => { + const map1 = new Map([ + ["a", "1"], + ["b", "2"], + ]); + const map2 = new Map([ + ["b", "2"], + ["a", "1"], + ]); + + expect(hashArtifact(map1)).toBe(hashArtifact(map2)); + }); + + it("should hash Set consistently", () => { + const set1 = new Set(["a", "b", "c"]); + const set2 = new Set(["c", "a", "b"]); + + expect(hashArtifact(set1)).toBe(hashArtifact(set2)); + }); + + it("should hash arrays", () => { + const arr = [1, 2, 3]; + const hash = hashArtifact(arr); + expect(hash).toMatch(/^[0-9a-f]{8}$/); + }); + + it("should hash objects", () => { + const obj = { foo: "bar", baz: 123 }; + const hash = hashArtifact(obj); + expect(hash).toMatch(/^[0-9a-f]{8}$/); + }); +}); + +describe("createMemoryCacheStore", () => { + it("should store and retrieve cache entries", async () => { + const store = createMemoryCacheStore(); + const key: CacheKey = { + routeId: "test", + version: "1.0.0", + inputHash: "abc", + artifactHashes: {}, + }; + + const entry: CacheEntry = { + key, + output: [{ data: "test" }], + producedArtifacts: {}, + createdAt: new Date().toISOString(), + }; + + await store.set(entry); + const retrieved = await store.get(key); + + expect(retrieved).toEqual(entry); + }); + + it("should return undefined for missing entries", async () => { + const store = createMemoryCacheStore(); + const key: CacheKey = { + routeId: "nonexistent", + version: "1.0.0", + inputHash: "xyz", + artifactHashes: {}, + }; + + const result = await store.get(key); + expect(result).toBeUndefined(); + }); + + it("should check existence with has()", async () => { + const store = createMemoryCacheStore(); + const key: CacheKey = { + routeId: "test", + version: "1.0.0", + inputHash: "abc", + artifactHashes: {}, + }; + + expect(await store.has(key)).toBe(false); + + await store.set({ + key, + output: [], + producedArtifacts: {}, + createdAt: new Date().toISOString(), + }); + + expect(await store.has(key)).toBe(true); + }); + + it("should delete entries", async () => { + const store = createMemoryCacheStore(); + const key: CacheKey = { + routeId: "test", + version: "1.0.0", + inputHash: "abc", + artifactHashes: {}, + }; + + await store.set({ + key, + output: [], + producedArtifacts: {}, + createdAt: new Date().toISOString(), + }); + + expect(await store.delete(key)).toBe(true); + expect(await store.has(key)).toBe(false); + expect(await store.delete(key)).toBe(false); + }); + + it("should clear all entries", async () => { + const store = createMemoryCacheStore(); + + for (let i = 0; i < 3; i++) { + await store.set({ + key: { + routeId: `route-${i}`, + version: "1.0.0", + inputHash: `hash-${i}`, + artifactHashes: {}, + }, + output: [], + producedArtifacts: {}, + createdAt: new Date().toISOString(), + }); + } + + const statsBefore = await store.stats?.(); + expect(statsBefore?.entries).toBe(3); + + await store.clear(); + + const statsAfter = await store.stats?.(); + expect(statsAfter?.entries).toBe(0); + }); + + it("should track hits and misses", async () => { + const store = createMemoryCacheStore(); + const key: CacheKey = { + routeId: "test", + version: "1.0.0", + inputHash: "abc", + artifactHashes: {}, + }; + + await store.get(key); + await store.get(key); + + await store.set({ + key, + output: [], + producedArtifacts: {}, + createdAt: new Date().toISOString(), + }); + + await store.get(key); + await store.get(key); + await store.get(key); + + const stats = await store.stats?.(); + expect(stats?.misses).toBe(2); + expect(stats?.hits).toBe(3); + }); +}); diff --git a/packages/pipelines/test/events.test.ts b/packages/pipelines/test/events.test.ts new file mode 100644 index 000000000..da2874c70 --- /dev/null +++ b/packages/pipelines/test/events.test.ts @@ -0,0 +1,829 @@ +import type { FileContext, ParseContext, ParsedRow, PropertyJson } from "../src/types"; +import type { + ArtifactEndEvent, + ArtifactStartEvent, + FileMatchedEvent, + FileSkippedEvent, + ParseEndEvent, + ParseStartEvent, + PipelineEndEvent, + PipelineError, + PipelineErrorEvent, + PipelineEvent, + PipelineGraph, + PipelineGraphEdge, + PipelineGraphNode, + PipelineStartEvent, + ResolveEndEvent, + ResolveStartEvent, + VersionEndEvent, + VersionStartEvent, +} from "../src/events"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { definePipelineArtifact } from "../src/artifact"; +import { definePipelineRoute } from "../src/route"; +import { definePipelineSource } from "../src/source"; + +let mockSourceCounter = 0; + +function createMockSource(files: Record>) { + return definePipelineSource({ + id: `mock-${++mockSourceCounter}`, + backend: { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }, + }); +} + +function createRow(ctx: ParseContext, props: Partial): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("PipelineEvent types", () => { + it("should have correct PipelineStartEvent structure", () => { + const event: PipelineStartEvent = { + type: "pipeline:start", + versions: ["16.0.0"], + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"pipeline:start">(); + expectTypeOf(event.versions).toEqualTypeOf(); + expectTypeOf(event.timestamp).toEqualTypeOf(); + }); + + it("should have correct PipelineEndEvent structure", () => { + const event: PipelineEndEvent = { + type: "pipeline:end", + durationMs: 100, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"pipeline:end">(); + expectTypeOf(event.durationMs).toEqualTypeOf(); + expectTypeOf(event.timestamp).toEqualTypeOf(); + }); + + it("should have correct VersionStartEvent structure", () => { + const event: VersionStartEvent = { + type: "version:start", + version: "16.0.0", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"version:start">(); + expectTypeOf(event.version).toEqualTypeOf(); + }); + + it("should have correct VersionEndEvent structure", () => { + const event: VersionEndEvent = { + type: "version:end", + version: "16.0.0", + durationMs: 100, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"version:end">(); + expectTypeOf(event.durationMs).toEqualTypeOf(); + }); + + it("should have correct ArtifactStartEvent structure", () => { + const event: ArtifactStartEvent = { + type: "artifact:start", + artifactId: "names", + version: "16.0.0", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"artifact:start">(); + expectTypeOf(event.artifactId).toEqualTypeOf(); + expectTypeOf(event.version).toEqualTypeOf(); + }); + + it("should have correct ArtifactEndEvent structure", () => { + const event: ArtifactEndEvent = { + type: "artifact:end", + artifactId: "names", + version: "16.0.0", + durationMs: 50, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"artifact:end">(); + expectTypeOf(event.durationMs).toEqualTypeOf(); + }); + + it("should have correct FileMatchedEvent structure", () => { + const file: FileContext = { + path: "LineBreak.txt", + name: "LineBreak.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: FileMatchedEvent = { + type: "file:matched", + file, + routeId: "line-break", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"file:matched">(); + expectTypeOf(event.file).toEqualTypeOf(); + expectTypeOf(event.routeId).toEqualTypeOf(); + }); + + it("should have correct FileSkippedEvent structure", () => { + const file: FileContext = { + path: "Unknown.txt", + name: "Unknown.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: FileSkippedEvent = { + type: "file:skipped", + file, + reason: "no-match", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"file:skipped">(); + expectTypeOf(event.reason).toEqualTypeOf<"no-match" | "filtered">(); + }); + + it("should have correct ParseStartEvent structure", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: ParseStartEvent = { + type: "parse:start", + file, + routeId: "test-route", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"parse:start">(); + expectTypeOf(event.file).toEqualTypeOf(); + expectTypeOf(event.routeId).toEqualTypeOf(); + }); + + it("should have correct ParseEndEvent structure", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: ParseEndEvent = { + type: "parse:end", + file, + routeId: "test-route", + rowCount: 100, + durationMs: 25, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"parse:end">(); + expectTypeOf(event.rowCount).toEqualTypeOf(); + expectTypeOf(event.durationMs).toEqualTypeOf(); + }); + + it("should have correct ResolveStartEvent structure", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: ResolveStartEvent = { + type: "resolve:start", + file, + routeId: "test-route", + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"resolve:start">(); + }); + + it("should have correct ResolveEndEvent structure", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const event: ResolveEndEvent = { + type: "resolve:end", + file, + routeId: "test-route", + outputCount: 5, + durationMs: 10, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"resolve:end">(); + expectTypeOf(event.outputCount).toEqualTypeOf(); + }); + + it("should have correct PipelineErrorEvent structure", () => { + const error: PipelineError = { + scope: "route", + message: "Something went wrong", + routeId: "test-route", + version: "16.0.0", + }; + + const event: PipelineErrorEvent = { + type: "error", + error, + timestamp: Date.now(), + }; + + expectTypeOf(event.type).toEqualTypeOf<"error">(); + expectTypeOf(event.error).toEqualTypeOf(); + }); +}); + +describe("PipelineError", () => { + it("should support all error scopes", () => { + const scopes: PipelineError["scope"][] = [ + "pipeline", + "version", + "file", + "route", + "artifact", + ]; + + expect(scopes).toHaveLength(5); + }); + + it("should have optional file context", () => { + const errorWithFile: PipelineError = { + scope: "file", + message: "File error", + file: { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }, + }; + + const errorWithoutFile: PipelineError = { + scope: "pipeline", + message: "Pipeline error", + }; + + expect(errorWithFile.file).toBeDefined(); + expect(errorWithoutFile.file).toBeUndefined(); + }); + + it("should have optional routeId", () => { + const error: PipelineError = { + scope: "route", + message: "Route error", + routeId: "test-route", + }; + + expectTypeOf(error.routeId).toEqualTypeOf(); + }); + + it("should have optional artifactId", () => { + const error: PipelineError = { + scope: "artifact", + message: "Artifact error", + artifactId: "names", + }; + + expectTypeOf(error.artifactId).toEqualTypeOf(); + }); +}); + +describe("PipelineGraph", () => { + it("should have nodes and edges", () => { + const graph: PipelineGraph = { + nodes: [], + edges: [], + }; + + expectTypeOf(graph.nodes).toEqualTypeOf(); + expectTypeOf(graph.edges).toEqualTypeOf(); + }); + + it("should support source node type", () => { + const node: PipelineGraphNode = { + id: "source:16.0.0", + type: "source", + version: "16.0.0", + }; + + expect(node.type).toBe("source"); + expect(node.version).toBe("16.0.0"); + }); + + it("should support file node type", () => { + const file: FileContext = { + path: "test.txt", + name: "test.txt", + dir: "", + ext: ".txt", + version: "16.0.0", + }; + + const node: PipelineGraphNode = { + id: "file:16.0.0:test.txt", + type: "file", + file, + }; + + expect(node.type).toBe("file"); + expect(node.file).toEqual(file); + }); + + it("should support route node type", () => { + const node: PipelineGraphNode = { + id: "route:16.0.0:line-break", + type: "route", + routeId: "line-break", + }; + + expect(node.type).toBe("route"); + expect(node.routeId).toBe("line-break"); + }); + + it("should support artifact node type", () => { + const node: PipelineGraphNode = { + id: "artifact:16.0.0:names", + type: "artifact", + artifactId: "names", + }; + + expect(node.type).toBe("artifact"); + expect(node.artifactId).toBe("names"); + }); + + it("should support output node type with optional property", () => { + const nodeWithProperty: PipelineGraphNode = { + id: "output:16.0.0:0", + type: "output", + outputIndex: 0, + property: "Line_Break", + }; + + const nodeWithoutProperty: PipelineGraphNode = { + id: "output:16.0.0:1", + type: "output", + outputIndex: 1, + }; + + expect(nodeWithProperty.property).toBe("Line_Break"); + expect(nodeWithoutProperty.property).toBeUndefined(); + }); + + it("should support all edge types", () => { + const edgeTypes: PipelineGraphEdge["type"][] = [ + "provides", + "matched", + "parsed", + "resolved", + "uses-artifact", + ]; + + expect(edgeTypes).toHaveLength(5); + }); + + it("should have from and to on edges", () => { + const edge: PipelineGraphEdge = { + from: "source:16.0.0", + to: "file:16.0.0:test.txt", + type: "provides", + }; + + expectTypeOf(edge.from).toEqualTypeOf(); + expectTypeOf(edge.to).toEqualTypeOf(); + expectTypeOf(edge.type).toEqualTypeOf<"provides" | "matched" | "parsed" | "resolved" | "uses-artifact">(); + }); +}); + +describe("Event emission during pipeline run", () => { + it("should emit pipeline:start and pipeline:end events", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const startEvent = events.find((e) => e.type === "pipeline:start"); + const endEvent = events.find((e) => e.type === "pipeline:end"); + + expect(startEvent).toBeDefined(); + expect(endEvent).toBeDefined(); + expect(startEvent?.type).toBe("pipeline:start"); + expect(endEvent?.type).toBe("pipeline:end"); + }); + + it("should emit version:start and version:end for each version", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + inputs: [createMockSource({ + "16.0.0": { "test.txt": "content" }, + "15.1.0": { "test.txt": "content" }, + })], + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const versionStarts = events.filter((e) => e.type === "version:start"); + const versionEnds = events.filter((e) => e.type === "version:end"); + + expect(versionStarts).toHaveLength(2); + expect(versionEnds).toHaveLength(2); + }); + + it("should emit artifact events when artifacts are defined", async () => { + const events: PipelineEvent[] = []; + + const artifact = definePipelineArtifact({ + id: "names", + build: async () => new Map([["0041", "LATIN CAPITAL LETTER A"]]), + }); + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + artifacts: [artifact], + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const artifactStart = events.find((e) => e.type === "artifact:start"); + const artifactEnd = events.find((e) => e.type === "artifact:end"); + + expect(artifactStart).toBeDefined(); + expect(artifactEnd).toBeDefined(); + if (artifactStart?.type === "artifact:start") { + expect(artifactStart.artifactId).toBe("names"); + } + }); + + it("should emit file:matched for matched files", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ + "16.0.0": { + "matched.txt": "content", + "unmatched.txt": "content", + }, + })], + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const matchedEvents = events.filter((e) => e.type === "file:matched"); + expect(matchedEvents).toHaveLength(1); + + if (matchedEvents[0]?.type === "file:matched") { + expect(matchedEvents[0].file.name).toBe("matched.txt"); + expect(matchedEvents[0].routeId).toBe("test"); + } + }); + + it("should emit file:skipped for unmatched files", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ + "16.0.0": { + "matched.txt": "content", + "unmatched.txt": "content", + }, + })], + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const skippedEvents = events.filter((e) => e.type === "file:skipped"); + expect(skippedEvents).toHaveLength(1); + + if (skippedEvents[0]?.type === "file:skipped") { + expect(skippedEvents[0].file.name).toBe("unmatched.txt"); + expect(skippedEvents[0].reason).toBe("no-match"); + } + }); + + it("should emit parse and resolve events", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const parseStart = events.find((e) => e.type === "parse:start"); + const parseEnd = events.find((e) => e.type === "parse:end"); + const resolveStart = events.find((e) => e.type === "resolve:start"); + const resolveEnd = events.find((e) => e.type === "resolve:end"); + + expect(parseStart).toBeDefined(); + expect(parseEnd).toBeDefined(); + expect(resolveStart).toBeDefined(); + expect(resolveEnd).toBeDefined(); + }); + + it("should emit error events on failures", async () => { + const events: PipelineEvent[] = []; + + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (): Promise => { + throw new Error("Resolver failed"); + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + onEvent: (event) => { events.push(event); }, + }); + + await pipeline.run(); + + const errorEvent = events.find((e) => e.type === "error"); + expect(errorEvent).toBeDefined(); + + if (errorEvent?.type === "error") { + expect(errorEvent.error.message).toBe("Resolver failed"); + expect(errorEvent.error.scope).toBe("route"); + } + }); +}); + +describe("Graph construction", () => { + it("should build graph with source nodes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + const sourceNodes = result.graph.nodes.filter((n) => n.type === "source"); + expect(sourceNodes).toHaveLength(1); + expect(sourceNodes[0]?.type === "source" && sourceNodes[0].version).toBe("16.0.0"); + }); + + it("should build graph with file nodes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + const fileNodes = result.graph.nodes.filter((n) => n.type === "file"); + expect(fileNodes).toHaveLength(1); + expect(fileNodes[0]?.type === "file" && fileNodes[0].file.name).toBe("test.txt"); + }); + + it("should build graph with route nodes", async () => { + const route = definePipelineRoute({ + id: "line-break", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + const routeNodes = result.graph.nodes.filter((n) => n.type === "route"); + expect(routeNodes).toHaveLength(1); + expect(routeNodes[0]?.type === "route" && routeNodes[0].routeId).toBe("line-break"); + }); + + it("should build graph with output nodes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + const outputNodes = result.graph.nodes.filter((n) => n.type === "output"); + expect(outputNodes).toHaveLength(1); + }); + + it("should create edges between nodes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.graph.edges.length).toBeGreaterThan(0); + + const providesEdges = result.graph.edges.filter((e) => e.type === "provides"); + const matchedEdges = result.graph.edges.filter((e) => e.type === "matched"); + const resolvedEdges = result.graph.edges.filter((e) => e.type === "resolved"); + + expect(providesEdges.length).toBeGreaterThan(0); + expect(matchedEdges.length).toBeGreaterThan(0); + expect(resolvedEdges.length).toBeGreaterThan(0); + }); +}); diff --git a/packages/pipelines/test/filters.test.ts b/packages/pipelines/test/filters.test.ts new file mode 100644 index 000000000..0fc2f0135 --- /dev/null +++ b/packages/pipelines/test/filters.test.ts @@ -0,0 +1,382 @@ +import type { FileContext } from "../src/types"; +import { describe, expect, it } from "vitest"; +import { + always, + and, + byDir, + byExt, + byGlob, + byName, + byPath, + byProp, + never, + not, + or, +} from "../src/filters"; + +function createFile(path: string, version = "16.0.0"): FileContext { + return { + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + }; +} + +describe("byName", () => { + it("should match exact file name", () => { + const filter = byName("LineBreak.txt"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(true); + }); + + it("should not match different file names", () => { + const filter = byName("LineBreak.txt"); + + expect(filter({ file: createFile("WordBreak.txt") })).toBe(false); + expect(filter({ file: createFile("linebreak.txt") })).toBe(false); + expect(filter({ file: createFile("LineBreak.html") })).toBe(false); + }); + + it("should match file name regardless of directory", () => { + const filter = byName("UnicodeData.txt"); + + expect(filter({ file: createFile("UnicodeData.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/UnicodeData.txt") })).toBe(true); + expect(filter({ file: createFile("deep/nested/path/UnicodeData.txt") })).toBe(true); + }); +}); + +describe("byDir", () => { + it("should match files in specific directory", () => { + const filter = byDir("ucd"); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/UnicodeData.txt") })).toBe(true); + }); + + it("should not match files in different directories", () => { + const filter = byDir("ucd"); + + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("extracted/DerivedName.txt") })).toBe(false); + expect(filter({ file: createFile("LineBreak.txt") })).toBe(false); + }); + + it("should match root directory with empty string", () => { + const filter = byDir(""); + + expect(filter({ file: createFile("ReadMe.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(false); + }); + + it("should match nested directories", () => { + const filter = byDir("ucd/auxiliary"); + + expect(filter({ file: createFile("ucd/auxiliary/WordBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(false); + }); +}); + +describe("byExt", () => { + it("should match files by extension", () => { + const filter = byExt(".txt"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/UnicodeData.txt") })).toBe(true); + }); + + it("should not match different extensions", () => { + const filter = byExt(".txt"); + + expect(filter({ file: createFile("ReadMe.html") })).toBe(false); + expect(filter({ file: createFile("emoji-data.json") })).toBe(false); + expect(filter({ file: createFile("config.xml") })).toBe(false); + }); + + it("should handle extension with or without dot", () => { + const filterWithDot = byExt(".json"); + const filterWithoutDot = byExt("json"); + + expect(filterWithDot({ file: createFile("data.json") })).toBe(true); + expect(filterWithoutDot({ file: createFile("data.json") })).toBe(true); + }); + + it("should match files without extension", () => { + const filter = byExt(""); + + expect(filter({ file: createFile("Makefile") })).toBe(true); + expect(filter({ file: createFile("LICENSE") })).toBe(true); + }); +}); + +describe("byPath", () => { + it("should match exact path", () => { + const filter = byPath("ucd/LineBreak.txt"); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + }); + + it("should not match different paths", () => { + const filter = byPath("ucd/LineBreak.txt"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/WordBreak.txt") })).toBe(false); + }); + + it("should be case-sensitive", () => { + const filter = byPath("ucd/LineBreak.txt"); + + expect(filter({ file: createFile("UCD/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/linebreak.txt") })).toBe(false); + }); +}); + +describe("byGlob", () => { + it("should match files with glob pattern", () => { + const filter = byGlob("**/*.txt"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/UnicodeData.txt") })).toBe(true); + expect(filter({ file: createFile("deep/nested/file.txt") })).toBe(true); + }); + + it("should not match non-matching files", () => { + const filter = byGlob("**/*.txt"); + + expect(filter({ file: createFile("ReadMe.html") })).toBe(false); + expect(filter({ file: createFile("data.json") })).toBe(false); + }); + + it("should support directory-specific patterns", () => { + const filter = byGlob("ucd/*.txt"); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("LineBreak.txt") })).toBe(false); + }); + + it("should support complex glob patterns", () => { + const filter = byGlob("**/auxiliary/*Test*.txt"); + + expect(filter({ file: createFile("auxiliary/WordBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/auxiliary/LineBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + }); + + it("should support negation patterns", () => { + const filter = byGlob("!**/*.html"); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ReadMe.html") })).toBe(false); + }); + + it("should support brace expansion", () => { + const filter = byGlob("**/*.{txt,json}"); + + expect(filter({ file: createFile("data.txt") })).toBe(true); + expect(filter({ file: createFile("config.json") })).toBe(true); + expect(filter({ file: createFile("readme.html") })).toBe(false); + }); +}); + +describe("byProp", () => { + it("should match rows with specific property", () => { + const filter = byProp("Line_Break"); + + expect(filter({ + file: createFile("test.txt"), + row: { property: "Line_Break" }, + })).toBe(true); + }); + + it("should not match rows with different property", () => { + const filter = byProp("Line_Break"); + + expect(filter({ + file: createFile("test.txt"), + row: { property: "Word_Break" }, + })).toBe(false); + }); + + it("should not match when row is undefined", () => { + const filter = byProp("Line_Break"); + + expect(filter({ file: createFile("test.txt") })).toBe(false); + }); + + it("should not match when row property is undefined", () => { + const filter = byProp("Line_Break"); + + expect(filter({ + file: createFile("test.txt"), + row: {}, + })).toBe(false); + }); +}); + +describe("and", () => { + it("should return true when all filters match", () => { + const filter = and(byExt(".txt"), byDir("ucd")); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + }); + + it("should return false when any filter does not match", () => { + const filter = and(byExt(".txt"), byDir("ucd")); + + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/data.json") })).toBe(false); + }); + + it("should short-circuit evaluation", () => { + let secondCalled = false; + const filter = and( + () => false, + () => { secondCalled = true; return true; }, + ); + + filter({ file: createFile("test.txt") }); + + expect(secondCalled).toBe(false); + }); + + it("should handle multiple filters", () => { + const filter = and( + byExt(".txt"), + byDir("ucd"), + byName("LineBreak.txt"), + ); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/WordBreak.txt") })).toBe(false); + }); + + it("should return true for empty filter list", () => { + const filter = and(); + + expect(filter({ file: createFile("any.txt") })).toBe(true); + }); +}); + +describe("or", () => { + it("should return true when any filter matches", () => { + const filter = or(byName("LineBreak.txt"), byName("WordBreak.txt")); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("WordBreak.txt") })).toBe(true); + }); + + it("should return false when no filter matches", () => { + const filter = or(byName("LineBreak.txt"), byName("WordBreak.txt")); + + expect(filter({ file: createFile("GraphemeBreak.txt") })).toBe(false); + }); + + it("should short-circuit evaluation", () => { + let secondCalled = false; + const filter = or( + () => true, + () => { secondCalled = true; return false; }, + ); + + filter({ file: createFile("test.txt") }); + + expect(secondCalled).toBe(false); + }); + + it("should handle multiple filters", () => { + const filter = or( + byName("LineBreak.txt"), + byName("WordBreak.txt"), + byName("GraphemeBreak.txt"), + ); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("WordBreak.txt") })).toBe(true); + expect(filter({ file: createFile("GraphemeBreak.txt") })).toBe(true); + expect(filter({ file: createFile("SentenceBreak.txt") })).toBe(false); + }); + + it("should return false for empty filter list", () => { + const filter = or(); + + expect(filter({ file: createFile("any.txt") })).toBe(false); + }); +}); + +describe("not", () => { + it("should negate the filter result", () => { + const filter = not(byName("LineBreak.txt")); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("WordBreak.txt") })).toBe(true); + }); + + it("should work with complex filters", () => { + const filter = not(and(byExt(".txt"), byDir("ucd"))); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("auxiliary/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/data.json") })).toBe(true); + }); +}); + +describe("always", () => { + it("should always return true", () => { + const filter = always(); + + expect(filter({ file: createFile("any.txt") })).toBe(true); + expect(filter({ file: createFile("ucd/data.json") })).toBe(true); + expect(filter({ file: createFile("deep/nested/path.xml") })).toBe(true); + }); +}); + +describe("never", () => { + it("should always return false", () => { + const filter = never(); + + expect(filter({ file: createFile("any.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/data.json") })).toBe(false); + expect(filter({ file: createFile("deep/nested/path.xml") })).toBe(false); + }); +}); + +describe("filter composition", () => { + it("should support complex compositions", () => { + const filter = or( + and(byDir("ucd"), byExt(".txt")), + and(byDir("auxiliary"), byGlob("**/*Test*.txt")), + ); + + expect(filter({ file: createFile("ucd/LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/WordBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("auxiliary/data.txt") })).toBe(false); + expect(filter({ file: createFile("other/file.txt") })).toBe(false); + }); + + it("should support exclusion patterns", () => { + const filter = and( + byExt(".txt"), + not(byGlob("**/*Test*.txt")), + ); + + expect(filter({ file: createFile("LineBreak.txt") })).toBe(true); + expect(filter({ file: createFile("TestLineBreak.txt") })).toBe(false); + expect(filter({ file: createFile("ucd/WordBreakTest.txt") })).toBe(false); + }); + + it("should support version-aware filtering", () => { + const filter = and( + byName("UnicodeData.txt"), + (ctx) => ctx.file.version === "16.0.0", + ); + + expect(filter({ file: createFile("UnicodeData.txt", "16.0.0") })).toBe(true); + expect(filter({ file: createFile("UnicodeData.txt", "15.1.0") })).toBe(false); + }); +}); diff --git a/packages/pipelines/test/pipeline.test.ts b/packages/pipelines/test/pipeline.test.ts new file mode 100644 index 000000000..f9abe28f3 --- /dev/null +++ b/packages/pipelines/test/pipeline.test.ts @@ -0,0 +1,560 @@ +import type { FileContext, ParseContext, ParsedRow, PropertyJson, ResolveContext } from "../src/types"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { definePipelineArtifact } from "../src/artifact"; +import { byDir, byExt, byGlob, byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { definePipelineRoute } from "../src/route"; +import { definePipelineSource } from "../src/source"; + +let mockSourceCounter = 0; + +function createMockSource(files: Record>) { + return definePipelineSource({ + id: `mock-${++mockSourceCounter}`, + backend: { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }, + }); +} + +function createRow(ctx: ParseContext, props: Partial & { codePoint?: string; property?: string }): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("definePipeline", () => { + it("should create a pipeline with routes", () => { + const route = definePipelineRoute({ + id: "test-route", + filter: byName("test.txt"), + parser: async function* (ctx) { + const content = await ctx.readContent(); + yield createRow(ctx, { codePoint: "0000", property: "test", value: content }); + }, + resolver: async (ctx, rows): Promise => { + const entries = []; + for await (const row of rows) { + entries.push({ codePoint: row.codePoint, value: row.value as string }); + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({})], + routes: [route], + }); + + expect(pipeline).toBeDefined(); + expect(typeof pipeline.run).toBe("function"); + }); + + it("should run a simple pipeline and return results", async () => { + const route = definePipelineRoute({ + id: "simple", + filter: byName("data.txt"), + parser: async function* (ctx) { + const content = await ctx.readContent(); + for (const line of content.split("\n")) { + if (line.trim()) { + const [codePoint, prop] = line.split(";").map((s) => s.trim()); + yield createRow(ctx, { codePoint, property: prop ?? "", value: prop ?? "" }); + } + } + }, + resolver: async (ctx, rows): Promise => { + const entries = []; + for await (const row of rows) { + entries.push({ codePoint: row.codePoint, value: row.property ?? "" }); + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries }]; + }, + }); + + const source = createMockSource({ + "16.0.0": { + "data.txt": "0041;Letter\n0042;Letter", + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + expect(result.data).toHaveLength(1); + expect(result.data[0]).toMatchObject({ + version: "16.0.0", + property: "Test", + }); + expect(result.summary.totalFiles).toBe(1); + expect(result.summary.matchedFiles).toBe(1); + expect(result.summary.skippedFiles).toBe(0); + }); + + it("should emit events during pipeline execution", async () => { + const events: string[] = []; + + const route = definePipelineRoute({ + id: "event-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { "test.txt": "content" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + routes: [route], + onEvent: (event) => { + events.push(event.type); + }, + }); + + await pipeline.run(); + + expect(events).toContain("pipeline:start"); + expect(events).toContain("pipeline:end"); + expect(events).toContain("version:start"); + expect(events).toContain("version:end"); + expect(events).toContain("file:matched"); + expect(events).toContain("parse:start"); + expect(events).toContain("parse:end"); + expect(events).toContain("resolve:start"); + expect(events).toContain("resolve:end"); + }); + + it("should build a pipeline graph", async () => { + const route = definePipelineRoute({ + id: "graph-test", + filter: byName("file.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { "file.txt": "content" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.graph.nodes.length).toBeGreaterThan(0); + expect(result.graph.edges.length).toBeGreaterThan(0); + + const nodeTypes = result.graph.nodes.map((n) => n.type); + expect(nodeTypes).toContain("source"); + expect(nodeTypes).toContain("file"); + expect(nodeTypes).toContain("route"); + expect(nodeTypes).toContain("output"); + + const edgeTypes = result.graph.edges.map((e) => e.type); + expect(edgeTypes).toContain("provides"); + expect(edgeTypes).toContain("matched"); + expect(edgeTypes).toContain("resolved"); + }); + + it("should support artifacts", async () => { + const aliasArtifact = definePipelineArtifact({ + id: "aliases", + build: async () => { + return new Map([["A", ["Letter_A", "Uppercase_A"]]]); + }, + }); + + const route = definePipelineRoute({ + id: "with-artifact", + filter: byName("data.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", property: "A", value: "A" }); + }, + resolver: async (ctx, rows): Promise => { + const aliases = (ctx.getArtifact as (k: string) => Map | undefined)("aliases"); + const entries = []; + for await (const row of rows) { + const propertyAliases = aliases?.get(row.property ?? "") ?? []; + entries.push({ + codePoint: row.codePoint, + value: propertyAliases.join(",") || (row.property ?? ""), + }); + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries }]; + }, + }); + + const source = createMockSource({ + "16.0.0": { "data.txt": "0041;A" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + artifacts: [aliasArtifact], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + expect(result.data).toHaveLength(1); + }); + + it("should use fallback for unmatched files", async () => { + const route = definePipelineRoute({ + id: "specific", + filter: byName("known.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "known", value: "known" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Known", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { + "known.txt": "content", + "unknown.txt": "other content", + }, + }); + + const fallbackRoute = { + parser: async function* (ctx: ParseContext) { + yield createRow(ctx, { codePoint: "FFFF", property: "fallback", value: await ctx.readContent() }); + }, + resolver: async (ctx: ResolveContext): Promise => [ + { version: ctx.version, property: "Fallback", file: ctx.file.name, entries: [] }, + ], + }; + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + routes: [route], + fallback: fallbackRoute, + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + expect(result.data).toHaveLength(2); + expect(result.summary.matchedFiles).toBe(1); + expect(result.summary.fallbackFiles).toBe(1); + }); + + it("should skip files with no match in non-strict mode", async () => { + const route = definePipelineRoute({ + id: "strict-test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Matched", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { + "matched.txt": "content", + "unmatched.txt": "other", + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + routes: [route], + strict: false, + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + expect(result.data).toHaveLength(1); + expect(result.summary.skippedFiles).toBe(1); + }); + + it("should report error for unmatched files in strict mode", async () => { + const route = definePipelineRoute({ + id: "strict-test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Matched", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { + "matched.txt": "content", + "unmatched.txt": "other", + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + routes: [route], + strict: true, + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.scope).toBe("file"); + expect(result.errors[0]!.message).toContain("No matching route"); + }); + + it("should handle route errors gracefully", async () => { + const route = definePipelineRoute({ + id: "error-route", + filter: byName("error.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (): Promise => { + throw new Error("Resolver failed"); + }, + }); + + const source = createMockSource({ + "16.0.0": { "error.txt": "content" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.scope).toBe("route"); + expect(result.errors[0]!.message).toBe("Resolver failed"); + expect(result.errors[0]!.routeId).toBe("error-route"); + }); + + it("should process multiple versions", async () => { + const route = definePipelineRoute({ + id: "multi-version", + filter: byName("data.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { "data.txt": "content" }, + "15.1.0": { "data.txt": "content" }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + inputs: [source], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(2); + const versions = result.data.map((d) => (d as PropertyJson).version); + expect(versions).toContain("16.0.0"); + expect(versions).toContain("15.1.0"); + expect(result.summary.versions).toEqual(["16.0.0", "15.1.0"]); + }); + + it("should apply include filter to limit processed files", async () => { + const route = definePipelineRoute({ + id: "include-test", + filter: byExt(".txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const source = createMockSource({ + "16.0.0": { + "include.txt": "content", + "exclude.txt": "content", + "data/nested.txt": "content", + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [source], + routes: [route], + include: byDir(""), + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(2); + const files = result.data.map((d) => (d as PropertyJson).file); + expect(files).toContain("include.txt"); + expect(files).toContain("exclude.txt"); + expect(files).not.toContain("nested.txt"); + }); +}); + +describe("filters", () => { + const createFile = (path: string, version = "16.0.0"): FileContext => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + }); + + describe("byName", () => { + it("should match exact file name", () => { + const filter = byName("test.txt"); + expect(filter({ file: createFile("test.txt") })).toBe(true); + expect(filter({ file: createFile("other.txt") })).toBe(false); + expect(filter({ file: createFile("dir/test.txt") })).toBe(true); + }); + }); + + describe("byDir", () => { + it("should match files in directory", () => { + const filter = byDir("ucd"); + expect(filter({ file: createFile("ucd/test.txt") })).toBe(true); + expect(filter({ file: createFile("other/test.txt") })).toBe(false); + expect(filter({ file: createFile("test.txt") })).toBe(false); + }); + }); + + describe("byExt", () => { + it("should match files by extension", () => { + const filter = byExt(".txt"); + expect(filter({ file: createFile("test.txt") })).toBe(true); + expect(filter({ file: createFile("test.html") })).toBe(false); + }); + }); + + describe("byGlob", () => { + it("should match files by glob pattern", () => { + const filter = byGlob("**/*.txt"); + expect(filter({ file: createFile("test.txt") })).toBe(true); + expect(filter({ file: createFile("dir/test.txt") })).toBe(true); + expect(filter({ file: createFile("test.html") })).toBe(false); + }); + + it("should support complex glob patterns", () => { + const filter = byGlob("auxiliary/*.txt"); + expect(filter({ file: createFile("auxiliary/WordBreakTest.txt") })).toBe(true); + expect(filter({ file: createFile("UnicodeData.txt") })).toBe(false); + }); + }); +}); + +describe("type inference", () => { + it("should infer route output types", () => { + const route = definePipelineRoute({ + id: "typed-route", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [{ codePoint: "0000", value: "X" }] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({})], + routes: [route] as const, + }); + + const _typeTest = async () => { + const result = await pipeline.run(); + expectTypeOf(result.data).toBeArray(); + }; + }); + + it("should have properly typed resolver context", () => { + const aliasArtifact = definePipelineArtifact({ + id: "aliases", + build: async () => new Map(), + }); + + const countArtifact = definePipelineArtifact({ + id: "count", + build: async () => 42, + }); + + const route = definePipelineRoute({ + id: "typed-artifacts", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", property: "test", value: "test" }); + }, + resolver: async (ctx, _rows): Promise => { + const getArtifact = ctx.getArtifact as (k: string) => unknown; + const aliases = getArtifact("aliases"); + const count = getArtifact("count"); + + expect(aliases).toBeUndefined(); + expect(count).toBeUndefined(); + + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const _pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({})], + artifacts: [aliasArtifact, countArtifact], + routes: [route], + }); + }); +}); diff --git a/packages/pipelines/test/results.test.ts b/packages/pipelines/test/results.test.ts new file mode 100644 index 000000000..cd640ccb1 --- /dev/null +++ b/packages/pipelines/test/results.test.ts @@ -0,0 +1,462 @@ +import type { FileContext, ParseContext, ParsedRow, PropertyJson } from "../src/types"; +import type { PipelineGraph } from "../src/events"; +import type { PipelineRunResult, PipelineSummary } from "../src/results"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { definePipelineRoute } from "../src/route"; +import { definePipelineSource } from "../src/source"; + +let mockSourceCounter = 0; + +function createMockSource(files: Record>) { + return definePipelineSource({ + id: `mock-${++mockSourceCounter}`, + backend: { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }, + }); +} + +function createRow(ctx: ParseContext, props: Partial): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("PipelineSummary", () => { + it("should have correct structure", () => { + const summary: PipelineSummary = { + versions: ["16.0.0", "15.1.0"], + totalFiles: 100, + matchedFiles: 80, + skippedFiles: 15, + fallbackFiles: 5, + totalOutputs: 120, + durationMs: 500, + }; + + expectTypeOf(summary.versions).toEqualTypeOf(); + expectTypeOf(summary.totalFiles).toEqualTypeOf(); + expectTypeOf(summary.matchedFiles).toEqualTypeOf(); + expectTypeOf(summary.skippedFiles).toEqualTypeOf(); + expectTypeOf(summary.fallbackFiles).toEqualTypeOf(); + expectTypeOf(summary.totalOutputs).toEqualTypeOf(); + expectTypeOf(summary.durationMs).toEqualTypeOf(); + }); + + it("should contain all processed versions", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0", "15.0.0"], + inputs: [createMockSource({ + "16.0.0": { "test.txt": "content" }, + "15.1.0": { "test.txt": "content" }, + "15.0.0": { "test.txt": "content" }, + })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.versions).toEqual(["16.0.0", "15.1.0", "15.0.0"]); + }); + + it("should track total files across all versions", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + inputs: [createMockSource({ + "16.0.0": { "test.txt": "a", "other.txt": "b" }, + "15.1.0": { "test.txt": "c", "another.txt": "d", "third.txt": "e" }, + })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.totalFiles).toBe(5); + }); + + it("should track matched files", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ + "16.0.0": { + "matched.txt": "a", + "unmatched1.txt": "b", + "unmatched2.txt": "c", + }, + })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.matchedFiles).toBe(1); + }); + + it("should track skipped files", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("matched.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ + "16.0.0": { + "matched.txt": "a", + "unmatched1.txt": "b", + "unmatched2.txt": "c", + }, + })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.skippedFiles).toBe(2); + }); + + it("should track total outputs", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", value: "A" }); + yield createRow(ctx, { codePoint: "0042", value: "B" }); + }, + resolver: async (ctx, rows): Promise => { + const outputs: PropertyJson[] = []; + for await (const row of rows) { + outputs.push({ + version: ctx.version, + property: row.value as string, + file: ctx.file.name, + entries: [], + }); + } + return outputs; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.totalOutputs).toBe(2); + }); + + it("should track duration", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.summary.durationMs).toBeGreaterThanOrEqual(0); + }); +}); + +describe("PipelineRunResult", () => { + it("should have correct structure", () => { + type TestData = { id: string }; + const result: PipelineRunResult = { + data: [{ id: "1" }, { id: "2" }], + graph: { nodes: [], edges: [] }, + errors: [], + summary: { + versions: ["16.0.0"], + totalFiles: 10, + matchedFiles: 8, + skippedFiles: 2, + fallbackFiles: 0, + totalOutputs: 8, + durationMs: 100, + }, + }; + + expectTypeOf(result.data).toEqualTypeOf(); + expectTypeOf(result.graph).toEqualTypeOf(); + expectTypeOf(result.errors).toBeArray(); + expectTypeOf(result.summary).toEqualTypeOf(); + }); + + it("should contain all outputs in data array", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", value: "A" }); + yield createRow(ctx, { codePoint: "0042", value: "B" }); + }, + resolver: async (ctx, rows): Promise => { + const outputs: PropertyJson[] = []; + for await (const row of rows) { + outputs.push({ + version: ctx.version, + property: row.value as string, + file: ctx.file.name, + entries: [{ codePoint: row.codePoint, value: row.value as string }], + }); + } + return outputs; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(2); + expect(result.data[0]?.property).toBe("A"); + expect(result.data[1]?.property).toBe("B"); + }); + + it("should contain graph structure", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.graph).toBeDefined(); + expect(result.graph.nodes).toBeInstanceOf(Array); + expect(result.graph.edges).toBeInstanceOf(Array); + expect(result.graph.nodes.length).toBeGreaterThan(0); + expect(result.graph.edges.length).toBeGreaterThan(0); + }); + + it("should contain errors when they occur", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (): Promise => { + throw new Error("Test error"); + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]?.message).toBe("Test error"); + expect(result.errors[0]?.scope).toBe("route"); + }); + + it("should have empty errors array when no errors occur", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(0); + }); + + it("should accumulate multiple errors", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (): Promise => { + throw new Error("Route error"); + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0", "15.1.0"], + inputs: [createMockSource({ + "16.0.0": { "test.txt": "content" }, + "15.1.0": { "test.txt": "content" }, + })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors.length).toBe(2); + }); +}); + +describe("Result data typing", () => { + it("should infer output type from routes", async () => { + const route = definePipelineRoute({ + id: "test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expectTypeOf(result.data).toEqualTypeOf(); + }); + + it("should combine multiple route output types when using same base type", async () => { + const route1 = definePipelineRoute({ + id: "route1", + filter: byName("a.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "A", file: ctx.file.name, entries: [] }]; + }, + }); + + const route2 = definePipelineRoute({ + id: "route2", + filter: byName("b.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) {} + return [{ version: ctx.version, property: "B", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ + "16.0.0": { "a.txt": "content", "b.txt": "content" }, + })], + routes: [route1, route2], + }); + + const result = await pipeline.run(); + + expectTypeOf(result.data).toEqualTypeOf(); + expect(result.data).toHaveLength(2); + }); +}); diff --git a/packages/pipelines/test/route.test.ts b/packages/pipelines/test/route.test.ts new file mode 100644 index 000000000..8c90ccbf0 --- /dev/null +++ b/packages/pipelines/test/route.test.ts @@ -0,0 +1,445 @@ +import type { FileContext, ParseContext, ParsedRow, PropertyJson } from "../src/types"; +import { describe, expect, expectTypeOf, it } from "vitest"; +import { byName } from "../src/filters"; +import { definePipeline } from "../src/pipeline"; +import { + definePipelineRoute, + type InferRouteId, + type InferRouteOutput, + type InferRoutesOutput, + type PipelineRouteDefinition, +} from "../src/route"; +import { definePipelineSource } from "../src/source"; + +let mockSourceCounter = 0; + +function createMockSource(files: Record>) { + return definePipelineSource({ + id: `mock-${++mockSourceCounter}`, + backend: { + listFiles: async (version: string): Promise => { + const versionFiles = files[version] ?? {}; + return Object.keys(versionFiles).map((path) => ({ + path, + name: path.split("/").pop() ?? path, + dir: path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "", + ext: path.includes(".") ? path.substring(path.lastIndexOf(".")) : "", + version, + })); + }, + readFile: async (file: FileContext): Promise => { + const versionFiles = files[file.version] ?? {}; + return versionFiles[file.path] ?? ""; + }, + }, + }); +} + +function createRow(ctx: ParseContext, props: Partial): ParsedRow { + return { + sourceFile: ctx.file.path, + kind: props.codePoint ? "point" : "range", + ...props, + }; +} + +describe("definePipelineRoute", () => { + it("should create a route definition with required fields", () => { + const route = definePipelineRoute({ + id: "test-route", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + expect(route.id).toBe("test-route"); + expect(typeof route.filter).toBe("function"); + expect(typeof route.parser).toBe("function"); + expect(typeof route.resolver).toBe("function"); + }); + + it("should preserve the route id as a literal type", () => { + const route = definePipelineRoute({ + id: "line-break-route", + filter: byName("LineBreak.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Line_Break", file: ctx.file.name, entries: [] }, + ], + }); + + expectTypeOf(route.id).toEqualTypeOf<"line-break-route">(); + }); + + it("should support optional out configuration", () => { + const route = definePipelineRoute({ + id: "with-output-config", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + out: { + dir: "custom-output", + fileName: (pj) => `${pj.property.toLowerCase()}.json`, + }, + }); + + expect(route.out).toBeDefined(); + expect(route.out?.dir).toBe("custom-output"); + expect(typeof route.out?.fileName).toBe("function"); + }); + + it("should parse file content correctly", async () => { + const parsedRows: ParsedRow[] = []; + + const route = definePipelineRoute({ + id: "parsing-test", + filter: byName("data.txt"), + parser: async function* (ctx) { + for await (const line of ctx.readLines()) { + if (!ctx.isComment(line) && line.trim()) { + const [codePoint, value] = line.split(";").map((s) => s.trim()); + yield createRow(ctx, { codePoint, value }); + } + } + }, + resolver: async (ctx, rows): Promise => { + for await (const row of rows) { + parsedRows.push(row); + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ + "16.0.0": { "data.txt": "# comment\n0041;A\n0042;B\n\n0043;C" }, + })], + routes: [route], + }); + + await pipeline.run(); + + expect(parsedRows).toHaveLength(3); + expect(parsedRows[0]!.codePoint).toBe("0041"); + expect(parsedRows[1]!.codePoint).toBe("0042"); + expect(parsedRows[2]!.codePoint).toBe("0043"); + }); + + it("should receive file context in parser", async () => { + let receivedFile: FileContext | undefined; + + const route = definePipelineRoute({ + id: "file-context-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + receivedFile = ctx.file; + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + // Must consume rows to trigger parser execution + for await (const _row of rows) { + // consume + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + await pipeline.run(); + + expect(receivedFile).toBeDefined(); + expect(receivedFile?.name).toBe("test.txt"); + expect(receivedFile?.version).toBe("16.0.0"); + }); + + it("should receive version and file in resolver context", async () => { + let receivedVersion: string | undefined; + let receivedFileName: string | undefined; + + const route = definePipelineRoute({ + id: "resolver-context-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => { + receivedVersion = ctx.version; + receivedFileName = ctx.file.name; + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + await pipeline.run(); + + expect(receivedVersion).toBe("16.0.0"); + expect(receivedFileName).toBe("test.txt"); + }); + + it("should provide normalizeEntries utility in resolver", async () => { + let hasNormalizeEntries = false; + + const route = definePipelineRoute({ + id: "normalize-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => { + hasNormalizeEntries = typeof ctx.normalizeEntries === "function"; + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + await pipeline.run(); + + expect(hasNormalizeEntries).toBe(true); + }); + + it("should provide now utility for timestamps", async () => { + let timestamp: string | undefined; + + const route = definePipelineRoute({ + id: "timestamp-test", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => { + timestamp = ctx.now(); + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + await pipeline.run(); + + expect(timestamp).toBeDefined(); + expect(new Date(timestamp!).toISOString()).toBe(timestamp); + }); + + it("should match files based on filter", async () => { + const matchedFiles: string[] = []; + + const route = definePipelineRoute({ + id: "filter-test", + filter: byName("target.txt"), + parser: async function* (ctx) { + matchedFiles.push(ctx.file.name); + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) { + // consume to trigger parser + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ + "16.0.0": { + "target.txt": "content", + "other.txt": "content", + "another.txt": "content", + }, + })], + routes: [route], + }); + + await pipeline.run(); + + expect(matchedFiles).toEqual(["target.txt"]); + }); + + it("should support returning single output instead of array", async () => { + const route = definePipelineRoute({ + id: "single-output", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(1); + }); + + it("should support returning multiple outputs from resolver", async () => { + const route = definePipelineRoute({ + id: "multi-output", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0041", property: "A", value: "A" }); + yield createRow(ctx, { codePoint: "0042", property: "B", value: "B" }); + }, + resolver: async (ctx, rows): Promise => { + const outputs: PropertyJson[] = []; + for await (const row of rows) { + outputs.push({ + version: ctx.version, + property: row.property ?? "Unknown", + file: ctx.file.name, + entries: [{ codePoint: row.codePoint, value: row.value as string }], + }); + } + return outputs; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.data).toHaveLength(2); + }); + + it("should handle parser errors", async () => { + const route = definePipelineRoute({ + id: "parser-error", + filter: byName("test.txt"), + parser: async function* () { + throw new Error("Parser failed"); + }, + resolver: async (ctx, rows): Promise => { + for await (const _row of rows) { + // consume to trigger parser error + } + return [{ version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }]; + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.message).toBe("Parser failed"); + }); + + it("should handle resolver errors", async () => { + const route = definePipelineRoute({ + id: "resolver-error", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (): Promise => { + throw new Error("Resolver failed"); + }, + }); + + const pipeline = definePipeline({ + versions: ["16.0.0"], + inputs: [createMockSource({ "16.0.0": { "test.txt": "content" } })], + routes: [route], + }); + + const result = await pipeline.run(); + + expect(result.errors).toHaveLength(1); + expect(result.errors[0]!.message).toBe("Resolver failed"); + expect(result.errors[0]!.routeId).toBe("resolver-error"); + }); +}); + +describe("route type inference", () => { + it("should infer route id type", () => { + const route = definePipelineRoute({ + id: "inferred-id", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + type Id = InferRouteId; + expectTypeOf().toEqualTypeOf<"inferred-id">(); + }); + + it("should infer route output type", () => { + const route = definePipelineRoute({ + id: "typed-output", + filter: byName("test.txt"), + parser: async function* (ctx) { + yield createRow(ctx, { codePoint: "0000", value: "x" }); + }, + resolver: async (ctx): Promise => [ + { version: ctx.version, property: "Test", file: ctx.file.name, entries: [] }, + ], + }); + + type Output = InferRouteOutput; + expectTypeOf().toEqualTypeOf(); + }); + + it("should infer combined output from multiple routes", () => { + type Routes = readonly [ + PipelineRouteDefinition<"a", Record, PropertyJson[]>, + PipelineRouteDefinition<"b", Record, PropertyJson[]>, + ]; + + type Output = InferRoutesOutput; + expectTypeOf().toEqualTypeOf(); + }); + + it("should type PipelineRouteDefinition with generics", () => { + type CustomRoute = PipelineRouteDefinition<"custom-id", { cache: Map }, PropertyJson[]>; + + expectTypeOf().toEqualTypeOf<"custom-id">(); + }); +}); diff --git a/packages/pipelines/test/types.test.ts b/packages/pipelines/test/types.test.ts new file mode 100644 index 000000000..862f4a643 --- /dev/null +++ b/packages/pipelines/test/types.test.ts @@ -0,0 +1,447 @@ +import { describe, expectTypeOf, it } from "vitest"; +import type { + InferArtifactId, + InferArtifactsMap, + InferArtifactValue, + PipelineArtifactDefinition, +} from "../src/artifact"; +import type { + PipelineEvent, + PipelineGraph, + PipelineGraphEdge, + PipelineGraphNode, + PipelineError, +} from "../src/events"; +import type { PipelineRunResult, PipelineSummary } from "../src/results"; +import type { + InferRouteId, + InferRouteOutput, + InferRoutesOutput, + PipelineRouteDefinition, +} from "../src/route"; +import type { + DefaultRange, + FileContext, + FilterContext, + ParseContext, + ParsedRow, + ParserFn, + PipelineFilter, + PropertyJson, + ResolvedEntry, + ResolveContext, + ResolverFn, + RowContext, +} from "../src/types"; + +describe("FileContext type", () => { + it("should have correct structure", () => { + expectTypeOf().toMatchTypeOf<{ + version: string; + dir: string; + path: string; + name: string; + ext: string; + }>(); + }); + + it("should allow specific dir values", () => { + const file: FileContext = { + version: "16.0.0", + dir: "ucd", + path: "ucd/LineBreak.txt", + name: "LineBreak.txt", + ext: ".txt", + }; + + expectTypeOf(file.dir).toMatchTypeOf(); + }); +}); + +describe("RowContext type", () => { + it("should have optional property field", () => { + expectTypeOf().toMatchTypeOf<{ property?: string }>(); + }); +}); + +describe("FilterContext type", () => { + it("should have file and optional row", () => { + expectTypeOf().toMatchTypeOf<{ + file: FileContext; + row?: RowContext; + }>(); + }); +}); + +describe("PipelineFilter type", () => { + it("should be a predicate function", () => { + expectTypeOf().toEqualTypeOf<(ctx: FilterContext) => boolean>(); + }); +}); + +describe("ParsedRow type", () => { + it("should have required sourceFile and kind", () => { + expectTypeOf().toMatchTypeOf<{ + sourceFile: string; + kind: "range" | "point" | "sequence" | "alias"; + }>(); + }); + + it("should have optional range fields", () => { + expectTypeOf().toMatchTypeOf<{ + start?: string; + end?: string; + codePoint?: string; + sequence?: string[]; + }>(); + }); + + it("should have optional property and value", () => { + expectTypeOf().toMatchTypeOf<{ + property?: string; + value?: string | string[]; + meta?: Record; + }>(); + }); +}); + +describe("ParseContext type", () => { + it("should have file and reader methods", () => { + expectTypeOf().toMatchTypeOf<{ + file: FileContext; + readContent: () => Promise; + readLines: () => AsyncIterable; + isComment: (line: string) => boolean; + }>(); + }); +}); + +describe("ParserFn type", () => { + it("should take ParseContext and return AsyncIterable of ParsedRow", () => { + expectTypeOf().toEqualTypeOf<(ctx: ParseContext) => AsyncIterable>(); + }); +}); + +describe("ResolvedEntry type", () => { + it("should have value and optional location fields", () => { + expectTypeOf().toMatchTypeOf<{ + value: string | string[]; + range?: `${string}..${string}`; + codePoint?: string; + sequence?: string[]; + }>(); + }); +}); + +describe("DefaultRange type", () => { + it("should have range and value", () => { + expectTypeOf().toMatchTypeOf<{ + range: `${string}..${string}`; + value: string | string[]; + }>(); + }); +}); + +describe("PropertyJson type", () => { + it("should have version, property, file, and entries", () => { + expectTypeOf().toMatchTypeOf<{ + version: string; + property: string; + file: string; + entries: ResolvedEntry[]; + }>(); + }); + + it("should have optional defaults and meta", () => { + expectTypeOf().toMatchTypeOf<{ + defaults?: DefaultRange[]; + meta?: Record; + }>(); + }); +}); + +describe("ResolveContext type", () => { + it("should have version and file", () => { + expectTypeOf().toMatchTypeOf<{ + version: string; + file: FileContext; + }>(); + }); + + it("should have getArtifact method", () => { + type Ctx = ResolveContext<{ aliases: Map; count: number }>; + + expectTypeOf().toBeFunction(); + }); + + it("should have utility methods", () => { + expectTypeOf().toMatchTypeOf<{ + normalizeEntries: (entries: ResolvedEntry[]) => ResolvedEntry[]; + now: () => string; + }>(); + }); +}); + +describe("ResolverFn type", () => { + it("should take context and rows and return promise", () => { + type Fn = ResolverFn, PropertyJson[]>; + + expectTypeOf().toMatchTypeOf< + (ctx: ResolveContext>, rows: AsyncIterable) => Promise + >(); + }); + + it("should support custom artifact types", () => { + type CustomArtifacts = { cache: Map }; + type Fn = ResolverFn; + + expectTypeOf().toMatchTypeOf< + (ctx: ResolveContext, rows: AsyncIterable) => Promise + >(); + }); +}); + +describe("PipelineArtifactDefinition type", () => { + it("should have id and build function", () => { + expectTypeOf().toMatchTypeOf<{ + id: string; + build: (ctx: { version: string }, rows?: AsyncIterable) => Promise; + }>(); + }); + + it("should have optional filter and parser", () => { + expectTypeOf().toMatchTypeOf<{ + filter?: PipelineFilter; + parser?: ParserFn; + }>(); + }); + + it("should preserve generic id type", () => { + type Specific = PipelineArtifactDefinition<"my-id", number>; + expectTypeOf().toEqualTypeOf<"my-id">(); + }); + + it("should preserve generic value type", () => { + type Specific = PipelineArtifactDefinition<"id", Map>; + expectTypeOf().returns.resolves.toEqualTypeOf>(); + }); +}); + +describe("InferArtifactId type", () => { + it("should extract id from artifact definition", () => { + type Def = PipelineArtifactDefinition<"extracted-id", unknown>; + expectTypeOf>().toEqualTypeOf<"extracted-id">(); + }); +}); + +describe("InferArtifactValue type", () => { + it("should extract value type from artifact definition", () => { + type Def = PipelineArtifactDefinition<"id", Set>; + expectTypeOf>().toEqualTypeOf>(); + }); +}); + +describe("InferArtifactsMap type", () => { + it("should create a map from artifact array", () => { + type Artifacts = [ + PipelineArtifactDefinition<"a", string>, + PipelineArtifactDefinition<"b", number>, + PipelineArtifactDefinition<"c", boolean>, + ]; + + expectTypeOf>().toEqualTypeOf<{ + a: string; + b: number; + c: boolean; + }>(); + }); + + it("should handle complex value types", () => { + type Artifacts = [ + PipelineArtifactDefinition<"map", Map>, + PipelineArtifactDefinition<"set", Set>, + PipelineArtifactDefinition<"obj", { nested: { value: boolean } }>, + ]; + + expectTypeOf>().toEqualTypeOf<{ + map: Map; + set: Set; + obj: { nested: { value: boolean } }; + }>(); + }); +}); + +describe("PipelineRouteDefinition type", () => { + it("should have id, filter, parser, and resolver", () => { + expectTypeOf().toMatchTypeOf<{ + id: string; + filter: PipelineFilter; + parser: ParserFn; + }>(); + }); + + it("should preserve generic id type", () => { + type Route = PipelineRouteDefinition<"my-route">; + + expectTypeOf().toEqualTypeOf<"my-route">(); + }); +}); + +describe("InferRouteId type", () => { + it("should extract id from route definition", () => { + type Route = PipelineRouteDefinition<"line-break">; + expectTypeOf>().toEqualTypeOf<"line-break">(); + }); +}); + +describe("InferRouteOutput type", () => { + it("should extract output type from route definition", () => { + type Route = PipelineRouteDefinition<"id", readonly [], Record, readonly [], { custom: true }[]>; + expectTypeOf>().toEqualTypeOf<{ custom: true }[]>(); + }); +}); + +describe("InferRoutesOutput type", () => { + it("should union output types from route array", () => { + type Routes = readonly [ + PipelineRouteDefinition<"a", readonly [], Record, readonly [], PropertyJson[]>, + PipelineRouteDefinition<"b", readonly [], Record, readonly [], PropertyJson[]>, + ]; + + expectTypeOf>().toEqualTypeOf(); + }); +}); + +describe("PipelineEvent type", () => { + it("should be a union of event types", () => { + expectTypeOf().toMatchTypeOf<{ type: string; timestamp: number }>(); + }); + + it("should include pipeline lifecycle events", () => { + const startEvent: PipelineEvent = { + type: "pipeline:start", + versions: ["16.0.0"], + timestamp: Date.now(), + }; + + const endEvent: PipelineEvent = { + type: "pipeline:end", + durationMs: 100, + timestamp: Date.now(), + }; + + expectTypeOf(startEvent).toMatchTypeOf(); + expectTypeOf(endEvent).toMatchTypeOf(); + }); + + it("should include version events", () => { + const startEvent: PipelineEvent = { + type: "version:start", + version: "16.0.0", + timestamp: Date.now(), + }; + + expectTypeOf(startEvent).toMatchTypeOf(); + }); + + it("should include file events", () => { + const matchedEvent: PipelineEvent = { + type: "file:matched", + file: { version: "16.0.0", dir: "", path: "test.txt", name: "test.txt", ext: ".txt" }, + routeId: "route-id", + timestamp: Date.now(), + }; + + expectTypeOf(matchedEvent).toMatchTypeOf(); + }); + + it("should include error events", () => { + const errorEvent: PipelineEvent = { + type: "error", + error: { scope: "route", message: "Failed", routeId: "id", version: "16.0.0" }, + timestamp: Date.now(), + }; + + expectTypeOf(errorEvent).toMatchTypeOf(); + }); +}); + +describe("PipelineGraphNode type", () => { + it("should have id and type", () => { + expectTypeOf().toMatchTypeOf<{ + id: string; + type: "source" | "artifact" | "file" | "route" | "output"; + }>(); + }); +}); + +describe("PipelineGraphEdge type", () => { + it("should have from, to, and type", () => { + expectTypeOf().toMatchTypeOf<{ + from: string; + to: string; + type: "provides" | "matched" | "parsed" | "resolved" | "uses-artifact"; + }>(); + }); +}); + +describe("PipelineGraph type", () => { + it("should have nodes and edges", () => { + expectTypeOf().toMatchTypeOf<{ + nodes: PipelineGraphNode[]; + edges: PipelineGraphEdge[]; + }>(); + }); +}); + +describe("PipelineError type", () => { + it("should have scope and message", () => { + expectTypeOf().toMatchTypeOf<{ + scope: "artifact" | "route" | "file" | "pipeline" | "version"; + message: string; + }>(); + }); + + it("should have optional version", () => { + expectTypeOf().toMatchTypeOf<{ version?: string }>(); + }); + + it("should have optional context fields", () => { + expectTypeOf().toMatchTypeOf<{ + error?: unknown; + file?: FileContext; + routeId?: string; + artifactId?: string; + }>(); + }); +}); + +describe("PipelineSummary type", () => { + it("should have file counts", () => { + expectTypeOf().toMatchTypeOf<{ + versions: string[]; + totalFiles: number; + matchedFiles: number; + skippedFiles: number; + fallbackFiles: number; + totalOutputs: number; + durationMs: number; + }>(); + }); +}); + +describe("PipelineRunResult type", () => { + it("should have data, graph, errors, and summary", () => { + expectTypeOf>().toMatchTypeOf<{ + data: PropertyJson[]; + graph: PipelineGraph; + errors: PipelineError[]; + summary: PipelineSummary; + }>(); + }); + + it("should preserve generic data type", () => { + type CustomOutput = { custom: true }; + expectTypeOf["data"]>().toEqualTypeOf(); + }); +}); diff --git a/packages/pipelines/tsconfig.build.json b/packages/pipelines/tsconfig.build.json new file mode 100644 index 000000000..36c889e0c --- /dev/null +++ b/packages/pipelines/tsconfig.build.json @@ -0,0 +1,5 @@ +{ + "extends": "./tsconfig.json", + "include": ["src"], + "exclude": ["dist", "test"] +} diff --git a/packages/pipelines/tsconfig.json b/packages/pipelines/tsconfig.json new file mode 100644 index 000000000..07edf31d8 --- /dev/null +++ b/packages/pipelines/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "@ucdjs-tooling/tsconfig/base", + "include": [ + "src", + "test", + "playgrounds" + ], + "exclude": ["dist"] +} diff --git a/packages/pipelines/tsdown.config.ts b/packages/pipelines/tsdown.config.ts new file mode 100644 index 000000000..dee0149e6 --- /dev/null +++ b/packages/pipelines/tsdown.config.ts @@ -0,0 +1,7 @@ +import { createTsdownConfig } from "@ucdjs-tooling/tsdown-config"; + +export default createTsdownConfig({ + entry: [ + "./src/index.ts", + ], +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e1476863a..b4ed3cd63 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -913,6 +913,46 @@ importers: specifier: catalog:testing version: 4.3.0(vitest@4.0.16) + packages/pipelines: + dependencies: + '@ucdjs-internal/shared': + specifier: workspace:* + version: link:../shared + picomatch: + specifier: catalog:prod + version: 4.0.3 + zod: + specifier: catalog:prod + version: 4.3.5 + devDependencies: + '@luxass/eslint-config': + specifier: catalog:linting + version: 6.0.3(@eslint-react/eslint-plugin@2.3.12(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(@vue/compiler-sfc@3.5.25)(eslint-plugin-format@1.2.0(eslint@9.39.2(jiti@2.6.1)))(eslint-plugin-react-hooks@7.0.1(eslint@9.39.2(jiti@2.6.1)))(eslint-plugin-react-refresh@0.4.24(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)(vitest@4.0.16) + '@types/picomatch': + specifier: catalog:types + version: 4.0.2 + '@ucdjs-tooling/tsconfig': + specifier: workspace:* + version: link:../../tooling/tsconfig + '@ucdjs-tooling/tsdown-config': + specifier: workspace:* + version: link:../../tooling/tsdown-config + eslint: + specifier: catalog:linting + version: 9.39.2(jiti@2.6.1) + publint: + specifier: catalog:dev + version: 0.3.16 + tsdown: + specifier: catalog:dev + version: 0.18.4(publint@0.3.16)(synckit@0.11.11)(typescript@5.9.3) + tsx: + specifier: catalog:dev + version: 4.21.0 + typescript: + specifier: catalog:dev + version: 5.9.3 + packages/schema-gen: dependencies: '@ai-sdk/openai':