diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8846ee8..f69aa38 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,6 +59,7 @@ jobs: - name: Run benchmarks id: bench + continue-on-error: true run: | BENCH_ARGS="--ci --markdown-file benchmark-report.md --json-file benchmark-results.json" if [ -f perf-baseline.json ]; then @@ -77,11 +78,9 @@ jobs: console.log('No benchmark report found, skipping comment.'); return; } - let body = fs.readFileSync(reportPath, 'utf-8').trim(); + const body = fs.readFileSync(reportPath, 'utf-8').trim(); if (!body) return; - body += '\n\n---\n*Compared against master baseline. Threshold: 25% regression.*'; - const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, diff --git a/bun.lock b/bun.lock index 49850c4..3f10d15 100644 --- a/bun.lock +++ b/bun.lock @@ -10,6 +10,8 @@ "mongodb": "^7.1.0", "openai": "^6.22.0", "papaparse": "^5.5.3", + "string-width": "^8.2.0", + "xlsx": "^0.18.5", }, "devDependencies": { "@types/better-sqlite3": "^7.6.13", @@ -295,8 +297,12 @@ "@vueuse/shared": ["@vueuse/shared@12.8.2", "", { "dependencies": { "vue": "^3.5.13" } }, "sha512-dznP38YzxZoNloI0qpEfpkms8knDtaoQ6Y/sfS0L7Yki4zh40LFHEhur0odJC6xTHG5dxWVPiUWBXn+wCG2s5w=="], + "adler-32": ["adler-32@1.3.1", "", {}, "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A=="], + "algoliasearch": ["algoliasearch@5.49.0", "", { "dependencies": { "@algolia/abtesting": "1.15.0", "@algolia/client-abtesting": "5.49.0", "@algolia/client-analytics": "5.49.0", "@algolia/client-common": "5.49.0", "@algolia/client-insights": "5.49.0", "@algolia/client-personalization": "5.49.0", "@algolia/client-query-suggestions": "5.49.0", "@algolia/client-search": "5.49.0", "@algolia/ingestion": "1.49.0", "@algolia/monitoring": "1.49.0", "@algolia/recommend": "5.49.0", "@algolia/requester-browser-xhr": "5.49.0", "@algolia/requester-fetch": "5.49.0", "@algolia/requester-node-http": "5.49.0" } }, "sha512-Tse7vx7WOvbU+kpq/L3BrBhSWTPbtMa59zIEhMn+Z2NoxZlpcCRUDCRxQ7kDFs1T3CHxDgvb+mDuILiBBpBaAA=="], + "ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], + "base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="], "better-sqlite3": ["better-sqlite3@12.6.2", "", { "dependencies": { "bindings": "^1.5.0", "prebuild-install": "^7.1.1" } }, "sha512-8VYKM3MjCa9WcaSAI3hzwhmyHVlH8tiGFwf0RlTsZPWJ1I5MkzjiudCo4KC4DxOaL/53A5B1sI/IbldNFDbsKA=="], @@ -315,16 +321,22 @@ "ccount": ["ccount@2.0.1", "", {}, "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg=="], + "cfb": ["cfb@1.2.2", "", { "dependencies": { "adler-32": "~1.3.0", "crc-32": "~1.2.0" } }, "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA=="], + "character-entities-html4": ["character-entities-html4@2.1.0", "", {}, "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA=="], "character-entities-legacy": ["character-entities-legacy@3.0.0", "", {}, "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ=="], "chownr": ["chownr@1.1.4", "", {}, "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="], + "codepage": ["codepage@1.15.0", "", {}, "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA=="], + "comma-separated-tokens": ["comma-separated-tokens@2.0.3", "", {}, "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg=="], "copy-anything": ["copy-anything@4.0.5", "", { "dependencies": { "is-what": "^5.2.0" } }, "sha512-7Vv6asjS4gMOuILabD3l739tsaxFQmC+a7pLZm02zyvs8p977bL3zEgq3yDk5rn9B0PbYgIv++jmHcuUab4RhA=="], + "crc-32": ["crc-32@1.2.2", "", { "bin": { "crc32": "bin/crc32.njs" } }, "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ=="], + "csstype": ["csstype@3.2.3", "", {}, "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="], "decompress-response": ["decompress-response@6.0.0", "", { "dependencies": { "mimic-response": "^3.1.0" } }, "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ=="], @@ -355,10 +367,14 @@ "focus-trap": ["focus-trap@7.8.0", "", { "dependencies": { "tabbable": "^6.4.0" } }, "sha512-/yNdlIkpWbM0ptxno3ONTuf+2g318kh2ez3KSeZN5dZ8YC6AAmgeWz+GasYYiBJPFaYcSAPeu4GfhUaChzIJXA=="], + "frac": ["frac@1.1.2", "", {}, "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA=="], + "fs-constants": ["fs-constants@1.0.0", "", {}, "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="], "fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="], + "get-east-asian-width": ["get-east-asian-width@1.5.0", "", {}, "sha512-CQ+bEO+Tva/qlmw24dCejulK5pMzVnUOFOijVogd3KQs07HnRIgp8TGipvCCRT06xeYEbpbgwaCxglFyiuIcmA=="], + "github-from-package": ["github-from-package@0.0.0", "", {}, "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw=="], "hast-util-to-html": ["hast-util-to-html@9.0.5", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/unist": "^3.0.0", "ccount": "^2.0.0", "comma-separated-tokens": "^2.0.0", "hast-util-whitespace": "^3.0.0", "html-void-elements": "^3.0.0", "mdast-util-to-hast": "^13.0.0", "property-information": "^7.0.0", "space-separated-tokens": "^2.0.0", "stringify-entities": "^4.0.0", "zwitch": "^2.0.4" } }, "sha512-OguPdidb+fbHQSU4Q4ZiLKnzWo8Wwsf5bZfbvu7//a9oTYoqD/fWpe96NuHkoS9h0ccGOTe0C4NGXdtS0iObOw=="], @@ -497,10 +513,16 @@ "speakingurl": ["speakingurl@14.0.1", "", {}, "sha512-1POYv7uv2gXoyGFpBCmpDVSNV74IfsWlDW216UPjbWufNf+bSU6GdbDsxdcxtfwb4xlI3yxzOTKClUosxARYrQ=="], + "ssf": ["ssf@0.11.2", "", { "dependencies": { "frac": "~1.1.2" } }, "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g=="], + + "string-width": ["string-width@8.2.0", "", { "dependencies": { "get-east-asian-width": "^1.5.0", "strip-ansi": "^7.1.2" } }, "sha512-6hJPQ8N0V0P3SNmP6h2J99RLuzrWz2gvT7VnK5tKvrNqJoyS9W4/Fb8mo31UiPvy00z7DQXkP2hnKBVav76thw=="], + "string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="], "stringify-entities": ["stringify-entities@4.0.4", "", { "dependencies": { "character-entities-html4": "^2.0.0", "character-entities-legacy": "^3.0.0" } }, "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg=="], + "strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="], + "strip-json-comments": ["strip-json-comments@2.0.1", "", {}, "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ=="], "strnum": ["strnum@2.1.2", "", {}, "sha512-l63NF9y/cLROq/yqKXSLtcMeeyOfnSQlfMSlzFt/K73oIaD8DGaQWd7Z34X9GPiKqP5rbSh84Hl4bOlLcjiSrQ=="], @@ -549,8 +571,14 @@ "whatwg-url": ["whatwg-url@14.2.0", "", { "dependencies": { "tr46": "^5.1.0", "webidl-conversions": "^7.0.0" } }, "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw=="], + "wmf": ["wmf@1.0.2", "", {}, "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw=="], + + "word": ["word@0.3.0", "", {}, "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA=="], + "wrappy": ["wrappy@1.0.2", "", {}, "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="], + "xlsx": ["xlsx@0.18.5", "", { "dependencies": { "adler-32": "~1.3.0", "cfb": "~1.2.1", "codepage": "~1.15.0", "crc-32": "~1.2.1", "ssf": "~0.11.2", "wmf": "~1.0.1", "word": "~0.3.0" }, "bin": { "xlsx": "bin/xlsx.njs" } }, "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ=="], + "zwitch": ["zwitch@2.0.4", "", {}, "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A=="], } } diff --git a/man/man1/recs-decollate.1 b/man/man1/recs-decollate.1 index 1554dcd..b9911d5 100644 --- a/man/man1/recs-decollate.1 +++ b/man/man1/recs-decollate.1 @@ -15,6 +15,9 @@ Reverse of collate: takes a single record and produces multiple records using de \fB--deaggregator\fR, \fB-d\fR \fI\fR Deaggregator specification (colon\-separated). .TP +\fB--only\fR, \fB-o\fR +Only output deaggregated fields, excluding original record fields. Useful when you only want the expanded data, not the source record. +.TP \fB--list-deaggregators\fR List available deaggregators and exit. @@ -27,6 +30,14 @@ Split the \'hosts\' field into individual \'host\' fields .fi .RE +Decollate and only keep deaggregated fields +.PP +.RS 4 +.nf +\fBrecs decollate --only -d \'unarray,items,,item\'\fR +.fi +.RE + .SH SEE ALSO \fBrecs\-collate\fR(1) diff --git a/man/man1/recs-fromxls.1 b/man/man1/recs-fromxls.1 new file mode 100644 index 0000000..621bc66 --- /dev/null +++ b/man/man1/recs-fromxls.1 @@ -0,0 +1,60 @@ +.TH RECS\-FROMXLS 1 "2026-02-22" "recs 0.1.0" "RecordStream Manual" + +.SH NAME +recs\-fromxls \- Parse Excel files (xls, xlsx, xlsb, xlsm) into records + +.SH SYNOPSIS +.B recs fromxls [options] + +.SH DESCRIPTION +Parse Excel files (xls, xlsx, xlsb, xlsm) into records. By default, reads the first sheet and uses the first row as header names. +.PP + +.SH OPTIONS +.TP +\fB--key\fR, \fB-k\fR \fI\fR +Comma separated list of field names. Overrides header detection. +.TP +\fB--field\fR, \fB-f\fR \fI\fR +Comma separated list of field names. Overrides header detection. +.TP +\fB--no-header\fR, \fB-n\fR +Do not treat the first row as a header. Fields will be named numerically (0, 1, 2, ...). +.TP +\fB--sheet\fR, \fB-s\fR \fI\fR +Specify a sheet name to read. Defaults to the first sheet. +.TP +\fB--all-sheets\fR +Read all sheets in the workbook, adding a \'sheet\' field to each record. + +.SH EXAMPLES +Read an Excel file using headers from the first row +.PP +.RS 4 +.nf +\fBrecs fromxls data.xlsx\fR +.fi +.RE + +Read a specific sheet without headers +.PP +.RS 4 +.nf +\fBrecs fromxls --sheet \'Sheet2\' --no-header -k name,value data.xlsx\fR +.fi +.RE + +Read all sheets +.PP +.RS 4 +.nf +\fBrecs fromxls --all-sheets data.xlsx\fR +.fi +.RE + +.SH SEE ALSO +\fBrecs\-fromcsv\fR(1) + +.SH AUTHOR +Benjamin Bernard + diff --git a/man/man1/recs-multiplex.1 b/man/man1/recs-multiplex.1 index 225f74d..33b3bac 100644 --- a/man/man1/recs-multiplex.1 +++ b/man/man1/recs-multiplex.1 @@ -21,6 +21,12 @@ Domain language key: name=expression where the expression evaluates as a valuati \fB--line-key\fR, \fB-L\fR \fI\fR Use the value of this key as line input for the nested operation (rather than the entire record). Use with recs from* operations generally. .TP +\fB--output-file-key\fR, \fB-o\fR \fI\fR +Write each group\'s output to a separate file, using the value of the given key as the filename. +.TP +\fB--output-file-eval\fR, \fB-O\fR \fI\fR +Write each group\'s output to a separate file, with filename determined by the given expression. Supports {{key}} interpolation with group key values. +.TP \fB--adjacent\fR, \fB-1\fR Only group together adjacent records. Avoids spooling records into memory. .TP @@ -59,6 +65,14 @@ Separate out a stream of text by PID into separate invocations of an operation .fi .RE +Write each group\'s CSV output to separate files by department +.PP +.RS 4 +.nf +\fBrecs multiplex -k department -O \'output-{{department}}.csv\' -- recs tocsv\fR +.fi +.RE + .SH SEE ALSO \fBrecs\-collate\fR(1), \fBrecs\-chain\fR(1) diff --git a/man/man1/recs-parsedate.1 b/man/man1/recs-parsedate.1 new file mode 100644 index 0000000..f888365 --- /dev/null +++ b/man/man1/recs-parsedate.1 @@ -0,0 +1,66 @@ +.TH RECS\-PARSEDATE 1 "2026-02-22" "recs 0.1.0" "RecordStream Manual" + +.SH NAME +recs\-parsedate \- Parse date/time strings from a field and output them in a normalized format + +.SH SYNOPSIS +.B recs parsedate [options] [files...] + +.SH DESCRIPTION +Parse date/time strings from a field and output them in a normalized format. Supports epoch seconds, ISO 8601, and custom strftime\-style format strings for both input and output. +.PP + +.SH OPTIONS +.TP +\fB--key\fR, \fB-k\fR \fI\fR +Key field containing the date/time string to parse. +.TP +\fB--output\fR, \fB-o\fR \fI\fR +Output key for the parsed date (defaults to \'parsed_\'). +.TP +\fB--format\fR, \fB-f\fR \fI\fR +Input format for parsing (strftime\-style). Common directives: %Y (4\-digit year), %m (month), %d (day), %H (hour 24h), %M (minute), %S (second), %b (abbreviated month name), %p (AM/PM). +.TP +\fB--output-format\fR, \fB-F\fR \fI\fR +Output format (strftime\-style). Defaults to ISO 8601 if not specified. +.TP +\fB--epoch\fR, \fB-e\fR +Input date is in epoch seconds. +.TP +\fB--output-epoch\fR, \fB-E\fR +Output as epoch seconds instead of a formatted string. +.TP +\fB--timezone\fR, \fB-z\fR \fI\fR +Timezone for output formatting (IANA name like \'America/New_York\'). + +.SH EXAMPLES +Parse dates and output as epoch seconds +.PP +.RS 4 +.nf +\fBrecs parsedate -k date -E\fR +.fi +.RE + +Parse a custom date format and reformat it +.PP +.RS 4 +.nf +\fBrecs parsedate -k timestamp -f \'%d/%b/%Y:%H:%M:%S\' -F \'%Y-%m-%d %H:%M:%S\'\fR +.fi +.RE + +Convert epoch seconds to ISO 8601 +.PP +.RS 4 +.nf +\fBrecs parsedate -k time -e -o iso_time\fR +.fi +.RE + +.SH SEE ALSO +\fBrecs\-normalizetime\fR(1) + +.SH AUTHOR +Benjamin Bernard + diff --git a/man/man1/recs.1 b/man/man1/recs.1 index 5253ba3..4b09d80 100644 --- a/man/man1/recs.1 +++ b/man/man1/recs.1 @@ -53,6 +53,9 @@ Runs tcpdump and puts out records, one for each packet \fBrecs fromxferlog\fR Each line of input (or lines of ) is parsed as an FTP transfer log (xferlog format) to produce an output record .TP +\fBrecs fromxls\fR +Parse Excel files (xls, xlsx, xlsb, xlsm) into records +.TP \fBrecs fromxml\fR Reads either from STDIN or from the specified URIs @@ -100,6 +103,9 @@ Take records, grouped together by \-\-keys, and run a separate operation instanc \fBrecs normalizetime\fR Given a single key field containing a date/time value, construct a normalized version of the value and place it into a field named \'n_\' .TP +\fBrecs parsedate\fR +Parse date/time strings from a field and output them in a normalized format +.TP \fBrecs sort\fR Sort records from input or from files .TP diff --git a/package.json b/package.json index 12d625d..7ec4f89 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,8 @@ "fast-xml-parser": "^5.3.7", "mongodb": "^7.1.0", "openai": "^6.22.0", - "papaparse": "^5.5.3" + "papaparse": "^5.5.3", + "string-width": "^8.2.0", + "xlsx": "^0.18.5" } } diff --git a/src/Operation.ts b/src/Operation.ts index ff8eb29..38f90fb 100644 --- a/src/Operation.ts +++ b/src/Operation.ts @@ -50,12 +50,18 @@ export class PrinterReceiver implements RecordReceiver { */ export class CollectorReceiver implements RecordReceiver { readonly records: Record[] = []; + readonly lines: string[] = []; acceptRecord(record: Record): boolean { this.records.push(record); return true; } + acceptLine(line: string): boolean { + this.lines.push(line); + return true; + } + finish(): void { // nothing to do } diff --git a/src/cli/dispatcher.ts b/src/cli/dispatcher.ts index f4f6962..8dcd987 100644 --- a/src/cli/dispatcher.ts +++ b/src/cli/dispatcher.ts @@ -20,6 +20,7 @@ import { FromRe } from "../operations/input/fromre.ts"; import { FromSplit } from "../operations/input/fromsplit.ts"; import { FromTcpdump } from "../operations/input/fromtcpdump.ts"; import { FromXferlog } from "../operations/input/fromxferlog.ts"; +import { FromXls } from "../operations/input/fromxls.ts"; import { FromXml } from "../operations/input/fromxml.ts"; // -- Transform operations -- @@ -40,6 +41,7 @@ import { JoinOperation } from "../operations/transform/join.ts"; import { CollateOperation } from "../operations/transform/collate.ts"; import { DecollateOperation } from "../operations/transform/decollate.ts"; import { ExpandJsonOperation } from "../operations/transform/expandjson.ts"; +import { ParseDateOperation } from "../operations/transform/parsedate.ts"; import { ChainOperation, registerOperationFactory } from "../operations/transform/chain.ts"; import { MultiplexOperation } from "../operations/transform/multiplex.ts"; @@ -75,6 +77,7 @@ const operationRegistry = new Map([ ["fromsplit", FromSplit], ["fromtcpdump", FromTcpdump], ["fromxferlog", FromXferlog], + ["fromxls", FromXls], ["fromxml", FromXml], // Transform ["grep", GrepOperation], @@ -94,6 +97,7 @@ const operationRegistry = new Map([ ["join", JoinOperation], ["collate", CollateOperation], ["decollate", DecollateOperation], + ["parsedate", ParseDateOperation], ["chain", ChainOperation], ["multiplex", MultiplexOperation], // Output diff --git a/src/cli/operation-registry.ts b/src/cli/operation-registry.ts index 96c4a6c..0c1ca5f 100644 --- a/src/cli/operation-registry.ts +++ b/src/cli/operation-registry.ts @@ -22,6 +22,7 @@ import { documentation as fromre } from "../operations/input/fromre.ts"; import { documentation as fromsplit } from "../operations/input/fromsplit.ts"; import { documentation as fromtcpdump } from "../operations/input/fromtcpdump.ts"; import { documentation as fromxferlog } from "../operations/input/fromxferlog.ts"; +import { documentation as fromxls } from "../operations/input/fromxls.ts"; import { documentation as fromxml } from "../operations/input/fromxml.ts"; // ── Transform operations ──────────────────────────────────────── @@ -39,6 +40,7 @@ import { documentation as grep } from "../operations/transform/grep.ts"; import { documentation as join } from "../operations/transform/join.ts"; import { documentation as multiplex } from "../operations/transform/multiplex.ts"; import { documentation as normalizetime } from "../operations/transform/normalizetime.ts"; +import { documentation as parsedate } from "../operations/transform/parsedate.ts"; import { documentation as sort } from "../operations/transform/sort.ts"; import { documentation as stream2table } from "../operations/transform/stream2table.ts"; import { documentation as substream } from "../operations/transform/substream.ts"; @@ -72,6 +74,7 @@ export const allDocs: CommandDoc[] = [ fromsplit, fromtcpdump, fromxferlog, + fromxls, fromxml, // Transform annotate, @@ -88,6 +91,7 @@ export const allDocs: CommandDoc[] = [ join, multiplex, normalizetime, + parsedate, sort, stream2table, substream, diff --git a/src/operations/input/fromxls.ts b/src/operations/input/fromxls.ts new file mode 100644 index 0000000..856ea90 --- /dev/null +++ b/src/operations/input/fromxls.ts @@ -0,0 +1,196 @@ +import { readFileSync } from "node:fs"; +import { read, utils } from "xlsx"; +import { Operation, type OptionDef } from "../../Operation.ts"; +import { Record } from "../../Record.ts"; +import { setKey } from "../../KeySpec.ts"; + +/** + * Parse Excel files (xls, xlsx) into records. + * Each row becomes a record with field names taken from headers or numeric indices. + */ +export class FromXls extends Operation { + fields: string[] = []; + headerLine = true; + sheet: string | null = null; + allSheets = false; + extraArgs: string[] = []; + + init(args: string[]): void { + const defs: OptionDef[] = [ + { + long: "key", + short: "k", + type: "string", + handler: (v) => { + this.fields.push(...(v as string).split(",")); + }, + description: "Comma separated list of field names", + }, + { + long: "field", + short: "f", + type: "string", + handler: (v) => { + this.fields.push(...(v as string).split(",")); + }, + description: "Comma separated list of field names", + }, + { + long: "no-header", + short: "n", + type: "boolean", + handler: () => { this.headerLine = false; }, + description: "Do not treat the first row as a header line", + }, + { + long: "sheet", + short: "s", + type: "string", + handler: (v) => { this.sheet = v as string; }, + description: "Sheet name to read (defaults to first sheet)", + }, + { + long: "all-sheets", + type: "boolean", + handler: () => { this.allSheets = true; }, + description: "Read all sheets, adding a 'sheet' field to each record", + }, + ]; + + this.extraArgs = this.parseOptions(args, defs); + + if (this.extraArgs.length === 0) { + throw new Error("fromxls requires at least one file argument"); + } + } + + acceptRecord(_record: Record): boolean { + return true; + } + + override wantsInput(): boolean { + return false; + } + + override streamDone(): void { + for (const file of this.extraArgs) { + this.updateCurrentFilename(file); + this.parseFile(file); + } + } + + parseFile(file: string): void { + const buffer = readFileSync(file); + const workbook = read(buffer, { type: "buffer" }); + + const sheetNames = this.allSheets + ? workbook.SheetNames + : [this.sheet ?? workbook.SheetNames[0]!]; + + for (const sheetName of sheetNames) { + const sheet = workbook.Sheets[sheetName]; + if (!sheet) { + throw new Error(`Sheet '${sheetName}' not found in ${file}`); + } + + const rows = utils.sheet_to_json(sheet, { + header: 1, + defval: "", + raw: false, + }) as string[][]; + + if (rows.length === 0) continue; + + let fields = [...this.fields]; + let startRow = 0; + + if (this.headerLine && fields.length === 0) { + fields = rows[0]!.map((v) => String(v).trim()); + startRow = 1; + } + + for (let i = startRow; i < rows.length; i++) { + const row = rows[i]!; + // Skip entirely empty rows + if (row.every((v) => String(v).trim() === "")) continue; + + const record = new Record(); + const data = record.dataRef(); + + for (let j = 0; j < row.length; j++) { + const key = fields[j] ?? String(j); + const val = String(row[j] ?? ""); + // Try to parse numbers + const num = Number(val); + if (val !== "" && !isNaN(num) && isFinite(num)) { + setKey(data, key, num); + } else { + setKey(data, key, val); + } + } + + if (this.allSheets) { + setKey(data, "sheet", sheetName); + } + + this.pushRecord(record); + } + } + } +} + +import type { CommandDoc } from "../../types/CommandDoc.ts"; + +export const documentation: CommandDoc = { + name: "fromxls", + category: "input", + synopsis: "recs fromxls [options] ", + description: + "Parse Excel files (xls, xlsx, xlsb, xlsm) into records. " + + "By default, reads the first sheet and uses the first row as header names.", + options: [ + { + flags: ["--key", "-k"], + argument: "", + description: + "Comma separated list of field names. Overrides header detection.", + }, + { + flags: ["--field", "-f"], + argument: "", + description: + "Comma separated list of field names. Overrides header detection.", + }, + { + flags: ["--no-header", "-n"], + description: + "Do not treat the first row as a header. Fields will be named numerically (0, 1, 2, ...).", + }, + { + flags: ["--sheet", "-s"], + argument: "", + description: + "Specify a sheet name to read. Defaults to the first sheet.", + }, + { + flags: ["--all-sheets"], + description: + "Read all sheets in the workbook, adding a 'sheet' field to each record.", + }, + ], + examples: [ + { + description: "Read an Excel file using headers from the first row", + command: "recs fromxls data.xlsx", + }, + { + description: "Read a specific sheet without headers", + command: "recs fromxls --sheet 'Sheet2' --no-header -k name,value data.xlsx", + }, + { + description: "Read all sheets", + command: "recs fromxls --all-sheets data.xlsx", + }, + ], + seeAlso: ["fromcsv"], +}; diff --git a/src/operations/output/toptable.ts b/src/operations/output/toptable.ts index db30554..95e1f64 100644 --- a/src/operations/output/toptable.ts +++ b/src/operations/output/toptable.ts @@ -1,3 +1,4 @@ +import stringWidth from "string-width"; import { Record } from "../../Record.ts"; import { Operation, type RecordReceiver, type OptionDef } from "../../Operation.ts"; import { Accumulator } from "../../Accumulator.ts"; @@ -5,6 +6,10 @@ import { KeyGroups } from "../../KeyGroups.ts"; import { KeySpec } from "../../KeySpec.ts"; import type { JsonObject } from "../../types/json.ts"; +function displayWidth(str: string): number { + return stringWidth(str); +} + /** * A node in the values tree used to track unique value tuples. * [0] = children map, [1] = insertion-order keys, [2] = assigned index (-1 if unset) @@ -433,14 +438,14 @@ export class ToPtable extends Operation { table[heightOffset + j]![widthOffset + i] = v; } - // Calculate column widths + // Calculate column widths using visual display width const colWidths: number[] = []; for (const row of table) { while (colWidths.length < row.length) { colWidths.push(0); } for (let i = 0; i < row.length; i++) { - const l = row[i]!.length; + const l = displayWidth(row[i]!); if (l > colWidths[i]!) { colWidths[i] = l; } @@ -615,7 +620,7 @@ function formatTableRow( let s = delim; for (let i = 0; i < widths.length; i++) { let cell = cellFn(i, widths[i]!); - cell += " ".repeat(Math.max(0, widths[i]! - cell.length)); + cell += " ".repeat(Math.max(0, widths[i]! - displayWidth(cell))); s += cell + delim; } return s; diff --git a/src/operations/output/totable.ts b/src/operations/output/totable.ts index c04d5d9..0ee9cf8 100644 --- a/src/operations/output/totable.ts +++ b/src/operations/output/totable.ts @@ -1,9 +1,30 @@ +import stringWidth from "string-width"; import { Record } from "../../Record.ts"; import { Operation, type RecordReceiver, type OptionDef } from "../../Operation.ts"; import { Accumulator } from "../../Accumulator.ts"; import { KeyGroups } from "../../KeyGroups.ts"; import { findKey } from "../../KeySpec.ts"; +/** + * Compute the visual display width of a string, accounting for + * East Asian wide characters, emoji, and other Unicode oddities. + */ +function displayWidth(str: string): number { + return stringWidth(str); +} + +/** + * Escape control characters (newlines, tabs, etc.) in a string so + * they don't break table alignment. + */ +function escapeForTable(str: string): string { + return str + .replace(/\\/g, "\\\\") + .replace(/\n/g, "\\n") + .replace(/\r/g, "\\r") + .replace(/\t/g, "\\t"); +} + /** * Pretty prints records as an ASCII table with column alignment. * Reads the entire record stream to determine column sizes. @@ -99,8 +120,9 @@ export class ToTable extends Operation { } const val = this.extractField(record, field); const currentMax = widths.get(field)!; - if (val.length > currentMax) { - widths.set(field, val.length); + const w = displayWidth(val); + if (w > currentMax) { + widths.set(field, w); } } } @@ -109,8 +131,9 @@ export class ToTable extends Operation { if (!this.noHeader) { for (const field of fields) { const currentMax = widths.get(field)!; - if (field.length > currentMax) { - widths.set(field, field.length); + const w = displayWidth(field); + if (w > currentMax) { + widths.set(field, w); } } } @@ -179,8 +202,8 @@ export class ToTable extends Operation { for (const field of fields) { let fieldStr = formatter(field, field); - if (!this.spreadsheet && fieldStr.length < widths.get(field)!) { - fieldStr += " ".repeat(widths.get(field)! - fieldStr.length); + if (!this.spreadsheet && displayWidth(fieldStr) < widths.get(field)!) { + fieldStr += " ".repeat(widths.get(field)! - displayWidth(fieldStr)); } if (first) { @@ -199,8 +222,8 @@ export class ToTable extends Operation { const data = record.dataRef(); const val = findKey(data, field, true); if (val === undefined || val === null) return ""; - if (typeof val === "object") return JSON.stringify(val); - return String(val); + if (typeof val === "object") return escapeForTable(JSON.stringify(val)); + return escapeForTable(String(val)); } override doesRecordOutput(): boolean { diff --git a/src/operations/transform/decollate.ts b/src/operations/transform/decollate.ts index 8ad77ea..fca9233 100644 --- a/src/operations/transform/decollate.ts +++ b/src/operations/transform/decollate.ts @@ -3,6 +3,7 @@ import type { OptionDef } from "../../Operation.ts"; import { deaggregatorRegistry } from "../../Deaggregator.ts"; import type { Deaggregator } from "../../Deaggregator.ts"; import { Record } from "../../Record.ts"; +import type { JsonValue } from "../../types/json.ts"; /** * Reverse of collate: takes a single record and produces multiple records @@ -12,6 +13,7 @@ import { Record } from "../../Record.ts"; */ export class DecollateOperation extends Operation { deaggregators: Deaggregator[] = []; + onlyDeaggregated = false; init(args: string[]): void { const deaggSpecs: string[] = []; @@ -27,6 +29,13 @@ export class DecollateOperation extends Operation { }, description: "Deaggregator specification (colon-separated)", }, + { + long: "only", + short: "o", + type: "boolean", + handler: () => { this.onlyDeaggregated = true; }, + description: "Only output deaggregated fields, excluding original record fields", + }, { long: "list-deaggregators", type: "boolean", @@ -55,11 +64,24 @@ export class DecollateOperation extends Operation { const results = deaggregator.deaggregate(record); for (const deaggregated of results) { - // Merge original record with deaggregated fields - const merged = new Record({ - ...record.toJSON(), - ...deaggregated.toJSON(), - }); + let merged: Record; + if (this.onlyDeaggregated) { + // Extract only the fields that differ from the original record + const origData = record.toJSON(); + const deaggData = deaggregated.toJSON(); + const onlyNew: { [key: string]: JsonValue } = {}; + for (const [k, v] of Object.entries(deaggData)) { + if (!(k in origData) || origData[k] !== v) { + onlyNew[k] = v; + } + } + merged = new Record(onlyNew); + } else { + merged = new Record({ + ...record.toJSON(), + ...deaggregated.toJSON(), + }); + } this.deaggregateRecursive(depth + 1, merged); } } else { @@ -83,6 +105,12 @@ export const documentation: CommandDoc = { description: "Deaggregator specification (colon-separated).", argument: "", }, + { + flags: ["--only", "-o"], + description: + "Only output deaggregated fields, excluding original record fields. " + + "Useful when you only want the expanded data, not the source record.", + }, { flags: ["--list-deaggregators"], description: "List available deaggregators and exit.", @@ -93,6 +121,10 @@ export const documentation: CommandDoc = { description: "Split the 'hosts' field into individual 'host' fields", command: "recs decollate --deaggregator 'split,hosts,/\\s*,\\s*/,host'", }, + { + description: "Decollate and only keep deaggregated fields", + command: "recs decollate --only -d 'unarray,items,,item'", + }, ], seeAlso: ["collate"], }; diff --git a/src/operations/transform/index.ts b/src/operations/transform/index.ts index 42ebdcd..c9a47ff 100644 --- a/src/operations/transform/index.ts +++ b/src/operations/transform/index.ts @@ -14,5 +14,6 @@ export { SubstreamOperation } from "./substream.ts"; export { JoinOperation } from "./join.ts"; export { CollateOperation } from "./collate.ts"; export { DecollateOperation } from "./decollate.ts"; +export { ParseDateOperation } from "./parsedate.ts"; export { ChainOperation, registerOperationFactory, isRecsOperation, createOperation } from "./chain.ts"; export { MultiplexOperation } from "./multiplex.ts"; diff --git a/src/operations/transform/multiplex.ts b/src/operations/transform/multiplex.ts index 746ae44..8f7b7bf 100644 --- a/src/operations/transform/multiplex.ts +++ b/src/operations/transform/multiplex.ts @@ -1,3 +1,5 @@ +import { writeFileSync, appendFileSync, existsSync, mkdirSync } from "node:fs"; +import { dirname } from "node:path"; import { Operation, CollectorReceiver } from "../../Operation.ts"; import type { OptionDef } from "../../Operation.ts"; import type { ClumperCallback } from "../../Clumper.ts"; @@ -16,34 +18,63 @@ class MultiplexClumperCallback implements ClumperCallback { operationName: string; operationArgs: string[]; lineKey: string | null; + outputFileKey: string | null; + outputFileEval: string | null; pushRecordCb: (record: Record) => boolean; pushLineCb: (line: string) => void; + initializedFiles = new Set(); constructor( operationName: string, operationArgs: string[], lineKey: string | null, + outputFileKey: string | null, + outputFileEval: string | null, pushRecordCb: (record: Record) => boolean, pushLineCb: (line: string) => void ) { this.operationName = operationName; this.operationArgs = operationArgs; this.lineKey = lineKey; + this.outputFileKey = outputFileKey; + this.outputFileEval = outputFileEval; this.pushRecordCb = pushRecordCb; this.pushLineCb = pushLineCb; } - clumperCallbackBegin(_options: { [key: string]: unknown }): unknown { + resolveOutputFile(options: { [key: string]: unknown }): string | null { + if (this.outputFileKey) { + const val = options[this.outputFileKey]; + if (val !== undefined && val !== null) { + return String(val); + } + return null; + } + if (this.outputFileEval) { + // Replace {{key}} placeholders with group values + let filename = this.outputFileEval; + for (const [key, val] of Object.entries(options)) { + filename = filename.replace( + new RegExp(`\\{\\{${key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\}\\}`, "g"), + String(val ?? "") + ); + } + return filename; + } + return null; + } + + clumperCallbackBegin(options: { [key: string]: unknown }): unknown { const collector = new CollectorReceiver(); const op = createOperation(this.operationName, [...this.operationArgs], collector); - return { operation: op, collector }; + const outputFile = this.resolveOutputFile(options); + return { operation: op, collector, outputFile }; } clumperCallbackPushRecord(cookie: unknown, record: Record): void { - const state = cookie as { operation: Operation; collector: CollectorReceiver }; + const state = cookie as { operation: Operation; collector: CollectorReceiver; outputFile: string | null }; if (this.lineKey) { - // Use the value of lineKey as input line to the operation const data = record.dataRef() as JsonObject; const lineValue = findKey(data, this.lineKey, true); if (lineValue !== undefined && lineValue !== null) { @@ -55,12 +86,40 @@ class MultiplexClumperCallback implements ClumperCallback { } clumperCallbackEnd(cookie: unknown): void { - const state = cookie as { operation: Operation; collector: CollectorReceiver }; + const state = cookie as { operation: Operation; collector: CollectorReceiver; outputFile: string | null }; state.operation.finish(); - // Push all collected records - for (const record of state.collector.records) { - this.pushRecordCb(record); + if (state.outputFile) { + // Write output to file + const dir = dirname(state.outputFile); + if (dir && dir !== "." && !existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + + // Initialize file (truncate) on first write, then append + if (!this.initializedFiles.has(state.outputFile)) { + writeFileSync(state.outputFile, ""); + this.initializedFiles.add(state.outputFile); + } + + // Write collected lines (from output operations like tocsv) + for (const line of state.collector.lines) { + appendFileSync(state.outputFile, line + "\n"); + } + + // Write collected records (from transform operations) + for (const record of state.collector.records) { + appendFileSync(state.outputFile, record.toString() + "\n"); + } + } else { + // Push lines to downstream + for (const line of state.collector.lines) { + this.pushLineCb(line); + } + // Push records to downstream + for (const record of state.collector.records) { + this.pushRecordCb(record); + } } } } @@ -85,6 +144,8 @@ export class MultiplexOperation extends Operation { init(args: string[]): void { const clumperOptions = new ClumperOptions(); let lineKey: string | null = null; + let outputFileKey: string | null = null; + let outputFileEval: string | null = null; const defs: OptionDef[] = [ { @@ -118,6 +179,20 @@ export class MultiplexOperation extends Operation { handler: (v) => { lineKey = v as string; }, description: "Use this key's value as line input for the nested operation", }, + { + long: "output-file-key", + short: "o", + type: "string", + handler: (v) => { outputFileKey = v as string; }, + description: "Write each group's output to a file named by the value of this key field", + }, + { + long: "output-file-eval", + short: "O", + type: "string", + handler: (v) => { outputFileEval = v as string; }, + description: "Write each group's output to a filename derived from the given expression (supports {{key}} interpolation)", + }, { long: "adjacent", short: "1", @@ -186,6 +261,8 @@ export class MultiplexOperation extends Operation { operationName, operationArgs, lineKey, + outputFileKey, + outputFileEval, (record: Record) => this.pushRecord(record), (line: string) => this.pushLine(line) ); @@ -231,6 +308,19 @@ export const documentation: CommandDoc = { "(rather than the entire record). Use with recs from* operations generally.", argument: "", }, + { + flags: ["--output-file-key", "-o"], + description: + "Write each group's output to a separate file, using the value of the given key as the filename.", + argument: "", + }, + { + flags: ["--output-file-eval", "-O"], + description: + "Write each group's output to a separate file, with filename determined by the given expression. " + + "Supports {{key}} interpolation with group key values.", + argument: "", + }, { flags: ["--adjacent", "-1"], description: "Only group together adjacent records. Avoids spooling records into memory.", @@ -275,6 +365,11 @@ export const documentation: CommandDoc = { command: "recs fromre '^(.*PID=([0-9]*).*)$' -f line,pid | recs multiplex -L line -k pid -- recs frommultire ...", }, + { + description: "Write each group's CSV output to separate files by department", + command: + "recs multiplex -k department -O 'output-{{department}}.csv' -- recs tocsv", + }, ], seeAlso: ["collate", "chain"], }; diff --git a/src/operations/transform/parsedate.ts b/src/operations/transform/parsedate.ts new file mode 100644 index 0000000..b7adf33 --- /dev/null +++ b/src/operations/transform/parsedate.ts @@ -0,0 +1,333 @@ +import { Operation } from "../../Operation.ts"; +import type { OptionDef } from "../../Operation.ts"; +import { findKey, setKey } from "../../KeySpec.ts"; +import { Record } from "../../Record.ts"; +import type { JsonObject } from "../../types/json.ts"; + +/** + * Parse date/time strings and reformat them. Reads a date string from a key, + * parses it, and writes the result (epoch seconds or formatted string) to an + * output key. + * + * Inspired by the Perl PR #74 parsedate operation. + */ +export class ParseDateOperation extends Operation { + inputKey = ""; + outputKey = ""; + inputFormat: string | null = null; + outputFormat: string | null = null; + epoch = false; + outputEpoch = false; + timezone: string | null = null; + + init(args: string[]): void { + const defs: OptionDef[] = [ + { + long: "key", + short: "k", + type: "string", + handler: (v) => { this.inputKey = v as string; }, + description: "Key field containing the date/time string to parse", + }, + { + long: "output", + short: "o", + type: "string", + handler: (v) => { this.outputKey = v as string; }, + description: "Output key for the parsed date (defaults to parsed_)", + }, + { + long: "format", + short: "f", + type: "string", + handler: (v) => { this.inputFormat = v as string; }, + description: "Input format for parsing (strftime-style: %Y-%m-%d %H:%M:%S)", + }, + { + long: "output-format", + short: "F", + type: "string", + handler: (v) => { this.outputFormat = v as string; }, + description: "Output format (strftime-style: %Y-%m-%d %H:%M:%S). Default is ISO 8601.", + }, + { + long: "epoch", + short: "e", + type: "boolean", + handler: () => { this.epoch = true; }, + description: "Input date is in epoch seconds", + }, + { + long: "output-epoch", + short: "E", + type: "boolean", + handler: () => { this.outputEpoch = true; }, + description: "Output as epoch seconds instead of a formatted string", + }, + { + long: "timezone", + short: "z", + type: "string", + handler: (v) => { this.timezone = v as string; }, + description: "Timezone for output (IANA name like America/New_York)", + }, + ]; + + this.parseOptions(args, defs); + + if (!this.inputKey) { + throw new Error("Must specify --key"); + } + if (!this.outputKey) { + this.outputKey = `parsed_${this.inputKey.replace(/\//g, "_")}`; + } + } + + acceptRecord(record: Record): boolean { + const data = record.dataRef() as JsonObject; + const value = findKey(data, this.inputKey, true); + + if (value === undefined || value === null || value === "") { + this.pushRecord(record); + return true; + } + + let date: Date; + + if (this.epoch) { + date = new Date(Number(value) * 1000); + } else if (this.inputFormat) { + date = parseWithFormat(String(value), this.inputFormat); + } else { + // Try JavaScript's native date parsing + date = new Date(String(value)); + } + + if (isNaN(date.getTime())) { + throw new Error(`Cannot parse date from key '${this.inputKey}', value: ${String(value)}`); + } + + let output: string | number; + if (this.outputEpoch) { + output = Math.floor(date.getTime() / 1000); + } else if (this.outputFormat) { + output = formatDate(date, this.outputFormat, this.timezone ?? undefined); + } else { + output = date.toISOString(); + } + + setKey(data, this.outputKey, output); + this.pushRecord(record); + return true; + } +} + +/** + * Parse a date string given a strftime-style format. + * Supports common directives: %Y, %m, %d, %H, %M, %S, %b, %B, %p, %Z, %z + */ +function parseWithFormat(input: string, format: string): Date { + const monthNames: { [key: string]: number } = { + jan: 0, january: 0, feb: 1, february: 1, mar: 2, march: 2, + apr: 3, april: 3, may: 4, jun: 5, june: 5, + jul: 6, july: 6, aug: 7, august: 7, sep: 8, september: 8, + oct: 9, october: 9, nov: 10, november: 10, dec: 11, december: 11, + }; + + let year = 1970, month = 0, day = 1, hour = 0, minute = 0, second = 0; + let isPM = false; + + // Build a regex from the format string + let regexStr = "^"; + const captures: string[] = []; + let i = 0; + while (i < format.length) { + if (format[i] === "%") { + i++; + const directive = format[i]!; + switch (directive) { + case "Y": regexStr += "(\\d{4})"; captures.push("Y"); break; + case "y": regexStr += "(\\d{2})"; captures.push("y"); break; + case "m": regexStr += "(\\d{1,2})"; captures.push("m"); break; + case "d": regexStr += "(\\d{1,2})"; captures.push("d"); break; + case "H": regexStr += "(\\d{1,2})"; captures.push("H"); break; + case "I": regexStr += "(\\d{1,2})"; captures.push("I"); break; + case "M": regexStr += "(\\d{1,2})"; captures.push("M"); break; + case "S": regexStr += "(\\d{1,2})"; captures.push("S"); break; + case "b": case "B": + regexStr += "([A-Za-z]+)"; captures.push("b"); break; + case "p": regexStr += "(AM|PM|am|pm)"; captures.push("p"); break; + case "Z": regexStr += "(\\S+)"; captures.push("Z"); break; + case "z": regexStr += "([+-]\\d{2}:?\\d{2})"; captures.push("z"); break; + case "%": regexStr += "%"; break; + default: regexStr += directive; break; + } + } else { + regexStr += format[i]!.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + } + i++; + } + regexStr += "$"; + + const match = input.match(new RegExp(regexStr)); + if (!match) { + return new Date(NaN); + } + + for (let j = 0; j < captures.length; j++) { + const val = match[j + 1]!; + switch (captures[j]) { + case "Y": year = parseInt(val, 10); break; + case "y": year = 2000 + parseInt(val, 10); break; + case "m": month = parseInt(val, 10) - 1; break; + case "d": day = parseInt(val, 10); break; + case "H": hour = parseInt(val, 10); break; + case "I": hour = parseInt(val, 10); break; + case "M": minute = parseInt(val, 10); break; + case "S": second = parseInt(val, 10); break; + case "b": { + const idx = monthNames[val.toLowerCase()]; + if (idx !== undefined) month = idx; + break; + } + case "p": isPM = val.toLowerCase() === "pm"; break; + } + } + + if (isPM && hour < 12) hour += 12; + if (!isPM && hour === 12 && captures.includes("p")) hour = 0; + + return new Date(year, month, day, hour, minute, second); +} + +/** + * Format a Date object using strftime-style directives. + */ +function formatDate(date: Date, format: string, timezone?: string): string { + // Use Intl.DateTimeFormat for timezone conversion if needed + let d = date; + if (timezone) { + // Create a new date adjusted for the target timezone + const formatter = new Intl.DateTimeFormat("en-US", { + timeZone: timezone, + year: "numeric", month: "2-digit", day: "2-digit", + hour: "2-digit", minute: "2-digit", second: "2-digit", + hour12: false, + }); + const parts = formatter.formatToParts(date); + const get = (type: string) => parts.find((p) => p.type === type)?.value ?? "0"; + d = new Date( + parseInt(get("year"), 10), + parseInt(get("month"), 10) - 1, + parseInt(get("day"), 10), + parseInt(get("hour"), 10), + parseInt(get("minute"), 10), + parseInt(get("second"), 10), + ); + } + + const pad = (n: number, len = 2) => String(n).padStart(len, "0"); + const monthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]; + const fullMonths = ["January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"]; + const dayNames = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; + + let result = ""; + let i = 0; + while (i < format.length) { + if (format[i] === "%") { + i++; + const directive = format[i]!; + switch (directive) { + case "Y": result += pad(d.getFullYear(), 4); break; + case "y": result += pad(d.getFullYear() % 100); break; + case "m": result += pad(d.getMonth() + 1); break; + case "d": result += pad(d.getDate()); break; + case "H": result += pad(d.getHours()); break; + case "I": result += pad(d.getHours() % 12 || 12); break; + case "M": result += pad(d.getMinutes()); break; + case "S": result += pad(d.getSeconds()); break; + case "b": result += monthNames[d.getMonth()]; break; + case "B": result += fullMonths[d.getMonth()]; break; + case "a": result += dayNames[d.getDay()]; break; + case "p": result += d.getHours() < 12 ? "AM" : "PM"; break; + case "s": result += Math.floor(d.getTime() / 1000); break; + case "%": result += "%"; break; + default: result += "%" + directive; break; + } + } else { + result += format[i]; + } + i++; + } + return result; +} + +import type { CommandDoc } from "../../types/CommandDoc.ts"; + +export const documentation: CommandDoc = { + name: "parsedate", + category: "transform", + synopsis: "recs parsedate [options] [files...]", + description: + "Parse date/time strings from a field and output them in a normalized format. " + + "Supports epoch seconds, ISO 8601, and custom strftime-style format strings for " + + "both input and output.", + options: [ + { + flags: ["--key", "-k"], + argument: "", + description: "Key field containing the date/time string to parse.", + required: true, + }, + { + flags: ["--output", "-o"], + argument: "", + description: "Output key for the parsed date (defaults to 'parsed_').", + }, + { + flags: ["--format", "-f"], + argument: "", + description: + "Input format for parsing (strftime-style). Common directives: " + + "%Y (4-digit year), %m (month), %d (day), %H (hour 24h), %M (minute), %S (second), " + + "%b (abbreviated month name), %p (AM/PM).", + }, + { + flags: ["--output-format", "-F"], + argument: "", + description: + "Output format (strftime-style). Defaults to ISO 8601 if not specified.", + }, + { + flags: ["--epoch", "-e"], + description: "Input date is in epoch seconds.", + }, + { + flags: ["--output-epoch", "-E"], + description: "Output as epoch seconds instead of a formatted string.", + }, + { + flags: ["--timezone", "-z"], + argument: "", + description: + "Timezone for output formatting (IANA name like 'America/New_York').", + }, + ], + examples: [ + { + description: "Parse dates and output as epoch seconds", + command: "recs parsedate -k date -E", + }, + { + description: "Parse a custom date format and reformat it", + command: "recs parsedate -k timestamp -f '%d/%b/%Y:%H:%M:%S' -F '%Y-%m-%d %H:%M:%S'", + }, + { + description: "Convert epoch seconds to ISO 8601", + command: "recs parsedate -k time -e -o iso_time", + }, + ], + seeAlso: ["normalizetime"], +}; diff --git a/tests/cli/help.test.ts b/tests/cli/help.test.ts index 1b67010..82994f8 100644 --- a/tests/cli/help.test.ts +++ b/tests/cli/help.test.ts @@ -8,9 +8,9 @@ import { import type { CommandDoc } from "../../src/types/CommandDoc.ts"; describe("loadAllDocs", () => { - test("loads documentation for all 43 operations", async () => { + test("loads documentation for all 45 operations", async () => { const docs = await loadAllDocs(); - expect(docs.length).toBe(43); + expect(docs.length).toBe(45); }); test("every doc has required fields", async () => { diff --git a/tests/cli/manpages.test.ts b/tests/cli/manpages.test.ts index dc6462e..080b387 100644 --- a/tests/cli/manpages.test.ts +++ b/tests/cli/manpages.test.ts @@ -25,10 +25,10 @@ describe("man page generation", () => { const expectedCommands = [ "fromapache", "fromatomfeed", "fromcsv", "fromdb", "fromjsonarray", "fromkv", "frommongo", "frommultire", "fromps", "fromre", - "fromsplit", "fromtcpdump", "fromxferlog", "fromxml", + "fromsplit", "fromtcpdump", "fromxferlog", "fromxls", "fromxml", "annotate", "assert", "chain", "collate", "decollate", "delta", "eval", "expandjson", "flatten", "generate", "grep", "join", "multiplex", - "normalizetime", "sort", "stream2table", "substream", "topn", "xform", + "normalizetime", "parsedate", "sort", "stream2table", "substream", "topn", "xform", "tochart", "tocsv", "todb", "togdgraph", "tognuplot", "tohtml", "tojsonarray", "toprettyprint", "toptable", "totable", ]; @@ -39,12 +39,12 @@ describe("man page generation", () => { } }); - test("generates exactly 44 man pages (43 commands + recs.1)", () => { + test("generates exactly 46 man pages (45 commands + recs.1)", () => { const { readdirSync } = require("node:fs"); const files = (readdirSync(MAN_DIR) as string[]).filter( (f: string) => f.endsWith(".1") ); - expect(files.length).toBe(44); + expect(files.length).toBe(46); }); test("recs.1 contains proper troff header", () => { diff --git a/tests/operations/output/totable-unicode.test.ts b/tests/operations/output/totable-unicode.test.ts new file mode 100644 index 0000000..948d91b --- /dev/null +++ b/tests/operations/output/totable-unicode.test.ts @@ -0,0 +1,50 @@ +import { describe, test, expect } from "bun:test"; +import { ToTable } from "../../../src/operations/output/totable.ts"; +import { testOutput } from "./testHelper.ts"; + +describe("ToTable - Unicode and special characters (issue #71)", () => { + test("CJK wide characters align correctly", () => { + const stream = `{"name":"你好","val":"1"} +{"name":"hi","val":"2"}`; + const actual = testOutput(ToTable, [], stream); + // "你好" is 4 display columns wide (2 chars * 2 width each) + // "name" is 4 display columns wide + // So both should be padded to 4 + const lines = actual.split("\n").filter(l => l !== ""); + // All lines should have the same visual width pattern + expect(lines.length).toBe(4); // header, dash, 2 data rows + // The separator line should use the correct width + expect(lines[1]).toContain("----"); + }); + + test("emoji characters align correctly", () => { + const stream = `{"icon":"🎉","val":"a"} +{"icon":"x","val":"b"}`; + const actual = testOutput(ToTable, [], stream); + const lines = actual.split("\n").filter(l => l !== ""); + expect(lines.length).toBe(4); + // 🎉 is 2 display columns; "icon" is 4; so column width should be 4 + }); + + test("newlines in field values are escaped", () => { + const stream = `{"text":"line1\\nline2","val":"ok"}`; + const actual = testOutput(ToTable, [], stream); + // Newlines should be escaped as \\n so they don't break alignment + expect(actual).toContain("\\n"); + expect(actual).not.toContain("line1\nline2"); + }); + + test("tab characters in field values are escaped", () => { + const stream = `{"text":"col1\\tcol2","val":"ok"}`; + const actual = testOutput(ToTable, [], stream); + expect(actual).toContain("\\t"); + }); + + test("ellipsis character aligns correctly", () => { + const stream = `{"text":"hello…","val":"1"} +{"text":"world!","val":"2"}`; + const actual = testOutput(ToTable, [], stream); + const lines = actual.split("\n").filter(l => l !== ""); + expect(lines.length).toBe(4); + }); +}); diff --git a/tests/operations/transform/collate-empty-stream.test.ts b/tests/operations/transform/collate-empty-stream.test.ts new file mode 100644 index 0000000..7bf474e --- /dev/null +++ b/tests/operations/transform/collate-empty-stream.test.ts @@ -0,0 +1,68 @@ +import { describe, test, expect } from "bun:test"; +import { CollectorReceiver } from "../../../src/Operation.ts"; +import { CollateOperation } from "../../../src/operations/transform/collate.ts"; + +// Import aggregators to ensure they're registered +import "../../../src/aggregators/Average.ts"; +import "../../../src/aggregators/Sum.ts"; +import "../../../src/aggregators/Count.ts"; +import "../../../src/aggregators/Maximum.ts"; +import "../../../src/aggregators/Minimum.ts"; + +function makeOp(args: string[]): { op: CollateOperation; collector: CollectorReceiver } { + const collector = new CollectorReceiver(); + const op = new CollateOperation(collector); + op.init(args); + return { op, collector }; +} + +describe("CollateOperation - empty stream (issue #65)", () => { + test("avg on empty stream does not crash", () => { + const { op, collector } = makeOp(["--aggregator", "avg,value"]); + // Feed no records at all - should not throw + op.finish(); + + // With no keys and no input, collate produces nothing (correct per issue discussion) + expect(collector.records.length).toBe(0); + }); + + test("count on empty stream does not crash", () => { + const { op, collector } = makeOp(["--aggregator", "count"]); + op.finish(); + expect(collector.records.length).toBe(0); + }); + + test("sum on empty stream does not crash", () => { + const { op, collector } = makeOp(["--aggregator", "sum,x"]); + op.finish(); + expect(collector.records.length).toBe(0); + }); + + test("max on empty stream does not crash", () => { + const { op, collector } = makeOp(["--aggregator", "max,x"]); + op.finish(); + expect(collector.records.length).toBe(0); + }); + + test("min on empty stream does not crash", () => { + const { op, collector } = makeOp(["--aggregator", "min,x"]); + op.finish(); + expect(collector.records.length).toBe(0); + }); + + test("avg on empty stream with key produces no records", () => { + const { op, collector } = makeOp(["--key", "group", "--aggregator", "avg,value"]); + op.finish(); + expect(collector.records.length).toBe(0); + }); + + test("multiple aggregators on empty stream does not crash", () => { + const { op, collector } = makeOp([ + "--aggregator", "avg,value", + "--aggregator", "count", + "--aggregator", "sum,value", + ]); + op.finish(); + expect(collector.records.length).toBe(0); + }); +}); diff --git a/tests/operations/transform/decollate-only.test.ts b/tests/operations/transform/decollate-only.test.ts new file mode 100644 index 0000000..3666486 --- /dev/null +++ b/tests/operations/transform/decollate-only.test.ts @@ -0,0 +1,59 @@ +import { describe, test, expect } from "bun:test"; +import { Record } from "../../../src/Record.ts"; +import { CollectorReceiver } from "../../../src/Operation.ts"; +import { DecollateOperation } from "../../../src/operations/transform/decollate.ts"; +import { deaggregatorRegistry } from "../../../src/Deaggregator.ts"; +import type { Deaggregator } from "../../../src/Deaggregator.ts"; + +function registerTestDeaggregators(): void { + if (deaggregatorRegistry.has("split")) return; + + deaggregatorRegistry.register("split", { + create: (field: string, delim: string, outputField: string) => ({ + deaggregate: (record: Record): Record[] => { + const val = record.get(field); + if (typeof val !== "string") return [new Record()]; + const parts = val.split(delim); + return parts.map((part) => new Record({ [outputField]: part })); + }, + }) as Deaggregator, + argCounts: [3], + shortUsage: "Split a field", + longUsage: "Splits a field by delimiter into separate records", + }); +} + +function makeOp(args: string[]): { op: DecollateOperation; collector: CollectorReceiver } { + registerTestDeaggregators(); + const collector = new CollectorReceiver(); + const op = new DecollateOperation(collector); + op.init(args); + return { op, collector }; +} + +describe("DecollateOperation --only flag (issue #81)", () => { + test("without --only: includes original fields", () => { + const { op, collector } = makeOp(["--deaggregator", "split,hosts, ,host"]); + + op.acceptRecord(new Record({ hosts: "a b", group: "g1" })); + op.finish(); + + expect(collector.records.length).toBe(2); + expect(collector.records[0]!.get("host")).toBe("a"); + expect(collector.records[0]!.get("group")).toBe("g1"); // Original field preserved + expect(collector.records[0]!.get("hosts")).toBe("a b"); // Original field preserved + }); + + test("with --only: excludes original fields", () => { + const { op, collector } = makeOp(["--only", "--deaggregator", "split,hosts, ,host"]); + + op.acceptRecord(new Record({ hosts: "a b", group: "g1" })); + op.finish(); + + expect(collector.records.length).toBe(2); + expect(collector.records[0]!.get("host")).toBe("a"); + expect(collector.records[0]!.get("group")).toBeUndefined(); // Original excluded + expect(collector.records[0]!.get("hosts")).toBeUndefined(); // Original excluded + expect(collector.records[1]!.get("host")).toBe("b"); + }); +}); diff --git a/tests/operations/transform/multiplex.test.ts b/tests/operations/transform/multiplex.test.ts index d4e2258..8828313 100644 --- a/tests/operations/transform/multiplex.test.ts +++ b/tests/operations/transform/multiplex.test.ts @@ -1,11 +1,16 @@ -import { describe, test, expect } from "bun:test"; +import { describe, test, expect, afterAll } from "bun:test"; +import { readFileSync, mkdirSync, rmSync, existsSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; import { Record } from "../../../src/Record.ts"; import { CollectorReceiver, Operation } from "../../../src/Operation.ts"; import type { RecordReceiver } from "../../../src/Operation.ts"; import { MultiplexOperation } from "../../../src/operations/transform/multiplex.ts"; import { registerOperationFactory } from "../../../src/operations/transform/chain.ts"; +import { ToCsv } from "../../../src/operations/output/tocsv.ts"; +import { XformOperation } from "../../../src/operations/transform/xform.ts"; -// Register a test passthrough operation so multiplex can create it +// Register operations so multiplex can create them class PassthroughOp extends Operation { init(_args: string[]): void { // no-op @@ -16,6 +21,18 @@ class PassthroughOp extends Operation { } registerOperationFactory("passthrough", (next: RecordReceiver) => new PassthroughOp(next)); +registerOperationFactory("tocsv", (next: RecordReceiver) => new ToCsv(next)); +registerOperationFactory("xform", (next: RecordReceiver) => new XformOperation(next)); + +// Temp directory for file-output tests +const tmpBase = join(tmpdir(), `recs-multiplex-test-${process.pid}-${Date.now()}`); +mkdirSync(tmpBase, { recursive: true }); + +afterAll(() => { + if (existsSync(tmpBase)) { + rmSync(tmpBase, { recursive: true, force: true }); + } +}); function makeOp(args: string[]): { op: MultiplexOperation; collector: CollectorReceiver } { const collector = new CollectorReceiver(); @@ -39,7 +56,6 @@ describe("MultiplexOperation", () => { new Record({ x: 2 }), ]); - // Passthrough operation just passes records through expect(collector.records.length).toBe(2); expect(collector.records[0]!.get("x")).toBe(1); expect(collector.records[1]!.get("x")).toBe(2); @@ -53,7 +69,6 @@ describe("MultiplexOperation", () => { new Record({ group: "b", val: 3 }), ]); - // All records pass through, grouped by key expect(collector.records.length).toBe(3); }); @@ -62,4 +77,161 @@ describe("MultiplexOperation", () => { makeOp([]); }).toThrow("multiplex requires an operation"); }); + + test("multiplex with line-based output op (tocsv) to stdout", () => { + const { op, collector } = makeOp(["-k", "dept", "--", "tocsv", "--noheader", "-k", "name,score"]); + feedRecords(op, [ + new Record({ dept: "eng", name: "alice", score: 90 }), + new Record({ dept: "eng", name: "bob", score: 85 }), + new Record({ dept: "sales", name: "carol", score: 70 }), + ]); + + // tocsv pushes lines, not records — the collector must receive them + expect(collector.lines.length).toBe(3); + expect(collector.records.length).toBe(0); + + // Verify CSV content: each group gets its own tocsv instance + const allLines = collector.lines.join("\n"); + expect(allLines).toContain("alice,90"); + expect(allLines).toContain("bob,85"); + expect(allLines).toContain("carol,70"); + }); + + test("multiplex with tocsv including header per group", () => { + const { op, collector } = makeOp(["-k", "dept", "--", "tocsv", "-k", "name,score"]); + feedRecords(op, [ + new Record({ dept: "eng", name: "alice", score: 90 }), + new Record({ dept: "eng", name: "bob", score: 85 }), + new Record({ dept: "sales", name: "carol", score: 70 }), + ]); + + // Each group gets its own tocsv instance, so each emits a header + // eng group: header + 2 data rows = 3 lines + // sales group: header + 1 data row = 2 lines + expect(collector.lines.length).toBe(5); + + // Both groups should produce a "name,score" header line + const headerCount = collector.lines.filter(l => l === "name,score").length; + expect(headerCount).toBe(2); + }); + + test("multiplex with --output-file-key writes line-based output to files", () => { + const outDir = join(tmpBase, "output-file-key"); + mkdirSync(outDir, { recursive: true }); + + const fileA = join(outDir, "eng.csv"); + const fileB = join(outDir, "sales.csv"); + + // Group by outfile so it's available in group options for resolveOutputFile + const { op, collector } = makeOp([ + "-k", "outfile", + "--output-file-key", "outfile", + "--", "tocsv", "--noheader", "-k", "name,score", + ]); + feedRecords(op, [ + new Record({ dept: "eng", name: "alice", score: 90, outfile: fileA }), + new Record({ dept: "eng", name: "bob", score: 85, outfile: fileA }), + new Record({ dept: "sales", name: "carol", score: 70, outfile: fileB }), + ]); + + // Lines should NOT go to the collector when writing to files + expect(collector.lines.length).toBe(0); + expect(collector.records.length).toBe(0); + + // Files should exist with correct CSV content + const contentA = readFileSync(fileA, "utf-8"); + expect(contentA).toContain("alice,90"); + expect(contentA).toContain("bob,85"); + expect(contentA.trim().split("\n").length).toBe(2); + + const contentB = readFileSync(fileB, "utf-8"); + expect(contentB).toContain("carol,70"); + expect(contentB.trim().split("\n").length).toBe(1); + }); + + test("multiplex with --output-file-eval and {{key}} interpolation", () => { + const outDir = join(tmpBase, "output-file-eval"); + mkdirSync(outDir, { recursive: true }); + + const { op, collector } = makeOp([ + "-k", "dept", + "--output-file-eval", join(outDir, "report-{{dept}}.csv"), + "--", "tocsv", "--noheader", "-k", "name,score", + ]); + feedRecords(op, [ + new Record({ dept: "eng", name: "alice", score: 90 }), + new Record({ dept: "eng", name: "bob", score: 85 }), + new Record({ dept: "sales", name: "carol", score: 70 }), + ]); + + expect(collector.lines.length).toBe(0); + expect(collector.records.length).toBe(0); + + // Verify files created with interpolated names + const engFile = join(outDir, "report-eng.csv"); + const salesFile = join(outDir, "report-sales.csv"); + + expect(existsSync(engFile)).toBe(true); + expect(existsSync(salesFile)).toBe(true); + + const engContent = readFileSync(engFile, "utf-8"); + expect(engContent).toContain("alice,90"); + expect(engContent).toContain("bob,85"); + + const salesContent = readFileSync(salesFile, "utf-8"); + expect(salesContent).toContain("carol,70"); + }); + + test("multiplex with record-based transform op (xform)", () => { + const { op, collector } = makeOp([ + "-k", "dept", "--", + "xform", "{{tagged}} = {{dept}} + '-' + {{name}}", + ]); + feedRecords(op, [ + new Record({ dept: "eng", name: "alice" }), + new Record({ dept: "eng", name: "bob" }), + new Record({ dept: "sales", name: "carol" }), + ]); + + // xform pushes records, not lines + expect(collector.records.length).toBe(3); + expect(collector.lines.length).toBe(0); + + const tags = collector.records.map(r => r.get("tagged") as string); + expect(tags).toContain("eng-alice"); + expect(tags).toContain("eng-bob"); + expect(tags).toContain("sales-carol"); + }); + + test("multiplex with record-based passthrough and --output-file-key", () => { + const outDir = join(tmpBase, "record-output-file"); + mkdirSync(outDir, { recursive: true }); + + const fileA = join(outDir, "groupA.jsonl"); + const fileB = join(outDir, "groupB.jsonl"); + + // Group by outfile so it's available in group options + const { op, collector } = makeOp([ + "-k", "outfile", + "--output-file-key", "outfile", + "--", "passthrough", + ]); + feedRecords(op, [ + new Record({ group: "a", val: 1, outfile: fileA }), + new Record({ group: "a", val: 2, outfile: fileA }), + new Record({ group: "b", val: 3, outfile: fileB }), + ]); + + expect(collector.records.length).toBe(0); + expect(collector.lines.length).toBe(0); + + const contentA = readFileSync(fileA, "utf-8").trim().split("\n"); + expect(contentA.length).toBe(2); + expect(JSON.parse(contentA[0]!)).toMatchObject({ group: "a", val: 1 }); + expect(JSON.parse(contentA[1]!)).toMatchObject({ group: "a", val: 2 }); + + const contentB = readFileSync(fileB, "utf-8").trim().split("\n"); + expect(contentB.length).toBe(1); + expect(JSON.parse(contentB[0]!)).toMatchObject({ group: "b", val: 3 }); + }); }); diff --git a/tests/operations/transform/parsedate.test.ts b/tests/operations/transform/parsedate.test.ts new file mode 100644 index 0000000..2f4438f --- /dev/null +++ b/tests/operations/transform/parsedate.test.ts @@ -0,0 +1,105 @@ +import { describe, test, expect } from "bun:test"; +import { Record } from "../../../src/Record.ts"; +import { CollectorReceiver } from "../../../src/Operation.ts"; +import { ParseDateOperation } from "../../../src/operations/transform/parsedate.ts"; + +function makeOp(args: string[]): { op: ParseDateOperation; collector: CollectorReceiver } { + const collector = new CollectorReceiver(); + const op = new ParseDateOperation(collector); + op.init(args); + return { op, collector }; +} + +describe("ParseDateOperation", () => { + test("parses ISO date string to ISO output", () => { + const { op, collector } = makeOp(["-k", "date"]); + op.acceptRecord(new Record({ date: "2024-01-15T12:30:00Z", val: 1 })); + op.finish(); + + expect(collector.records.length).toBe(1); + expect(collector.records[0]!.get("parsed_date")).toBe("2024-01-15T12:30:00.000Z"); + expect(collector.records[0]!.get("val")).toBe(1); // Original field preserved + }); + + test("parses epoch seconds to ISO", () => { + const { op, collector } = makeOp(["-k", "ts", "-e"]); + op.acceptRecord(new Record({ ts: 1705312200 })); + op.finish(); + + expect(collector.records.length).toBe(1); + const parsed = collector.records[0]!.get("parsed_ts") as string; + expect(parsed).toContain("2024-01-15"); + }); + + test("outputs epoch seconds with --output-epoch", () => { + const { op, collector } = makeOp(["-k", "date", "-E"]); + op.acceptRecord(new Record({ date: "2024-01-15T12:30:00Z" })); + op.finish(); + + expect(collector.records.length).toBe(1); + // The exact epoch depends on timezone, so just verify it's a reasonable number + const epoch = collector.records[0]!.get("parsed_date") as number; + expect(typeof epoch).toBe("number"); + // Should be within a day of the expected value + expect(Math.abs(epoch - 1705318200)).toBeLessThan(86400); + }); + + test("custom output format", () => { + const { op, collector } = makeOp(["-k", "date", "-F", "%Y/%m/%d"]); + op.acceptRecord(new Record({ date: "2024-01-15T00:00:00Z" })); + op.finish(); + + // Since the date is parsed at UTC but formatted at local time, + // we just check the format pattern + const parsed = collector.records[0]!.get("parsed_date") as string; + expect(parsed).toMatch(/^\d{4}\/\d{2}\/\d{2}$/); + }); + + test("custom input format", () => { + const { op, collector } = makeOp(["-k", "date", "-f", "%d/%m/%Y", "-E"]); + op.acceptRecord(new Record({ date: "15/01/2024" })); + op.finish(); + + const epoch = collector.records[0]!.get("parsed_date") as number; + const parsed = new Date(epoch * 1000); + expect(parsed.getFullYear()).toBe(2024); + expect(parsed.getMonth()).toBe(0); // January + expect(parsed.getDate()).toBe(15); + }); + + test("custom output key", () => { + const { op, collector } = makeOp(["-k", "date", "-o", "my_date"]); + op.acceptRecord(new Record({ date: "2024-01-15T00:00:00Z" })); + op.finish(); + + expect(collector.records[0]!.get("my_date")).toBeDefined(); + expect(collector.records[0]!.get("parsed_date")).toBeUndefined(); + }); + + test("null/empty values pass through", () => { + const { op, collector } = makeOp(["-k", "date"]); + op.acceptRecord(new Record({ date: null, val: 1 })); + op.acceptRecord(new Record({ date: "", val: 2 })); + op.finish(); + + expect(collector.records.length).toBe(2); + // Records should pass through without parsed_date being set + expect(collector.records[0]!.get("val")).toBe(1); + expect(collector.records[1]!.get("val")).toBe(2); + }); + + test("throws on unparseable date", () => { + const { op } = makeOp(["-k", "date"]); + expect(() => { + op.acceptRecord(new Record({ date: "not-a-date" })); + }).toThrow("Cannot parse date"); + }); + + test("requires --key", () => { + expect(() => { + const collector = new CollectorReceiver(); + const op = new ParseDateOperation(collector); + op.init([]); + }).toThrow("Must specify --key"); + }); +}); diff --git a/tests/perf/bench.ts b/tests/perf/bench.ts index 86670c0..878b98b 100644 --- a/tests/perf/bench.ts +++ b/tests/perf/bench.ts @@ -30,6 +30,7 @@ export interface BenchmarkResult { export interface CIResult { name: string; + suite: string; median: number; mean: number; min: number; @@ -151,11 +152,11 @@ function deltaPercent(current: number, baseline: number): number { return ((current - baseline) / baseline) * 100; } -function fmtDeltaMarkdown(pct: number): string { +function fmtDeltaMarkdown(pct: number, threshold: number = 10): string { const sign = pct > 0 ? "+" : ""; const text = `${sign}${pct.toFixed(1)}%`; - if (pct > 5) return `**${text}** :red_circle:`; - if (pct < -5) return `${text} :green_circle:`; + if (pct > threshold) return `**${text}** :red_circle:`; + if (pct < -threshold) return `${text} :green_circle:`; return `${text}`; } @@ -305,12 +306,14 @@ export class BenchmarkSuite { function buildCIResults( results: BenchmarkResult[], baseline: BaselineData | null, + suite: string = "", ): CIResult[] { return results.map((r) => { const base = baseline?.[r.name]; const delta = base ? deltaPercent(r.median, base.median) : undefined; return { name: r.name, + suite, median: r.median, mean: r.mean, min: r.min, @@ -324,48 +327,93 @@ function buildCIResults( }); } -export function generateMarkdown(ciResults: CIResult[]): string { +export function generateMarkdown(ciResults: CIResult[], failThreshold: number = 25): string { const lines: string[] = []; - lines.push("## Performance Benchmark Results\n"); - lines.push( - "| Benchmark | Median | Baseline | Delta | Throughput |", - ); - lines.push( - "|-----------|--------|----------|-------|------------|", - ); + // Classify results using the visual indicator threshold (10%) + const indicatorThreshold = 10; let faster = 0; let slower = 0; let unchanged = 0; + const regressions: CIResult[] = []; for (const r of ciResults) { - const medianStr = fmtMs(r.median); - const baseStr = r.baselineMedian != null ? fmtMs(r.baselineMedian) : "—"; - let deltaStr = "—"; if (r.deltaPercent != null) { - deltaStr = fmtDeltaMarkdown(r.deltaPercent); - if (r.deltaPercent > 5) slower++; - else if (r.deltaPercent < -5) faster++; + if (r.deltaPercent > indicatorThreshold) slower++; + else if (r.deltaPercent < -indicatorThreshold) faster++; else unchanged++; + if (r.deltaPercent > failThreshold) regressions.push(r); } else { unchanged++; } + } - const throughputParts: string[] = []; - if (r.recordsPerSec) throughputParts.push(`${fmtRate(r.recordsPerSec)} rec/s`); - if (r.mbPerSec) throughputParts.push(`${r.mbPerSec.toFixed(1)} MB/s`); - const throughputStr = throughputParts.join(", ") || "—"; + // --- Top-level summary --- + lines.push("## Performance Benchmark Results\n"); - lines.push( - `| ${r.name} | ${medianStr} | ${baseStr} | ${deltaStr} | ${throughputStr} |`, - ); + if (regressions.length === 0) { + lines.push(`:white_check_mark: **All ${ciResults.length} benchmarks passed** (threshold: ${failThreshold}%)`); + } else { + lines.push(`:warning: **${regressions.length} regression${regressions.length > 1 ? "s" : ""} detected** out of ${ciResults.length} benchmarks (threshold: ${failThreshold}%)`); + lines.push(""); + lines.push("| Benchmark | Median | Baseline | Delta |"); + lines.push("|-----------|--------|----------|-------|"); + for (const r of regressions) { + lines.push( + `| ${r.name} | ${fmtMs(r.median)} | ${r.baselineMedian != null ? fmtMs(r.baselineMedian) : "—"} | ${fmtDeltaMarkdown(r.deltaPercent!, indicatorThreshold)} |`, + ); + } } lines.push(""); - const total = ciResults.length; lines.push( - `**Summary**: ${total} benchmarks: ${faster} faster, ${slower} slower, ${unchanged} unchanged`, + `${ciResults.length} benchmarks: ${faster} faster, ${slower} slower, ${unchanged} within noise (${indicatorThreshold}%)`, ); + lines.push(""); + lines.push( + "> ℹ️ **Note:** Benchmarks are advisory-only. GitHub Actions shared runners have variable performance, so results may fluctuate ±25% between runs. For reliable benchmarking, run locally with `bun run bench`.", + ); + + // --- Grouped details per suite --- + const suites = new Map(); + for (const r of ciResults) { + const key = r.suite || "other"; + if (!suites.has(key)) suites.set(key, []); + suites.get(key)!.push(r); + } + + lines.push(""); + lines.push("
"); + lines.push("Full benchmark results"); + lines.push(""); + + for (const [suite, results] of suites) { + lines.push(`### ${suite}\n`); + lines.push("| Benchmark | Median | Baseline | Delta | Throughput |"); + lines.push("|-----------|--------|----------|-------|------------|"); + + for (const r of results) { + const medianStr = fmtMs(r.median); + const baseStr = r.baselineMedian != null ? fmtMs(r.baselineMedian) : "—"; + let deltaStr = "—"; + if (r.deltaPercent != null) { + deltaStr = fmtDeltaMarkdown(r.deltaPercent, indicatorThreshold); + } + + const throughputParts: string[] = []; + if (r.recordsPerSec) throughputParts.push(`${fmtRate(r.recordsPerSec)} rec/s`); + if (r.mbPerSec) throughputParts.push(`${r.mbPerSec.toFixed(1)} MB/s`); + const throughputStr = throughputParts.join(", ") || "—"; + + lines.push( + `| ${r.name} | ${medianStr} | ${baseStr} | ${deltaStr} | ${throughputStr} |`, + ); + } + + lines.push(""); + } + + lines.push("
"); return lines.join("\n"); } @@ -404,10 +452,12 @@ export async function runAllSuites( console.log(`Date: ${new Date().toISOString()}`); const allResults: BenchmarkResult[] = []; + const suiteResults: { suite: BenchmarkSuite; results: BenchmarkResult[] }[] = []; for (const suite of suites) { const results = await suite.run(); allResults.push(...results); + suiteResults.push({ suite, results }); } console.log(`\n${"=".repeat(70)}`); @@ -442,7 +492,10 @@ export async function runAllSuites( baseline = loadBaseline(); } - const ciResults = buildCIResults(allResults, baseline); + const ciResults: CIResult[] = []; + for (const { suite, results } of suiteResults) { + ciResults.push(...buildCIResults(results, baseline, suite.name)); + } if (options.jsonFile) { writeFileSync(options.jsonFile, JSON.stringify(ciResults, null, 2) + "\n"); @@ -450,7 +503,7 @@ export async function runAllSuites( } if (options.markdownFile) { - const md = generateMarkdown(ciResults); + const md = generateMarkdown(ciResults, options.failThreshold ?? 25); writeFileSync(options.markdownFile, md + "\n"); console.log(`Markdown report saved to ${options.markdownFile}`); } @@ -458,11 +511,11 @@ export async function runAllSuites( if (options.failThreshold != null) { const failures = checkThreshold(ciResults, options.failThreshold); if (failures.length > 0) { - console.error(`\nPerformance regression detected (threshold: ${options.failThreshold}%):`); + console.warn(`\nPerformance regression detected (threshold: ${options.failThreshold}%):`); for (const f of failures) { - console.error(` - ${f}`); + console.warn(` - ${f}`); } - process.exit(1); + console.warn(`\nBenchmarks are advisory-only — not failing the build.`); } else { console.log(`\nAll benchmarks within ${options.failThreshold}% threshold.`); } diff --git a/tests/perf/run.ts b/tests/perf/run.ts index 610fd02..6b0b9de 100644 --- a/tests/perf/run.ts +++ b/tests/perf/run.ts @@ -63,8 +63,8 @@ Options: --save-baseline Save results as baseline for future comparison --ci Enable CI mode (machine-readable output, threshold checking, and markdown/JSON report generation) - --fail-threshold Exit with code 1 if any benchmark regresses more - than % vs baseline (requires --ci, default: 25) + --fail-threshold Warn if any benchmark regresses more than % + vs baseline (advisory-only, requires --ci, default: 25) --markdown-file Write a markdown report table to (requires --ci) --json-file Write JSON results to (requires --ci) --baseline-file Read/write baseline from instead of the